diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..ad58833ab5a203710294bf6ef5a921f123241cef 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoint-6700/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-6800/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-6840/tokenizer.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7e6e7f8f85c4b42f4bab8068a8b01bda24772101 --- /dev/null +++ b/README.md @@ -0,0 +1,62 @@ +--- +library_name: peft +license: other +base_model: Qwen/Qwen2.5-VL-7B-Instruct +tags: +- base_model:adapter:Qwen/Qwen2.5-VL-7B-Instruct +- llama-factory +- lora +- transformers +pipeline_tag: text-generation +model-index: +- name: Qwen2.5-VL-7B-sft-valid + results: [] +--- + + + +# Qwen2.5-VL-7B-sft-valid + +This model is a fine-tuned version of [Qwen/Qwen2.5-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct) on the rule_filter_valid dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 2e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- gradient_accumulation_steps: 2 +- total_train_batch_size: 4 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_steps: 0.05 +- num_epochs: 2 + +### Training results + + + +### Framework versions + +- PEFT 0.18.1 +- Transformers 5.2.0 +- Pytorch 2.5.1+cu124 +- Datasets 4.0.0 +- Tokenizers 0.22.2 \ No newline at end of file diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2358f759370f40d042b47e8407cdc2843daac45e --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,127 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-VL-7B-Instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "layers.6.mlp.up_proj", + "layers.24.mlp.gate_proj", + "layers.18.mlp.down_proj", + "layers.24.mlp.down_proj", + "layers.12.mlp.up_proj", + "layers.20.mlp.gate_proj", + "layers.23.mlp.up_proj", + "layers.19.mlp.down_proj", + "layers.14.mlp.down_proj", + "layers.11.mlp.gate_proj", + "layers.8.mlp.up_proj", + "layers.19.mlp.gate_proj", + "layers.7.mlp.down_proj", + "layers.22.mlp.down_proj", + "layers.13.mlp.down_proj", + "layers.23.mlp.down_proj", + "layers.9.mlp.gate_proj", + "layers.0.mlp.up_proj", + "layers.24.mlp.up_proj", + "layers.0.mlp.gate_proj", + "layers.3.mlp.gate_proj", + "layers.10.mlp.gate_proj", + "layers.10.mlp.up_proj", + "layers.14.mlp.gate_proj", + "layers.25.mlp.up_proj", + "layers.12.mlp.gate_proj", + "layers.20.mlp.down_proj", + "layers.0.mlp.down_proj", + "layers.5.mlp.down_proj", + "layers.13.mlp.gate_proj", + "layers.19.mlp.up_proj", + "layers.2.mlp.gate_proj", + "layers.18.mlp.up_proj", + "layers.21.mlp.up_proj", + "layers.2.mlp.down_proj", + "layers.6.mlp.down_proj", + "layers.21.mlp.gate_proj", + "layers.4.mlp.gate_proj", + "q_proj", + "layers.20.mlp.up_proj", + "layers.2.mlp.up_proj", + "layers.21.mlp.down_proj", + "layers.22.mlp.up_proj", + "layers.3.mlp.up_proj", + "layers.18.mlp.gate_proj", + "layers.23.mlp.gate_proj", + "layers.27.mlp.up_proj", + "v_proj", + "layers.7.mlp.gate_proj", + "layers.10.mlp.down_proj", + "layers.8.mlp.gate_proj", + "layers.17.mlp.up_proj", + "layers.5.mlp.gate_proj", + "layers.3.mlp.down_proj", + "layers.27.mlp.gate_proj", + "layers.26.mlp.down_proj", + "layers.11.mlp.down_proj", + "layers.22.mlp.gate_proj", + "layers.7.mlp.up_proj", + "layers.17.mlp.gate_proj", + "layers.14.mlp.up_proj", + "layers.13.mlp.up_proj", + "layers.17.mlp.down_proj", + "layers.15.mlp.up_proj", + "layers.26.mlp.up_proj", + "layers.5.mlp.up_proj", + "layers.16.mlp.up_proj", + "layers.1.mlp.gate_proj", + "layers.12.mlp.down_proj", + "layers.4.mlp.down_proj", + "layers.1.mlp.down_proj", + "layers.15.mlp.gate_proj", + "layers.27.mlp.down_proj", + "layers.15.mlp.down_proj", + "layers.25.mlp.gate_proj", + "layers.26.mlp.gate_proj", + "o_proj", + "layers.6.mlp.gate_proj", + "layers.11.mlp.up_proj", + "layers.9.mlp.up_proj", + "layers.16.mlp.gate_proj", + "layers.4.mlp.up_proj", + "layers.8.mlp.down_proj", + "layers.1.mlp.up_proj", + "k_proj", + "layers.16.mlp.down_proj", + "layers.25.mlp.down_proj", + "layers.9.mlp.down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/adapter_model.safetensors b/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1934484a824571d95faca5f608a429363d9dce22 --- /dev/null +++ b/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae497a5fd3e2f347345d7a0e67639f7faf92e4f40e35c1c3afa4428486cbc1b +size 323020440 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e045f36c49d9be60827f700c2e7d8175dcd5d1df --- /dev/null +++ b/all_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 2.0, + "total_flos": 2089529852362752.0, + "train_loss": 0.6033765813455596, + "train_runtime": 36066.319, + "train_samples_per_second": 0.758, + "train_steps_per_second": 0.19 +} \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..6c226632394ae7474b0d4b13e15793eac2e21ee9 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,7 @@ +{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system +You are a helpful assistant.<|im_end|> +{% endif %}<|im_start|>{{ message['role'] }} +{% if message['content'] is string %}{{ message['content'] }}<|im_end|> +{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|> +{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant +{% endif %} \ No newline at end of file diff --git a/checkpoint-6700/README.md b/checkpoint-6700/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4005c4d8e7a819833408da4794e4e74d2ced6553 --- /dev/null +++ b/checkpoint-6700/README.md @@ -0,0 +1,208 @@ +--- +base_model: Qwen/Qwen2.5-VL-7B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-VL-7B-Instruct +- llama-factory +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/checkpoint-6700/adapter_config.json b/checkpoint-6700/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2358f759370f40d042b47e8407cdc2843daac45e --- /dev/null +++ b/checkpoint-6700/adapter_config.json @@ -0,0 +1,127 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-VL-7B-Instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "layers.6.mlp.up_proj", + "layers.24.mlp.gate_proj", + "layers.18.mlp.down_proj", + "layers.24.mlp.down_proj", + "layers.12.mlp.up_proj", + "layers.20.mlp.gate_proj", + "layers.23.mlp.up_proj", + "layers.19.mlp.down_proj", + "layers.14.mlp.down_proj", + "layers.11.mlp.gate_proj", + "layers.8.mlp.up_proj", + "layers.19.mlp.gate_proj", + "layers.7.mlp.down_proj", + "layers.22.mlp.down_proj", + "layers.13.mlp.down_proj", + "layers.23.mlp.down_proj", + "layers.9.mlp.gate_proj", + "layers.0.mlp.up_proj", + "layers.24.mlp.up_proj", + "layers.0.mlp.gate_proj", + "layers.3.mlp.gate_proj", + "layers.10.mlp.gate_proj", + "layers.10.mlp.up_proj", + "layers.14.mlp.gate_proj", + "layers.25.mlp.up_proj", + "layers.12.mlp.gate_proj", + "layers.20.mlp.down_proj", + "layers.0.mlp.down_proj", + "layers.5.mlp.down_proj", + "layers.13.mlp.gate_proj", + "layers.19.mlp.up_proj", + "layers.2.mlp.gate_proj", + "layers.18.mlp.up_proj", + "layers.21.mlp.up_proj", + "layers.2.mlp.down_proj", + "layers.6.mlp.down_proj", + "layers.21.mlp.gate_proj", + "layers.4.mlp.gate_proj", + "q_proj", + "layers.20.mlp.up_proj", + "layers.2.mlp.up_proj", + "layers.21.mlp.down_proj", + "layers.22.mlp.up_proj", + "layers.3.mlp.up_proj", + "layers.18.mlp.gate_proj", + "layers.23.mlp.gate_proj", + "layers.27.mlp.up_proj", + "v_proj", + "layers.7.mlp.gate_proj", + "layers.10.mlp.down_proj", + "layers.8.mlp.gate_proj", + "layers.17.mlp.up_proj", + "layers.5.mlp.gate_proj", + "layers.3.mlp.down_proj", + "layers.27.mlp.gate_proj", + "layers.26.mlp.down_proj", + "layers.11.mlp.down_proj", + "layers.22.mlp.gate_proj", + "layers.7.mlp.up_proj", + "layers.17.mlp.gate_proj", + "layers.14.mlp.up_proj", + "layers.13.mlp.up_proj", + "layers.17.mlp.down_proj", + "layers.15.mlp.up_proj", + "layers.26.mlp.up_proj", + "layers.5.mlp.up_proj", + "layers.16.mlp.up_proj", + "layers.1.mlp.gate_proj", + "layers.12.mlp.down_proj", + "layers.4.mlp.down_proj", + "layers.1.mlp.down_proj", + "layers.15.mlp.gate_proj", + "layers.27.mlp.down_proj", + "layers.15.mlp.down_proj", + "layers.25.mlp.gate_proj", + "layers.26.mlp.gate_proj", + "o_proj", + "layers.6.mlp.gate_proj", + "layers.11.mlp.up_proj", + "layers.9.mlp.up_proj", + "layers.16.mlp.gate_proj", + "layers.4.mlp.up_proj", + "layers.8.mlp.down_proj", + "layers.1.mlp.up_proj", + "k_proj", + "layers.16.mlp.down_proj", + "layers.25.mlp.down_proj", + "layers.9.mlp.down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-6700/adapter_model.safetensors b/checkpoint-6700/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d6db3849516797f20b143e500412e28746c7e19a --- /dev/null +++ b/checkpoint-6700/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86f43e03354e29ebbe9714c9e363d222a433c1dcc9ac8f32f302df247e3c4a31 +size 323020440 diff --git a/checkpoint-6700/chat_template.jinja b/checkpoint-6700/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..6c226632394ae7474b0d4b13e15793eac2e21ee9 --- /dev/null +++ b/checkpoint-6700/chat_template.jinja @@ -0,0 +1,7 @@ +{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system +You are a helpful assistant.<|im_end|> +{% endif %}<|im_start|>{{ message['role'] }} +{% if message['content'] is string %}{{ message['content'] }}<|im_end|> +{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|> +{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant +{% endif %} \ No newline at end of file diff --git a/checkpoint-6700/global_step6700/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/checkpoint-6700/global_step6700/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6cbab4a7c227e5ca163830839859bf17aac9476f --- /dev/null +++ b/checkpoint-6700/global_step6700/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b8d5991589977bde766c96f0ad223327a9248247dc5ef834078d2a659dfdd57 +size 1937772272 diff --git a/checkpoint-6700/global_step6700/zero_pp_rank_0_mp_rank_00_model_states.pt b/checkpoint-6700/global_step6700/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..58aaa0281543a28821d3950ce4e049c15bf5c92f --- /dev/null +++ b/checkpoint-6700/global_step6700/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9acd890a8d41caadd896c3e173330353f7c02f9e5669e9a51c17503cf26464e2 +size 460630 diff --git a/checkpoint-6700/latest b/checkpoint-6700/latest new file mode 100644 index 0000000000000000000000000000000000000000..d0309d42ac44da7df0351262f58e048ee32843d6 --- /dev/null +++ b/checkpoint-6700/latest @@ -0,0 +1 @@ +global_step6700 \ No newline at end of file diff --git a/checkpoint-6700/processor_config.json b/checkpoint-6700/processor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e717d9bf475c411369034636e82e48cf79108a8 --- /dev/null +++ b/checkpoint-6700/processor_config.json @@ -0,0 +1,63 @@ +{ + "image_processor": { + "data_format": "channels_first", + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessorFast", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "merge_size": 2, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2 + }, + "processor_class": "Qwen2_5_VLProcessor", + "video_processor": { + "data_format": "channels_first", + "default_to_square": true, + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "do_sample_frames": false, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessor", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "max_frames": 768, + "merge_size": 2, + "min_frames": 4, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "return_metadata": false, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2, + "video_processor_type": "Qwen2VLVideoProcessor" + } +} diff --git a/checkpoint-6700/rng_state.pth b/checkpoint-6700/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5663fb28f8c802f765a03d8595ec1f4dd2a20e92 --- /dev/null +++ b/checkpoint-6700/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd74ed7d4b54f85bd7a87224f7ff4be0c45e7d302a2518cae00ce473fe6bb661 +size 14244 diff --git a/checkpoint-6700/scheduler.pt b/checkpoint-6700/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c0ac8f9e7dbb348acf4e61741de34a84c5543962 --- /dev/null +++ b/checkpoint-6700/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a028fe048aa673a93a440e22780f04f9aa256c565d6754067e4ac0c0c3eee5cd +size 1000 diff --git a/checkpoint-6700/tokenizer.json b/checkpoint-6700/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..34510ff0037cd50428af467a17ead5a96140a32c --- /dev/null +++ b/checkpoint-6700/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/checkpoint-6700/tokenizer_config.json b/checkpoint-6700/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7322db3e15385c79a5a29523dd1ccad6d343278 --- /dev/null +++ b/checkpoint-6700/tokenizer_config.json @@ -0,0 +1,31 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "processor_class": "Qwen2_5_VLProcessor", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/checkpoint-6700/trainer_state.json b/checkpoint-6700/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..21e54ba2fa1e2bc615b2d944aa97151d3cd995a6 --- /dev/null +++ b/checkpoint-6700/trainer_state.json @@ -0,0 +1,46934 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.959204562070478, + "eval_steps": 500, + "global_step": 6700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00029244041526538966, + "grad_norm": 1.376689135449382, + "learning_rate": 0.0, + "loss": 1.2599382400512695, + "step": 1 + }, + { + "epoch": 0.0005848808305307793, + "grad_norm": 1.234681838317607, + "learning_rate": 5.847953216374269e-08, + "loss": 0.9314937591552734, + "step": 2 + }, + { + "epoch": 0.000877321245796169, + "grad_norm": 1.3874138849382744, + "learning_rate": 1.1695906432748539e-07, + "loss": 1.1433629989624023, + "step": 3 + }, + { + "epoch": 0.0011697616610615586, + "grad_norm": 1.4618979511530414, + "learning_rate": 1.7543859649122808e-07, + "loss": 1.2224640846252441, + "step": 4 + }, + { + "epoch": 0.0014622020763269484, + "grad_norm": 1.236340065064986, + "learning_rate": 2.3391812865497077e-07, + "loss": 1.0468370914459229, + "step": 5 + }, + { + "epoch": 0.001754642491592338, + "grad_norm": 1.358651453520776, + "learning_rate": 2.9239766081871344e-07, + "loss": 1.1314436197280884, + "step": 6 + }, + { + "epoch": 0.0020470829068577278, + "grad_norm": 1.3850033876300505, + "learning_rate": 3.5087719298245616e-07, + "loss": 0.9903597831726074, + "step": 7 + }, + { + "epoch": 0.0023395233221231173, + "grad_norm": 1.326993456005612, + "learning_rate": 4.093567251461988e-07, + "loss": 1.1988611221313477, + "step": 8 + }, + { + "epoch": 0.002631963737388507, + "grad_norm": 1.3313234883955534, + "learning_rate": 4.6783625730994155e-07, + "loss": 1.1209533214569092, + "step": 9 + }, + { + "epoch": 0.0029244041526538967, + "grad_norm": 1.3471142230235869, + "learning_rate": 5.263157894736843e-07, + "loss": 1.1582586765289307, + "step": 10 + }, + { + "epoch": 0.0032168445679192866, + "grad_norm": 1.3073172655293792, + "learning_rate": 5.847953216374269e-07, + "loss": 1.2469007968902588, + "step": 11 + }, + { + "epoch": 0.003509284983184676, + "grad_norm": 1.500493931988472, + "learning_rate": 6.432748538011696e-07, + "loss": 1.115494728088379, + "step": 12 + }, + { + "epoch": 0.0038017253984500656, + "grad_norm": 1.4157975190751417, + "learning_rate": 7.017543859649123e-07, + "loss": 1.1927871704101562, + "step": 13 + }, + { + "epoch": 0.0040941658137154556, + "grad_norm": 1.4273551735693608, + "learning_rate": 7.60233918128655e-07, + "loss": 1.1014869213104248, + "step": 14 + }, + { + "epoch": 0.004386606228980845, + "grad_norm": 1.214320734942881, + "learning_rate": 8.187134502923977e-07, + "loss": 1.1055865287780762, + "step": 15 + }, + { + "epoch": 0.0046790466442462346, + "grad_norm": 1.2962699407775686, + "learning_rate": 8.771929824561404e-07, + "loss": 1.1071349382400513, + "step": 16 + }, + { + "epoch": 0.004971487059511625, + "grad_norm": 1.2885224717964352, + "learning_rate": 9.356725146198831e-07, + "loss": 1.1737473011016846, + "step": 17 + }, + { + "epoch": 0.005263927474777014, + "grad_norm": 1.407390623938155, + "learning_rate": 9.941520467836258e-07, + "loss": 1.283717155456543, + "step": 18 + }, + { + "epoch": 0.005556367890042404, + "grad_norm": 1.4470139877184414, + "learning_rate": 1.0526315789473685e-06, + "loss": 1.2509160041809082, + "step": 19 + }, + { + "epoch": 0.005848808305307793, + "grad_norm": 1.3242663031296102, + "learning_rate": 1.111111111111111e-06, + "loss": 0.9722317457199097, + "step": 20 + }, + { + "epoch": 0.006141248720573183, + "grad_norm": 1.7221218211796423, + "learning_rate": 1.1695906432748538e-06, + "loss": 1.1927049160003662, + "step": 21 + }, + { + "epoch": 0.006433689135838573, + "grad_norm": 1.4346324267765085, + "learning_rate": 1.2280701754385965e-06, + "loss": 1.2133033275604248, + "step": 22 + }, + { + "epoch": 0.006726129551103963, + "grad_norm": 1.449278395489955, + "learning_rate": 1.2865497076023392e-06, + "loss": 1.2373273372650146, + "step": 23 + }, + { + "epoch": 0.007018569966369352, + "grad_norm": 1.6650860096596214, + "learning_rate": 1.345029239766082e-06, + "loss": 0.9476668834686279, + "step": 24 + }, + { + "epoch": 0.007311010381634742, + "grad_norm": 1.2748998150534738, + "learning_rate": 1.4035087719298246e-06, + "loss": 1.1171324253082275, + "step": 25 + }, + { + "epoch": 0.007603450796900131, + "grad_norm": 1.4396688825039674, + "learning_rate": 1.4619883040935674e-06, + "loss": 1.1276075839996338, + "step": 26 + }, + { + "epoch": 0.007895891212165522, + "grad_norm": 1.4009443443291978, + "learning_rate": 1.52046783625731e-06, + "loss": 1.190751314163208, + "step": 27 + }, + { + "epoch": 0.008188331627430911, + "grad_norm": 1.3912141798418658, + "learning_rate": 1.5789473684210526e-06, + "loss": 1.2171813249588013, + "step": 28 + }, + { + "epoch": 0.0084807720426963, + "grad_norm": 1.3073224250652524, + "learning_rate": 1.6374269005847953e-06, + "loss": 0.8595987558364868, + "step": 29 + }, + { + "epoch": 0.00877321245796169, + "grad_norm": 1.2671914308960317, + "learning_rate": 1.695906432748538e-06, + "loss": 1.0270106792449951, + "step": 30 + }, + { + "epoch": 0.00906565287322708, + "grad_norm": 1.5005896829818803, + "learning_rate": 1.7543859649122807e-06, + "loss": 1.068537712097168, + "step": 31 + }, + { + "epoch": 0.009358093288492469, + "grad_norm": 1.2766478202995049, + "learning_rate": 1.8128654970760235e-06, + "loss": 1.1307867765426636, + "step": 32 + }, + { + "epoch": 0.009650533703757859, + "grad_norm": 1.5582616996952416, + "learning_rate": 1.8713450292397662e-06, + "loss": 1.0837950706481934, + "step": 33 + }, + { + "epoch": 0.00994297411902325, + "grad_norm": 1.4304945053464713, + "learning_rate": 1.929824561403509e-06, + "loss": 1.1506178379058838, + "step": 34 + }, + { + "epoch": 0.01023541453428864, + "grad_norm": 1.4722243618391941, + "learning_rate": 1.9883040935672516e-06, + "loss": 0.9450151324272156, + "step": 35 + }, + { + "epoch": 0.010527854949554029, + "grad_norm": 1.4847744449229108, + "learning_rate": 2.0467836257309943e-06, + "loss": 1.2040901184082031, + "step": 36 + }, + { + "epoch": 0.010820295364819418, + "grad_norm": 1.4600954284408973, + "learning_rate": 2.105263157894737e-06, + "loss": 1.2316429615020752, + "step": 37 + }, + { + "epoch": 0.011112735780084808, + "grad_norm": 1.479845514016971, + "learning_rate": 2.1637426900584798e-06, + "loss": 1.2119100093841553, + "step": 38 + }, + { + "epoch": 0.011405176195350197, + "grad_norm": 1.353351745720387, + "learning_rate": 2.222222222222222e-06, + "loss": 1.276926875114441, + "step": 39 + }, + { + "epoch": 0.011697616610615587, + "grad_norm": 1.256680621146734, + "learning_rate": 2.280701754385965e-06, + "loss": 0.9357824921607971, + "step": 40 + }, + { + "epoch": 0.011990057025880976, + "grad_norm": 1.3348703609284243, + "learning_rate": 2.3391812865497075e-06, + "loss": 1.1861131191253662, + "step": 41 + }, + { + "epoch": 0.012282497441146366, + "grad_norm": 1.3287978940598948, + "learning_rate": 2.3976608187134502e-06, + "loss": 1.1745539903640747, + "step": 42 + }, + { + "epoch": 0.012574937856411755, + "grad_norm": 1.1561631937443322, + "learning_rate": 2.456140350877193e-06, + "loss": 1.0291770696640015, + "step": 43 + }, + { + "epoch": 0.012867378271677147, + "grad_norm": 1.2176771446345134, + "learning_rate": 2.5146198830409357e-06, + "loss": 1.2361294031143188, + "step": 44 + }, + { + "epoch": 0.013159818686942536, + "grad_norm": 1.3295063710563702, + "learning_rate": 2.5730994152046784e-06, + "loss": 1.1909143924713135, + "step": 45 + }, + { + "epoch": 0.013452259102207926, + "grad_norm": 1.2650643173778968, + "learning_rate": 2.631578947368421e-06, + "loss": 1.1998133659362793, + "step": 46 + }, + { + "epoch": 0.013744699517473315, + "grad_norm": 1.1278701463292995, + "learning_rate": 2.690058479532164e-06, + "loss": 1.0011268854141235, + "step": 47 + }, + { + "epoch": 0.014037139932738705, + "grad_norm": 1.4726969666937608, + "learning_rate": 2.7485380116959066e-06, + "loss": 1.0552136898040771, + "step": 48 + }, + { + "epoch": 0.014329580348004094, + "grad_norm": 1.0797124442917296, + "learning_rate": 2.8070175438596493e-06, + "loss": 0.9727921485900879, + "step": 49 + }, + { + "epoch": 0.014622020763269484, + "grad_norm": 1.1798592697113668, + "learning_rate": 2.865497076023392e-06, + "loss": 0.9361351728439331, + "step": 50 + }, + { + "epoch": 0.014914461178534873, + "grad_norm": 1.1254749584923542, + "learning_rate": 2.9239766081871347e-06, + "loss": 1.140329360961914, + "step": 51 + }, + { + "epoch": 0.015206901593800263, + "grad_norm": 1.1050662639156084, + "learning_rate": 2.9824561403508774e-06, + "loss": 0.991325855255127, + "step": 52 + }, + { + "epoch": 0.015499342009065652, + "grad_norm": 1.364923415701691, + "learning_rate": 3.04093567251462e-06, + "loss": 1.3082914352416992, + "step": 53 + }, + { + "epoch": 0.015791782424331043, + "grad_norm": 1.1357483626397489, + "learning_rate": 3.0994152046783624e-06, + "loss": 0.9767723083496094, + "step": 54 + }, + { + "epoch": 0.016084222839596433, + "grad_norm": 1.1338887919712684, + "learning_rate": 3.157894736842105e-06, + "loss": 1.193568229675293, + "step": 55 + }, + { + "epoch": 0.016376663254861822, + "grad_norm": 1.176328275981774, + "learning_rate": 3.216374269005848e-06, + "loss": 0.9767440557479858, + "step": 56 + }, + { + "epoch": 0.016669103670127212, + "grad_norm": 1.0263265896491178, + "learning_rate": 3.2748538011695906e-06, + "loss": 0.8888605833053589, + "step": 57 + }, + { + "epoch": 0.0169615440853926, + "grad_norm": 1.0668435517314094, + "learning_rate": 3.3333333333333333e-06, + "loss": 1.087357997894287, + "step": 58 + }, + { + "epoch": 0.01725398450065799, + "grad_norm": 1.1952584851106463, + "learning_rate": 3.391812865497076e-06, + "loss": 1.0217459201812744, + "step": 59 + }, + { + "epoch": 0.01754642491592338, + "grad_norm": 1.1279843674972485, + "learning_rate": 3.4502923976608188e-06, + "loss": 1.0783777236938477, + "step": 60 + }, + { + "epoch": 0.01783886533118877, + "grad_norm": 0.9080265579264722, + "learning_rate": 3.5087719298245615e-06, + "loss": 0.85099196434021, + "step": 61 + }, + { + "epoch": 0.01813130574645416, + "grad_norm": 1.0228765689803359, + "learning_rate": 3.567251461988304e-06, + "loss": 0.9322569966316223, + "step": 62 + }, + { + "epoch": 0.01842374616171955, + "grad_norm": 0.991842254830473, + "learning_rate": 3.625730994152047e-06, + "loss": 0.8749685287475586, + "step": 63 + }, + { + "epoch": 0.018716186576984938, + "grad_norm": 0.9789077968505817, + "learning_rate": 3.6842105263157896e-06, + "loss": 0.857900857925415, + "step": 64 + }, + { + "epoch": 0.019008626992250328, + "grad_norm": 0.8092242526335478, + "learning_rate": 3.7426900584795324e-06, + "loss": 0.8891770243644714, + "step": 65 + }, + { + "epoch": 0.019301067407515717, + "grad_norm": 1.0526332302181824, + "learning_rate": 3.801169590643275e-06, + "loss": 1.0730159282684326, + "step": 66 + }, + { + "epoch": 0.019593507822781107, + "grad_norm": 1.124329301516788, + "learning_rate": 3.859649122807018e-06, + "loss": 1.108138084411621, + "step": 67 + }, + { + "epoch": 0.0198859482380465, + "grad_norm": 1.3581659451048562, + "learning_rate": 3.9181286549707605e-06, + "loss": 1.2126305103302002, + "step": 68 + }, + { + "epoch": 0.02017838865331189, + "grad_norm": 1.1108109420327934, + "learning_rate": 3.976608187134503e-06, + "loss": 0.9527193307876587, + "step": 69 + }, + { + "epoch": 0.02047082906857728, + "grad_norm": 0.9965971604796123, + "learning_rate": 4.035087719298246e-06, + "loss": 1.0454832315444946, + "step": 70 + }, + { + "epoch": 0.020763269483842668, + "grad_norm": 0.821178202034714, + "learning_rate": 4.093567251461989e-06, + "loss": 0.7075237035751343, + "step": 71 + }, + { + "epoch": 0.021055709899108058, + "grad_norm": 1.2413273222740282, + "learning_rate": 4.152046783625731e-06, + "loss": 1.0972111225128174, + "step": 72 + }, + { + "epoch": 0.021348150314373447, + "grad_norm": 0.9838475362870381, + "learning_rate": 4.210526315789474e-06, + "loss": 1.0400984287261963, + "step": 73 + }, + { + "epoch": 0.021640590729638837, + "grad_norm": 0.8577987626348056, + "learning_rate": 4.269005847953217e-06, + "loss": 0.7712557315826416, + "step": 74 + }, + { + "epoch": 0.021933031144904226, + "grad_norm": 1.0937426764383058, + "learning_rate": 4.3274853801169596e-06, + "loss": 1.1733636856079102, + "step": 75 + }, + { + "epoch": 0.022225471560169616, + "grad_norm": 0.9896291906902066, + "learning_rate": 4.385964912280702e-06, + "loss": 0.8653621673583984, + "step": 76 + }, + { + "epoch": 0.022517911975435005, + "grad_norm": 0.9059062097735997, + "learning_rate": 4.444444444444444e-06, + "loss": 0.8797299861907959, + "step": 77 + }, + { + "epoch": 0.022810352390700395, + "grad_norm": 1.0128235878781693, + "learning_rate": 4.502923976608187e-06, + "loss": 0.8357750177383423, + "step": 78 + }, + { + "epoch": 0.023102792805965784, + "grad_norm": 1.241636412088512, + "learning_rate": 4.56140350877193e-06, + "loss": 1.1249456405639648, + "step": 79 + }, + { + "epoch": 0.023395233221231174, + "grad_norm": 1.2743547410748093, + "learning_rate": 4.619883040935672e-06, + "loss": 0.9920758008956909, + "step": 80 + }, + { + "epoch": 0.023687673636496563, + "grad_norm": 1.0290847197991744, + "learning_rate": 4.678362573099415e-06, + "loss": 0.8115094900131226, + "step": 81 + }, + { + "epoch": 0.023980114051761953, + "grad_norm": 0.9339898981913745, + "learning_rate": 4.736842105263158e-06, + "loss": 1.060575246810913, + "step": 82 + }, + { + "epoch": 0.024272554467027342, + "grad_norm": 1.1898301512766587, + "learning_rate": 4.7953216374269005e-06, + "loss": 1.028218150138855, + "step": 83 + }, + { + "epoch": 0.02456499488229273, + "grad_norm": 0.9840324243241313, + "learning_rate": 4.853801169590643e-06, + "loss": 1.090872049331665, + "step": 84 + }, + { + "epoch": 0.02485743529755812, + "grad_norm": 1.110956193223445, + "learning_rate": 4.912280701754386e-06, + "loss": 1.0069574117660522, + "step": 85 + }, + { + "epoch": 0.02514987571282351, + "grad_norm": 1.0134868000559825, + "learning_rate": 4.970760233918129e-06, + "loss": 0.9391698837280273, + "step": 86 + }, + { + "epoch": 0.025442316128088904, + "grad_norm": 1.0912235029106665, + "learning_rate": 5.029239766081871e-06, + "loss": 0.881995677947998, + "step": 87 + }, + { + "epoch": 0.025734756543354293, + "grad_norm": 1.0399116507679627, + "learning_rate": 5.087719298245615e-06, + "loss": 0.87871253490448, + "step": 88 + }, + { + "epoch": 0.026027196958619683, + "grad_norm": 1.0265015868708693, + "learning_rate": 5.146198830409357e-06, + "loss": 1.005904197692871, + "step": 89 + }, + { + "epoch": 0.026319637373885072, + "grad_norm": 1.0161210383553128, + "learning_rate": 5.2046783625731e-06, + "loss": 0.8624223470687866, + "step": 90 + }, + { + "epoch": 0.02661207778915046, + "grad_norm": 1.0154040401745301, + "learning_rate": 5.263157894736842e-06, + "loss": 0.9976427555084229, + "step": 91 + }, + { + "epoch": 0.02690451820441585, + "grad_norm": 1.157266795240935, + "learning_rate": 5.321637426900586e-06, + "loss": 0.7743148803710938, + "step": 92 + }, + { + "epoch": 0.02719695861968124, + "grad_norm": 1.0027983307117943, + "learning_rate": 5.380116959064328e-06, + "loss": 0.8541792631149292, + "step": 93 + }, + { + "epoch": 0.02748939903494663, + "grad_norm": 1.0195872536359372, + "learning_rate": 5.438596491228071e-06, + "loss": 0.9141846895217896, + "step": 94 + }, + { + "epoch": 0.02778183945021202, + "grad_norm": 0.9964676811589505, + "learning_rate": 5.497076023391813e-06, + "loss": 0.9762974977493286, + "step": 95 + }, + { + "epoch": 0.02807427986547741, + "grad_norm": 1.086834377136063, + "learning_rate": 5.555555555555557e-06, + "loss": 0.8039775490760803, + "step": 96 + }, + { + "epoch": 0.0283667202807428, + "grad_norm": 1.0288673358640383, + "learning_rate": 5.6140350877192985e-06, + "loss": 0.9464477300643921, + "step": 97 + }, + { + "epoch": 0.028659160696008188, + "grad_norm": 0.9989091266376411, + "learning_rate": 5.672514619883041e-06, + "loss": 0.8264896869659424, + "step": 98 + }, + { + "epoch": 0.028951601111273578, + "grad_norm": 1.239452647422259, + "learning_rate": 5.730994152046784e-06, + "loss": 0.8347363471984863, + "step": 99 + }, + { + "epoch": 0.029244041526538967, + "grad_norm": 1.1482101557047766, + "learning_rate": 5.789473684210527e-06, + "loss": 0.7974327802658081, + "step": 100 + }, + { + "epoch": 0.029536481941804357, + "grad_norm": 1.040746567320999, + "learning_rate": 5.847953216374269e-06, + "loss": 0.7953752875328064, + "step": 101 + }, + { + "epoch": 0.029828922357069746, + "grad_norm": 1.0186289029859024, + "learning_rate": 5.906432748538012e-06, + "loss": 0.8652607798576355, + "step": 102 + }, + { + "epoch": 0.030121362772335136, + "grad_norm": 1.0719829766550855, + "learning_rate": 5.964912280701755e-06, + "loss": 0.973792552947998, + "step": 103 + }, + { + "epoch": 0.030413803187600525, + "grad_norm": 0.9226382056883017, + "learning_rate": 6.023391812865498e-06, + "loss": 0.8093612194061279, + "step": 104 + }, + { + "epoch": 0.030706243602865915, + "grad_norm": 0.9154711374479992, + "learning_rate": 6.08187134502924e-06, + "loss": 0.8463394045829773, + "step": 105 + }, + { + "epoch": 0.030998684018131304, + "grad_norm": 1.2769916053670627, + "learning_rate": 6.140350877192983e-06, + "loss": 0.7898350358009338, + "step": 106 + }, + { + "epoch": 0.0312911244333967, + "grad_norm": 1.298220618549192, + "learning_rate": 6.198830409356725e-06, + "loss": 0.9750698804855347, + "step": 107 + }, + { + "epoch": 0.031583564848662087, + "grad_norm": 1.000315516155276, + "learning_rate": 6.2573099415204685e-06, + "loss": 0.8137387633323669, + "step": 108 + }, + { + "epoch": 0.031876005263927476, + "grad_norm": 1.082436003075408, + "learning_rate": 6.31578947368421e-06, + "loss": 1.0641593933105469, + "step": 109 + }, + { + "epoch": 0.032168445679192866, + "grad_norm": 1.0363310086535433, + "learning_rate": 6.374269005847954e-06, + "loss": 0.9647193551063538, + "step": 110 + }, + { + "epoch": 0.032460886094458255, + "grad_norm": 1.1062097211432278, + "learning_rate": 6.432748538011696e-06, + "loss": 0.9693200588226318, + "step": 111 + }, + { + "epoch": 0.032753326509723645, + "grad_norm": 1.145031857661525, + "learning_rate": 6.491228070175439e-06, + "loss": 0.9600590467453003, + "step": 112 + }, + { + "epoch": 0.033045766924989034, + "grad_norm": 1.0203404188427831, + "learning_rate": 6.549707602339181e-06, + "loss": 0.8908880949020386, + "step": 113 + }, + { + "epoch": 0.033338207340254424, + "grad_norm": 1.2162435709165451, + "learning_rate": 6.608187134502925e-06, + "loss": 0.9803124666213989, + "step": 114 + }, + { + "epoch": 0.03363064775551981, + "grad_norm": 1.1738875143751093, + "learning_rate": 6.666666666666667e-06, + "loss": 0.8288271427154541, + "step": 115 + }, + { + "epoch": 0.0339230881707852, + "grad_norm": 0.9490473067752526, + "learning_rate": 6.72514619883041e-06, + "loss": 0.7203798890113831, + "step": 116 + }, + { + "epoch": 0.03421552858605059, + "grad_norm": 1.0046253156347025, + "learning_rate": 6.783625730994152e-06, + "loss": 0.7670629024505615, + "step": 117 + }, + { + "epoch": 0.03450796900131598, + "grad_norm": 1.0563125407630551, + "learning_rate": 6.842105263157896e-06, + "loss": 0.8487929105758667, + "step": 118 + }, + { + "epoch": 0.03480040941658137, + "grad_norm": 1.1292147521599132, + "learning_rate": 6.9005847953216375e-06, + "loss": 0.8332704305648804, + "step": 119 + }, + { + "epoch": 0.03509284983184676, + "grad_norm": 1.2138847310663696, + "learning_rate": 6.959064327485381e-06, + "loss": 0.9984017610549927, + "step": 120 + }, + { + "epoch": 0.03538529024711215, + "grad_norm": 1.126543099330432, + "learning_rate": 7.017543859649123e-06, + "loss": 0.788459062576294, + "step": 121 + }, + { + "epoch": 0.03567773066237754, + "grad_norm": 1.5166585395762038, + "learning_rate": 7.0760233918128665e-06, + "loss": 1.0288443565368652, + "step": 122 + }, + { + "epoch": 0.03597017107764293, + "grad_norm": 1.0086777607738802, + "learning_rate": 7.134502923976608e-06, + "loss": 0.7939552664756775, + "step": 123 + }, + { + "epoch": 0.03626261149290832, + "grad_norm": 1.0254521267017753, + "learning_rate": 7.192982456140352e-06, + "loss": 0.8816506862640381, + "step": 124 + }, + { + "epoch": 0.03655505190817371, + "grad_norm": 1.0223917066157164, + "learning_rate": 7.251461988304094e-06, + "loss": 0.8864353895187378, + "step": 125 + }, + { + "epoch": 0.0368474923234391, + "grad_norm": 1.2363556273996017, + "learning_rate": 7.309941520467837e-06, + "loss": 0.9817954897880554, + "step": 126 + }, + { + "epoch": 0.03713993273870449, + "grad_norm": 1.0757650534793346, + "learning_rate": 7.368421052631579e-06, + "loss": 0.8423842787742615, + "step": 127 + }, + { + "epoch": 0.037432373153969876, + "grad_norm": 1.1636915661730252, + "learning_rate": 7.426900584795322e-06, + "loss": 0.8375135660171509, + "step": 128 + }, + { + "epoch": 0.037724813569235266, + "grad_norm": 1.2215328884976426, + "learning_rate": 7.485380116959065e-06, + "loss": 0.9105685949325562, + "step": 129 + }, + { + "epoch": 0.038017253984500655, + "grad_norm": 1.1346801425180852, + "learning_rate": 7.5438596491228074e-06, + "loss": 0.8784557580947876, + "step": 130 + }, + { + "epoch": 0.038309694399766045, + "grad_norm": 1.0071578019284073, + "learning_rate": 7.60233918128655e-06, + "loss": 0.7557879686355591, + "step": 131 + }, + { + "epoch": 0.038602134815031434, + "grad_norm": 1.228942961434803, + "learning_rate": 7.660818713450294e-06, + "loss": 0.8966819047927856, + "step": 132 + }, + { + "epoch": 0.038894575230296824, + "grad_norm": 1.0961114842309465, + "learning_rate": 7.719298245614036e-06, + "loss": 0.7642185091972351, + "step": 133 + }, + { + "epoch": 0.03918701564556221, + "grad_norm": 1.062961529950125, + "learning_rate": 7.77777777777778e-06, + "loss": 0.8313230276107788, + "step": 134 + }, + { + "epoch": 0.0394794560608276, + "grad_norm": 1.3350623914867434, + "learning_rate": 7.836257309941521e-06, + "loss": 0.8388677835464478, + "step": 135 + }, + { + "epoch": 0.039771896476093, + "grad_norm": 1.2027686314521255, + "learning_rate": 7.894736842105265e-06, + "loss": 0.9065952301025391, + "step": 136 + }, + { + "epoch": 0.04006433689135839, + "grad_norm": 1.123144368922916, + "learning_rate": 7.953216374269006e-06, + "loss": 0.8153767585754395, + "step": 137 + }, + { + "epoch": 0.04035677730662378, + "grad_norm": 1.163761684167935, + "learning_rate": 8.01169590643275e-06, + "loss": 0.8976421356201172, + "step": 138 + }, + { + "epoch": 0.04064921772188917, + "grad_norm": 1.1354333989669174, + "learning_rate": 8.070175438596492e-06, + "loss": 0.7360264658927917, + "step": 139 + }, + { + "epoch": 0.04094165813715456, + "grad_norm": 1.1009203930924998, + "learning_rate": 8.128654970760235e-06, + "loss": 0.8442148566246033, + "step": 140 + }, + { + "epoch": 0.04123409855241995, + "grad_norm": 1.0872796831159965, + "learning_rate": 8.187134502923977e-06, + "loss": 0.6541435718536377, + "step": 141 + }, + { + "epoch": 0.041526538967685336, + "grad_norm": 1.2792221696979318, + "learning_rate": 8.24561403508772e-06, + "loss": 0.7492353916168213, + "step": 142 + }, + { + "epoch": 0.041818979382950726, + "grad_norm": 1.0406728730985955, + "learning_rate": 8.304093567251463e-06, + "loss": 0.6681893467903137, + "step": 143 + }, + { + "epoch": 0.042111419798216115, + "grad_norm": 1.2507905783247102, + "learning_rate": 8.362573099415205e-06, + "loss": 0.8384866714477539, + "step": 144 + }, + { + "epoch": 0.042403860213481505, + "grad_norm": 1.125680624680095, + "learning_rate": 8.421052631578948e-06, + "loss": 0.8338214159011841, + "step": 145 + }, + { + "epoch": 0.042696300628746894, + "grad_norm": 1.3441065562284606, + "learning_rate": 8.47953216374269e-06, + "loss": 0.8549021482467651, + "step": 146 + }, + { + "epoch": 0.042988741044012284, + "grad_norm": 1.0226139512096055, + "learning_rate": 8.538011695906434e-06, + "loss": 0.8324464559555054, + "step": 147 + }, + { + "epoch": 0.04328118145927767, + "grad_norm": 1.3742681865566602, + "learning_rate": 8.596491228070176e-06, + "loss": 0.9247474670410156, + "step": 148 + }, + { + "epoch": 0.04357362187454306, + "grad_norm": 1.3295257009133983, + "learning_rate": 8.654970760233919e-06, + "loss": 0.8488880395889282, + "step": 149 + }, + { + "epoch": 0.04386606228980845, + "grad_norm": 1.244174459745273, + "learning_rate": 8.713450292397661e-06, + "loss": 0.7844473123550415, + "step": 150 + }, + { + "epoch": 0.04415850270507384, + "grad_norm": 1.3605735346558072, + "learning_rate": 8.771929824561405e-06, + "loss": 1.0540976524353027, + "step": 151 + }, + { + "epoch": 0.04445094312033923, + "grad_norm": 1.096092225329518, + "learning_rate": 8.830409356725146e-06, + "loss": 0.7919446229934692, + "step": 152 + }, + { + "epoch": 0.04474338353560462, + "grad_norm": 1.1577837223865697, + "learning_rate": 8.888888888888888e-06, + "loss": 0.818670928478241, + "step": 153 + }, + { + "epoch": 0.04503582395087001, + "grad_norm": 1.4320201209257988, + "learning_rate": 8.947368421052632e-06, + "loss": 0.8491114377975464, + "step": 154 + }, + { + "epoch": 0.0453282643661354, + "grad_norm": 1.8326606844764444, + "learning_rate": 9.005847953216374e-06, + "loss": 0.660563588142395, + "step": 155 + }, + { + "epoch": 0.04562070478140079, + "grad_norm": 1.1838649114458772, + "learning_rate": 9.064327485380117e-06, + "loss": 0.8559159636497498, + "step": 156 + }, + { + "epoch": 0.04591314519666618, + "grad_norm": 1.0968958293675206, + "learning_rate": 9.12280701754386e-06, + "loss": 0.8478386402130127, + "step": 157 + }, + { + "epoch": 0.04620558561193157, + "grad_norm": 1.1272218094040445, + "learning_rate": 9.181286549707603e-06, + "loss": 0.758915901184082, + "step": 158 + }, + { + "epoch": 0.04649802602719696, + "grad_norm": 1.3159367769875163, + "learning_rate": 9.239766081871345e-06, + "loss": 0.773307204246521, + "step": 159 + }, + { + "epoch": 0.04679046644246235, + "grad_norm": 1.29739510285095, + "learning_rate": 9.298245614035088e-06, + "loss": 0.8948490023612976, + "step": 160 + }, + { + "epoch": 0.04708290685772774, + "grad_norm": 1.2170406448830853, + "learning_rate": 9.35672514619883e-06, + "loss": 0.83086097240448, + "step": 161 + }, + { + "epoch": 0.047375347272993126, + "grad_norm": 1.474814122834776, + "learning_rate": 9.415204678362574e-06, + "loss": 0.7683168649673462, + "step": 162 + }, + { + "epoch": 0.047667787688258516, + "grad_norm": 1.2546637555360107, + "learning_rate": 9.473684210526315e-06, + "loss": 0.9267748594284058, + "step": 163 + }, + { + "epoch": 0.047960228103523905, + "grad_norm": 1.1945733924353639, + "learning_rate": 9.532163742690059e-06, + "loss": 0.9243365526199341, + "step": 164 + }, + { + "epoch": 0.048252668518789295, + "grad_norm": 1.1508961292698372, + "learning_rate": 9.590643274853801e-06, + "loss": 0.7841176986694336, + "step": 165 + }, + { + "epoch": 0.048545108934054684, + "grad_norm": 1.1853174404309834, + "learning_rate": 9.649122807017545e-06, + "loss": 0.8318643569946289, + "step": 166 + }, + { + "epoch": 0.048837549349320074, + "grad_norm": 1.3089312801161905, + "learning_rate": 9.707602339181286e-06, + "loss": 0.866286039352417, + "step": 167 + }, + { + "epoch": 0.04912998976458546, + "grad_norm": 1.32215003396801, + "learning_rate": 9.76608187134503e-06, + "loss": 0.8232241868972778, + "step": 168 + }, + { + "epoch": 0.04942243017985085, + "grad_norm": 1.4759162272800292, + "learning_rate": 9.824561403508772e-06, + "loss": 0.874968945980072, + "step": 169 + }, + { + "epoch": 0.04971487059511624, + "grad_norm": 1.3247540509223557, + "learning_rate": 9.883040935672515e-06, + "loss": 0.9048999547958374, + "step": 170 + }, + { + "epoch": 0.05000731101038163, + "grad_norm": 1.4647995646715117, + "learning_rate": 9.941520467836257e-06, + "loss": 0.9220215082168579, + "step": 171 + }, + { + "epoch": 0.05029975142564702, + "grad_norm": 1.3290504006044366, + "learning_rate": 1e-05, + "loss": 0.8326996564865112, + "step": 172 + }, + { + "epoch": 0.05059219184091241, + "grad_norm": 1.0687285940591045, + "learning_rate": 1.0058479532163743e-05, + "loss": 0.8023662567138672, + "step": 173 + }, + { + "epoch": 0.05088463225617781, + "grad_norm": 1.4370267362244613, + "learning_rate": 1.0116959064327488e-05, + "loss": 0.9172271490097046, + "step": 174 + }, + { + "epoch": 0.0511770726714432, + "grad_norm": 1.2538172153184461, + "learning_rate": 1.017543859649123e-05, + "loss": 0.8016377687454224, + "step": 175 + }, + { + "epoch": 0.051469513086708586, + "grad_norm": 1.1436252675754246, + "learning_rate": 1.0233918128654972e-05, + "loss": 0.7656369805335999, + "step": 176 + }, + { + "epoch": 0.051761953501973976, + "grad_norm": 1.1951944941269466, + "learning_rate": 1.0292397660818714e-05, + "loss": 0.7769640684127808, + "step": 177 + }, + { + "epoch": 0.052054393917239365, + "grad_norm": 1.3791114600068226, + "learning_rate": 1.0350877192982459e-05, + "loss": 0.9830589294433594, + "step": 178 + }, + { + "epoch": 0.052346834332504755, + "grad_norm": 1.1501081025808126, + "learning_rate": 1.04093567251462e-05, + "loss": 0.8002523183822632, + "step": 179 + }, + { + "epoch": 0.052639274747770144, + "grad_norm": 1.3726838653365003, + "learning_rate": 1.0467836257309943e-05, + "loss": 0.879243016242981, + "step": 180 + }, + { + "epoch": 0.052931715163035534, + "grad_norm": 1.2863425151805854, + "learning_rate": 1.0526315789473684e-05, + "loss": 0.7266525030136108, + "step": 181 + }, + { + "epoch": 0.05322415557830092, + "grad_norm": 1.350994010752117, + "learning_rate": 1.0584795321637428e-05, + "loss": 0.784702479839325, + "step": 182 + }, + { + "epoch": 0.05351659599356631, + "grad_norm": 1.415897619399055, + "learning_rate": 1.0643274853801172e-05, + "loss": 0.8419734239578247, + "step": 183 + }, + { + "epoch": 0.0538090364088317, + "grad_norm": 1.201782404599289, + "learning_rate": 1.0701754385964913e-05, + "loss": 0.8462855815887451, + "step": 184 + }, + { + "epoch": 0.05410147682409709, + "grad_norm": 1.361501494219251, + "learning_rate": 1.0760233918128655e-05, + "loss": 0.8888737559318542, + "step": 185 + }, + { + "epoch": 0.05439391723936248, + "grad_norm": 1.3305576553150047, + "learning_rate": 1.0818713450292399e-05, + "loss": 0.8063781261444092, + "step": 186 + }, + { + "epoch": 0.05468635765462787, + "grad_norm": 1.2109684966022718, + "learning_rate": 1.0877192982456142e-05, + "loss": 0.7981499433517456, + "step": 187 + }, + { + "epoch": 0.05497879806989326, + "grad_norm": 1.5415785509759563, + "learning_rate": 1.0935672514619884e-05, + "loss": 0.8474490642547607, + "step": 188 + }, + { + "epoch": 0.05527123848515865, + "grad_norm": 1.300197838887535, + "learning_rate": 1.0994152046783626e-05, + "loss": 0.818732500076294, + "step": 189 + }, + { + "epoch": 0.05556367890042404, + "grad_norm": 1.3192619521811115, + "learning_rate": 1.105263157894737e-05, + "loss": 0.7660291194915771, + "step": 190 + }, + { + "epoch": 0.05585611931568943, + "grad_norm": 1.2626389127660034, + "learning_rate": 1.1111111111111113e-05, + "loss": 0.8240147233009338, + "step": 191 + }, + { + "epoch": 0.05614855973095482, + "grad_norm": 1.340830231936402, + "learning_rate": 1.1169590643274855e-05, + "loss": 0.9377203583717346, + "step": 192 + }, + { + "epoch": 0.05644100014622021, + "grad_norm": 1.416661564809907, + "learning_rate": 1.1228070175438597e-05, + "loss": 0.8662704229354858, + "step": 193 + }, + { + "epoch": 0.0567334405614856, + "grad_norm": 1.3274611257173192, + "learning_rate": 1.128654970760234e-05, + "loss": 0.717308759689331, + "step": 194 + }, + { + "epoch": 0.05702588097675099, + "grad_norm": 1.1942152308113003, + "learning_rate": 1.1345029239766083e-05, + "loss": 0.8538037538528442, + "step": 195 + }, + { + "epoch": 0.057318321392016376, + "grad_norm": 1.4411136610170212, + "learning_rate": 1.1403508771929826e-05, + "loss": 0.9016960859298706, + "step": 196 + }, + { + "epoch": 0.057610761807281766, + "grad_norm": 1.4664426354083508, + "learning_rate": 1.1461988304093568e-05, + "loss": 0.9313502311706543, + "step": 197 + }, + { + "epoch": 0.057903202222547155, + "grad_norm": 1.2885330427126278, + "learning_rate": 1.1520467836257312e-05, + "loss": 0.7330124974250793, + "step": 198 + }, + { + "epoch": 0.058195642637812545, + "grad_norm": 1.272277327326545, + "learning_rate": 1.1578947368421053e-05, + "loss": 0.8904056549072266, + "step": 199 + }, + { + "epoch": 0.058488083053077934, + "grad_norm": 1.4761275028472136, + "learning_rate": 1.1637426900584797e-05, + "loss": 0.7816377878189087, + "step": 200 + }, + { + "epoch": 0.058780523468343324, + "grad_norm": 1.3244130760300052, + "learning_rate": 1.1695906432748539e-05, + "loss": 0.7109910249710083, + "step": 201 + }, + { + "epoch": 0.05907296388360871, + "grad_norm": 1.499082853070359, + "learning_rate": 1.1754385964912282e-05, + "loss": 0.7657924890518188, + "step": 202 + }, + { + "epoch": 0.0593654042988741, + "grad_norm": 1.5632309821036996, + "learning_rate": 1.1812865497076024e-05, + "loss": 0.8521978259086609, + "step": 203 + }, + { + "epoch": 0.05965784471413949, + "grad_norm": 1.3625729366507646, + "learning_rate": 1.1871345029239766e-05, + "loss": 0.7558364868164062, + "step": 204 + }, + { + "epoch": 0.05995028512940488, + "grad_norm": 1.3362044158661328, + "learning_rate": 1.192982456140351e-05, + "loss": 0.8488497734069824, + "step": 205 + }, + { + "epoch": 0.06024272554467027, + "grad_norm": 1.5823695803446844, + "learning_rate": 1.1988304093567253e-05, + "loss": 0.7905591726303101, + "step": 206 + }, + { + "epoch": 0.06053516595993566, + "grad_norm": 1.324069880941127, + "learning_rate": 1.2046783625730995e-05, + "loss": 0.747936487197876, + "step": 207 + }, + { + "epoch": 0.06082760637520105, + "grad_norm": 1.3370127883002023, + "learning_rate": 1.2105263157894737e-05, + "loss": 0.8653486967086792, + "step": 208 + }, + { + "epoch": 0.06112004679046644, + "grad_norm": 1.295171295812896, + "learning_rate": 1.216374269005848e-05, + "loss": 0.8662437200546265, + "step": 209 + }, + { + "epoch": 0.06141248720573183, + "grad_norm": 1.6369328366726996, + "learning_rate": 1.2222222222222224e-05, + "loss": 0.9567133188247681, + "step": 210 + }, + { + "epoch": 0.06170492762099722, + "grad_norm": 1.4011109813275144, + "learning_rate": 1.2280701754385966e-05, + "loss": 0.8994660377502441, + "step": 211 + }, + { + "epoch": 0.06199736803626261, + "grad_norm": 1.2989562892904951, + "learning_rate": 1.2339181286549708e-05, + "loss": 0.7889316082000732, + "step": 212 + }, + { + "epoch": 0.062289808451528005, + "grad_norm": 1.2266327731037636, + "learning_rate": 1.239766081871345e-05, + "loss": 0.883985161781311, + "step": 213 + }, + { + "epoch": 0.0625822488667934, + "grad_norm": 1.2190679056716556, + "learning_rate": 1.2456140350877195e-05, + "loss": 0.7780495882034302, + "step": 214 + }, + { + "epoch": 0.06287468928205878, + "grad_norm": 1.3596314866008754, + "learning_rate": 1.2514619883040937e-05, + "loss": 0.6514906883239746, + "step": 215 + }, + { + "epoch": 0.06316712969732417, + "grad_norm": 1.3008367711622892, + "learning_rate": 1.2573099415204679e-05, + "loss": 0.750559389591217, + "step": 216 + }, + { + "epoch": 0.06345957011258956, + "grad_norm": 1.4761536100726258, + "learning_rate": 1.263157894736842e-05, + "loss": 0.8330573439598083, + "step": 217 + }, + { + "epoch": 0.06375201052785495, + "grad_norm": 1.4144186396910836, + "learning_rate": 1.2690058479532166e-05, + "loss": 0.8075361847877502, + "step": 218 + }, + { + "epoch": 0.06404445094312033, + "grad_norm": 1.2867265784947997, + "learning_rate": 1.2748538011695908e-05, + "loss": 0.7636772394180298, + "step": 219 + }, + { + "epoch": 0.06433689135838573, + "grad_norm": 1.1905704140813884, + "learning_rate": 1.280701754385965e-05, + "loss": 0.8241903185844421, + "step": 220 + }, + { + "epoch": 0.06462933177365111, + "grad_norm": 1.261461662230418, + "learning_rate": 1.2865497076023392e-05, + "loss": 0.6582514047622681, + "step": 221 + }, + { + "epoch": 0.06492177218891651, + "grad_norm": 1.461492259499335, + "learning_rate": 1.2923976608187137e-05, + "loss": 0.6363992691040039, + "step": 222 + }, + { + "epoch": 0.06521421260418189, + "grad_norm": 1.5776709499534403, + "learning_rate": 1.2982456140350879e-05, + "loss": 0.8093860149383545, + "step": 223 + }, + { + "epoch": 0.06550665301944729, + "grad_norm": 1.5281675606912017, + "learning_rate": 1.304093567251462e-05, + "loss": 0.7719511985778809, + "step": 224 + }, + { + "epoch": 0.06579909343471267, + "grad_norm": 1.4484434101459598, + "learning_rate": 1.3099415204678362e-05, + "loss": 0.8314809799194336, + "step": 225 + }, + { + "epoch": 0.06609153384997807, + "grad_norm": 1.3751378156667435, + "learning_rate": 1.3157894736842108e-05, + "loss": 0.8752902746200562, + "step": 226 + }, + { + "epoch": 0.06638397426524345, + "grad_norm": 1.4660956062146326, + "learning_rate": 1.321637426900585e-05, + "loss": 0.7564839124679565, + "step": 227 + }, + { + "epoch": 0.06667641468050885, + "grad_norm": 1.6744274403459947, + "learning_rate": 1.3274853801169591e-05, + "loss": 0.7377971410751343, + "step": 228 + }, + { + "epoch": 0.06696885509577423, + "grad_norm": 1.3046915227989528, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.7298087477684021, + "step": 229 + }, + { + "epoch": 0.06726129551103963, + "grad_norm": 1.4026797729918719, + "learning_rate": 1.3391812865497079e-05, + "loss": 0.7291176915168762, + "step": 230 + }, + { + "epoch": 0.06755373592630501, + "grad_norm": 1.3421785664914363, + "learning_rate": 1.345029239766082e-05, + "loss": 0.8226944208145142, + "step": 231 + }, + { + "epoch": 0.0678461763415704, + "grad_norm": 1.4277073905518047, + "learning_rate": 1.3508771929824562e-05, + "loss": 0.7185185551643372, + "step": 232 + }, + { + "epoch": 0.0681386167568358, + "grad_norm": 1.2950151686673683, + "learning_rate": 1.3567251461988304e-05, + "loss": 0.7028212547302246, + "step": 233 + }, + { + "epoch": 0.06843105717210118, + "grad_norm": 1.6157016450339874, + "learning_rate": 1.362573099415205e-05, + "loss": 0.8809897899627686, + "step": 234 + }, + { + "epoch": 0.06872349758736658, + "grad_norm": 1.388536739112073, + "learning_rate": 1.3684210526315791e-05, + "loss": 0.7779085040092468, + "step": 235 + }, + { + "epoch": 0.06901593800263196, + "grad_norm": 1.5070530641919806, + "learning_rate": 1.3742690058479533e-05, + "loss": 0.731019139289856, + "step": 236 + }, + { + "epoch": 0.06930837841789736, + "grad_norm": 1.4005389899518954, + "learning_rate": 1.3801169590643275e-05, + "loss": 0.7495850920677185, + "step": 237 + }, + { + "epoch": 0.06960081883316274, + "grad_norm": 1.2241508662035476, + "learning_rate": 1.385964912280702e-05, + "loss": 0.7018189430236816, + "step": 238 + }, + { + "epoch": 0.06989325924842814, + "grad_norm": 1.2596692368793962, + "learning_rate": 1.3918128654970762e-05, + "loss": 0.7072417736053467, + "step": 239 + }, + { + "epoch": 0.07018569966369352, + "grad_norm": 1.3606864903220994, + "learning_rate": 1.3976608187134504e-05, + "loss": 0.8125720620155334, + "step": 240 + }, + { + "epoch": 0.07047814007895892, + "grad_norm": 1.442924901417446, + "learning_rate": 1.4035087719298246e-05, + "loss": 0.6101655960083008, + "step": 241 + }, + { + "epoch": 0.0707705804942243, + "grad_norm": 1.3725413795436465, + "learning_rate": 1.409356725146199e-05, + "loss": 0.9005568623542786, + "step": 242 + }, + { + "epoch": 0.0710630209094897, + "grad_norm": 1.4215646059439664, + "learning_rate": 1.4152046783625733e-05, + "loss": 0.7678338289260864, + "step": 243 + }, + { + "epoch": 0.07135546132475508, + "grad_norm": 1.4745728838056915, + "learning_rate": 1.4210526315789475e-05, + "loss": 0.7563410997390747, + "step": 244 + }, + { + "epoch": 0.07164790174002048, + "grad_norm": 1.3043448641122064, + "learning_rate": 1.4269005847953217e-05, + "loss": 0.7497583627700806, + "step": 245 + }, + { + "epoch": 0.07194034215528586, + "grad_norm": 1.8237088246729396, + "learning_rate": 1.432748538011696e-05, + "loss": 0.8913442492485046, + "step": 246 + }, + { + "epoch": 0.07223278257055125, + "grad_norm": 1.446976759622428, + "learning_rate": 1.4385964912280704e-05, + "loss": 0.7714704871177673, + "step": 247 + }, + { + "epoch": 0.07252522298581664, + "grad_norm": 1.4721214924941617, + "learning_rate": 1.4444444444444446e-05, + "loss": 0.6752789616584778, + "step": 248 + }, + { + "epoch": 0.07281766340108203, + "grad_norm": 1.4015875441769006, + "learning_rate": 1.4502923976608188e-05, + "loss": 0.6092795133590698, + "step": 249 + }, + { + "epoch": 0.07311010381634742, + "grad_norm": 1.4602535650914903, + "learning_rate": 1.4561403508771931e-05, + "loss": 0.9300343990325928, + "step": 250 + }, + { + "epoch": 0.07340254423161281, + "grad_norm": 1.3884630911660603, + "learning_rate": 1.4619883040935675e-05, + "loss": 0.8005613088607788, + "step": 251 + }, + { + "epoch": 0.0736949846468782, + "grad_norm": 1.2918508056771596, + "learning_rate": 1.4678362573099417e-05, + "loss": 0.7188931703567505, + "step": 252 + }, + { + "epoch": 0.07398742506214359, + "grad_norm": 1.3258314938186555, + "learning_rate": 1.4736842105263159e-05, + "loss": 0.6967242956161499, + "step": 253 + }, + { + "epoch": 0.07427986547740897, + "grad_norm": 1.300875000270566, + "learning_rate": 1.4795321637426902e-05, + "loss": 0.6921653747558594, + "step": 254 + }, + { + "epoch": 0.07457230589267437, + "grad_norm": 1.4258732788152875, + "learning_rate": 1.4853801169590644e-05, + "loss": 0.8498743772506714, + "step": 255 + }, + { + "epoch": 0.07486474630793975, + "grad_norm": 1.4311730434285577, + "learning_rate": 1.4912280701754388e-05, + "loss": 0.6420027017593384, + "step": 256 + }, + { + "epoch": 0.07515718672320515, + "grad_norm": 1.3747073212413874, + "learning_rate": 1.497076023391813e-05, + "loss": 0.7101434469223022, + "step": 257 + }, + { + "epoch": 0.07544962713847053, + "grad_norm": 1.562801712624193, + "learning_rate": 1.5029239766081873e-05, + "loss": 0.740740180015564, + "step": 258 + }, + { + "epoch": 0.07574206755373593, + "grad_norm": 1.726645998674187, + "learning_rate": 1.5087719298245615e-05, + "loss": 0.891905665397644, + "step": 259 + }, + { + "epoch": 0.07603450796900131, + "grad_norm": 1.5486677390214905, + "learning_rate": 1.5146198830409358e-05, + "loss": 0.867740273475647, + "step": 260 + }, + { + "epoch": 0.07632694838426671, + "grad_norm": 1.5072500165891534, + "learning_rate": 1.52046783625731e-05, + "loss": 0.7895220518112183, + "step": 261 + }, + { + "epoch": 0.07661938879953209, + "grad_norm": 1.5579945503860015, + "learning_rate": 1.5263157894736846e-05, + "loss": 0.7987008094787598, + "step": 262 + }, + { + "epoch": 0.07691182921479749, + "grad_norm": 1.4014455476427317, + "learning_rate": 1.5321637426900587e-05, + "loss": 0.7780282497406006, + "step": 263 + }, + { + "epoch": 0.07720426963006287, + "grad_norm": 1.2290290646079385, + "learning_rate": 1.538011695906433e-05, + "loss": 0.6265891194343567, + "step": 264 + }, + { + "epoch": 0.07749671004532827, + "grad_norm": 1.4917276843875658, + "learning_rate": 1.543859649122807e-05, + "loss": 0.6559646129608154, + "step": 265 + }, + { + "epoch": 0.07778915046059365, + "grad_norm": 1.4406503206723986, + "learning_rate": 1.5497076023391816e-05, + "loss": 0.8362047672271729, + "step": 266 + }, + { + "epoch": 0.07808159087585904, + "grad_norm": 1.481487764499426, + "learning_rate": 1.555555555555556e-05, + "loss": 0.707663357257843, + "step": 267 + }, + { + "epoch": 0.07837403129112443, + "grad_norm": 1.398507930714671, + "learning_rate": 1.56140350877193e-05, + "loss": 0.67903071641922, + "step": 268 + }, + { + "epoch": 0.07866647170638982, + "grad_norm": 1.3187056037490035, + "learning_rate": 1.5672514619883042e-05, + "loss": 0.7634894251823425, + "step": 269 + }, + { + "epoch": 0.0789589121216552, + "grad_norm": 1.3791372975152867, + "learning_rate": 1.5730994152046787e-05, + "loss": 0.6395117044448853, + "step": 270 + }, + { + "epoch": 0.0792513525369206, + "grad_norm": 1.4273746235266698, + "learning_rate": 1.578947368421053e-05, + "loss": 0.6948165893554688, + "step": 271 + }, + { + "epoch": 0.079543792952186, + "grad_norm": 1.342718294320327, + "learning_rate": 1.584795321637427e-05, + "loss": 0.9288383722305298, + "step": 272 + }, + { + "epoch": 0.07983623336745138, + "grad_norm": 1.4727633207578312, + "learning_rate": 1.5906432748538013e-05, + "loss": 0.9291346073150635, + "step": 273 + }, + { + "epoch": 0.08012867378271678, + "grad_norm": 1.3613936763496384, + "learning_rate": 1.5964912280701755e-05, + "loss": 0.7399512529373169, + "step": 274 + }, + { + "epoch": 0.08042111419798216, + "grad_norm": 1.5856072060707183, + "learning_rate": 1.60233918128655e-05, + "loss": 0.6890764236450195, + "step": 275 + }, + { + "epoch": 0.08071355461324756, + "grad_norm": 1.1844012071470522, + "learning_rate": 1.6081871345029242e-05, + "loss": 0.6520324349403381, + "step": 276 + }, + { + "epoch": 0.08100599502851294, + "grad_norm": 1.4161353486782806, + "learning_rate": 1.6140350877192984e-05, + "loss": 0.6726658344268799, + "step": 277 + }, + { + "epoch": 0.08129843544377834, + "grad_norm": 1.5076627116667636, + "learning_rate": 1.6198830409356726e-05, + "loss": 0.7453294992446899, + "step": 278 + }, + { + "epoch": 0.08159087585904372, + "grad_norm": 1.6796077609043067, + "learning_rate": 1.625730994152047e-05, + "loss": 0.755578875541687, + "step": 279 + }, + { + "epoch": 0.08188331627430911, + "grad_norm": 1.576837195920435, + "learning_rate": 1.6315789473684213e-05, + "loss": 0.713086724281311, + "step": 280 + }, + { + "epoch": 0.0821757566895745, + "grad_norm": 1.5223162841340931, + "learning_rate": 1.6374269005847955e-05, + "loss": 0.8714310526847839, + "step": 281 + }, + { + "epoch": 0.0824681971048399, + "grad_norm": 1.4999918578300349, + "learning_rate": 1.6432748538011697e-05, + "loss": 0.6827348470687866, + "step": 282 + }, + { + "epoch": 0.08276063752010528, + "grad_norm": 1.5263417760460645, + "learning_rate": 1.649122807017544e-05, + "loss": 0.8613482713699341, + "step": 283 + }, + { + "epoch": 0.08305307793537067, + "grad_norm": 1.3847261162959308, + "learning_rate": 1.6549707602339184e-05, + "loss": 0.7442763447761536, + "step": 284 + }, + { + "epoch": 0.08334551835063606, + "grad_norm": 1.3784508201309091, + "learning_rate": 1.6608187134502926e-05, + "loss": 0.7505494356155396, + "step": 285 + }, + { + "epoch": 0.08363795876590145, + "grad_norm": 1.3042392110114591, + "learning_rate": 1.6666666666666667e-05, + "loss": 0.7720779776573181, + "step": 286 + }, + { + "epoch": 0.08393039918116683, + "grad_norm": 1.5516828033558783, + "learning_rate": 1.672514619883041e-05, + "loss": 0.7746216654777527, + "step": 287 + }, + { + "epoch": 0.08422283959643223, + "grad_norm": 1.4429865955911445, + "learning_rate": 1.6783625730994155e-05, + "loss": 0.8471436500549316, + "step": 288 + }, + { + "epoch": 0.08451528001169761, + "grad_norm": 1.4116704654777366, + "learning_rate": 1.6842105263157896e-05, + "loss": 0.7117248773574829, + "step": 289 + }, + { + "epoch": 0.08480772042696301, + "grad_norm": 1.4428575448924124, + "learning_rate": 1.690058479532164e-05, + "loss": 0.758680522441864, + "step": 290 + }, + { + "epoch": 0.08510016084222839, + "grad_norm": 1.4632326474117294, + "learning_rate": 1.695906432748538e-05, + "loss": 0.9083560705184937, + "step": 291 + }, + { + "epoch": 0.08539260125749379, + "grad_norm": 1.3444847997489586, + "learning_rate": 1.7017543859649125e-05, + "loss": 0.7457551956176758, + "step": 292 + }, + { + "epoch": 0.08568504167275917, + "grad_norm": 1.423532632485526, + "learning_rate": 1.7076023391812867e-05, + "loss": 0.7463638782501221, + "step": 293 + }, + { + "epoch": 0.08597748208802457, + "grad_norm": 1.4584931442713187, + "learning_rate": 1.713450292397661e-05, + "loss": 0.6983559131622314, + "step": 294 + }, + { + "epoch": 0.08626992250328995, + "grad_norm": 1.3612667828489424, + "learning_rate": 1.719298245614035e-05, + "loss": 0.8043842911720276, + "step": 295 + }, + { + "epoch": 0.08656236291855535, + "grad_norm": 1.5042924331122234, + "learning_rate": 1.7251461988304093e-05, + "loss": 0.7150747776031494, + "step": 296 + }, + { + "epoch": 0.08685480333382073, + "grad_norm": 2.0308017082996326, + "learning_rate": 1.7309941520467838e-05, + "loss": 0.7805558443069458, + "step": 297 + }, + { + "epoch": 0.08714724374908613, + "grad_norm": 1.4326584270734728, + "learning_rate": 1.736842105263158e-05, + "loss": 0.7158486843109131, + "step": 298 + }, + { + "epoch": 0.08743968416435151, + "grad_norm": 1.2329719748746066, + "learning_rate": 1.7426900584795322e-05, + "loss": 0.6496458053588867, + "step": 299 + }, + { + "epoch": 0.0877321245796169, + "grad_norm": 1.3255444740397837, + "learning_rate": 1.7485380116959064e-05, + "loss": 0.7488506436347961, + "step": 300 + }, + { + "epoch": 0.08802456499488229, + "grad_norm": 1.5658056782887144, + "learning_rate": 1.754385964912281e-05, + "loss": 0.8370999097824097, + "step": 301 + }, + { + "epoch": 0.08831700541014768, + "grad_norm": 1.3342670844496862, + "learning_rate": 1.760233918128655e-05, + "loss": 0.6624353528022766, + "step": 302 + }, + { + "epoch": 0.08860944582541307, + "grad_norm": 1.4627534576360353, + "learning_rate": 1.7660818713450293e-05, + "loss": 0.6861047148704529, + "step": 303 + }, + { + "epoch": 0.08890188624067846, + "grad_norm": 1.6532053166188327, + "learning_rate": 1.7719298245614035e-05, + "loss": 0.746711015701294, + "step": 304 + }, + { + "epoch": 0.08919432665594385, + "grad_norm": 1.554160121250669, + "learning_rate": 1.7777777777777777e-05, + "loss": 0.7794955968856812, + "step": 305 + }, + { + "epoch": 0.08948676707120924, + "grad_norm": 1.7649976265227958, + "learning_rate": 1.7836257309941522e-05, + "loss": 0.7202489972114563, + "step": 306 + }, + { + "epoch": 0.08977920748647462, + "grad_norm": 1.6262384567896693, + "learning_rate": 1.7894736842105264e-05, + "loss": 0.7252119183540344, + "step": 307 + }, + { + "epoch": 0.09007164790174002, + "grad_norm": 1.5452508352574224, + "learning_rate": 1.7953216374269006e-05, + "loss": 0.9168737530708313, + "step": 308 + }, + { + "epoch": 0.0903640883170054, + "grad_norm": 1.487069935429652, + "learning_rate": 1.8011695906432747e-05, + "loss": 0.7647944688796997, + "step": 309 + }, + { + "epoch": 0.0906565287322708, + "grad_norm": 1.7447386842901849, + "learning_rate": 1.8070175438596493e-05, + "loss": 0.7836136817932129, + "step": 310 + }, + { + "epoch": 0.0909489691475362, + "grad_norm": 1.2604562921756688, + "learning_rate": 1.8128654970760235e-05, + "loss": 0.6495587825775146, + "step": 311 + }, + { + "epoch": 0.09124140956280158, + "grad_norm": 1.5613577023920442, + "learning_rate": 1.8187134502923976e-05, + "loss": 0.7266290187835693, + "step": 312 + }, + { + "epoch": 0.09153384997806698, + "grad_norm": 1.9984801625992445, + "learning_rate": 1.824561403508772e-05, + "loss": 0.8417587876319885, + "step": 313 + }, + { + "epoch": 0.09182629039333236, + "grad_norm": 1.5767499272635297, + "learning_rate": 1.8304093567251464e-05, + "loss": 0.8431564569473267, + "step": 314 + }, + { + "epoch": 0.09211873080859775, + "grad_norm": 1.4390326104450535, + "learning_rate": 1.8362573099415205e-05, + "loss": 0.7724050283432007, + "step": 315 + }, + { + "epoch": 0.09241117122386314, + "grad_norm": 1.4145032164176374, + "learning_rate": 1.8421052631578947e-05, + "loss": 0.6687352657318115, + "step": 316 + }, + { + "epoch": 0.09270361163912853, + "grad_norm": 1.3696816256616517, + "learning_rate": 1.847953216374269e-05, + "loss": 0.7465454339981079, + "step": 317 + }, + { + "epoch": 0.09299605205439392, + "grad_norm": 1.507661205433782, + "learning_rate": 1.8538011695906434e-05, + "loss": 0.6944088935852051, + "step": 318 + }, + { + "epoch": 0.09328849246965931, + "grad_norm": 1.2922205760098913, + "learning_rate": 1.8596491228070176e-05, + "loss": 0.6692598462104797, + "step": 319 + }, + { + "epoch": 0.0935809328849247, + "grad_norm": 1.4345621362788812, + "learning_rate": 1.8654970760233918e-05, + "loss": 0.7287981510162354, + "step": 320 + }, + { + "epoch": 0.09387337330019009, + "grad_norm": 1.426362426046858, + "learning_rate": 1.871345029239766e-05, + "loss": 0.704437255859375, + "step": 321 + }, + { + "epoch": 0.09416581371545547, + "grad_norm": 1.2757141813139592, + "learning_rate": 1.8771929824561405e-05, + "loss": 0.6425009965896606, + "step": 322 + }, + { + "epoch": 0.09445825413072087, + "grad_norm": 1.4929466314279891, + "learning_rate": 1.8830409356725147e-05, + "loss": 0.765799880027771, + "step": 323 + }, + { + "epoch": 0.09475069454598625, + "grad_norm": 1.482293870539422, + "learning_rate": 1.888888888888889e-05, + "loss": 0.9151520133018494, + "step": 324 + }, + { + "epoch": 0.09504313496125165, + "grad_norm": 1.5087468194478204, + "learning_rate": 1.894736842105263e-05, + "loss": 0.8753486275672913, + "step": 325 + }, + { + "epoch": 0.09533557537651703, + "grad_norm": 1.649363404228967, + "learning_rate": 1.9005847953216376e-05, + "loss": 0.7652826309204102, + "step": 326 + }, + { + "epoch": 0.09562801579178243, + "grad_norm": 1.405975419146797, + "learning_rate": 1.9064327485380118e-05, + "loss": 0.7309015393257141, + "step": 327 + }, + { + "epoch": 0.09592045620704781, + "grad_norm": 1.6766609888433524, + "learning_rate": 1.912280701754386e-05, + "loss": 0.7656553983688354, + "step": 328 + }, + { + "epoch": 0.09621289662231321, + "grad_norm": 1.4942542074310006, + "learning_rate": 1.9181286549707602e-05, + "loss": 0.7400631904602051, + "step": 329 + }, + { + "epoch": 0.09650533703757859, + "grad_norm": 1.4740815055784118, + "learning_rate": 1.9239766081871347e-05, + "loss": 0.6812465190887451, + "step": 330 + }, + { + "epoch": 0.09679777745284399, + "grad_norm": 1.4394939888427052, + "learning_rate": 1.929824561403509e-05, + "loss": 0.6820628046989441, + "step": 331 + }, + { + "epoch": 0.09709021786810937, + "grad_norm": 1.9824484648298863, + "learning_rate": 1.935672514619883e-05, + "loss": 0.7437758445739746, + "step": 332 + }, + { + "epoch": 0.09738265828337477, + "grad_norm": 1.4755288186056683, + "learning_rate": 1.9415204678362573e-05, + "loss": 0.8011504411697388, + "step": 333 + }, + { + "epoch": 0.09767509869864015, + "grad_norm": 1.3829561395962537, + "learning_rate": 1.9473684210526318e-05, + "loss": 0.7437810301780701, + "step": 334 + }, + { + "epoch": 0.09796753911390554, + "grad_norm": 1.328838303483977, + "learning_rate": 1.953216374269006e-05, + "loss": 0.7419568300247192, + "step": 335 + }, + { + "epoch": 0.09825997952917093, + "grad_norm": 1.4291436246188844, + "learning_rate": 1.9590643274853802e-05, + "loss": 0.7805042266845703, + "step": 336 + }, + { + "epoch": 0.09855241994443632, + "grad_norm": 1.3104711543583085, + "learning_rate": 1.9649122807017544e-05, + "loss": 0.6952530145645142, + "step": 337 + }, + { + "epoch": 0.0988448603597017, + "grad_norm": 1.313224719465845, + "learning_rate": 1.970760233918129e-05, + "loss": 0.7669289112091064, + "step": 338 + }, + { + "epoch": 0.0991373007749671, + "grad_norm": 1.4101609769639065, + "learning_rate": 1.976608187134503e-05, + "loss": 0.8033919930458069, + "step": 339 + }, + { + "epoch": 0.09942974119023248, + "grad_norm": 1.2883543538345825, + "learning_rate": 1.9824561403508773e-05, + "loss": 0.6523177623748779, + "step": 340 + }, + { + "epoch": 0.09972218160549788, + "grad_norm": 1.3960808628411998, + "learning_rate": 1.9883040935672515e-05, + "loss": 0.7221896648406982, + "step": 341 + }, + { + "epoch": 0.10001462202076326, + "grad_norm": 1.2255647850534943, + "learning_rate": 1.994152046783626e-05, + "loss": 0.6054700016975403, + "step": 342 + }, + { + "epoch": 0.10030706243602866, + "grad_norm": 1.6303566611100393, + "learning_rate": 2e-05, + "loss": 0.8368290662765503, + "step": 343 + }, + { + "epoch": 0.10059950285129404, + "grad_norm": 1.4276425594743465, + "learning_rate": 1.99999988312804e-05, + "loss": 0.9075677990913391, + "step": 344 + }, + { + "epoch": 0.10089194326655944, + "grad_norm": 1.4517524210925274, + "learning_rate": 1.999999532512188e-05, + "loss": 0.7202495336532593, + "step": 345 + }, + { + "epoch": 0.10118438368182482, + "grad_norm": 1.5340311782896001, + "learning_rate": 1.9999989481525245e-05, + "loss": 0.7373536229133606, + "step": 346 + }, + { + "epoch": 0.10147682409709022, + "grad_norm": 1.3128585037330316, + "learning_rate": 1.9999981300491873e-05, + "loss": 0.7292035222053528, + "step": 347 + }, + { + "epoch": 0.10176926451235561, + "grad_norm": 1.2681362139682877, + "learning_rate": 1.9999970782023673e-05, + "loss": 0.8970675468444824, + "step": 348 + }, + { + "epoch": 0.102061704927621, + "grad_norm": 1.384714606589521, + "learning_rate": 1.9999957926123104e-05, + "loss": 0.7909846305847168, + "step": 349 + }, + { + "epoch": 0.1023541453428864, + "grad_norm": 1.3537270396362884, + "learning_rate": 1.999994273279317e-05, + "loss": 0.7784097790718079, + "step": 350 + }, + { + "epoch": 0.10264658575815178, + "grad_norm": 1.4008631296209513, + "learning_rate": 1.9999925202037422e-05, + "loss": 0.7129874229431152, + "step": 351 + }, + { + "epoch": 0.10293902617341717, + "grad_norm": 1.3322666039831734, + "learning_rate": 1.999990533385996e-05, + "loss": 0.7185519337654114, + "step": 352 + }, + { + "epoch": 0.10323146658868255, + "grad_norm": 1.379111892126872, + "learning_rate": 1.9999883128265428e-05, + "loss": 0.812228798866272, + "step": 353 + }, + { + "epoch": 0.10352390700394795, + "grad_norm": 1.2831139743741589, + "learning_rate": 1.999985858525901e-05, + "loss": 0.7187886238098145, + "step": 354 + }, + { + "epoch": 0.10381634741921333, + "grad_norm": 1.133776070922858, + "learning_rate": 1.9999831704846452e-05, + "loss": 0.6618789434432983, + "step": 355 + }, + { + "epoch": 0.10410878783447873, + "grad_norm": 1.5601168208020613, + "learning_rate": 1.999980248703403e-05, + "loss": 0.9226458072662354, + "step": 356 + }, + { + "epoch": 0.10440122824974411, + "grad_norm": 1.3702611517072447, + "learning_rate": 1.9999770931828578e-05, + "loss": 0.7326352596282959, + "step": 357 + }, + { + "epoch": 0.10469366866500951, + "grad_norm": 1.4755549813416367, + "learning_rate": 1.9999737039237472e-05, + "loss": 0.719240128993988, + "step": 358 + }, + { + "epoch": 0.10498610908027489, + "grad_norm": 1.2914576093532248, + "learning_rate": 1.999970080926863e-05, + "loss": 0.7380290031433105, + "step": 359 + }, + { + "epoch": 0.10527854949554029, + "grad_norm": 1.6255135036531254, + "learning_rate": 1.9999662241930523e-05, + "loss": 0.736219048500061, + "step": 360 + }, + { + "epoch": 0.10557098991080567, + "grad_norm": 1.381933387611508, + "learning_rate": 1.999962133723217e-05, + "loss": 0.8160735368728638, + "step": 361 + }, + { + "epoch": 0.10586343032607107, + "grad_norm": 1.4607575491849774, + "learning_rate": 1.9999578095183126e-05, + "loss": 0.6679781675338745, + "step": 362 + }, + { + "epoch": 0.10615587074133645, + "grad_norm": 1.551414308388604, + "learning_rate": 1.9999532515793498e-05, + "loss": 0.7670542001724243, + "step": 363 + }, + { + "epoch": 0.10644831115660185, + "grad_norm": 1.2802491712211252, + "learning_rate": 1.9999484599073945e-05, + "loss": 0.6395057439804077, + "step": 364 + }, + { + "epoch": 0.10674075157186723, + "grad_norm": 1.571289013739176, + "learning_rate": 1.9999434345035666e-05, + "loss": 0.7226368188858032, + "step": 365 + }, + { + "epoch": 0.10703319198713263, + "grad_norm": 1.4755023089198305, + "learning_rate": 1.9999381753690403e-05, + "loss": 0.6236128211021423, + "step": 366 + }, + { + "epoch": 0.10732563240239801, + "grad_norm": 1.2507526885979663, + "learning_rate": 1.9999326825050455e-05, + "loss": 0.5937299132347107, + "step": 367 + }, + { + "epoch": 0.1076180728176634, + "grad_norm": 1.294239826855842, + "learning_rate": 1.999926955912866e-05, + "loss": 0.6014857292175293, + "step": 368 + }, + { + "epoch": 0.10791051323292879, + "grad_norm": 1.1031323946933334, + "learning_rate": 1.9999209955938394e-05, + "loss": 0.5898704528808594, + "step": 369 + }, + { + "epoch": 0.10820295364819418, + "grad_norm": 1.475520460275832, + "learning_rate": 1.9999148015493602e-05, + "loss": 0.6879048943519592, + "step": 370 + }, + { + "epoch": 0.10849539406345957, + "grad_norm": 1.5235484717330832, + "learning_rate": 1.999908373780876e-05, + "loss": 0.781298041343689, + "step": 371 + }, + { + "epoch": 0.10878783447872496, + "grad_norm": 1.2913472995661532, + "learning_rate": 1.9999017122898886e-05, + "loss": 0.6997531652450562, + "step": 372 + }, + { + "epoch": 0.10908027489399034, + "grad_norm": 1.2104967688689228, + "learning_rate": 1.9998948170779556e-05, + "loss": 0.6979694366455078, + "step": 373 + }, + { + "epoch": 0.10937271530925574, + "grad_norm": 1.6154905149339498, + "learning_rate": 1.999887688146689e-05, + "loss": 0.8069214820861816, + "step": 374 + }, + { + "epoch": 0.10966515572452112, + "grad_norm": 1.4534879205249425, + "learning_rate": 1.9998803254977538e-05, + "loss": 0.875137448310852, + "step": 375 + }, + { + "epoch": 0.10995759613978652, + "grad_norm": 1.4252221781216903, + "learning_rate": 1.9998727291328725e-05, + "loss": 0.8267173767089844, + "step": 376 + }, + { + "epoch": 0.1102500365550519, + "grad_norm": 1.3704709368430794, + "learning_rate": 1.99986489905382e-05, + "loss": 0.7589337825775146, + "step": 377 + }, + { + "epoch": 0.1105424769703173, + "grad_norm": 1.7248131297126135, + "learning_rate": 1.999856835262427e-05, + "loss": 0.7479992508888245, + "step": 378 + }, + { + "epoch": 0.11083491738558268, + "grad_norm": 1.2827951417341936, + "learning_rate": 1.999848537760577e-05, + "loss": 0.7315084934234619, + "step": 379 + }, + { + "epoch": 0.11112735780084808, + "grad_norm": 1.2954297558049002, + "learning_rate": 1.9998400065502113e-05, + "loss": 0.6256793737411499, + "step": 380 + }, + { + "epoch": 0.11141979821611346, + "grad_norm": 1.3569633064170001, + "learning_rate": 1.999831241633323e-05, + "loss": 0.7521710395812988, + "step": 381 + }, + { + "epoch": 0.11171223863137886, + "grad_norm": 1.0851029845548303, + "learning_rate": 1.999822243011961e-05, + "loss": 0.6824651956558228, + "step": 382 + }, + { + "epoch": 0.11200467904664424, + "grad_norm": 1.4206429861314096, + "learning_rate": 1.9998130106882286e-05, + "loss": 0.7254977226257324, + "step": 383 + }, + { + "epoch": 0.11229711946190964, + "grad_norm": 1.4795080730717471, + "learning_rate": 1.999803544664284e-05, + "loss": 0.8263741731643677, + "step": 384 + }, + { + "epoch": 0.11258955987717502, + "grad_norm": 1.3096519492267191, + "learning_rate": 1.9997938449423397e-05, + "loss": 0.6829507350921631, + "step": 385 + }, + { + "epoch": 0.11288200029244042, + "grad_norm": 1.2970935037264724, + "learning_rate": 1.9997839115246632e-05, + "loss": 0.7452428340911865, + "step": 386 + }, + { + "epoch": 0.11317444070770581, + "grad_norm": 1.322513824449788, + "learning_rate": 1.999773744413576e-05, + "loss": 0.7900702953338623, + "step": 387 + }, + { + "epoch": 0.1134668811229712, + "grad_norm": 1.288312120065537, + "learning_rate": 1.9997633436114547e-05, + "loss": 0.6215303540229797, + "step": 388 + }, + { + "epoch": 0.11375932153823659, + "grad_norm": 1.3132613017546322, + "learning_rate": 1.999752709120731e-05, + "loss": 0.798041820526123, + "step": 389 + }, + { + "epoch": 0.11405176195350197, + "grad_norm": 1.1590478323977431, + "learning_rate": 1.9997418409438893e-05, + "loss": 0.6033064126968384, + "step": 390 + }, + { + "epoch": 0.11434420236876737, + "grad_norm": 1.0686988063553795, + "learning_rate": 1.9997307390834712e-05, + "loss": 0.6358453631401062, + "step": 391 + }, + { + "epoch": 0.11463664278403275, + "grad_norm": 1.2775095189945147, + "learning_rate": 1.999719403542071e-05, + "loss": 0.6544308662414551, + "step": 392 + }, + { + "epoch": 0.11492908319929815, + "grad_norm": 1.3305771925144483, + "learning_rate": 1.9997078343223393e-05, + "loss": 0.73077392578125, + "step": 393 + }, + { + "epoch": 0.11522152361456353, + "grad_norm": 1.1914838503287841, + "learning_rate": 1.9996960314269792e-05, + "loss": 0.5874192118644714, + "step": 394 + }, + { + "epoch": 0.11551396402982893, + "grad_norm": 1.420658082184349, + "learning_rate": 1.9996839948587503e-05, + "loss": 0.8242438435554504, + "step": 395 + }, + { + "epoch": 0.11580640444509431, + "grad_norm": 1.705790457884444, + "learning_rate": 1.9996717246204655e-05, + "loss": 0.9496668577194214, + "step": 396 + }, + { + "epoch": 0.1160988448603597, + "grad_norm": 1.2258839048083405, + "learning_rate": 1.9996592207149933e-05, + "loss": 0.6940287351608276, + "step": 397 + }, + { + "epoch": 0.11639128527562509, + "grad_norm": 1.4226760671412086, + "learning_rate": 1.999646483145256e-05, + "loss": 0.7403827905654907, + "step": 398 + }, + { + "epoch": 0.11668372569089049, + "grad_norm": 1.441557495225195, + "learning_rate": 1.9996335119142315e-05, + "loss": 0.7493172287940979, + "step": 399 + }, + { + "epoch": 0.11697616610615587, + "grad_norm": 1.1233068749163333, + "learning_rate": 1.9996203070249516e-05, + "loss": 0.6048015356063843, + "step": 400 + }, + { + "epoch": 0.11726860652142126, + "grad_norm": 1.218449987518831, + "learning_rate": 1.9996068684805025e-05, + "loss": 0.7220426797866821, + "step": 401 + }, + { + "epoch": 0.11756104693668665, + "grad_norm": 1.4820269559236292, + "learning_rate": 1.9995931962840255e-05, + "loss": 0.7294620275497437, + "step": 402 + }, + { + "epoch": 0.11785348735195204, + "grad_norm": 1.2693334480850886, + "learning_rate": 1.999579290438717e-05, + "loss": 0.7075647115707397, + "step": 403 + }, + { + "epoch": 0.11814592776721743, + "grad_norm": 1.4353448940274405, + "learning_rate": 1.9995651509478264e-05, + "loss": 0.7396657466888428, + "step": 404 + }, + { + "epoch": 0.11843836818248282, + "grad_norm": 1.5214596029668779, + "learning_rate": 1.999550777814659e-05, + "loss": 0.8240506649017334, + "step": 405 + }, + { + "epoch": 0.1187308085977482, + "grad_norm": 1.3463253886040645, + "learning_rate": 1.9995361710425752e-05, + "loss": 0.7518147826194763, + "step": 406 + }, + { + "epoch": 0.1190232490130136, + "grad_norm": 1.3938258800517485, + "learning_rate": 1.9995213306349886e-05, + "loss": 0.6998933553695679, + "step": 407 + }, + { + "epoch": 0.11931568942827898, + "grad_norm": 2.8811625928277134, + "learning_rate": 1.999506256595368e-05, + "loss": 0.659205973148346, + "step": 408 + }, + { + "epoch": 0.11960812984354438, + "grad_norm": 1.6815673603725616, + "learning_rate": 1.9994909489272372e-05, + "loss": 0.7826964259147644, + "step": 409 + }, + { + "epoch": 0.11990057025880976, + "grad_norm": 1.4225942370637599, + "learning_rate": 1.999475407634174e-05, + "loss": 0.770768404006958, + "step": 410 + }, + { + "epoch": 0.12019301067407516, + "grad_norm": 1.4031411556955713, + "learning_rate": 1.9994596327198113e-05, + "loss": 0.7390692234039307, + "step": 411 + }, + { + "epoch": 0.12048545108934054, + "grad_norm": 1.238945633280151, + "learning_rate": 1.999443624187836e-05, + "loss": 0.7092628479003906, + "step": 412 + }, + { + "epoch": 0.12077789150460594, + "grad_norm": 1.2795019723948553, + "learning_rate": 1.9994273820419903e-05, + "loss": 0.5252765417098999, + "step": 413 + }, + { + "epoch": 0.12107033191987132, + "grad_norm": 1.389583747663469, + "learning_rate": 1.9994109062860707e-05, + "loss": 0.8131704330444336, + "step": 414 + }, + { + "epoch": 0.12136277233513672, + "grad_norm": 1.490804798338551, + "learning_rate": 1.9993941969239284e-05, + "loss": 0.8257562518119812, + "step": 415 + }, + { + "epoch": 0.1216552127504021, + "grad_norm": 1.5541597255876767, + "learning_rate": 1.999377253959469e-05, + "loss": 0.7163048982620239, + "step": 416 + }, + { + "epoch": 0.1219476531656675, + "grad_norm": 1.590877283394053, + "learning_rate": 1.9993600773966528e-05, + "loss": 0.7216504812240601, + "step": 417 + }, + { + "epoch": 0.12224009358093288, + "grad_norm": 1.6748981575800963, + "learning_rate": 1.9993426672394945e-05, + "loss": 0.7831340432167053, + "step": 418 + }, + { + "epoch": 0.12253253399619828, + "grad_norm": 1.3976993960000088, + "learning_rate": 1.9993250234920638e-05, + "loss": 0.7675709128379822, + "step": 419 + }, + { + "epoch": 0.12282497441146366, + "grad_norm": 1.454911379398845, + "learning_rate": 1.999307146158485e-05, + "loss": 0.8085238337516785, + "step": 420 + }, + { + "epoch": 0.12311741482672905, + "grad_norm": 1.2979608734451222, + "learning_rate": 1.9992890352429368e-05, + "loss": 0.735150933265686, + "step": 421 + }, + { + "epoch": 0.12340985524199444, + "grad_norm": 1.2046206432187132, + "learning_rate": 1.9992706907496523e-05, + "loss": 0.612186074256897, + "step": 422 + }, + { + "epoch": 0.12370229565725983, + "grad_norm": 1.364838486847665, + "learning_rate": 1.9992521126829194e-05, + "loss": 0.6636590957641602, + "step": 423 + }, + { + "epoch": 0.12399473607252522, + "grad_norm": 1.4068215451581474, + "learning_rate": 1.9992333010470806e-05, + "loss": 0.6814526319503784, + "step": 424 + }, + { + "epoch": 0.12428717648779061, + "grad_norm": 1.3620595505436823, + "learning_rate": 1.9992142558465335e-05, + "loss": 0.6940894722938538, + "step": 425 + }, + { + "epoch": 0.12457961690305601, + "grad_norm": 1.3427645949787534, + "learning_rate": 1.9991949770857294e-05, + "loss": 0.7485121488571167, + "step": 426 + }, + { + "epoch": 0.12487205731832139, + "grad_norm": 1.266832638558228, + "learning_rate": 1.9991754647691744e-05, + "loss": 0.5315885543823242, + "step": 427 + }, + { + "epoch": 0.1251644977335868, + "grad_norm": 1.2511757429133081, + "learning_rate": 1.9991557189014297e-05, + "loss": 0.7416529655456543, + "step": 428 + }, + { + "epoch": 0.12545693814885217, + "grad_norm": 1.4031357379707678, + "learning_rate": 1.9991357394871106e-05, + "loss": 0.7937026023864746, + "step": 429 + }, + { + "epoch": 0.12574937856411755, + "grad_norm": 1.3448962462478107, + "learning_rate": 1.9991155265308872e-05, + "loss": 0.7009662389755249, + "step": 430 + }, + { + "epoch": 0.12604181897938296, + "grad_norm": 1.3042132277590721, + "learning_rate": 1.999095080037484e-05, + "loss": 0.6577681303024292, + "step": 431 + }, + { + "epoch": 0.12633425939464835, + "grad_norm": 1.4036627734956777, + "learning_rate": 1.9990744000116808e-05, + "loss": 0.7372399568557739, + "step": 432 + }, + { + "epoch": 0.12662669980991373, + "grad_norm": 1.3819832545517663, + "learning_rate": 1.999053486458311e-05, + "loss": 0.5959814190864563, + "step": 433 + }, + { + "epoch": 0.1269191402251791, + "grad_norm": 1.424207998116027, + "learning_rate": 1.999032339382263e-05, + "loss": 0.6684107780456543, + "step": 434 + }, + { + "epoch": 0.12721158064044452, + "grad_norm": 1.7048493578408517, + "learning_rate": 1.99901095878848e-05, + "loss": 0.8837687373161316, + "step": 435 + }, + { + "epoch": 0.1275040210557099, + "grad_norm": 3.7468635382669717, + "learning_rate": 1.9989893446819594e-05, + "loss": 0.7128579616546631, + "step": 436 + }, + { + "epoch": 0.1277964614709753, + "grad_norm": 1.2617709714670788, + "learning_rate": 1.9989674970677533e-05, + "loss": 0.6634687185287476, + "step": 437 + }, + { + "epoch": 0.12808890188624067, + "grad_norm": 1.626814629507008, + "learning_rate": 1.998945415950969e-05, + "loss": 0.7866299152374268, + "step": 438 + }, + { + "epoch": 0.12838134230150608, + "grad_norm": 1.6912246432889755, + "learning_rate": 1.998923101336767e-05, + "loss": 0.8104820251464844, + "step": 439 + }, + { + "epoch": 0.12867378271677146, + "grad_norm": 1.3163679319076276, + "learning_rate": 1.9989005532303637e-05, + "loss": 0.6643097400665283, + "step": 440 + }, + { + "epoch": 0.12896622313203684, + "grad_norm": 1.304280975921877, + "learning_rate": 1.9988777716370293e-05, + "loss": 0.7663843631744385, + "step": 441 + }, + { + "epoch": 0.12925866354730223, + "grad_norm": 1.4275530439491644, + "learning_rate": 1.9988547565620896e-05, + "loss": 0.8831629753112793, + "step": 442 + }, + { + "epoch": 0.12955110396256764, + "grad_norm": 1.2581390355141424, + "learning_rate": 1.9988315080109233e-05, + "loss": 0.6889798045158386, + "step": 443 + }, + { + "epoch": 0.12984354437783302, + "grad_norm": 1.2589816711321935, + "learning_rate": 1.9988080259889652e-05, + "loss": 0.8173589706420898, + "step": 444 + }, + { + "epoch": 0.1301359847930984, + "grad_norm": 1.437216407920067, + "learning_rate": 1.998784310501704e-05, + "loss": 0.7444369196891785, + "step": 445 + }, + { + "epoch": 0.13042842520836379, + "grad_norm": 1.2527388287385341, + "learning_rate": 1.998760361554682e-05, + "loss": 0.6728573441505432, + "step": 446 + }, + { + "epoch": 0.1307208656236292, + "grad_norm": 1.4620149588082576, + "learning_rate": 1.998736179153499e-05, + "loss": 0.6398168802261353, + "step": 447 + }, + { + "epoch": 0.13101330603889458, + "grad_norm": 1.3925962417611275, + "learning_rate": 1.9987117633038063e-05, + "loss": 0.7367146015167236, + "step": 448 + }, + { + "epoch": 0.13130574645415996, + "grad_norm": 1.3497781950543108, + "learning_rate": 1.998687114011311e-05, + "loss": 0.7072159051895142, + "step": 449 + }, + { + "epoch": 0.13159818686942534, + "grad_norm": 1.402234544131691, + "learning_rate": 1.998662231281775e-05, + "loss": 0.7899993062019348, + "step": 450 + }, + { + "epoch": 0.13189062728469075, + "grad_norm": 1.4376114251018388, + "learning_rate": 1.9986371151210146e-05, + "loss": 0.7668592929840088, + "step": 451 + }, + { + "epoch": 0.13218306769995614, + "grad_norm": 1.3943197925338484, + "learning_rate": 1.9986117655349003e-05, + "loss": 0.7222825288772583, + "step": 452 + }, + { + "epoch": 0.13247550811522152, + "grad_norm": 1.2939952744587226, + "learning_rate": 1.9985861825293577e-05, + "loss": 0.7301540374755859, + "step": 453 + }, + { + "epoch": 0.1327679485304869, + "grad_norm": 1.174339392511722, + "learning_rate": 1.998560366110366e-05, + "loss": 0.6517907381057739, + "step": 454 + }, + { + "epoch": 0.1330603889457523, + "grad_norm": 1.5763167634786863, + "learning_rate": 1.99853431628396e-05, + "loss": 0.6889342069625854, + "step": 455 + }, + { + "epoch": 0.1333528293610177, + "grad_norm": 1.525770213874127, + "learning_rate": 1.9985080330562293e-05, + "loss": 0.6804303526878357, + "step": 456 + }, + { + "epoch": 0.13364526977628308, + "grad_norm": 1.3944930335298842, + "learning_rate": 1.9984815164333163e-05, + "loss": 0.7699184417724609, + "step": 457 + }, + { + "epoch": 0.13393771019154846, + "grad_norm": 1.4886205672815649, + "learning_rate": 1.99845476642142e-05, + "loss": 0.7470533847808838, + "step": 458 + }, + { + "epoch": 0.13423015060681387, + "grad_norm": 1.251305257809984, + "learning_rate": 1.9984277830267927e-05, + "loss": 0.6689419746398926, + "step": 459 + }, + { + "epoch": 0.13452259102207925, + "grad_norm": 1.5088252817247363, + "learning_rate": 1.998400566255742e-05, + "loss": 0.6395387649536133, + "step": 460 + }, + { + "epoch": 0.13481503143734463, + "grad_norm": 1.3414013526988133, + "learning_rate": 1.9983731161146288e-05, + "loss": 0.7785208225250244, + "step": 461 + }, + { + "epoch": 0.13510747185261002, + "grad_norm": 1.2995640327613904, + "learning_rate": 1.9983454326098703e-05, + "loss": 0.6864018440246582, + "step": 462 + }, + { + "epoch": 0.13539991226787543, + "grad_norm": 1.424075352019454, + "learning_rate": 1.9983175157479366e-05, + "loss": 0.7201317548751831, + "step": 463 + }, + { + "epoch": 0.1356923526831408, + "grad_norm": 1.4977322356937255, + "learning_rate": 1.9982893655353534e-05, + "loss": 0.7128555774688721, + "step": 464 + }, + { + "epoch": 0.1359847930984062, + "grad_norm": 1.2421635772982216, + "learning_rate": 1.998260981978701e-05, + "loss": 0.7252457141876221, + "step": 465 + }, + { + "epoch": 0.1362772335136716, + "grad_norm": 1.472555101507684, + "learning_rate": 1.9982323650846137e-05, + "loss": 0.7453348636627197, + "step": 466 + }, + { + "epoch": 0.13656967392893699, + "grad_norm": 1.153602031844393, + "learning_rate": 1.9982035148597804e-05, + "loss": 0.6643078923225403, + "step": 467 + }, + { + "epoch": 0.13686211434420237, + "grad_norm": 1.280273878296217, + "learning_rate": 1.9981744313109445e-05, + "loss": 0.7249360084533691, + "step": 468 + }, + { + "epoch": 0.13715455475946775, + "grad_norm": 1.2363385614561972, + "learning_rate": 1.9981451144449042e-05, + "loss": 0.8179303407669067, + "step": 469 + }, + { + "epoch": 0.13744699517473316, + "grad_norm": 1.1335812448130365, + "learning_rate": 1.9981155642685125e-05, + "loss": 0.6763637661933899, + "step": 470 + }, + { + "epoch": 0.13773943558999854, + "grad_norm": 1.4603088026603306, + "learning_rate": 1.998085780788676e-05, + "loss": 0.6684300303459167, + "step": 471 + }, + { + "epoch": 0.13803187600526393, + "grad_norm": 1.2670786265894947, + "learning_rate": 1.9980557640123566e-05, + "loss": 0.7251675128936768, + "step": 472 + }, + { + "epoch": 0.1383243164205293, + "grad_norm": 1.5269819113708596, + "learning_rate": 1.998025513946571e-05, + "loss": 0.7146456241607666, + "step": 473 + }, + { + "epoch": 0.13861675683579472, + "grad_norm": 1.2263952606430522, + "learning_rate": 1.9979950305983895e-05, + "loss": 0.7067978382110596, + "step": 474 + }, + { + "epoch": 0.1389091972510601, + "grad_norm": 1.2396761565289731, + "learning_rate": 1.9979643139749373e-05, + "loss": 0.7017637491226196, + "step": 475 + }, + { + "epoch": 0.13920163766632548, + "grad_norm": 1.397663972134979, + "learning_rate": 1.9979333640833947e-05, + "loss": 0.7511367201805115, + "step": 476 + }, + { + "epoch": 0.13949407808159087, + "grad_norm": 1.5675722536579784, + "learning_rate": 1.997902180930996e-05, + "loss": 0.8129127025604248, + "step": 477 + }, + { + "epoch": 0.13978651849685628, + "grad_norm": 1.3801608404871573, + "learning_rate": 1.9978707645250293e-05, + "loss": 0.7760868072509766, + "step": 478 + }, + { + "epoch": 0.14007895891212166, + "grad_norm": 1.2722362515735255, + "learning_rate": 1.9978391148728388e-05, + "loss": 0.5190733671188354, + "step": 479 + }, + { + "epoch": 0.14037139932738704, + "grad_norm": 1.4267690174722667, + "learning_rate": 1.9978072319818222e-05, + "loss": 0.759798526763916, + "step": 480 + }, + { + "epoch": 0.14066383974265242, + "grad_norm": 1.3594087764036291, + "learning_rate": 1.997775115859432e-05, + "loss": 0.5750235319137573, + "step": 481 + }, + { + "epoch": 0.14095628015791783, + "grad_norm": 1.5288357817907694, + "learning_rate": 1.9977427665131748e-05, + "loss": 0.6837687492370605, + "step": 482 + }, + { + "epoch": 0.14124872057318322, + "grad_norm": 1.4085455647433316, + "learning_rate": 1.9977101839506123e-05, + "loss": 0.8774302005767822, + "step": 483 + }, + { + "epoch": 0.1415411609884486, + "grad_norm": 1.3951237263634118, + "learning_rate": 1.9976773681793605e-05, + "loss": 0.6447024345397949, + "step": 484 + }, + { + "epoch": 0.14183360140371398, + "grad_norm": 1.3077152366881364, + "learning_rate": 1.99764431920709e-05, + "loss": 0.6212965250015259, + "step": 485 + }, + { + "epoch": 0.1421260418189794, + "grad_norm": 1.7246179492768339, + "learning_rate": 1.9976110370415257e-05, + "loss": 0.7606823444366455, + "step": 486 + }, + { + "epoch": 0.14241848223424478, + "grad_norm": 1.6009360634049956, + "learning_rate": 1.9975775216904468e-05, + "loss": 0.792106032371521, + "step": 487 + }, + { + "epoch": 0.14271092264951016, + "grad_norm": 1.526072177508378, + "learning_rate": 1.997543773161688e-05, + "loss": 0.828373372554779, + "step": 488 + }, + { + "epoch": 0.14300336306477554, + "grad_norm": 1.2193329399673667, + "learning_rate": 1.997509791463137e-05, + "loss": 0.7148743867874146, + "step": 489 + }, + { + "epoch": 0.14329580348004095, + "grad_norm": 1.617921839516307, + "learning_rate": 1.9974755766027372e-05, + "loss": 0.6566554307937622, + "step": 490 + }, + { + "epoch": 0.14358824389530633, + "grad_norm": 1.2041404679997165, + "learning_rate": 1.9974411285884865e-05, + "loss": 0.7833706140518188, + "step": 491 + }, + { + "epoch": 0.14388068431057172, + "grad_norm": 1.3715764541616051, + "learning_rate": 1.997406447428436e-05, + "loss": 0.7661226987838745, + "step": 492 + }, + { + "epoch": 0.1441731247258371, + "grad_norm": 1.2510873907811162, + "learning_rate": 1.9973715331306935e-05, + "loss": 0.5403884649276733, + "step": 493 + }, + { + "epoch": 0.1444655651411025, + "grad_norm": 1.417853529635827, + "learning_rate": 1.9973363857034183e-05, + "loss": 0.7744722366333008, + "step": 494 + }, + { + "epoch": 0.1447580055563679, + "grad_norm": 1.7245567814035911, + "learning_rate": 1.9973010051548274e-05, + "loss": 0.9036808013916016, + "step": 495 + }, + { + "epoch": 0.14505044597163327, + "grad_norm": 1.2752769917707012, + "learning_rate": 1.9972653914931902e-05, + "loss": 0.6952388286590576, + "step": 496 + }, + { + "epoch": 0.14534288638689866, + "grad_norm": 1.5454177465030166, + "learning_rate": 1.9972295447268312e-05, + "loss": 0.7818677425384521, + "step": 497 + }, + { + "epoch": 0.14563532680216407, + "grad_norm": 1.2104336195623258, + "learning_rate": 1.9971934648641294e-05, + "loss": 0.8197327256202698, + "step": 498 + }, + { + "epoch": 0.14592776721742945, + "grad_norm": 1.1376920899270277, + "learning_rate": 1.997157151913518e-05, + "loss": 0.5898807644844055, + "step": 499 + }, + { + "epoch": 0.14622020763269483, + "grad_norm": 1.6480348319290024, + "learning_rate": 1.9971206058834857e-05, + "loss": 0.7980005741119385, + "step": 500 + }, + { + "epoch": 0.14651264804796021, + "grad_norm": 1.2480430258500308, + "learning_rate": 1.997083826782574e-05, + "loss": 0.7161837816238403, + "step": 501 + }, + { + "epoch": 0.14680508846322562, + "grad_norm": 1.436852590534495, + "learning_rate": 1.99704681461938e-05, + "loss": 0.7657293081283569, + "step": 502 + }, + { + "epoch": 0.147097528878491, + "grad_norm": 1.256627894457605, + "learning_rate": 1.9970095694025553e-05, + "loss": 0.6638028621673584, + "step": 503 + }, + { + "epoch": 0.1473899692937564, + "grad_norm": 1.344090583049545, + "learning_rate": 1.996972091140806e-05, + "loss": 0.8759262561798096, + "step": 504 + }, + { + "epoch": 0.1476824097090218, + "grad_norm": 1.1099770302505587, + "learning_rate": 1.9969343798428916e-05, + "loss": 0.6686065196990967, + "step": 505 + }, + { + "epoch": 0.14797485012428718, + "grad_norm": 1.5672815870081807, + "learning_rate": 1.9968964355176276e-05, + "loss": 0.7900313138961792, + "step": 506 + }, + { + "epoch": 0.14826729053955257, + "grad_norm": 1.3116088800480374, + "learning_rate": 1.996858258173883e-05, + "loss": 0.699286937713623, + "step": 507 + }, + { + "epoch": 0.14855973095481795, + "grad_norm": 1.149004701425465, + "learning_rate": 1.9968198478205817e-05, + "loss": 0.6613560914993286, + "step": 508 + }, + { + "epoch": 0.14885217137008336, + "grad_norm": 1.471579106109443, + "learning_rate": 1.9967812044667014e-05, + "loss": 0.8586459755897522, + "step": 509 + }, + { + "epoch": 0.14914461178534874, + "grad_norm": 1.5307049334622256, + "learning_rate": 1.9967423281212754e-05, + "loss": 0.6620850563049316, + "step": 510 + }, + { + "epoch": 0.14943705220061412, + "grad_norm": 1.6192191406380994, + "learning_rate": 1.9967032187933905e-05, + "loss": 0.7991048097610474, + "step": 511 + }, + { + "epoch": 0.1497294926158795, + "grad_norm": 1.2792732447271702, + "learning_rate": 1.9966638764921882e-05, + "loss": 0.7301167845726013, + "step": 512 + }, + { + "epoch": 0.15002193303114492, + "grad_norm": 1.244527824938295, + "learning_rate": 1.9966243012268645e-05, + "loss": 0.6470698118209839, + "step": 513 + }, + { + "epoch": 0.1503143734464103, + "grad_norm": 1.3436689137677134, + "learning_rate": 1.99658449300667e-05, + "loss": 0.5766996145248413, + "step": 514 + }, + { + "epoch": 0.15060681386167568, + "grad_norm": 1.2104018154852028, + "learning_rate": 1.9965444518409098e-05, + "loss": 0.6365845203399658, + "step": 515 + }, + { + "epoch": 0.15089925427694106, + "grad_norm": 1.6995742833660814, + "learning_rate": 1.9965041777389426e-05, + "loss": 0.6945745944976807, + "step": 516 + }, + { + "epoch": 0.15119169469220647, + "grad_norm": 1.6841525179657149, + "learning_rate": 1.996463670710183e-05, + "loss": 0.802032470703125, + "step": 517 + }, + { + "epoch": 0.15148413510747186, + "grad_norm": 1.4666130226044234, + "learning_rate": 1.996422930764099e-05, + "loss": 0.7429964542388916, + "step": 518 + }, + { + "epoch": 0.15177657552273724, + "grad_norm": 1.5508181233008433, + "learning_rate": 1.9963819579102134e-05, + "loss": 0.6462180614471436, + "step": 519 + }, + { + "epoch": 0.15206901593800262, + "grad_norm": 1.3226128228565077, + "learning_rate": 1.996340752158103e-05, + "loss": 0.888412594795227, + "step": 520 + }, + { + "epoch": 0.15236145635326803, + "grad_norm": 1.386680099002057, + "learning_rate": 1.9962993135173996e-05, + "loss": 0.6734700798988342, + "step": 521 + }, + { + "epoch": 0.15265389676853341, + "grad_norm": 1.385050142293082, + "learning_rate": 1.9962576419977894e-05, + "loss": 0.6951336860656738, + "step": 522 + }, + { + "epoch": 0.1529463371837988, + "grad_norm": 1.26022036147928, + "learning_rate": 1.9962157376090126e-05, + "loss": 0.7130852341651917, + "step": 523 + }, + { + "epoch": 0.15323877759906418, + "grad_norm": 1.4353500802059385, + "learning_rate": 1.9961736003608646e-05, + "loss": 0.8322055339813232, + "step": 524 + }, + { + "epoch": 0.1535312180143296, + "grad_norm": 1.2563635075596429, + "learning_rate": 1.996131230263194e-05, + "loss": 0.7031791806221008, + "step": 525 + }, + { + "epoch": 0.15382365842959497, + "grad_norm": 1.3606474846075662, + "learning_rate": 1.9960886273259052e-05, + "loss": 0.8268769979476929, + "step": 526 + }, + { + "epoch": 0.15411609884486036, + "grad_norm": 1.048782156231717, + "learning_rate": 1.9960457915589557e-05, + "loss": 0.6843237280845642, + "step": 527 + }, + { + "epoch": 0.15440853926012574, + "grad_norm": 1.29845256190474, + "learning_rate": 1.9960027229723585e-05, + "loss": 0.8267906904220581, + "step": 528 + }, + { + "epoch": 0.15470097967539115, + "grad_norm": 1.502232175088585, + "learning_rate": 1.9959594215761807e-05, + "loss": 0.8259629011154175, + "step": 529 + }, + { + "epoch": 0.15499342009065653, + "grad_norm": 1.3618507954167858, + "learning_rate": 1.9959158873805435e-05, + "loss": 0.654765248298645, + "step": 530 + }, + { + "epoch": 0.1552858605059219, + "grad_norm": 1.3762650099604372, + "learning_rate": 1.9958721203956233e-05, + "loss": 0.7841149568557739, + "step": 531 + }, + { + "epoch": 0.1555783009211873, + "grad_norm": 1.131527995151024, + "learning_rate": 1.9958281206316497e-05, + "loss": 0.7364583015441895, + "step": 532 + }, + { + "epoch": 0.1558707413364527, + "grad_norm": 1.2428392866727909, + "learning_rate": 1.9957838880989076e-05, + "loss": 0.7985796928405762, + "step": 533 + }, + { + "epoch": 0.1561631817517181, + "grad_norm": 1.7674168807742325, + "learning_rate": 1.9957394228077363e-05, + "loss": 0.8432350754737854, + "step": 534 + }, + { + "epoch": 0.15645562216698347, + "grad_norm": 1.409652061557183, + "learning_rate": 1.995694724768529e-05, + "loss": 0.713615894317627, + "step": 535 + }, + { + "epoch": 0.15674806258224885, + "grad_norm": 1.3406073565001748, + "learning_rate": 1.9956497939917336e-05, + "loss": 0.6472936868667603, + "step": 536 + }, + { + "epoch": 0.15704050299751426, + "grad_norm": 1.4828550722777096, + "learning_rate": 1.9956046304878528e-05, + "loss": 0.7963594198226929, + "step": 537 + }, + { + "epoch": 0.15733294341277965, + "grad_norm": 1.3875627998599316, + "learning_rate": 1.9955592342674427e-05, + "loss": 0.8043302893638611, + "step": 538 + }, + { + "epoch": 0.15762538382804503, + "grad_norm": 1.3187786308741334, + "learning_rate": 1.995513605341115e-05, + "loss": 0.6277294754981995, + "step": 539 + }, + { + "epoch": 0.1579178242433104, + "grad_norm": 1.2577326193858611, + "learning_rate": 1.9954677437195345e-05, + "loss": 0.569086492061615, + "step": 540 + }, + { + "epoch": 0.15821026465857582, + "grad_norm": 1.4002846512494251, + "learning_rate": 1.9954216494134217e-05, + "loss": 0.7694308757781982, + "step": 541 + }, + { + "epoch": 0.1585027050738412, + "grad_norm": 1.2602961243105442, + "learning_rate": 1.9953753224335504e-05, + "loss": 0.7782721519470215, + "step": 542 + }, + { + "epoch": 0.1587951454891066, + "grad_norm": 1.4115021596903525, + "learning_rate": 1.9953287627907498e-05, + "loss": 0.6231539249420166, + "step": 543 + }, + { + "epoch": 0.159087585904372, + "grad_norm": 1.6469299670076099, + "learning_rate": 1.9952819704959022e-05, + "loss": 0.6431725025177002, + "step": 544 + }, + { + "epoch": 0.15938002631963738, + "grad_norm": 1.3041234892791729, + "learning_rate": 1.9952349455599455e-05, + "loss": 0.7062366008758545, + "step": 545 + }, + { + "epoch": 0.15967246673490276, + "grad_norm": 1.3521393089140767, + "learning_rate": 1.9951876879938716e-05, + "loss": 0.5376520156860352, + "step": 546 + }, + { + "epoch": 0.15996490715016815, + "grad_norm": 1.385911158215194, + "learning_rate": 1.9951401978087267e-05, + "loss": 0.7693386077880859, + "step": 547 + }, + { + "epoch": 0.16025734756543356, + "grad_norm": 1.7168051396485104, + "learning_rate": 1.9950924750156107e-05, + "loss": 0.6735765337944031, + "step": 548 + }, + { + "epoch": 0.16054978798069894, + "grad_norm": 1.1186135901816567, + "learning_rate": 1.995044519625679e-05, + "loss": 0.5333552360534668, + "step": 549 + }, + { + "epoch": 0.16084222839596432, + "grad_norm": 1.2149038323564916, + "learning_rate": 1.994996331650141e-05, + "loss": 0.6694493293762207, + "step": 550 + }, + { + "epoch": 0.1611346688112297, + "grad_norm": 1.3895443962170193, + "learning_rate": 1.9949479111002596e-05, + "loss": 0.6056857109069824, + "step": 551 + }, + { + "epoch": 0.1614271092264951, + "grad_norm": 1.4990214566868623, + "learning_rate": 1.9948992579873538e-05, + "loss": 0.7174896001815796, + "step": 552 + }, + { + "epoch": 0.1617195496417605, + "grad_norm": 1.4417886999069138, + "learning_rate": 1.9948503723227954e-05, + "loss": 0.9150595664978027, + "step": 553 + }, + { + "epoch": 0.16201199005702588, + "grad_norm": 1.475120009674046, + "learning_rate": 1.9948012541180116e-05, + "loss": 0.7418098449707031, + "step": 554 + }, + { + "epoch": 0.16230443047229126, + "grad_norm": 1.3802668140870205, + "learning_rate": 1.9947519033844828e-05, + "loss": 0.6937648057937622, + "step": 555 + }, + { + "epoch": 0.16259687088755667, + "grad_norm": 1.198267913228467, + "learning_rate": 1.9947023201337448e-05, + "loss": 0.628747820854187, + "step": 556 + }, + { + "epoch": 0.16288931130282205, + "grad_norm": 1.3166666693196283, + "learning_rate": 1.9946525043773875e-05, + "loss": 0.6252326965332031, + "step": 557 + }, + { + "epoch": 0.16318175171808744, + "grad_norm": 1.4085830915284543, + "learning_rate": 1.9946024561270547e-05, + "loss": 0.6243278980255127, + "step": 558 + }, + { + "epoch": 0.16347419213335282, + "grad_norm": 1.515326552036181, + "learning_rate": 1.994552175394445e-05, + "loss": 0.7613602876663208, + "step": 559 + }, + { + "epoch": 0.16376663254861823, + "grad_norm": 1.4167210376939137, + "learning_rate": 1.9945016621913115e-05, + "loss": 0.7680152654647827, + "step": 560 + }, + { + "epoch": 0.1640590729638836, + "grad_norm": 1.4413485992010024, + "learning_rate": 1.9944509165294614e-05, + "loss": 0.6926383972167969, + "step": 561 + }, + { + "epoch": 0.164351513379149, + "grad_norm": 1.3901402403092062, + "learning_rate": 1.9943999384207556e-05, + "loss": 0.6822172403335571, + "step": 562 + }, + { + "epoch": 0.16464395379441438, + "grad_norm": 1.1253426305557543, + "learning_rate": 1.99434872787711e-05, + "loss": 0.6533722281455994, + "step": 563 + }, + { + "epoch": 0.1649363942096798, + "grad_norm": 1.3434183662540475, + "learning_rate": 1.9942972849104955e-05, + "loss": 0.6754113435745239, + "step": 564 + }, + { + "epoch": 0.16522883462494517, + "grad_norm": 1.3906070154993262, + "learning_rate": 1.9942456095329357e-05, + "loss": 0.5585163235664368, + "step": 565 + }, + { + "epoch": 0.16552127504021055, + "grad_norm": 1.18702583603665, + "learning_rate": 1.99419370175651e-05, + "loss": 0.6268453598022461, + "step": 566 + }, + { + "epoch": 0.16581371545547594, + "grad_norm": 1.3564219134919553, + "learning_rate": 1.994141561593351e-05, + "loss": 0.6508245468139648, + "step": 567 + }, + { + "epoch": 0.16610615587074135, + "grad_norm": 1.353057425024783, + "learning_rate": 1.9940891890556468e-05, + "loss": 0.7337379455566406, + "step": 568 + }, + { + "epoch": 0.16639859628600673, + "grad_norm": 1.3764723902611744, + "learning_rate": 1.9940365841556385e-05, + "loss": 0.7888853549957275, + "step": 569 + }, + { + "epoch": 0.1666910367012721, + "grad_norm": 1.5384301744775797, + "learning_rate": 1.993983746905623e-05, + "loss": 0.777199923992157, + "step": 570 + }, + { + "epoch": 0.1669834771165375, + "grad_norm": 1.5194907821323576, + "learning_rate": 1.9939306773179498e-05, + "loss": 0.761531412601471, + "step": 571 + }, + { + "epoch": 0.1672759175318029, + "grad_norm": 1.5063040441270878, + "learning_rate": 1.993877375405024e-05, + "loss": 0.7060664296150208, + "step": 572 + }, + { + "epoch": 0.16756835794706829, + "grad_norm": 1.472994627130685, + "learning_rate": 1.9938238411793045e-05, + "loss": 0.6797431707382202, + "step": 573 + }, + { + "epoch": 0.16786079836233367, + "grad_norm": 1.3131930617818641, + "learning_rate": 1.9937700746533048e-05, + "loss": 0.7202910780906677, + "step": 574 + }, + { + "epoch": 0.16815323877759905, + "grad_norm": 1.198711592546953, + "learning_rate": 1.9937160758395923e-05, + "loss": 0.7241546511650085, + "step": 575 + }, + { + "epoch": 0.16844567919286446, + "grad_norm": 1.3694786109804489, + "learning_rate": 1.993661844750789e-05, + "loss": 0.7055338621139526, + "step": 576 + }, + { + "epoch": 0.16873811960812984, + "grad_norm": 1.4237978283864139, + "learning_rate": 1.993607381399571e-05, + "loss": 0.6973986625671387, + "step": 577 + }, + { + "epoch": 0.16903056002339523, + "grad_norm": 1.1715457050926792, + "learning_rate": 1.993552685798669e-05, + "loss": 0.693436861038208, + "step": 578 + }, + { + "epoch": 0.1693230004386606, + "grad_norm": 1.5585764488361307, + "learning_rate": 1.9934977579608676e-05, + "loss": 0.6687765121459961, + "step": 579 + }, + { + "epoch": 0.16961544085392602, + "grad_norm": 1.3798925262407884, + "learning_rate": 1.9934425978990057e-05, + "loss": 0.7776578068733215, + "step": 580 + }, + { + "epoch": 0.1699078812691914, + "grad_norm": 1.3168335454892666, + "learning_rate": 1.9933872056259768e-05, + "loss": 0.6914045810699463, + "step": 581 + }, + { + "epoch": 0.17020032168445678, + "grad_norm": 1.4649859185166105, + "learning_rate": 1.9933315811547283e-05, + "loss": 0.8005306720733643, + "step": 582 + }, + { + "epoch": 0.1704927620997222, + "grad_norm": 1.3952257625848015, + "learning_rate": 1.9932757244982625e-05, + "loss": 0.6936507225036621, + "step": 583 + }, + { + "epoch": 0.17078520251498758, + "grad_norm": 1.157795409448355, + "learning_rate": 1.9932196356696353e-05, + "loss": 0.6915504932403564, + "step": 584 + }, + { + "epoch": 0.17107764293025296, + "grad_norm": 1.4153568154846778, + "learning_rate": 1.9931633146819573e-05, + "loss": 0.7583723664283752, + "step": 585 + }, + { + "epoch": 0.17137008334551834, + "grad_norm": 1.2959976429359619, + "learning_rate": 1.9931067615483927e-05, + "loss": 0.7097266912460327, + "step": 586 + }, + { + "epoch": 0.17166252376078375, + "grad_norm": 1.5238633829769868, + "learning_rate": 1.9930499762821608e-05, + "loss": 0.7586667537689209, + "step": 587 + }, + { + "epoch": 0.17195496417604914, + "grad_norm": 1.3505202775838374, + "learning_rate": 1.9929929588965352e-05, + "loss": 0.7043411731719971, + "step": 588 + }, + { + "epoch": 0.17224740459131452, + "grad_norm": 1.3150009626714483, + "learning_rate": 1.9929357094048425e-05, + "loss": 0.8502261638641357, + "step": 589 + }, + { + "epoch": 0.1725398450065799, + "grad_norm": 1.3901300269374877, + "learning_rate": 1.992878227820465e-05, + "loss": 0.7196993827819824, + "step": 590 + }, + { + "epoch": 0.1728322854218453, + "grad_norm": 1.5475395216492736, + "learning_rate": 1.9928205141568388e-05, + "loss": 0.6783720850944519, + "step": 591 + }, + { + "epoch": 0.1731247258371107, + "grad_norm": 1.1911883688546063, + "learning_rate": 1.9927625684274534e-05, + "loss": 0.7128307819366455, + "step": 592 + }, + { + "epoch": 0.17341716625237608, + "grad_norm": 1.226507853409212, + "learning_rate": 1.9927043906458538e-05, + "loss": 0.7289423942565918, + "step": 593 + }, + { + "epoch": 0.17370960666764146, + "grad_norm": 1.298942183876381, + "learning_rate": 1.992645980825639e-05, + "loss": 0.6306120157241821, + "step": 594 + }, + { + "epoch": 0.17400204708290687, + "grad_norm": 1.2456494719411173, + "learning_rate": 1.9925873389804614e-05, + "loss": 0.7910655736923218, + "step": 595 + }, + { + "epoch": 0.17429448749817225, + "grad_norm": 1.267940212117298, + "learning_rate": 1.9925284651240282e-05, + "loss": 0.6075282096862793, + "step": 596 + }, + { + "epoch": 0.17458692791343763, + "grad_norm": 1.251937615037275, + "learning_rate": 1.992469359270101e-05, + "loss": 0.6270443201065063, + "step": 597 + }, + { + "epoch": 0.17487936832870302, + "grad_norm": 1.3200413033724028, + "learning_rate": 1.9924100214324955e-05, + "loss": 0.6487830877304077, + "step": 598 + }, + { + "epoch": 0.17517180874396843, + "grad_norm": 1.45237431858529, + "learning_rate": 1.9923504516250814e-05, + "loss": 0.5986843705177307, + "step": 599 + }, + { + "epoch": 0.1754642491592338, + "grad_norm": 1.2191897136056242, + "learning_rate": 1.992290649861783e-05, + "loss": 0.7734183073043823, + "step": 600 + }, + { + "epoch": 0.1757566895744992, + "grad_norm": 1.167414919229407, + "learning_rate": 1.9922306161565782e-05, + "loss": 0.5784964561462402, + "step": 601 + }, + { + "epoch": 0.17604912998976457, + "grad_norm": 1.501564665297397, + "learning_rate": 1.9921703505234995e-05, + "loss": 0.8034321069717407, + "step": 602 + }, + { + "epoch": 0.17634157040502998, + "grad_norm": 1.314622713247698, + "learning_rate": 1.992109852976634e-05, + "loss": 0.8153722882270813, + "step": 603 + }, + { + "epoch": 0.17663401082029537, + "grad_norm": 1.877065501880657, + "learning_rate": 1.992049123530123e-05, + "loss": 0.7293002605438232, + "step": 604 + }, + { + "epoch": 0.17692645123556075, + "grad_norm": 1.514670729590329, + "learning_rate": 1.9919881621981606e-05, + "loss": 0.7108439207077026, + "step": 605 + }, + { + "epoch": 0.17721889165082613, + "grad_norm": 1.4748189889445555, + "learning_rate": 1.9919269689949968e-05, + "loss": 0.7581946849822998, + "step": 606 + }, + { + "epoch": 0.17751133206609154, + "grad_norm": 1.2337358872247315, + "learning_rate": 1.991865543934935e-05, + "loss": 0.6821258068084717, + "step": 607 + }, + { + "epoch": 0.17780377248135693, + "grad_norm": 1.2791852908008183, + "learning_rate": 1.991803887032333e-05, + "loss": 0.7116109728813171, + "step": 608 + }, + { + "epoch": 0.1780962128966223, + "grad_norm": 1.2208883706731903, + "learning_rate": 1.9917419983016025e-05, + "loss": 0.6680186986923218, + "step": 609 + }, + { + "epoch": 0.1783886533118877, + "grad_norm": 1.3494621179320938, + "learning_rate": 1.99167987775721e-05, + "loss": 0.6763704419136047, + "step": 610 + }, + { + "epoch": 0.1786810937271531, + "grad_norm": 1.4133729383070797, + "learning_rate": 1.9916175254136755e-05, + "loss": 0.756158709526062, + "step": 611 + }, + { + "epoch": 0.17897353414241848, + "grad_norm": 1.4652489049885558, + "learning_rate": 1.9915549412855734e-05, + "loss": 0.600861132144928, + "step": 612 + }, + { + "epoch": 0.17926597455768387, + "grad_norm": 1.4731466609399737, + "learning_rate": 1.991492125387533e-05, + "loss": 0.6927047967910767, + "step": 613 + }, + { + "epoch": 0.17955841497294925, + "grad_norm": 1.6937006516406405, + "learning_rate": 1.9914290777342362e-05, + "loss": 0.6908516883850098, + "step": 614 + }, + { + "epoch": 0.17985085538821466, + "grad_norm": 1.4155029526585772, + "learning_rate": 1.9913657983404206e-05, + "loss": 0.7968926429748535, + "step": 615 + }, + { + "epoch": 0.18014329580348004, + "grad_norm": 1.1016955037712495, + "learning_rate": 1.9913022872208773e-05, + "loss": 0.6035164594650269, + "step": 616 + }, + { + "epoch": 0.18043573621874542, + "grad_norm": 1.4061380717551752, + "learning_rate": 1.9912385443904518e-05, + "loss": 0.6733090877532959, + "step": 617 + }, + { + "epoch": 0.1807281766340108, + "grad_norm": 2.2181842231696645, + "learning_rate": 1.9911745698640426e-05, + "loss": 0.6968391537666321, + "step": 618 + }, + { + "epoch": 0.18102061704927622, + "grad_norm": 1.2136657361400474, + "learning_rate": 1.991110363656605e-05, + "loss": 0.7126309871673584, + "step": 619 + }, + { + "epoch": 0.1813130574645416, + "grad_norm": 1.5461052617008268, + "learning_rate": 1.9910459257831455e-05, + "loss": 0.8604997396469116, + "step": 620 + }, + { + "epoch": 0.18160549787980698, + "grad_norm": 1.4378853015325992, + "learning_rate": 1.9909812562587266e-05, + "loss": 0.674797534942627, + "step": 621 + }, + { + "epoch": 0.1818979382950724, + "grad_norm": 1.4538548213207452, + "learning_rate": 1.9909163550984644e-05, + "loss": 0.7439107894897461, + "step": 622 + }, + { + "epoch": 0.18219037871033777, + "grad_norm": 1.4410118469577065, + "learning_rate": 1.9908512223175293e-05, + "loss": 0.7137601971626282, + "step": 623 + }, + { + "epoch": 0.18248281912560316, + "grad_norm": 1.286772355171783, + "learning_rate": 1.9907858579311448e-05, + "loss": 0.6395502090454102, + "step": 624 + }, + { + "epoch": 0.18277525954086854, + "grad_norm": 1.7411485569290241, + "learning_rate": 1.9907202619545905e-05, + "loss": 0.6747852563858032, + "step": 625 + }, + { + "epoch": 0.18306769995613395, + "grad_norm": 1.3891342500470065, + "learning_rate": 1.9906544344031986e-05, + "loss": 0.6995632648468018, + "step": 626 + }, + { + "epoch": 0.18336014037139933, + "grad_norm": 1.3916150531596103, + "learning_rate": 1.9905883752923557e-05, + "loss": 0.7006711363792419, + "step": 627 + }, + { + "epoch": 0.18365258078666472, + "grad_norm": 1.189158109720048, + "learning_rate": 1.990522084637503e-05, + "loss": 0.660778820514679, + "step": 628 + }, + { + "epoch": 0.1839450212019301, + "grad_norm": 1.258003733155152, + "learning_rate": 1.9904555624541362e-05, + "loss": 0.5826665163040161, + "step": 629 + }, + { + "epoch": 0.1842374616171955, + "grad_norm": 1.5565251427155322, + "learning_rate": 1.990388808757803e-05, + "loss": 0.8064266443252563, + "step": 630 + }, + { + "epoch": 0.1845299020324609, + "grad_norm": 1.3066621609893527, + "learning_rate": 1.9903218235641078e-05, + "loss": 0.6856451034545898, + "step": 631 + }, + { + "epoch": 0.18482234244772627, + "grad_norm": 1.325447510265949, + "learning_rate": 1.9902546068887076e-05, + "loss": 0.6423801183700562, + "step": 632 + }, + { + "epoch": 0.18511478286299166, + "grad_norm": 1.252931011950935, + "learning_rate": 1.9901871587473135e-05, + "loss": 0.6903005242347717, + "step": 633 + }, + { + "epoch": 0.18540722327825707, + "grad_norm": 1.2981623515351661, + "learning_rate": 1.9901194791556916e-05, + "loss": 0.636742115020752, + "step": 634 + }, + { + "epoch": 0.18569966369352245, + "grad_norm": 1.154196245030106, + "learning_rate": 1.9900515681296614e-05, + "loss": 0.6541105508804321, + "step": 635 + }, + { + "epoch": 0.18599210410878783, + "grad_norm": 1.2463484642096474, + "learning_rate": 1.9899834256850973e-05, + "loss": 0.7026485204696655, + "step": 636 + }, + { + "epoch": 0.1862845445240532, + "grad_norm": 1.2626549460002545, + "learning_rate": 1.989915051837926e-05, + "loss": 0.6232702732086182, + "step": 637 + }, + { + "epoch": 0.18657698493931862, + "grad_norm": 1.222405284140282, + "learning_rate": 1.9898464466041306e-05, + "loss": 0.5971217155456543, + "step": 638 + }, + { + "epoch": 0.186869425354584, + "grad_norm": 1.228365693552395, + "learning_rate": 1.9897776099997463e-05, + "loss": 0.7942230701446533, + "step": 639 + }, + { + "epoch": 0.1871618657698494, + "grad_norm": 1.4547764939553913, + "learning_rate": 1.9897085420408637e-05, + "loss": 0.6578072309494019, + "step": 640 + }, + { + "epoch": 0.18745430618511477, + "grad_norm": 1.3118111344764942, + "learning_rate": 1.989639242743627e-05, + "loss": 0.6928422451019287, + "step": 641 + }, + { + "epoch": 0.18774674660038018, + "grad_norm": 1.4232777703090678, + "learning_rate": 1.9895697121242346e-05, + "loss": 0.7656213641166687, + "step": 642 + }, + { + "epoch": 0.18803918701564556, + "grad_norm": 1.3841907158773847, + "learning_rate": 1.9894999501989383e-05, + "loss": 0.6540038585662842, + "step": 643 + }, + { + "epoch": 0.18833162743091095, + "grad_norm": 1.5637672668766274, + "learning_rate": 1.989429956984045e-05, + "loss": 0.707741379737854, + "step": 644 + }, + { + "epoch": 0.18862406784617633, + "grad_norm": 1.2389494128425964, + "learning_rate": 1.9893597324959156e-05, + "loss": 0.6191326379776001, + "step": 645 + }, + { + "epoch": 0.18891650826144174, + "grad_norm": 1.2174290538744046, + "learning_rate": 1.9892892767509634e-05, + "loss": 0.616736114025116, + "step": 646 + }, + { + "epoch": 0.18920894867670712, + "grad_norm": 1.4366227278982104, + "learning_rate": 1.989218589765658e-05, + "loss": 0.803301215171814, + "step": 647 + }, + { + "epoch": 0.1895013890919725, + "grad_norm": 1.2775653707157333, + "learning_rate": 1.989147671556522e-05, + "loss": 0.6528021097183228, + "step": 648 + }, + { + "epoch": 0.1897938295072379, + "grad_norm": 1.5463247112798635, + "learning_rate": 1.9890765221401314e-05, + "loss": 0.6966919898986816, + "step": 649 + }, + { + "epoch": 0.1900862699225033, + "grad_norm": 1.2768484224289256, + "learning_rate": 1.9890051415331178e-05, + "loss": 0.7223595380783081, + "step": 650 + }, + { + "epoch": 0.19037871033776868, + "grad_norm": 1.404271714764208, + "learning_rate": 1.9889335297521656e-05, + "loss": 0.6727452278137207, + "step": 651 + }, + { + "epoch": 0.19067115075303406, + "grad_norm": 1.5662163632688932, + "learning_rate": 1.988861686814014e-05, + "loss": 0.7008258104324341, + "step": 652 + }, + { + "epoch": 0.19096359116829945, + "grad_norm": 1.3756400508505757, + "learning_rate": 1.988789612735455e-05, + "loss": 0.7624703049659729, + "step": 653 + }, + { + "epoch": 0.19125603158356486, + "grad_norm": 1.4133612106119275, + "learning_rate": 1.988717307533336e-05, + "loss": 0.6813088655471802, + "step": 654 + }, + { + "epoch": 0.19154847199883024, + "grad_norm": 1.1919173127519105, + "learning_rate": 1.988644771224558e-05, + "loss": 0.5401284694671631, + "step": 655 + }, + { + "epoch": 0.19184091241409562, + "grad_norm": 1.4613018451006843, + "learning_rate": 1.9885720038260756e-05, + "loss": 0.6805379986763, + "step": 656 + }, + { + "epoch": 0.19213335282936103, + "grad_norm": 1.5412845974712732, + "learning_rate": 1.9884990053548982e-05, + "loss": 0.6449974775314331, + "step": 657 + }, + { + "epoch": 0.19242579324462641, + "grad_norm": 1.3481077932409014, + "learning_rate": 1.988425775828088e-05, + "loss": 0.6940032839775085, + "step": 658 + }, + { + "epoch": 0.1927182336598918, + "grad_norm": 1.3088210596354761, + "learning_rate": 1.9883523152627626e-05, + "loss": 0.7089565396308899, + "step": 659 + }, + { + "epoch": 0.19301067407515718, + "grad_norm": 1.3865316758332553, + "learning_rate": 1.9882786236760932e-05, + "loss": 0.7508438229560852, + "step": 660 + }, + { + "epoch": 0.1933031144904226, + "grad_norm": 1.6156320166139564, + "learning_rate": 1.988204701085304e-05, + "loss": 0.6828616261482239, + "step": 661 + }, + { + "epoch": 0.19359555490568797, + "grad_norm": 1.2372815991073003, + "learning_rate": 1.9881305475076744e-05, + "loss": 0.6652963161468506, + "step": 662 + }, + { + "epoch": 0.19388799532095335, + "grad_norm": 1.2410743539313074, + "learning_rate": 1.988056162960537e-05, + "loss": 0.6859447360038757, + "step": 663 + }, + { + "epoch": 0.19418043573621874, + "grad_norm": 1.4440746421071415, + "learning_rate": 1.9879815474612794e-05, + "loss": 0.693805992603302, + "step": 664 + }, + { + "epoch": 0.19447287615148415, + "grad_norm": 1.359257774367856, + "learning_rate": 1.987906701027342e-05, + "loss": 0.7028747200965881, + "step": 665 + }, + { + "epoch": 0.19476531656674953, + "grad_norm": 1.2833261279779522, + "learning_rate": 1.9878316236762195e-05, + "loss": 0.7492112517356873, + "step": 666 + }, + { + "epoch": 0.1950577569820149, + "grad_norm": 1.2065346249489062, + "learning_rate": 1.9877563154254613e-05, + "loss": 0.5394963026046753, + "step": 667 + }, + { + "epoch": 0.1953501973972803, + "grad_norm": 1.1848542596539768, + "learning_rate": 1.98768077629267e-05, + "loss": 0.5185493230819702, + "step": 668 + }, + { + "epoch": 0.1956426378125457, + "grad_norm": 1.2600065416138704, + "learning_rate": 1.9876050062955027e-05, + "loss": 0.7279829382896423, + "step": 669 + }, + { + "epoch": 0.1959350782278111, + "grad_norm": 1.3533145550923509, + "learning_rate": 1.9875290054516692e-05, + "loss": 0.7437206506729126, + "step": 670 + }, + { + "epoch": 0.19622751864307647, + "grad_norm": 1.6022192807514979, + "learning_rate": 1.9874527737789358e-05, + "loss": 0.7294617891311646, + "step": 671 + }, + { + "epoch": 0.19651995905834185, + "grad_norm": 1.3433918645025815, + "learning_rate": 1.9873763112951198e-05, + "loss": 0.7710307240486145, + "step": 672 + }, + { + "epoch": 0.19681239947360726, + "grad_norm": 1.3797998364213817, + "learning_rate": 1.9872996180180947e-05, + "loss": 0.690025806427002, + "step": 673 + }, + { + "epoch": 0.19710483988887265, + "grad_norm": 1.2826936342217614, + "learning_rate": 1.9872226939657867e-05, + "loss": 0.6690589189529419, + "step": 674 + }, + { + "epoch": 0.19739728030413803, + "grad_norm": 1.31971712284742, + "learning_rate": 1.9871455391561764e-05, + "loss": 0.7587239742279053, + "step": 675 + }, + { + "epoch": 0.1976897207194034, + "grad_norm": 1.2583882254944232, + "learning_rate": 1.987068153607298e-05, + "loss": 0.8048006296157837, + "step": 676 + }, + { + "epoch": 0.19798216113466882, + "grad_norm": 1.4904938665104162, + "learning_rate": 1.9869905373372402e-05, + "loss": 0.721023678779602, + "step": 677 + }, + { + "epoch": 0.1982746015499342, + "grad_norm": 1.2975987405043754, + "learning_rate": 1.9869126903641457e-05, + "loss": 0.646798849105835, + "step": 678 + }, + { + "epoch": 0.1985670419651996, + "grad_norm": 1.2591898865565592, + "learning_rate": 1.9868346127062098e-05, + "loss": 0.597393274307251, + "step": 679 + }, + { + "epoch": 0.19885948238046497, + "grad_norm": 1.2773189541737207, + "learning_rate": 1.9867563043816836e-05, + "loss": 0.8619129657745361, + "step": 680 + }, + { + "epoch": 0.19915192279573038, + "grad_norm": 1.2343587826225086, + "learning_rate": 1.986677765408871e-05, + "loss": 0.5391764640808105, + "step": 681 + }, + { + "epoch": 0.19944436321099576, + "grad_norm": 1.360221019641669, + "learning_rate": 1.9865989958061297e-05, + "loss": 0.8185729384422302, + "step": 682 + }, + { + "epoch": 0.19973680362626114, + "grad_norm": 1.3798220626145994, + "learning_rate": 1.9865199955918712e-05, + "loss": 0.6629397869110107, + "step": 683 + }, + { + "epoch": 0.20002924404152653, + "grad_norm": 1.2700323386046573, + "learning_rate": 1.9864407647845626e-05, + "loss": 0.6752325296401978, + "step": 684 + }, + { + "epoch": 0.20032168445679194, + "grad_norm": 1.4583632577866723, + "learning_rate": 1.9863613034027224e-05, + "loss": 0.8509782552719116, + "step": 685 + }, + { + "epoch": 0.20061412487205732, + "grad_norm": 1.2832087066986109, + "learning_rate": 1.986281611464925e-05, + "loss": 0.5573478937149048, + "step": 686 + }, + { + "epoch": 0.2009065652873227, + "grad_norm": 1.4672386586086157, + "learning_rate": 1.9862016889897976e-05, + "loss": 0.8152032494544983, + "step": 687 + }, + { + "epoch": 0.20119900570258809, + "grad_norm": 1.2878245307564982, + "learning_rate": 1.9861215359960217e-05, + "loss": 0.6346902847290039, + "step": 688 + }, + { + "epoch": 0.2014914461178535, + "grad_norm": 1.3877152633732261, + "learning_rate": 1.986041152502332e-05, + "loss": 0.6608721017837524, + "step": 689 + }, + { + "epoch": 0.20178388653311888, + "grad_norm": 1.5061562575575014, + "learning_rate": 1.9859605385275188e-05, + "loss": 0.7753713130950928, + "step": 690 + }, + { + "epoch": 0.20207632694838426, + "grad_norm": 1.2917361787707549, + "learning_rate": 1.9858796940904238e-05, + "loss": 0.6747434139251709, + "step": 691 + }, + { + "epoch": 0.20236876736364964, + "grad_norm": 1.4853341728710303, + "learning_rate": 1.9857986192099446e-05, + "loss": 0.7263737320899963, + "step": 692 + }, + { + "epoch": 0.20266120777891505, + "grad_norm": 1.2072706917482865, + "learning_rate": 1.9857173139050324e-05, + "loss": 0.7910827994346619, + "step": 693 + }, + { + "epoch": 0.20295364819418044, + "grad_norm": 1.479189890111576, + "learning_rate": 1.9856357781946913e-05, + "loss": 0.7245683670043945, + "step": 694 + }, + { + "epoch": 0.20324608860944582, + "grad_norm": 1.146324196354459, + "learning_rate": 1.9855540120979794e-05, + "loss": 0.7440140247344971, + "step": 695 + }, + { + "epoch": 0.20353852902471123, + "grad_norm": 1.823699641073059, + "learning_rate": 1.9854720156340096e-05, + "loss": 0.7485358715057373, + "step": 696 + }, + { + "epoch": 0.2038309694399766, + "grad_norm": 1.3927934028554216, + "learning_rate": 1.985389788821948e-05, + "loss": 0.7658560872077942, + "step": 697 + }, + { + "epoch": 0.204123409855242, + "grad_norm": 1.5269096149843602, + "learning_rate": 1.9853073316810144e-05, + "loss": 0.7366135120391846, + "step": 698 + }, + { + "epoch": 0.20441585027050738, + "grad_norm": 1.2008198015347107, + "learning_rate": 1.985224644230483e-05, + "loss": 0.622355580329895, + "step": 699 + }, + { + "epoch": 0.2047082906857728, + "grad_norm": 1.1924050316279482, + "learning_rate": 1.985141726489681e-05, + "loss": 0.6123125553131104, + "step": 700 + }, + { + "epoch": 0.20500073110103817, + "grad_norm": 1.3537888634275872, + "learning_rate": 1.9850585784779907e-05, + "loss": 0.6768301725387573, + "step": 701 + }, + { + "epoch": 0.20529317151630355, + "grad_norm": 1.2390814549745153, + "learning_rate": 1.9849752002148465e-05, + "loss": 0.6562466621398926, + "step": 702 + }, + { + "epoch": 0.20558561193156893, + "grad_norm": 1.5562868949340583, + "learning_rate": 1.984891591719738e-05, + "loss": 0.7818280458450317, + "step": 703 + }, + { + "epoch": 0.20587805234683434, + "grad_norm": 1.3407102317592055, + "learning_rate": 1.9848077530122083e-05, + "loss": 0.7144001722335815, + "step": 704 + }, + { + "epoch": 0.20617049276209973, + "grad_norm": 1.1671039191657233, + "learning_rate": 1.9847236841118537e-05, + "loss": 0.700564980506897, + "step": 705 + }, + { + "epoch": 0.2064629331773651, + "grad_norm": 1.3051666135645792, + "learning_rate": 1.984639385038326e-05, + "loss": 0.5933517217636108, + "step": 706 + }, + { + "epoch": 0.2067553735926305, + "grad_norm": 1.2749925819283578, + "learning_rate": 1.9845548558113278e-05, + "loss": 0.6174886226654053, + "step": 707 + }, + { + "epoch": 0.2070478140078959, + "grad_norm": 1.3159599421199524, + "learning_rate": 1.9844700964506188e-05, + "loss": 0.7241572141647339, + "step": 708 + }, + { + "epoch": 0.20734025442316129, + "grad_norm": 1.227834334214839, + "learning_rate": 1.9843851069760103e-05, + "loss": 0.6620675325393677, + "step": 709 + }, + { + "epoch": 0.20763269483842667, + "grad_norm": 1.3263327729601424, + "learning_rate": 1.9842998874073682e-05, + "loss": 0.6115273237228394, + "step": 710 + }, + { + "epoch": 0.20792513525369205, + "grad_norm": 1.2961824988419117, + "learning_rate": 1.984214437764612e-05, + "loss": 0.6871848106384277, + "step": 711 + }, + { + "epoch": 0.20821757566895746, + "grad_norm": 1.3134080639211354, + "learning_rate": 1.9841287580677152e-05, + "loss": 0.6887271404266357, + "step": 712 + }, + { + "epoch": 0.20851001608422284, + "grad_norm": 1.4994035488495783, + "learning_rate": 1.9840428483367046e-05, + "loss": 0.8519056439399719, + "step": 713 + }, + { + "epoch": 0.20880245649948823, + "grad_norm": 1.1754556134484295, + "learning_rate": 1.9839567085916617e-05, + "loss": 0.8168978691101074, + "step": 714 + }, + { + "epoch": 0.2090948969147536, + "grad_norm": 1.3651960767502735, + "learning_rate": 1.98387033885272e-05, + "loss": 0.6565415859222412, + "step": 715 + }, + { + "epoch": 0.20938733733001902, + "grad_norm": 1.3008644261492222, + "learning_rate": 1.9837837391400697e-05, + "loss": 0.7305471897125244, + "step": 716 + }, + { + "epoch": 0.2096797777452844, + "grad_norm": 1.4799180289336367, + "learning_rate": 1.9836969094739512e-05, + "loss": 0.7676819562911987, + "step": 717 + }, + { + "epoch": 0.20997221816054978, + "grad_norm": 1.8463650009400876, + "learning_rate": 1.983609849874661e-05, + "loss": 0.6519052982330322, + "step": 718 + }, + { + "epoch": 0.21026465857581517, + "grad_norm": 1.2876599445155823, + "learning_rate": 1.9835225603625488e-05, + "loss": 0.6298089623451233, + "step": 719 + }, + { + "epoch": 0.21055709899108058, + "grad_norm": 1.3906710149258825, + "learning_rate": 1.9834350409580184e-05, + "loss": 0.6384454369544983, + "step": 720 + }, + { + "epoch": 0.21084953940634596, + "grad_norm": 1.1568343654967514, + "learning_rate": 1.9833472916815264e-05, + "loss": 0.6335986852645874, + "step": 721 + }, + { + "epoch": 0.21114197982161134, + "grad_norm": 1.3831022749264381, + "learning_rate": 1.983259312553584e-05, + "loss": 0.6587867736816406, + "step": 722 + }, + { + "epoch": 0.21143442023687672, + "grad_norm": 1.4202837808347009, + "learning_rate": 1.9831711035947552e-05, + "loss": 0.6884294748306274, + "step": 723 + }, + { + "epoch": 0.21172686065214213, + "grad_norm": 1.3257507653834097, + "learning_rate": 1.983082664825659e-05, + "loss": 0.7094298601150513, + "step": 724 + }, + { + "epoch": 0.21201930106740752, + "grad_norm": 1.2528953355997736, + "learning_rate": 1.982993996266967e-05, + "loss": 0.736876368522644, + "step": 725 + }, + { + "epoch": 0.2123117414826729, + "grad_norm": 1.3690939580337487, + "learning_rate": 1.9829050979394052e-05, + "loss": 0.7802199125289917, + "step": 726 + }, + { + "epoch": 0.21260418189793828, + "grad_norm": 1.1986325257536081, + "learning_rate": 1.9828159698637527e-05, + "loss": 0.602590799331665, + "step": 727 + }, + { + "epoch": 0.2128966223132037, + "grad_norm": 1.2705657575851783, + "learning_rate": 1.982726612060843e-05, + "loss": 0.6855295896530151, + "step": 728 + }, + { + "epoch": 0.21318906272846908, + "grad_norm": 1.3075577627317818, + "learning_rate": 1.982637024551563e-05, + "loss": 0.7174949645996094, + "step": 729 + }, + { + "epoch": 0.21348150314373446, + "grad_norm": 1.404568014095412, + "learning_rate": 1.9825472073568527e-05, + "loss": 0.7002695798873901, + "step": 730 + }, + { + "epoch": 0.21377394355899984, + "grad_norm": 1.3606210741478622, + "learning_rate": 1.982457160497707e-05, + "loss": 0.7256268262863159, + "step": 731 + }, + { + "epoch": 0.21406638397426525, + "grad_norm": 1.6598974008247112, + "learning_rate": 1.9823668839951732e-05, + "loss": 0.8223557472229004, + "step": 732 + }, + { + "epoch": 0.21435882438953063, + "grad_norm": 1.361285088499868, + "learning_rate": 1.982276377870353e-05, + "loss": 0.760543942451477, + "step": 733 + }, + { + "epoch": 0.21465126480479602, + "grad_norm": 1.1189262427603888, + "learning_rate": 1.982185642144402e-05, + "loss": 0.5587141513824463, + "step": 734 + }, + { + "epoch": 0.21494370522006143, + "grad_norm": 1.5077440828298982, + "learning_rate": 1.9820946768385295e-05, + "loss": 0.5775829553604126, + "step": 735 + }, + { + "epoch": 0.2152361456353268, + "grad_norm": 1.2761529870001347, + "learning_rate": 1.982003481973997e-05, + "loss": 0.6654443144798279, + "step": 736 + }, + { + "epoch": 0.2155285860505922, + "grad_norm": 1.5826837327135188, + "learning_rate": 1.9819120575721212e-05, + "loss": 0.7963466048240662, + "step": 737 + }, + { + "epoch": 0.21582102646585757, + "grad_norm": 1.3788031698645051, + "learning_rate": 1.981820403654272e-05, + "loss": 0.6748678684234619, + "step": 738 + }, + { + "epoch": 0.21611346688112298, + "grad_norm": 1.4155297807006182, + "learning_rate": 1.9817285202418733e-05, + "loss": 0.7041783928871155, + "step": 739 + }, + { + "epoch": 0.21640590729638837, + "grad_norm": 1.5390789301713295, + "learning_rate": 1.981636407356402e-05, + "loss": 0.8008041381835938, + "step": 740 + }, + { + "epoch": 0.21669834771165375, + "grad_norm": 1.4349473190399622, + "learning_rate": 1.9815440650193887e-05, + "loss": 0.6873682141304016, + "step": 741 + }, + { + "epoch": 0.21699078812691913, + "grad_norm": 1.4041288075629241, + "learning_rate": 1.981451493252418e-05, + "loss": 0.6316831111907959, + "step": 742 + }, + { + "epoch": 0.21728322854218454, + "grad_norm": 1.3377112960270812, + "learning_rate": 1.9813586920771283e-05, + "loss": 0.6481543779373169, + "step": 743 + }, + { + "epoch": 0.21757566895744992, + "grad_norm": 1.2613104485847573, + "learning_rate": 1.9812656615152112e-05, + "loss": 0.6642731428146362, + "step": 744 + }, + { + "epoch": 0.2178681093727153, + "grad_norm": 1.4870873028073741, + "learning_rate": 1.9811724015884115e-05, + "loss": 0.6769483089447021, + "step": 745 + }, + { + "epoch": 0.2181605497879807, + "grad_norm": 1.4050593471281791, + "learning_rate": 1.981078912318529e-05, + "loss": 0.6397525072097778, + "step": 746 + }, + { + "epoch": 0.2184529902032461, + "grad_norm": 1.170420294448055, + "learning_rate": 1.9809851937274154e-05, + "loss": 0.4963756203651428, + "step": 747 + }, + { + "epoch": 0.21874543061851148, + "grad_norm": 1.6049508757911466, + "learning_rate": 1.9808912458369774e-05, + "loss": 0.7352936267852783, + "step": 748 + }, + { + "epoch": 0.21903787103377687, + "grad_norm": 1.3947943752325116, + "learning_rate": 1.980797068669175e-05, + "loss": 0.7177609205245972, + "step": 749 + }, + { + "epoch": 0.21933031144904225, + "grad_norm": 1.2819324457206713, + "learning_rate": 1.980702662246021e-05, + "loss": 0.76703941822052, + "step": 750 + }, + { + "epoch": 0.21962275186430766, + "grad_norm": 1.4885423867402507, + "learning_rate": 1.980608026589582e-05, + "loss": 0.8591324090957642, + "step": 751 + }, + { + "epoch": 0.21991519227957304, + "grad_norm": 1.1920075550965599, + "learning_rate": 1.9805131617219792e-05, + "loss": 0.6216185092926025, + "step": 752 + }, + { + "epoch": 0.22020763269483842, + "grad_norm": 1.359972752643247, + "learning_rate": 1.9804180676653867e-05, + "loss": 0.6067323684692383, + "step": 753 + }, + { + "epoch": 0.2205000731101038, + "grad_norm": 1.329886038437426, + "learning_rate": 1.9803227444420316e-05, + "loss": 0.5832521319389343, + "step": 754 + }, + { + "epoch": 0.22079251352536922, + "grad_norm": 1.3701144460168073, + "learning_rate": 1.9802271920741957e-05, + "loss": 0.6181083917617798, + "step": 755 + }, + { + "epoch": 0.2210849539406346, + "grad_norm": 1.6323941211416428, + "learning_rate": 1.9801314105842135e-05, + "loss": 0.614393949508667, + "step": 756 + }, + { + "epoch": 0.22137739435589998, + "grad_norm": 1.4783150089736257, + "learning_rate": 1.980035399994473e-05, + "loss": 0.7598476409912109, + "step": 757 + }, + { + "epoch": 0.22166983477116536, + "grad_norm": 1.3445249209174277, + "learning_rate": 1.979939160327417e-05, + "loss": 0.7185830473899841, + "step": 758 + }, + { + "epoch": 0.22196227518643077, + "grad_norm": 1.2604381133839313, + "learning_rate": 1.9798426916055403e-05, + "loss": 0.6672362089157104, + "step": 759 + }, + { + "epoch": 0.22225471560169616, + "grad_norm": 1.323605486489286, + "learning_rate": 1.9797459938513918e-05, + "loss": 0.60948646068573, + "step": 760 + }, + { + "epoch": 0.22254715601696154, + "grad_norm": 1.376081699980774, + "learning_rate": 1.979649067087574e-05, + "loss": 0.6073893308639526, + "step": 761 + }, + { + "epoch": 0.22283959643222692, + "grad_norm": 2.11374968768554, + "learning_rate": 1.9795519113367434e-05, + "loss": 0.7521525025367737, + "step": 762 + }, + { + "epoch": 0.22313203684749233, + "grad_norm": 1.3631196959673009, + "learning_rate": 1.979454526621609e-05, + "loss": 0.7281486988067627, + "step": 763 + }, + { + "epoch": 0.22342447726275771, + "grad_norm": 1.3466801989985047, + "learning_rate": 1.9793569129649345e-05, + "loss": 0.5628652572631836, + "step": 764 + }, + { + "epoch": 0.2237169176780231, + "grad_norm": 1.7030188389110175, + "learning_rate": 1.9792590703895364e-05, + "loss": 0.9115084409713745, + "step": 765 + }, + { + "epoch": 0.22400935809328848, + "grad_norm": 1.1906430527809846, + "learning_rate": 1.9791609989182843e-05, + "loss": 0.5793902277946472, + "step": 766 + }, + { + "epoch": 0.2243017985085539, + "grad_norm": 1.319680929079464, + "learning_rate": 1.979062698574102e-05, + "loss": 0.5811150074005127, + "step": 767 + }, + { + "epoch": 0.22459423892381927, + "grad_norm": 1.8337754364313175, + "learning_rate": 1.978964169379967e-05, + "loss": 0.7450643181800842, + "step": 768 + }, + { + "epoch": 0.22488667933908466, + "grad_norm": 1.2696945630714354, + "learning_rate": 1.9788654113589093e-05, + "loss": 0.6617515087127686, + "step": 769 + }, + { + "epoch": 0.22517911975435004, + "grad_norm": 1.1685310150494228, + "learning_rate": 1.9787664245340137e-05, + "loss": 0.6240406036376953, + "step": 770 + }, + { + "epoch": 0.22547156016961545, + "grad_norm": 1.450209328719988, + "learning_rate": 1.978667208928417e-05, + "loss": 0.694688081741333, + "step": 771 + }, + { + "epoch": 0.22576400058488083, + "grad_norm": 1.274649499261431, + "learning_rate": 1.9785677645653107e-05, + "loss": 0.6855190396308899, + "step": 772 + }, + { + "epoch": 0.2260564410001462, + "grad_norm": 1.5531275718881066, + "learning_rate": 1.978468091467939e-05, + "loss": 0.8132567405700684, + "step": 773 + }, + { + "epoch": 0.22634888141541162, + "grad_norm": 1.2819374084058084, + "learning_rate": 1.9783681896596006e-05, + "loss": 0.7011039853096008, + "step": 774 + }, + { + "epoch": 0.226641321830677, + "grad_norm": 1.2317633693628418, + "learning_rate": 1.9782680591636462e-05, + "loss": 0.5754199028015137, + "step": 775 + }, + { + "epoch": 0.2269337622459424, + "grad_norm": 1.3342396229289735, + "learning_rate": 1.9781677000034807e-05, + "loss": 0.7518784403800964, + "step": 776 + }, + { + "epoch": 0.22722620266120777, + "grad_norm": 1.4619385156109748, + "learning_rate": 1.978067112202563e-05, + "loss": 0.6802738904953003, + "step": 777 + }, + { + "epoch": 0.22751864307647318, + "grad_norm": 1.2836639966818497, + "learning_rate": 1.9779662957844046e-05, + "loss": 0.7667055726051331, + "step": 778 + }, + { + "epoch": 0.22781108349173856, + "grad_norm": 1.3402387686228199, + "learning_rate": 1.9778652507725704e-05, + "loss": 0.7590975165367126, + "step": 779 + }, + { + "epoch": 0.22810352390700395, + "grad_norm": 1.5322182562597366, + "learning_rate": 1.9777639771906795e-05, + "loss": 0.8009685277938843, + "step": 780 + }, + { + "epoch": 0.22839596432226933, + "grad_norm": 1.2184372022517955, + "learning_rate": 1.977662475062404e-05, + "loss": 0.6094385981559753, + "step": 781 + }, + { + "epoch": 0.22868840473753474, + "grad_norm": 1.2258891813878965, + "learning_rate": 1.977560744411469e-05, + "loss": 0.5919946432113647, + "step": 782 + }, + { + "epoch": 0.22898084515280012, + "grad_norm": 1.3994922066796667, + "learning_rate": 1.9774587852616537e-05, + "loss": 0.7616838216781616, + "step": 783 + }, + { + "epoch": 0.2292732855680655, + "grad_norm": 1.0864449553171927, + "learning_rate": 1.9773565976367903e-05, + "loss": 0.5107603073120117, + "step": 784 + }, + { + "epoch": 0.2295657259833309, + "grad_norm": 1.3785741559157736, + "learning_rate": 1.9772541815607645e-05, + "loss": 0.6819792985916138, + "step": 785 + }, + { + "epoch": 0.2298581663985963, + "grad_norm": 1.3095462010721952, + "learning_rate": 1.977151537057516e-05, + "loss": 0.748264729976654, + "step": 786 + }, + { + "epoch": 0.23015060681386168, + "grad_norm": 1.511078591377817, + "learning_rate": 1.977048664151037e-05, + "loss": 0.7341534495353699, + "step": 787 + }, + { + "epoch": 0.23044304722912706, + "grad_norm": 1.3481462417331131, + "learning_rate": 1.976945562865373e-05, + "loss": 0.569247841835022, + "step": 788 + }, + { + "epoch": 0.23073548764439245, + "grad_norm": 1.4792545387125078, + "learning_rate": 1.9768422332246233e-05, + "loss": 0.7003188133239746, + "step": 789 + }, + { + "epoch": 0.23102792805965786, + "grad_norm": 1.222254549739519, + "learning_rate": 1.9767386752529415e-05, + "loss": 0.6484041810035706, + "step": 790 + }, + { + "epoch": 0.23132036847492324, + "grad_norm": 1.2921197831934208, + "learning_rate": 1.9766348889745324e-05, + "loss": 0.6635721921920776, + "step": 791 + }, + { + "epoch": 0.23161280889018862, + "grad_norm": 1.3606759597173597, + "learning_rate": 1.9765308744136568e-05, + "loss": 0.5855914354324341, + "step": 792 + }, + { + "epoch": 0.231905249305454, + "grad_norm": 1.3590534475124305, + "learning_rate": 1.976426631594626e-05, + "loss": 0.7606059312820435, + "step": 793 + }, + { + "epoch": 0.2321976897207194, + "grad_norm": 1.399907486961256, + "learning_rate": 1.976322160541807e-05, + "loss": 0.7080718278884888, + "step": 794 + }, + { + "epoch": 0.2324901301359848, + "grad_norm": 1.6372996876909576, + "learning_rate": 1.9762174612796195e-05, + "loss": 0.8838162422180176, + "step": 795 + }, + { + "epoch": 0.23278257055125018, + "grad_norm": 1.1906217629409164, + "learning_rate": 1.9761125338325357e-05, + "loss": 0.5776950120925903, + "step": 796 + }, + { + "epoch": 0.23307501096651556, + "grad_norm": 1.4075761903811832, + "learning_rate": 1.9760073782250817e-05, + "loss": 0.7455854415893555, + "step": 797 + }, + { + "epoch": 0.23336745138178097, + "grad_norm": 1.4778525028622385, + "learning_rate": 1.9759019944818375e-05, + "loss": 0.7160001993179321, + "step": 798 + }, + { + "epoch": 0.23365989179704635, + "grad_norm": 1.2680712563874137, + "learning_rate": 1.9757963826274357e-05, + "loss": 0.6282311081886292, + "step": 799 + }, + { + "epoch": 0.23395233221231174, + "grad_norm": 1.3617325718771658, + "learning_rate": 1.9756905426865626e-05, + "loss": 0.6479916572570801, + "step": 800 + }, + { + "epoch": 0.23424477262757712, + "grad_norm": 1.2789508587545713, + "learning_rate": 1.9755844746839573e-05, + "loss": 0.6519639492034912, + "step": 801 + }, + { + "epoch": 0.23453721304284253, + "grad_norm": 1.4374021901805083, + "learning_rate": 1.9754781786444122e-05, + "loss": 0.5591464638710022, + "step": 802 + }, + { + "epoch": 0.2348296534581079, + "grad_norm": 1.6094479116430809, + "learning_rate": 1.9753716545927745e-05, + "loss": 0.6378511190414429, + "step": 803 + }, + { + "epoch": 0.2351220938733733, + "grad_norm": 1.593476138868701, + "learning_rate": 1.9752649025539424e-05, + "loss": 0.7932485342025757, + "step": 804 + }, + { + "epoch": 0.23541453428863868, + "grad_norm": 1.327032855057245, + "learning_rate": 1.9751579225528694e-05, + "loss": 0.7344592809677124, + "step": 805 + }, + { + "epoch": 0.2357069747039041, + "grad_norm": 1.5060138108990804, + "learning_rate": 1.975050714614561e-05, + "loss": 0.7879096269607544, + "step": 806 + }, + { + "epoch": 0.23599941511916947, + "grad_norm": 1.31391427286964, + "learning_rate": 1.9749432787640764e-05, + "loss": 0.6428436040878296, + "step": 807 + }, + { + "epoch": 0.23629185553443485, + "grad_norm": 1.1924129057081494, + "learning_rate": 1.9748356150265283e-05, + "loss": 0.7018194198608398, + "step": 808 + }, + { + "epoch": 0.23658429594970024, + "grad_norm": 1.3487665777693398, + "learning_rate": 1.974727723427082e-05, + "loss": 0.7696131467819214, + "step": 809 + }, + { + "epoch": 0.23687673636496565, + "grad_norm": 1.2806200429683234, + "learning_rate": 1.974619603990957e-05, + "loss": 0.6429424285888672, + "step": 810 + }, + { + "epoch": 0.23716917678023103, + "grad_norm": 1.4197164517856635, + "learning_rate": 1.9745112567434254e-05, + "loss": 0.7205626964569092, + "step": 811 + }, + { + "epoch": 0.2374616171954964, + "grad_norm": 1.206628595880062, + "learning_rate": 1.9744026817098122e-05, + "loss": 0.7018989324569702, + "step": 812 + }, + { + "epoch": 0.23775405761076182, + "grad_norm": 1.4562632106002198, + "learning_rate": 1.974293878915497e-05, + "loss": 0.6861958503723145, + "step": 813 + }, + { + "epoch": 0.2380464980260272, + "grad_norm": 1.8277672251442496, + "learning_rate": 1.9741848483859117e-05, + "loss": 0.687503457069397, + "step": 814 + }, + { + "epoch": 0.23833893844129259, + "grad_norm": 1.6702364448324796, + "learning_rate": 1.9740755901465408e-05, + "loss": 0.7808526754379272, + "step": 815 + }, + { + "epoch": 0.23863137885655797, + "grad_norm": 1.4777579354772585, + "learning_rate": 1.973966104222923e-05, + "loss": 0.7387286424636841, + "step": 816 + }, + { + "epoch": 0.23892381927182338, + "grad_norm": 1.2761337726208828, + "learning_rate": 1.9738563906406508e-05, + "loss": 0.6262110471725464, + "step": 817 + }, + { + "epoch": 0.23921625968708876, + "grad_norm": 1.2308979686961945, + "learning_rate": 1.973746449425368e-05, + "loss": 0.6618830561637878, + "step": 818 + }, + { + "epoch": 0.23950870010235414, + "grad_norm": 1.3525742869997646, + "learning_rate": 1.9736362806027732e-05, + "loss": 0.5866184234619141, + "step": 819 + }, + { + "epoch": 0.23980114051761953, + "grad_norm": 1.1916120410649227, + "learning_rate": 1.9735258841986175e-05, + "loss": 0.6413314342498779, + "step": 820 + }, + { + "epoch": 0.24009358093288494, + "grad_norm": 1.3855684564301443, + "learning_rate": 1.9734152602387054e-05, + "loss": 0.6125906109809875, + "step": 821 + }, + { + "epoch": 0.24038602134815032, + "grad_norm": 1.3708182915073268, + "learning_rate": 1.973304408748895e-05, + "loss": 0.6128122806549072, + "step": 822 + }, + { + "epoch": 0.2406784617634157, + "grad_norm": 1.4552398411515748, + "learning_rate": 1.973193329755097e-05, + "loss": 0.7763051986694336, + "step": 823 + }, + { + "epoch": 0.24097090217868108, + "grad_norm": 1.406068384249821, + "learning_rate": 1.9730820232832747e-05, + "loss": 0.7187550067901611, + "step": 824 + }, + { + "epoch": 0.2412633425939465, + "grad_norm": 1.4089612736012989, + "learning_rate": 1.972970489359446e-05, + "loss": 0.6564748287200928, + "step": 825 + }, + { + "epoch": 0.24155578300921188, + "grad_norm": 1.2962838731212396, + "learning_rate": 1.9728587280096815e-05, + "loss": 0.6573271751403809, + "step": 826 + }, + { + "epoch": 0.24184822342447726, + "grad_norm": 1.606482466732529, + "learning_rate": 1.9727467392601042e-05, + "loss": 0.8032153844833374, + "step": 827 + }, + { + "epoch": 0.24214066383974264, + "grad_norm": 1.344534982986645, + "learning_rate": 1.972634523136891e-05, + "loss": 0.6781449913978577, + "step": 828 + }, + { + "epoch": 0.24243310425500805, + "grad_norm": 1.3970734980370678, + "learning_rate": 1.972522079666272e-05, + "loss": 0.580757737159729, + "step": 829 + }, + { + "epoch": 0.24272554467027344, + "grad_norm": 1.4569992070347761, + "learning_rate": 1.97240940887453e-05, + "loss": 0.626894474029541, + "step": 830 + }, + { + "epoch": 0.24301798508553882, + "grad_norm": 1.4885978649776115, + "learning_rate": 1.9722965107880005e-05, + "loss": 0.8188163042068481, + "step": 831 + }, + { + "epoch": 0.2433104255008042, + "grad_norm": 1.4514623765445114, + "learning_rate": 1.9721833854330734e-05, + "loss": 0.6943579912185669, + "step": 832 + }, + { + "epoch": 0.2436028659160696, + "grad_norm": 1.3452906489662066, + "learning_rate": 1.972070032836191e-05, + "loss": 0.6177504658699036, + "step": 833 + }, + { + "epoch": 0.243895306331335, + "grad_norm": 1.3249219466208975, + "learning_rate": 1.971956453023849e-05, + "loss": 0.683998703956604, + "step": 834 + }, + { + "epoch": 0.24418774674660038, + "grad_norm": 1.3523687150823345, + "learning_rate": 1.9718426460225952e-05, + "loss": 0.77602219581604, + "step": 835 + }, + { + "epoch": 0.24448018716186576, + "grad_norm": 1.0190390519787025, + "learning_rate": 1.971728611859032e-05, + "loss": 0.4930742383003235, + "step": 836 + }, + { + "epoch": 0.24477262757713117, + "grad_norm": 1.057766741950331, + "learning_rate": 1.971614350559814e-05, + "loss": 0.634628415107727, + "step": 837 + }, + { + "epoch": 0.24506506799239655, + "grad_norm": 1.4273024070967653, + "learning_rate": 1.971499862151649e-05, + "loss": 0.6439167857170105, + "step": 838 + }, + { + "epoch": 0.24535750840766193, + "grad_norm": 1.1385728991135244, + "learning_rate": 1.9713851466612982e-05, + "loss": 0.701258659362793, + "step": 839 + }, + { + "epoch": 0.24564994882292732, + "grad_norm": 1.4590112387376561, + "learning_rate": 1.9712702041155753e-05, + "loss": 0.6488544344902039, + "step": 840 + }, + { + "epoch": 0.24594238923819273, + "grad_norm": 1.3405708553224296, + "learning_rate": 1.9711550345413476e-05, + "loss": 0.6962910890579224, + "step": 841 + }, + { + "epoch": 0.2462348296534581, + "grad_norm": 1.1939053963741824, + "learning_rate": 1.9710396379655355e-05, + "loss": 0.6617723703384399, + "step": 842 + }, + { + "epoch": 0.2465272700687235, + "grad_norm": 1.2279058278823862, + "learning_rate": 1.970924014415112e-05, + "loss": 0.7152801752090454, + "step": 843 + }, + { + "epoch": 0.24681971048398887, + "grad_norm": 1.2796222731345095, + "learning_rate": 1.9708081639171035e-05, + "loss": 0.6712393760681152, + "step": 844 + }, + { + "epoch": 0.24711215089925428, + "grad_norm": 1.3941735155074029, + "learning_rate": 1.970692086498589e-05, + "loss": 0.8413758277893066, + "step": 845 + }, + { + "epoch": 0.24740459131451967, + "grad_norm": 1.423836225011119, + "learning_rate": 1.9705757821867015e-05, + "loss": 0.6460679769515991, + "step": 846 + }, + { + "epoch": 0.24769703172978505, + "grad_norm": 1.3704721229511874, + "learning_rate": 1.970459251008626e-05, + "loss": 0.759244441986084, + "step": 847 + }, + { + "epoch": 0.24798947214505043, + "grad_norm": 1.2356631241001201, + "learning_rate": 1.970342492991601e-05, + "loss": 0.8148110508918762, + "step": 848 + }, + { + "epoch": 0.24828191256031584, + "grad_norm": 1.2587770996787473, + "learning_rate": 1.970225508162918e-05, + "loss": 0.6620084047317505, + "step": 849 + }, + { + "epoch": 0.24857435297558123, + "grad_norm": 1.451838551232366, + "learning_rate": 1.9701082965499217e-05, + "loss": 0.7090305089950562, + "step": 850 + }, + { + "epoch": 0.2488667933908466, + "grad_norm": 1.2074340737341804, + "learning_rate": 1.9699908581800094e-05, + "loss": 0.6846730709075928, + "step": 851 + }, + { + "epoch": 0.24915923380611202, + "grad_norm": 1.0752757256209107, + "learning_rate": 1.9698731930806315e-05, + "loss": 0.5183212757110596, + "step": 852 + }, + { + "epoch": 0.2494516742213774, + "grad_norm": 1.4176078828661092, + "learning_rate": 1.9697553012792915e-05, + "loss": 0.6913097500801086, + "step": 853 + }, + { + "epoch": 0.24974411463664278, + "grad_norm": 1.4996885245263052, + "learning_rate": 1.9696371828035466e-05, + "loss": 0.7896280884742737, + "step": 854 + }, + { + "epoch": 0.2500365550519082, + "grad_norm": 1.4718644942105623, + "learning_rate": 1.9695188376810055e-05, + "loss": 0.947577714920044, + "step": 855 + }, + { + "epoch": 0.2503289954671736, + "grad_norm": 1.3825164821538705, + "learning_rate": 1.9694002659393306e-05, + "loss": 0.7772419452667236, + "step": 856 + }, + { + "epoch": 0.25062143588243896, + "grad_norm": 1.3624521016930335, + "learning_rate": 1.9692814676062376e-05, + "loss": 0.6255912780761719, + "step": 857 + }, + { + "epoch": 0.25091387629770434, + "grad_norm": 1.3319834146029552, + "learning_rate": 1.969162442709495e-05, + "loss": 0.6572105884552002, + "step": 858 + }, + { + "epoch": 0.2512063167129697, + "grad_norm": 1.3718275193420901, + "learning_rate": 1.969043191276924e-05, + "loss": 0.6387436389923096, + "step": 859 + }, + { + "epoch": 0.2514987571282351, + "grad_norm": 1.1976239787141296, + "learning_rate": 1.968923713336399e-05, + "loss": 0.9180483222007751, + "step": 860 + }, + { + "epoch": 0.2517911975435005, + "grad_norm": 1.211847411431562, + "learning_rate": 1.9688040089158473e-05, + "loss": 0.6830536127090454, + "step": 861 + }, + { + "epoch": 0.2520836379587659, + "grad_norm": 1.6904119232689327, + "learning_rate": 1.9686840780432487e-05, + "loss": 0.9061588644981384, + "step": 862 + }, + { + "epoch": 0.2523760783740313, + "grad_norm": 1.157670921080695, + "learning_rate": 1.9685639207466365e-05, + "loss": 0.558010458946228, + "step": 863 + }, + { + "epoch": 0.2526685187892967, + "grad_norm": 1.1825470022948923, + "learning_rate": 1.968443537054097e-05, + "loss": 0.6788249611854553, + "step": 864 + }, + { + "epoch": 0.2529609592045621, + "grad_norm": 1.2105730438992965, + "learning_rate": 1.968322926993769e-05, + "loss": 0.576469898223877, + "step": 865 + }, + { + "epoch": 0.25325339961982746, + "grad_norm": 1.2982512656817862, + "learning_rate": 1.9682020905938438e-05, + "loss": 0.6994123458862305, + "step": 866 + }, + { + "epoch": 0.25354584003509284, + "grad_norm": 1.206872992638966, + "learning_rate": 1.9680810278825672e-05, + "loss": 0.6929521560668945, + "step": 867 + }, + { + "epoch": 0.2538382804503582, + "grad_norm": 1.273656030058159, + "learning_rate": 1.9679597388882363e-05, + "loss": 0.7596743106842041, + "step": 868 + }, + { + "epoch": 0.2541307208656236, + "grad_norm": 1.4805809886864818, + "learning_rate": 1.9678382236392013e-05, + "loss": 0.7925904989242554, + "step": 869 + }, + { + "epoch": 0.25442316128088904, + "grad_norm": 1.3335550122348163, + "learning_rate": 1.9677164821638666e-05, + "loss": 0.722467839717865, + "step": 870 + }, + { + "epoch": 0.2547156016961544, + "grad_norm": 1.3131624182400288, + "learning_rate": 1.9675945144906882e-05, + "loss": 0.7165451049804688, + "step": 871 + }, + { + "epoch": 0.2550080421114198, + "grad_norm": 1.1797512350865442, + "learning_rate": 1.9674723206481746e-05, + "loss": 0.5897061824798584, + "step": 872 + }, + { + "epoch": 0.2553004825266852, + "grad_norm": 1.2365962649439657, + "learning_rate": 1.9673499006648885e-05, + "loss": 0.6634531021118164, + "step": 873 + }, + { + "epoch": 0.2555929229419506, + "grad_norm": 1.3214235822507945, + "learning_rate": 1.9672272545694445e-05, + "loss": 0.7237584590911865, + "step": 874 + }, + { + "epoch": 0.25588536335721596, + "grad_norm": 1.4848759223566366, + "learning_rate": 1.967104382390511e-05, + "loss": 0.6382388472557068, + "step": 875 + }, + { + "epoch": 0.25617780377248134, + "grad_norm": 1.31447030866248, + "learning_rate": 1.966981284156808e-05, + "loss": 0.6788768768310547, + "step": 876 + }, + { + "epoch": 0.2564702441877467, + "grad_norm": 1.3072783419197107, + "learning_rate": 1.966857959897109e-05, + "loss": 0.6347095966339111, + "step": 877 + }, + { + "epoch": 0.25676268460301216, + "grad_norm": 1.4344629064681063, + "learning_rate": 1.9667344096402406e-05, + "loss": 0.8896903991699219, + "step": 878 + }, + { + "epoch": 0.25705512501827754, + "grad_norm": 1.3876445939749689, + "learning_rate": 1.966610633415082e-05, + "loss": 0.71473228931427, + "step": 879 + }, + { + "epoch": 0.2573475654335429, + "grad_norm": 1.3302375445053003, + "learning_rate": 1.9664866312505646e-05, + "loss": 0.7311601638793945, + "step": 880 + }, + { + "epoch": 0.2576400058488083, + "grad_norm": 1.2472942559074918, + "learning_rate": 1.9663624031756737e-05, + "loss": 0.6186199188232422, + "step": 881 + }, + { + "epoch": 0.2579324462640737, + "grad_norm": 1.4896774549089442, + "learning_rate": 1.9662379492194467e-05, + "loss": 0.8059204816818237, + "step": 882 + }, + { + "epoch": 0.25822488667933907, + "grad_norm": 1.4468929069066396, + "learning_rate": 1.9661132694109736e-05, + "loss": 0.6065236330032349, + "step": 883 + }, + { + "epoch": 0.25851732709460445, + "grad_norm": 1.182060018600662, + "learning_rate": 1.965988363779398e-05, + "loss": 0.6491106152534485, + "step": 884 + }, + { + "epoch": 0.25880976750986984, + "grad_norm": 1.197300798410388, + "learning_rate": 1.9658632323539158e-05, + "loss": 0.526267945766449, + "step": 885 + }, + { + "epoch": 0.2591022079251353, + "grad_norm": 1.5008074138248908, + "learning_rate": 1.9657378751637755e-05, + "loss": 0.812760591506958, + "step": 886 + }, + { + "epoch": 0.25939464834040066, + "grad_norm": 1.807239371921464, + "learning_rate": 1.9656122922382786e-05, + "loss": 0.7957908511161804, + "step": 887 + }, + { + "epoch": 0.25968708875566604, + "grad_norm": 1.3552357306732934, + "learning_rate": 1.9654864836067796e-05, + "loss": 0.7426323890686035, + "step": 888 + }, + { + "epoch": 0.2599795291709314, + "grad_norm": 1.3206271267013228, + "learning_rate": 1.9653604492986852e-05, + "loss": 0.602961540222168, + "step": 889 + }, + { + "epoch": 0.2602719695861968, + "grad_norm": 1.7789592821205134, + "learning_rate": 1.965234189343455e-05, + "loss": 0.8706510066986084, + "step": 890 + }, + { + "epoch": 0.2605644100014622, + "grad_norm": 1.3042391493572836, + "learning_rate": 1.965107703770602e-05, + "loss": 0.6245810985565186, + "step": 891 + }, + { + "epoch": 0.26085685041672757, + "grad_norm": 1.3389608750174764, + "learning_rate": 1.964980992609691e-05, + "loss": 0.7455421686172485, + "step": 892 + }, + { + "epoch": 0.261149290831993, + "grad_norm": 1.3769047718413097, + "learning_rate": 1.9648540558903404e-05, + "loss": 0.6917043328285217, + "step": 893 + }, + { + "epoch": 0.2614417312472584, + "grad_norm": 1.5543661242785587, + "learning_rate": 1.9647268936422204e-05, + "loss": 0.6488040685653687, + "step": 894 + }, + { + "epoch": 0.2617341716625238, + "grad_norm": 1.4168880936407573, + "learning_rate": 1.964599505895055e-05, + "loss": 0.7416148781776428, + "step": 895 + }, + { + "epoch": 0.26202661207778916, + "grad_norm": 1.2398123962846468, + "learning_rate": 1.9644718926786196e-05, + "loss": 0.7012773156166077, + "step": 896 + }, + { + "epoch": 0.26231905249305454, + "grad_norm": 1.4024640685787384, + "learning_rate": 1.9643440540227438e-05, + "loss": 0.8644432425498962, + "step": 897 + }, + { + "epoch": 0.2626114929083199, + "grad_norm": 1.2155057674795815, + "learning_rate": 1.9642159899573084e-05, + "loss": 0.614842414855957, + "step": 898 + }, + { + "epoch": 0.2629039333235853, + "grad_norm": 1.406064497865486, + "learning_rate": 1.964087700512248e-05, + "loss": 0.7794508337974548, + "step": 899 + }, + { + "epoch": 0.2631963737388507, + "grad_norm": 1.3041032890013364, + "learning_rate": 1.9639591857175492e-05, + "loss": 0.49217259883880615, + "step": 900 + }, + { + "epoch": 0.2634888141541161, + "grad_norm": 2.5300379427879656, + "learning_rate": 1.9638304456032516e-05, + "loss": 0.6319605708122253, + "step": 901 + }, + { + "epoch": 0.2637812545693815, + "grad_norm": 1.2937854520821135, + "learning_rate": 1.9637014801994478e-05, + "loss": 0.6066744327545166, + "step": 902 + }, + { + "epoch": 0.2640736949846469, + "grad_norm": 1.3364560601793205, + "learning_rate": 1.9635722895362824e-05, + "loss": 0.7529127597808838, + "step": 903 + }, + { + "epoch": 0.26436613539991227, + "grad_norm": 1.1766314649269587, + "learning_rate": 1.9634428736439524e-05, + "loss": 0.6026389598846436, + "step": 904 + }, + { + "epoch": 0.26465857581517765, + "grad_norm": 1.1341480559887087, + "learning_rate": 1.9633132325527092e-05, + "loss": 0.6227229237556458, + "step": 905 + }, + { + "epoch": 0.26495101623044304, + "grad_norm": 1.0934147682033295, + "learning_rate": 1.9631833662928548e-05, + "loss": 0.5959285497665405, + "step": 906 + }, + { + "epoch": 0.2652434566457084, + "grad_norm": 1.5332323248713289, + "learning_rate": 1.9630532748947445e-05, + "loss": 0.8104684352874756, + "step": 907 + }, + { + "epoch": 0.2655358970609738, + "grad_norm": 1.4286964634802555, + "learning_rate": 1.962922958388787e-05, + "loss": 0.6722325682640076, + "step": 908 + }, + { + "epoch": 0.26582833747623924, + "grad_norm": 1.3146328085881052, + "learning_rate": 1.962792416805442e-05, + "loss": 0.5996029376983643, + "step": 909 + }, + { + "epoch": 0.2661207778915046, + "grad_norm": 1.2576705371159294, + "learning_rate": 1.962661650175224e-05, + "loss": 0.7214776873588562, + "step": 910 + }, + { + "epoch": 0.26641321830677, + "grad_norm": 1.3644451050997106, + "learning_rate": 1.9625306585286986e-05, + "loss": 0.6833420991897583, + "step": 911 + }, + { + "epoch": 0.2667056587220354, + "grad_norm": 1.3539788924921423, + "learning_rate": 1.9623994418964834e-05, + "loss": 0.5571368336677551, + "step": 912 + }, + { + "epoch": 0.26699809913730077, + "grad_norm": 1.3710487138213245, + "learning_rate": 1.9622680003092503e-05, + "loss": 0.6748533248901367, + "step": 913 + }, + { + "epoch": 0.26729053955256615, + "grad_norm": 1.3715994474814863, + "learning_rate": 1.9621363337977232e-05, + "loss": 0.6681679487228394, + "step": 914 + }, + { + "epoch": 0.26758297996783154, + "grad_norm": 1.482670676536411, + "learning_rate": 1.9620044423926775e-05, + "loss": 0.6839786767959595, + "step": 915 + }, + { + "epoch": 0.2678754203830969, + "grad_norm": 1.4250296018843953, + "learning_rate": 1.961872326124943e-05, + "loss": 0.7481753826141357, + "step": 916 + }, + { + "epoch": 0.26816786079836236, + "grad_norm": 1.2167024955211783, + "learning_rate": 1.9617399850254e-05, + "loss": 0.6044093370437622, + "step": 917 + }, + { + "epoch": 0.26846030121362774, + "grad_norm": 1.284073365031053, + "learning_rate": 1.9616074191249833e-05, + "loss": 0.6399786472320557, + "step": 918 + }, + { + "epoch": 0.2687527416288931, + "grad_norm": 1.4810486497659208, + "learning_rate": 1.961474628454679e-05, + "loss": 0.6769053339958191, + "step": 919 + }, + { + "epoch": 0.2690451820441585, + "grad_norm": 1.3650368498715015, + "learning_rate": 1.961341613045526e-05, + "loss": 0.7508189678192139, + "step": 920 + }, + { + "epoch": 0.2693376224594239, + "grad_norm": 1.3260194970823536, + "learning_rate": 1.9612083729286164e-05, + "loss": 0.728675365447998, + "step": 921 + }, + { + "epoch": 0.26963006287468927, + "grad_norm": 1.241243201070507, + "learning_rate": 1.9610749081350934e-05, + "loss": 0.6886277794837952, + "step": 922 + }, + { + "epoch": 0.26992250328995465, + "grad_norm": 1.272552251820391, + "learning_rate": 1.9609412186961542e-05, + "loss": 0.6756877899169922, + "step": 923 + }, + { + "epoch": 0.27021494370522003, + "grad_norm": 1.3464083414999921, + "learning_rate": 1.960807304643048e-05, + "loss": 0.6761744022369385, + "step": 924 + }, + { + "epoch": 0.2705073841204855, + "grad_norm": 1.3141872927798783, + "learning_rate": 1.9606731660070758e-05, + "loss": 0.6475736498832703, + "step": 925 + }, + { + "epoch": 0.27079982453575085, + "grad_norm": 1.2576667239396297, + "learning_rate": 1.9605388028195922e-05, + "loss": 0.6169984936714172, + "step": 926 + }, + { + "epoch": 0.27109226495101624, + "grad_norm": 1.36667119537221, + "learning_rate": 1.9604042151120035e-05, + "loss": 0.6411685943603516, + "step": 927 + }, + { + "epoch": 0.2713847053662816, + "grad_norm": 1.203794827188605, + "learning_rate": 1.960269402915769e-05, + "loss": 0.6802625060081482, + "step": 928 + }, + { + "epoch": 0.271677145781547, + "grad_norm": 1.1204382547238934, + "learning_rate": 1.9601343662624e-05, + "loss": 0.6321320533752441, + "step": 929 + }, + { + "epoch": 0.2719695861968124, + "grad_norm": 1.1836254946940896, + "learning_rate": 1.959999105183461e-05, + "loss": 0.6242578029632568, + "step": 930 + }, + { + "epoch": 0.27226202661207777, + "grad_norm": 1.3574626937776866, + "learning_rate": 1.9598636197105672e-05, + "loss": 0.8106271624565125, + "step": 931 + }, + { + "epoch": 0.2725544670273432, + "grad_norm": 1.3336233570386715, + "learning_rate": 1.9597279098753893e-05, + "loss": 0.6810879707336426, + "step": 932 + }, + { + "epoch": 0.2728469074426086, + "grad_norm": 1.4182604377271, + "learning_rate": 1.959591975709647e-05, + "loss": 0.6121781468391418, + "step": 933 + }, + { + "epoch": 0.27313934785787397, + "grad_norm": 1.3855646528211634, + "learning_rate": 1.9594558172451153e-05, + "loss": 0.7347930669784546, + "step": 934 + }, + { + "epoch": 0.27343178827313935, + "grad_norm": 1.7726573891466724, + "learning_rate": 1.9593194345136196e-05, + "loss": 0.8280940651893616, + "step": 935 + }, + { + "epoch": 0.27372422868840474, + "grad_norm": 1.7069126445705718, + "learning_rate": 1.959182827547039e-05, + "loss": 0.8171218633651733, + "step": 936 + }, + { + "epoch": 0.2740166691036701, + "grad_norm": 1.5519639216005559, + "learning_rate": 1.9590459963773043e-05, + "loss": 0.7350337505340576, + "step": 937 + }, + { + "epoch": 0.2743091095189355, + "grad_norm": 1.2380635233009907, + "learning_rate": 1.9589089410363992e-05, + "loss": 0.5648026466369629, + "step": 938 + }, + { + "epoch": 0.2746015499342009, + "grad_norm": 1.2184482229154892, + "learning_rate": 1.9587716615563592e-05, + "loss": 0.630626916885376, + "step": 939 + }, + { + "epoch": 0.2748939903494663, + "grad_norm": 1.247434869071023, + "learning_rate": 1.9586341579692728e-05, + "loss": 0.658649206161499, + "step": 940 + }, + { + "epoch": 0.2751864307647317, + "grad_norm": 1.3583264773002954, + "learning_rate": 1.9584964303072804e-05, + "loss": 0.6938339471817017, + "step": 941 + }, + { + "epoch": 0.2754788711799971, + "grad_norm": 1.2844871691004516, + "learning_rate": 1.9583584786025755e-05, + "loss": 0.7124238014221191, + "step": 942 + }, + { + "epoch": 0.27577131159526247, + "grad_norm": 1.295461976555009, + "learning_rate": 1.9582203028874027e-05, + "loss": 0.5879669189453125, + "step": 943 + }, + { + "epoch": 0.27606375201052785, + "grad_norm": 1.3092326597229536, + "learning_rate": 1.9580819031940605e-05, + "loss": 0.6169895529747009, + "step": 944 + }, + { + "epoch": 0.27635619242579323, + "grad_norm": 1.3408083006486937, + "learning_rate": 1.9579432795548986e-05, + "loss": 0.6367429494857788, + "step": 945 + }, + { + "epoch": 0.2766486328410586, + "grad_norm": 1.294470969807804, + "learning_rate": 1.9578044320023195e-05, + "loss": 0.6198331117630005, + "step": 946 + }, + { + "epoch": 0.276941073256324, + "grad_norm": 1.2934388501492589, + "learning_rate": 1.9576653605687782e-05, + "loss": 0.6731230616569519, + "step": 947 + }, + { + "epoch": 0.27723351367158944, + "grad_norm": 1.3743119206413423, + "learning_rate": 1.957526065286781e-05, + "loss": 0.7185516953468323, + "step": 948 + }, + { + "epoch": 0.2775259540868548, + "grad_norm": 1.5124791251983178, + "learning_rate": 1.9573865461888882e-05, + "loss": 0.7362357378005981, + "step": 949 + }, + { + "epoch": 0.2778183945021202, + "grad_norm": 1.481999625276378, + "learning_rate": 1.9572468033077113e-05, + "loss": 0.7051525712013245, + "step": 950 + }, + { + "epoch": 0.2781108349173856, + "grad_norm": 1.3167000079730038, + "learning_rate": 1.9571068366759143e-05, + "loss": 0.6267420053482056, + "step": 951 + }, + { + "epoch": 0.27840327533265097, + "grad_norm": 1.4667668035632615, + "learning_rate": 1.9569666463262136e-05, + "loss": 0.649080753326416, + "step": 952 + }, + { + "epoch": 0.27869571574791635, + "grad_norm": 1.1940294879505342, + "learning_rate": 1.9568262322913777e-05, + "loss": 0.5700061321258545, + "step": 953 + }, + { + "epoch": 0.27898815616318173, + "grad_norm": 1.21562106075719, + "learning_rate": 1.9566855946042274e-05, + "loss": 0.6121870875358582, + "step": 954 + }, + { + "epoch": 0.2792805965784471, + "grad_norm": 1.3828404656512372, + "learning_rate": 1.9565447332976362e-05, + "loss": 0.8294541239738464, + "step": 955 + }, + { + "epoch": 0.27957303699371255, + "grad_norm": 1.2953263908127255, + "learning_rate": 1.9564036484045295e-05, + "loss": 0.6979323625564575, + "step": 956 + }, + { + "epoch": 0.27986547740897794, + "grad_norm": 1.4787353970640398, + "learning_rate": 1.9562623399578853e-05, + "loss": 0.6847009658813477, + "step": 957 + }, + { + "epoch": 0.2801579178242433, + "grad_norm": 1.174633661295302, + "learning_rate": 1.956120807990733e-05, + "loss": 0.6821733713150024, + "step": 958 + }, + { + "epoch": 0.2804503582395087, + "grad_norm": 1.2766608312969014, + "learning_rate": 1.955979052536155e-05, + "loss": 0.6943963766098022, + "step": 959 + }, + { + "epoch": 0.2807427986547741, + "grad_norm": 1.6283703947702834, + "learning_rate": 1.955837073627286e-05, + "loss": 0.5841893553733826, + "step": 960 + }, + { + "epoch": 0.28103523907003947, + "grad_norm": 1.4526296199919857, + "learning_rate": 1.955694871297313e-05, + "loss": 0.7196778059005737, + "step": 961 + }, + { + "epoch": 0.28132767948530485, + "grad_norm": 1.3568922084457422, + "learning_rate": 1.9555524455794743e-05, + "loss": 0.697501540184021, + "step": 962 + }, + { + "epoch": 0.28162011990057023, + "grad_norm": 1.3269336256780513, + "learning_rate": 1.9554097965070612e-05, + "loss": 0.7265810966491699, + "step": 963 + }, + { + "epoch": 0.28191256031583567, + "grad_norm": 1.1794879937673313, + "learning_rate": 1.955266924113417e-05, + "loss": 0.5766021013259888, + "step": 964 + }, + { + "epoch": 0.28220500073110105, + "grad_norm": 1.1486001787824904, + "learning_rate": 1.955123828431938e-05, + "loss": 0.6885402202606201, + "step": 965 + }, + { + "epoch": 0.28249744114636643, + "grad_norm": 1.4093622546586522, + "learning_rate": 1.954980509496071e-05, + "loss": 0.719329297542572, + "step": 966 + }, + { + "epoch": 0.2827898815616318, + "grad_norm": 1.1657877260705576, + "learning_rate": 1.954836967339316e-05, + "loss": 0.5621368885040283, + "step": 967 + }, + { + "epoch": 0.2830823219768972, + "grad_norm": 1.4684107409650433, + "learning_rate": 1.954693201995226e-05, + "loss": 0.6323715448379517, + "step": 968 + }, + { + "epoch": 0.2833747623921626, + "grad_norm": 1.1727530946898588, + "learning_rate": 1.954549213497404e-05, + "loss": 0.6265028119087219, + "step": 969 + }, + { + "epoch": 0.28366720280742797, + "grad_norm": 1.2740242277637046, + "learning_rate": 1.9544050018795076e-05, + "loss": 0.6234713792800903, + "step": 970 + }, + { + "epoch": 0.2839596432226934, + "grad_norm": 1.2342517719802, + "learning_rate": 1.9542605671752447e-05, + "loss": 0.6505804657936096, + "step": 971 + }, + { + "epoch": 0.2842520836379588, + "grad_norm": 1.408353713096739, + "learning_rate": 1.954115909418376e-05, + "loss": 0.7756558656692505, + "step": 972 + }, + { + "epoch": 0.28454452405322417, + "grad_norm": 1.4275947350210108, + "learning_rate": 1.953971028642715e-05, + "loss": 0.767257034778595, + "step": 973 + }, + { + "epoch": 0.28483696446848955, + "grad_norm": 1.5164327383088176, + "learning_rate": 1.9538259248821265e-05, + "loss": 0.6702018976211548, + "step": 974 + }, + { + "epoch": 0.28512940488375493, + "grad_norm": 1.5385088670888984, + "learning_rate": 1.953680598170527e-05, + "loss": 0.7072827816009521, + "step": 975 + }, + { + "epoch": 0.2854218452990203, + "grad_norm": 1.4449259987675327, + "learning_rate": 1.953535048541886e-05, + "loss": 0.6343571543693542, + "step": 976 + }, + { + "epoch": 0.2857142857142857, + "grad_norm": 1.2668558478543779, + "learning_rate": 1.953389276030225e-05, + "loss": 0.6361520290374756, + "step": 977 + }, + { + "epoch": 0.2860067261295511, + "grad_norm": 1.144363699587152, + "learning_rate": 1.9532432806696178e-05, + "loss": 0.6757364273071289, + "step": 978 + }, + { + "epoch": 0.2862991665448165, + "grad_norm": 1.2373799950730142, + "learning_rate": 1.9530970624941896e-05, + "loss": 0.6311759948730469, + "step": 979 + }, + { + "epoch": 0.2865916069600819, + "grad_norm": 1.3327233434420644, + "learning_rate": 1.9529506215381176e-05, + "loss": 0.6207036972045898, + "step": 980 + }, + { + "epoch": 0.2868840473753473, + "grad_norm": 1.182706201187961, + "learning_rate": 1.952803957835632e-05, + "loss": 0.5154495239257812, + "step": 981 + }, + { + "epoch": 0.28717648779061267, + "grad_norm": 1.4885508278374788, + "learning_rate": 1.9526570714210146e-05, + "loss": 0.797666072845459, + "step": 982 + }, + { + "epoch": 0.28746892820587805, + "grad_norm": 1.5013519512468485, + "learning_rate": 1.9525099623285983e-05, + "loss": 0.659400224685669, + "step": 983 + }, + { + "epoch": 0.28776136862114343, + "grad_norm": 1.565667149921291, + "learning_rate": 1.9523626305927706e-05, + "loss": 0.7638698816299438, + "step": 984 + }, + { + "epoch": 0.2880538090364088, + "grad_norm": 1.282540952352899, + "learning_rate": 1.952215076247968e-05, + "loss": 0.6656497120857239, + "step": 985 + }, + { + "epoch": 0.2883462494516742, + "grad_norm": 1.6004320535828411, + "learning_rate": 1.9520672993286807e-05, + "loss": 0.7701614499092102, + "step": 986 + }, + { + "epoch": 0.28863868986693964, + "grad_norm": 1.4907110687279852, + "learning_rate": 1.951919299869451e-05, + "loss": 0.6710221767425537, + "step": 987 + }, + { + "epoch": 0.288931130282205, + "grad_norm": 1.3912460639172692, + "learning_rate": 1.951771077904873e-05, + "loss": 0.6307191848754883, + "step": 988 + }, + { + "epoch": 0.2892235706974704, + "grad_norm": 1.5585350101159294, + "learning_rate": 1.951622633469592e-05, + "loss": 0.8226636648178101, + "step": 989 + }, + { + "epoch": 0.2895160111127358, + "grad_norm": 1.3925257650330547, + "learning_rate": 1.9514739665983065e-05, + "loss": 0.6286089420318604, + "step": 990 + }, + { + "epoch": 0.28980845152800117, + "grad_norm": 1.3766260212895336, + "learning_rate": 1.9513250773257667e-05, + "loss": 0.8167316317558289, + "step": 991 + }, + { + "epoch": 0.29010089194326655, + "grad_norm": 1.3082034964893225, + "learning_rate": 1.9511759656867738e-05, + "loss": 0.6840806603431702, + "step": 992 + }, + { + "epoch": 0.29039333235853193, + "grad_norm": 4.707433700267527, + "learning_rate": 1.9510266317161823e-05, + "loss": 0.5731699466705322, + "step": 993 + }, + { + "epoch": 0.2906857727737973, + "grad_norm": 1.179743170686313, + "learning_rate": 1.950877075448898e-05, + "loss": 0.696578860282898, + "step": 994 + }, + { + "epoch": 0.29097821318906275, + "grad_norm": 1.28092562469002, + "learning_rate": 1.9507272969198787e-05, + "loss": 0.7194398641586304, + "step": 995 + }, + { + "epoch": 0.29127065360432813, + "grad_norm": 1.7406610068492592, + "learning_rate": 1.9505772961641342e-05, + "loss": 0.7041016817092896, + "step": 996 + }, + { + "epoch": 0.2915630940195935, + "grad_norm": 1.2586308004321554, + "learning_rate": 1.9504270732167267e-05, + "loss": 0.7073841691017151, + "step": 997 + }, + { + "epoch": 0.2918555344348589, + "grad_norm": 1.204085782896564, + "learning_rate": 1.9502766281127693e-05, + "loss": 0.5097789764404297, + "step": 998 + }, + { + "epoch": 0.2921479748501243, + "grad_norm": 1.1340482101200409, + "learning_rate": 1.9501259608874276e-05, + "loss": 0.6522337198257446, + "step": 999 + }, + { + "epoch": 0.29244041526538966, + "grad_norm": 1.2639457143948831, + "learning_rate": 1.9499750715759197e-05, + "loss": 0.8276036381721497, + "step": 1000 + }, + { + "epoch": 0.29273285568065505, + "grad_norm": 1.3336888124261281, + "learning_rate": 1.9498239602135145e-05, + "loss": 0.7701225876808167, + "step": 1001 + }, + { + "epoch": 0.29302529609592043, + "grad_norm": 1.4216994028606598, + "learning_rate": 1.949672626835534e-05, + "loss": 0.6112316846847534, + "step": 1002 + }, + { + "epoch": 0.29331773651118587, + "grad_norm": 1.5055133598944146, + "learning_rate": 1.9495210714773506e-05, + "loss": 0.7196093201637268, + "step": 1003 + }, + { + "epoch": 0.29361017692645125, + "grad_norm": 1.3102459642638802, + "learning_rate": 1.9493692941743903e-05, + "loss": 0.708210825920105, + "step": 1004 + }, + { + "epoch": 0.29390261734171663, + "grad_norm": 1.1576562552023075, + "learning_rate": 1.9492172949621298e-05, + "loss": 0.6156430244445801, + "step": 1005 + }, + { + "epoch": 0.294195057756982, + "grad_norm": 1.3177580817558727, + "learning_rate": 1.9490650738760977e-05, + "loss": 0.6125216484069824, + "step": 1006 + }, + { + "epoch": 0.2944874981722474, + "grad_norm": 1.5792615772910776, + "learning_rate": 1.9489126309518752e-05, + "loss": 0.5691695213317871, + "step": 1007 + }, + { + "epoch": 0.2947799385875128, + "grad_norm": 1.2458453862912673, + "learning_rate": 1.9487599662250945e-05, + "loss": 0.6733062267303467, + "step": 1008 + }, + { + "epoch": 0.29507237900277816, + "grad_norm": 1.3579820847813902, + "learning_rate": 1.94860707973144e-05, + "loss": 0.6069025993347168, + "step": 1009 + }, + { + "epoch": 0.2953648194180436, + "grad_norm": 1.3771790647505693, + "learning_rate": 1.9484539715066488e-05, + "loss": 0.6191028356552124, + "step": 1010 + }, + { + "epoch": 0.295657259833309, + "grad_norm": 1.3927395620788336, + "learning_rate": 1.9483006415865082e-05, + "loss": 0.7423045635223389, + "step": 1011 + }, + { + "epoch": 0.29594970024857437, + "grad_norm": 1.584259935283413, + "learning_rate": 1.9481470900068585e-05, + "loss": 0.854878306388855, + "step": 1012 + }, + { + "epoch": 0.29624214066383975, + "grad_norm": 1.3274147652805814, + "learning_rate": 1.9479933168035914e-05, + "loss": 0.6950500011444092, + "step": 1013 + }, + { + "epoch": 0.29653458107910513, + "grad_norm": 1.2664754529699496, + "learning_rate": 1.9478393220126503e-05, + "loss": 0.6944484710693359, + "step": 1014 + }, + { + "epoch": 0.2968270214943705, + "grad_norm": 1.3385070796010239, + "learning_rate": 1.9476851056700303e-05, + "loss": 0.7120212316513062, + "step": 1015 + }, + { + "epoch": 0.2971194619096359, + "grad_norm": 1.2818173555684258, + "learning_rate": 1.9475306678117792e-05, + "loss": 0.6271052956581116, + "step": 1016 + }, + { + "epoch": 0.2974119023249013, + "grad_norm": 1.386949235285712, + "learning_rate": 1.9473760084739958e-05, + "loss": 0.6398453712463379, + "step": 1017 + }, + { + "epoch": 0.2977043427401667, + "grad_norm": 1.440440679973054, + "learning_rate": 1.94722112769283e-05, + "loss": 0.5563585758209229, + "step": 1018 + }, + { + "epoch": 0.2979967831554321, + "grad_norm": 1.2637928746894573, + "learning_rate": 1.947066025504485e-05, + "loss": 0.7895959615707397, + "step": 1019 + }, + { + "epoch": 0.2982892235706975, + "grad_norm": 1.2684661754258477, + "learning_rate": 1.9469107019452148e-05, + "loss": 0.6304349303245544, + "step": 1020 + }, + { + "epoch": 0.29858166398596286, + "grad_norm": 1.4493096125993807, + "learning_rate": 1.9467551570513257e-05, + "loss": 0.6915549039840698, + "step": 1021 + }, + { + "epoch": 0.29887410440122825, + "grad_norm": 1.2593652754748748, + "learning_rate": 1.9465993908591748e-05, + "loss": 0.6257511377334595, + "step": 1022 + }, + { + "epoch": 0.29916654481649363, + "grad_norm": 1.4075585450481771, + "learning_rate": 1.9464434034051716e-05, + "loss": 0.6409085988998413, + "step": 1023 + }, + { + "epoch": 0.299458985231759, + "grad_norm": 1.358442522813864, + "learning_rate": 1.9462871947257772e-05, + "loss": 0.7281351089477539, + "step": 1024 + }, + { + "epoch": 0.2997514256470244, + "grad_norm": 1.441690145181621, + "learning_rate": 1.9461307648575047e-05, + "loss": 0.8016781806945801, + "step": 1025 + }, + { + "epoch": 0.30004386606228983, + "grad_norm": 1.2844064559637345, + "learning_rate": 1.9459741138369186e-05, + "loss": 0.5883209705352783, + "step": 1026 + }, + { + "epoch": 0.3003363064775552, + "grad_norm": 1.674320224055934, + "learning_rate": 1.9458172417006347e-05, + "loss": 0.6414197683334351, + "step": 1027 + }, + { + "epoch": 0.3006287468928206, + "grad_norm": 1.465437904752509, + "learning_rate": 1.9456601484853218e-05, + "loss": 0.7076515555381775, + "step": 1028 + }, + { + "epoch": 0.300921187308086, + "grad_norm": 1.4091861442316225, + "learning_rate": 1.9455028342276984e-05, + "loss": 0.8102637529373169, + "step": 1029 + }, + { + "epoch": 0.30121362772335136, + "grad_norm": 1.3935099692215975, + "learning_rate": 1.9453452989645362e-05, + "loss": 0.6954574584960938, + "step": 1030 + }, + { + "epoch": 0.30150606813861675, + "grad_norm": 1.1912974865854908, + "learning_rate": 1.9451875427326585e-05, + "loss": 0.6647125482559204, + "step": 1031 + }, + { + "epoch": 0.3017985085538821, + "grad_norm": 1.2637381593470247, + "learning_rate": 1.9450295655689392e-05, + "loss": 0.5501933097839355, + "step": 1032 + }, + { + "epoch": 0.3020909489691475, + "grad_norm": 1.1642394496276798, + "learning_rate": 1.944871367510305e-05, + "loss": 0.6561415195465088, + "step": 1033 + }, + { + "epoch": 0.30238338938441295, + "grad_norm": 1.2818557575199787, + "learning_rate": 1.9447129485937335e-05, + "loss": 0.6768229007720947, + "step": 1034 + }, + { + "epoch": 0.30267582979967833, + "grad_norm": 1.229414584528048, + "learning_rate": 1.9445543088562543e-05, + "loss": 0.5693868398666382, + "step": 1035 + }, + { + "epoch": 0.3029682702149437, + "grad_norm": 1.197937800783061, + "learning_rate": 1.9443954483349485e-05, + "loss": 0.6165708303451538, + "step": 1036 + }, + { + "epoch": 0.3032607106302091, + "grad_norm": 1.0808504567320436, + "learning_rate": 1.944236367066948e-05, + "loss": 0.6116082668304443, + "step": 1037 + }, + { + "epoch": 0.3035531510454745, + "grad_norm": 1.2481100676234638, + "learning_rate": 1.9440770650894384e-05, + "loss": 0.7027714848518372, + "step": 1038 + }, + { + "epoch": 0.30384559146073986, + "grad_norm": 1.3613368127158991, + "learning_rate": 1.943917542439655e-05, + "loss": 0.7339189052581787, + "step": 1039 + }, + { + "epoch": 0.30413803187600524, + "grad_norm": 1.322856585416547, + "learning_rate": 1.943757799154885e-05, + "loss": 0.7975895404815674, + "step": 1040 + }, + { + "epoch": 0.3044304722912706, + "grad_norm": 1.2603507441667385, + "learning_rate": 1.9435978352724673e-05, + "loss": 0.6421841382980347, + "step": 1041 + }, + { + "epoch": 0.30472291270653606, + "grad_norm": 1.3017046883641064, + "learning_rate": 1.943437650829793e-05, + "loss": 0.6731791496276855, + "step": 1042 + }, + { + "epoch": 0.30501535312180145, + "grad_norm": 1.224211690521448, + "learning_rate": 1.943277245864304e-05, + "loss": 0.7008551359176636, + "step": 1043 + }, + { + "epoch": 0.30530779353706683, + "grad_norm": 1.2549197569852149, + "learning_rate": 1.943116620413494e-05, + "loss": 0.6777141094207764, + "step": 1044 + }, + { + "epoch": 0.3056002339523322, + "grad_norm": 1.258074600817151, + "learning_rate": 1.9429557745149084e-05, + "loss": 0.7649033069610596, + "step": 1045 + }, + { + "epoch": 0.3058926743675976, + "grad_norm": 1.2626508350830759, + "learning_rate": 1.9427947082061432e-05, + "loss": 0.6460477709770203, + "step": 1046 + }, + { + "epoch": 0.306185114782863, + "grad_norm": 1.3748035809258794, + "learning_rate": 1.942633421524848e-05, + "loss": 0.5939697623252869, + "step": 1047 + }, + { + "epoch": 0.30647755519812836, + "grad_norm": 1.3696807292374817, + "learning_rate": 1.9424719145087216e-05, + "loss": 0.606407880783081, + "step": 1048 + }, + { + "epoch": 0.3067699956133938, + "grad_norm": 1.2114201905625201, + "learning_rate": 1.9423101871955153e-05, + "loss": 0.5515298843383789, + "step": 1049 + }, + { + "epoch": 0.3070624360286592, + "grad_norm": 1.4449996700249255, + "learning_rate": 1.942148239623032e-05, + "loss": 0.7397217154502869, + "step": 1050 + }, + { + "epoch": 0.30735487644392456, + "grad_norm": 1.708533630902304, + "learning_rate": 1.9419860718291265e-05, + "loss": 0.6397782564163208, + "step": 1051 + }, + { + "epoch": 0.30764731685918995, + "grad_norm": 1.1946031757535738, + "learning_rate": 1.9418236838517036e-05, + "loss": 0.589732825756073, + "step": 1052 + }, + { + "epoch": 0.30793975727445533, + "grad_norm": 1.4196894685331136, + "learning_rate": 1.941661075728721e-05, + "loss": 0.7968351244926453, + "step": 1053 + }, + { + "epoch": 0.3082321976897207, + "grad_norm": 1.35500416476017, + "learning_rate": 1.9414982474981877e-05, + "loss": 0.5740514397621155, + "step": 1054 + }, + { + "epoch": 0.3085246381049861, + "grad_norm": 1.314001411398827, + "learning_rate": 1.9413351991981632e-05, + "loss": 0.656599760055542, + "step": 1055 + }, + { + "epoch": 0.3088170785202515, + "grad_norm": 1.2592244001939052, + "learning_rate": 1.9411719308667593e-05, + "loss": 0.5638262033462524, + "step": 1056 + }, + { + "epoch": 0.3091095189355169, + "grad_norm": 1.3510783569743914, + "learning_rate": 1.9410084425421392e-05, + "loss": 0.6391294002532959, + "step": 1057 + }, + { + "epoch": 0.3094019593507823, + "grad_norm": 1.300451628146748, + "learning_rate": 1.9408447342625167e-05, + "loss": 0.7109906077384949, + "step": 1058 + }, + { + "epoch": 0.3096943997660477, + "grad_norm": 1.35271058872007, + "learning_rate": 1.9406808060661583e-05, + "loss": 0.6922626495361328, + "step": 1059 + }, + { + "epoch": 0.30998684018131306, + "grad_norm": 1.3729160813047252, + "learning_rate": 1.9405166579913808e-05, + "loss": 0.6708151698112488, + "step": 1060 + }, + { + "epoch": 0.31027928059657844, + "grad_norm": 1.3049592711968918, + "learning_rate": 1.940352290076553e-05, + "loss": 0.6259905099868774, + "step": 1061 + }, + { + "epoch": 0.3105717210118438, + "grad_norm": 1.3047971530530311, + "learning_rate": 1.940187702360095e-05, + "loss": 0.6590703725814819, + "step": 1062 + }, + { + "epoch": 0.3108641614271092, + "grad_norm": 1.5136066296614852, + "learning_rate": 1.9400228948804777e-05, + "loss": 0.7371482849121094, + "step": 1063 + }, + { + "epoch": 0.3111566018423746, + "grad_norm": 1.3637094061000257, + "learning_rate": 1.9398578676762243e-05, + "loss": 0.6954984664916992, + "step": 1064 + }, + { + "epoch": 0.31144904225764003, + "grad_norm": 1.197618668709007, + "learning_rate": 1.9396926207859085e-05, + "loss": 0.604501485824585, + "step": 1065 + }, + { + "epoch": 0.3117414826729054, + "grad_norm": 1.4637648544146704, + "learning_rate": 1.939527154248156e-05, + "loss": 0.7580305337905884, + "step": 1066 + }, + { + "epoch": 0.3120339230881708, + "grad_norm": 1.2774221611024956, + "learning_rate": 1.9393614681016443e-05, + "loss": 0.5996969938278198, + "step": 1067 + }, + { + "epoch": 0.3123263635034362, + "grad_norm": 1.2247945329694363, + "learning_rate": 1.9391955623851e-05, + "loss": 0.5939687490463257, + "step": 1068 + }, + { + "epoch": 0.31261880391870156, + "grad_norm": 1.2833481425507127, + "learning_rate": 1.939029437137304e-05, + "loss": 0.6194947957992554, + "step": 1069 + }, + { + "epoch": 0.31291124433396694, + "grad_norm": 1.406800587144287, + "learning_rate": 1.9388630923970862e-05, + "loss": 0.7419420480728149, + "step": 1070 + }, + { + "epoch": 0.3132036847492323, + "grad_norm": 1.4290715744520364, + "learning_rate": 1.938696528203329e-05, + "loss": 0.6950613856315613, + "step": 1071 + }, + { + "epoch": 0.3134961251644977, + "grad_norm": 1.542135386244918, + "learning_rate": 1.9385297445949657e-05, + "loss": 0.7376282215118408, + "step": 1072 + }, + { + "epoch": 0.31378856557976315, + "grad_norm": 1.4197281288148755, + "learning_rate": 1.938362741610981e-05, + "loss": 0.800892174243927, + "step": 1073 + }, + { + "epoch": 0.31408100599502853, + "grad_norm": 1.3238571566647774, + "learning_rate": 1.938195519290411e-05, + "loss": 0.5747013688087463, + "step": 1074 + }, + { + "epoch": 0.3143734464102939, + "grad_norm": 1.3986418419585354, + "learning_rate": 1.9380280776723422e-05, + "loss": 0.7341697216033936, + "step": 1075 + }, + { + "epoch": 0.3146658868255593, + "grad_norm": 1.2703636347468634, + "learning_rate": 1.9378604167959138e-05, + "loss": 0.6229791641235352, + "step": 1076 + }, + { + "epoch": 0.3149583272408247, + "grad_norm": 1.2978515497126824, + "learning_rate": 1.937692536700315e-05, + "loss": 0.7266645431518555, + "step": 1077 + }, + { + "epoch": 0.31525076765609006, + "grad_norm": 1.1885775086050685, + "learning_rate": 1.937524437424787e-05, + "loss": 0.6163127422332764, + "step": 1078 + }, + { + "epoch": 0.31554320807135544, + "grad_norm": 1.3182925237610392, + "learning_rate": 1.9373561190086225e-05, + "loss": 0.6609925031661987, + "step": 1079 + }, + { + "epoch": 0.3158356484866208, + "grad_norm": 1.305005533703013, + "learning_rate": 1.937187581491164e-05, + "loss": 0.7157741785049438, + "step": 1080 + }, + { + "epoch": 0.31612808890188626, + "grad_norm": 1.286980397276467, + "learning_rate": 1.937018824911807e-05, + "loss": 0.6486212015151978, + "step": 1081 + }, + { + "epoch": 0.31642052931715164, + "grad_norm": 1.3383942599839993, + "learning_rate": 1.9368498493099963e-05, + "loss": 0.6931928396224976, + "step": 1082 + }, + { + "epoch": 0.316712969732417, + "grad_norm": 1.3241410530363713, + "learning_rate": 1.9366806547252295e-05, + "loss": 0.9291354417800903, + "step": 1083 + }, + { + "epoch": 0.3170054101476824, + "grad_norm": 1.247174604159187, + "learning_rate": 1.936511241197055e-05, + "loss": 0.6276642084121704, + "step": 1084 + }, + { + "epoch": 0.3172978505629478, + "grad_norm": 1.5622111951472988, + "learning_rate": 1.936341608765072e-05, + "loss": 0.7836581468582153, + "step": 1085 + }, + { + "epoch": 0.3175902909782132, + "grad_norm": 1.3508415072117352, + "learning_rate": 1.9361717574689308e-05, + "loss": 0.6785084009170532, + "step": 1086 + }, + { + "epoch": 0.31788273139347856, + "grad_norm": 1.2099617708073434, + "learning_rate": 1.936001687348333e-05, + "loss": 0.5715218782424927, + "step": 1087 + }, + { + "epoch": 0.318175171808744, + "grad_norm": 1.4697249093694587, + "learning_rate": 1.9358313984430324e-05, + "loss": 0.8417775630950928, + "step": 1088 + }, + { + "epoch": 0.3184676122240094, + "grad_norm": 1.2670961493626953, + "learning_rate": 1.935660890792832e-05, + "loss": 0.598076343536377, + "step": 1089 + }, + { + "epoch": 0.31876005263927476, + "grad_norm": 1.1923598198798329, + "learning_rate": 1.9354901644375876e-05, + "loss": 0.5830154418945312, + "step": 1090 + }, + { + "epoch": 0.31905249305454014, + "grad_norm": 1.307163759376097, + "learning_rate": 1.935319219417205e-05, + "loss": 0.5746437311172485, + "step": 1091 + }, + { + "epoch": 0.3193449334698055, + "grad_norm": 1.1091668518622428, + "learning_rate": 1.9351480557716414e-05, + "loss": 0.5520191788673401, + "step": 1092 + }, + { + "epoch": 0.3196373738850709, + "grad_norm": 1.218411442512637, + "learning_rate": 1.9349766735409058e-05, + "loss": 0.5847123861312866, + "step": 1093 + }, + { + "epoch": 0.3199298143003363, + "grad_norm": 1.568693203893066, + "learning_rate": 1.9348050727650577e-05, + "loss": 0.7390924692153931, + "step": 1094 + }, + { + "epoch": 0.3202222547156017, + "grad_norm": 1.4427842898109178, + "learning_rate": 1.9346332534842074e-05, + "loss": 0.5812145471572876, + "step": 1095 + }, + { + "epoch": 0.3205146951308671, + "grad_norm": 1.615769461575852, + "learning_rate": 1.9344612157385166e-05, + "loss": 0.6958816647529602, + "step": 1096 + }, + { + "epoch": 0.3208071355461325, + "grad_norm": 1.3481467288956208, + "learning_rate": 1.9342889595681986e-05, + "loss": 0.5618177652359009, + "step": 1097 + }, + { + "epoch": 0.3210995759613979, + "grad_norm": 1.2846515235734224, + "learning_rate": 1.9341164850135163e-05, + "loss": 0.6099411845207214, + "step": 1098 + }, + { + "epoch": 0.32139201637666326, + "grad_norm": 1.1242331249756639, + "learning_rate": 1.9339437921147854e-05, + "loss": 0.6772094964981079, + "step": 1099 + }, + { + "epoch": 0.32168445679192864, + "grad_norm": 1.4006184046576602, + "learning_rate": 1.9337708809123718e-05, + "loss": 0.6916643381118774, + "step": 1100 + }, + { + "epoch": 0.321976897207194, + "grad_norm": 1.199381767960838, + "learning_rate": 1.933597751446692e-05, + "loss": 0.5716762542724609, + "step": 1101 + }, + { + "epoch": 0.3222693376224594, + "grad_norm": 1.511781401125701, + "learning_rate": 1.9334244037582143e-05, + "loss": 0.68224036693573, + "step": 1102 + }, + { + "epoch": 0.3225617780377248, + "grad_norm": 1.3199204633429549, + "learning_rate": 1.933250837887457e-05, + "loss": 0.6888231635093689, + "step": 1103 + }, + { + "epoch": 0.3228542184529902, + "grad_norm": 1.4809797608653643, + "learning_rate": 1.933077053874991e-05, + "loss": 0.6469036340713501, + "step": 1104 + }, + { + "epoch": 0.3231466588682556, + "grad_norm": 1.5099365665086963, + "learning_rate": 1.932903051761437e-05, + "loss": 0.6202501058578491, + "step": 1105 + }, + { + "epoch": 0.323439099283521, + "grad_norm": 1.454362918518285, + "learning_rate": 1.932728831587467e-05, + "loss": 0.6041314601898193, + "step": 1106 + }, + { + "epoch": 0.3237315396987864, + "grad_norm": 1.3479422917529533, + "learning_rate": 1.9325543933938034e-05, + "loss": 0.7081667184829712, + "step": 1107 + }, + { + "epoch": 0.32402398011405176, + "grad_norm": 1.477559211803618, + "learning_rate": 1.9323797372212204e-05, + "loss": 0.7743494510650635, + "step": 1108 + }, + { + "epoch": 0.32431642052931714, + "grad_norm": 1.3188148010775738, + "learning_rate": 1.9322048631105428e-05, + "loss": 0.6122584342956543, + "step": 1109 + }, + { + "epoch": 0.3246088609445825, + "grad_norm": 1.279178726850882, + "learning_rate": 1.932029771102646e-05, + "loss": 0.6106122732162476, + "step": 1110 + }, + { + "epoch": 0.3249013013598479, + "grad_norm": 1.1897376224269591, + "learning_rate": 1.9318544612384572e-05, + "loss": 0.5082784295082092, + "step": 1111 + }, + { + "epoch": 0.32519374177511334, + "grad_norm": 1.3081590787355515, + "learning_rate": 1.9316789335589542e-05, + "loss": 0.6845188140869141, + "step": 1112 + }, + { + "epoch": 0.3254861821903787, + "grad_norm": 1.343292960468675, + "learning_rate": 1.9315031881051653e-05, + "loss": 0.5972481966018677, + "step": 1113 + }, + { + "epoch": 0.3257786226056441, + "grad_norm": 1.372744387816622, + "learning_rate": 1.931327224918169e-05, + "loss": 0.6312427520751953, + "step": 1114 + }, + { + "epoch": 0.3260710630209095, + "grad_norm": 1.6334469145871557, + "learning_rate": 1.9311510440390973e-05, + "loss": 0.7904551029205322, + "step": 1115 + }, + { + "epoch": 0.3263635034361749, + "grad_norm": 1.4496533611968336, + "learning_rate": 1.9309746455091302e-05, + "loss": 0.6513646841049194, + "step": 1116 + }, + { + "epoch": 0.32665594385144026, + "grad_norm": 1.2559107839078971, + "learning_rate": 1.9307980293694997e-05, + "loss": 0.5349715948104858, + "step": 1117 + }, + { + "epoch": 0.32694838426670564, + "grad_norm": 1.122130050588245, + "learning_rate": 1.93062119566149e-05, + "loss": 0.5815087556838989, + "step": 1118 + }, + { + "epoch": 0.327240824681971, + "grad_norm": 1.3638823451289013, + "learning_rate": 1.9304441444264335e-05, + "loss": 0.6380286812782288, + "step": 1119 + }, + { + "epoch": 0.32753326509723646, + "grad_norm": 1.2646470578382853, + "learning_rate": 1.9302668757057157e-05, + "loss": 0.7222728729248047, + "step": 1120 + }, + { + "epoch": 0.32782570551250184, + "grad_norm": 1.1611022114208025, + "learning_rate": 1.9300893895407715e-05, + "loss": 0.6262868642807007, + "step": 1121 + }, + { + "epoch": 0.3281181459277672, + "grad_norm": 1.522127361747668, + "learning_rate": 1.929911685973088e-05, + "loss": 0.6387197971343994, + "step": 1122 + }, + { + "epoch": 0.3284105863430326, + "grad_norm": 1.3879760662124887, + "learning_rate": 1.9297337650442015e-05, + "loss": 0.77378249168396, + "step": 1123 + }, + { + "epoch": 0.328703026758298, + "grad_norm": 1.3651260322738243, + "learning_rate": 1.9295556267957004e-05, + "loss": 0.7589142322540283, + "step": 1124 + }, + { + "epoch": 0.32899546717356337, + "grad_norm": 1.481283036614999, + "learning_rate": 1.9293772712692233e-05, + "loss": 0.7153090238571167, + "step": 1125 + }, + { + "epoch": 0.32928790758882875, + "grad_norm": 1.2572705841763243, + "learning_rate": 1.9291986985064595e-05, + "loss": 0.5738104581832886, + "step": 1126 + }, + { + "epoch": 0.3295803480040942, + "grad_norm": 1.2803221849130417, + "learning_rate": 1.92901990854915e-05, + "loss": 0.6530819535255432, + "step": 1127 + }, + { + "epoch": 0.3298727884193596, + "grad_norm": 1.221270033991816, + "learning_rate": 1.9288409014390854e-05, + "loss": 0.59107506275177, + "step": 1128 + }, + { + "epoch": 0.33016522883462496, + "grad_norm": 1.4658806562930384, + "learning_rate": 1.9286616772181072e-05, + "loss": 0.5798863172531128, + "step": 1129 + }, + { + "epoch": 0.33045766924989034, + "grad_norm": 1.689951660615568, + "learning_rate": 1.9284822359281085e-05, + "loss": 0.6957223415374756, + "step": 1130 + }, + { + "epoch": 0.3307501096651557, + "grad_norm": 1.3614959188818774, + "learning_rate": 1.9283025776110326e-05, + "loss": 0.6933379173278809, + "step": 1131 + }, + { + "epoch": 0.3310425500804211, + "grad_norm": 1.2712490538707164, + "learning_rate": 1.928122702308873e-05, + "loss": 0.527482271194458, + "step": 1132 + }, + { + "epoch": 0.3313349904956865, + "grad_norm": 1.2166131933862214, + "learning_rate": 1.927942610063675e-05, + "loss": 0.7244399785995483, + "step": 1133 + }, + { + "epoch": 0.33162743091095187, + "grad_norm": 1.4636848406157517, + "learning_rate": 1.9277623009175338e-05, + "loss": 0.7881563901901245, + "step": 1134 + }, + { + "epoch": 0.3319198713262173, + "grad_norm": 1.415089568819196, + "learning_rate": 1.9275817749125956e-05, + "loss": 0.7523232698440552, + "step": 1135 + }, + { + "epoch": 0.3322123117414827, + "grad_norm": 1.5537538186729503, + "learning_rate": 1.9274010320910575e-05, + "loss": 0.7226657867431641, + "step": 1136 + }, + { + "epoch": 0.3325047521567481, + "grad_norm": 1.4230293062648038, + "learning_rate": 1.9272200724951666e-05, + "loss": 0.6461686491966248, + "step": 1137 + }, + { + "epoch": 0.33279719257201346, + "grad_norm": 1.1785466753796996, + "learning_rate": 1.9270388961672214e-05, + "loss": 0.6343599557876587, + "step": 1138 + }, + { + "epoch": 0.33308963298727884, + "grad_norm": 1.2762072218920462, + "learning_rate": 1.926857503149571e-05, + "loss": 0.5510993599891663, + "step": 1139 + }, + { + "epoch": 0.3333820734025442, + "grad_norm": 1.3887196408907312, + "learning_rate": 1.9266758934846142e-05, + "loss": 0.6022439002990723, + "step": 1140 + }, + { + "epoch": 0.3336745138178096, + "grad_norm": 1.2716403438701216, + "learning_rate": 1.9264940672148018e-05, + "loss": 0.708207368850708, + "step": 1141 + }, + { + "epoch": 0.333966954233075, + "grad_norm": 1.3137035916667523, + "learning_rate": 1.9263120243826345e-05, + "loss": 0.566935122013092, + "step": 1142 + }, + { + "epoch": 0.3342593946483404, + "grad_norm": 1.634118861015607, + "learning_rate": 1.9261297650306635e-05, + "loss": 0.6848355531692505, + "step": 1143 + }, + { + "epoch": 0.3345518350636058, + "grad_norm": 1.3058715943169161, + "learning_rate": 1.9259472892014907e-05, + "loss": 0.7335090637207031, + "step": 1144 + }, + { + "epoch": 0.3348442754788712, + "grad_norm": 1.425387946547354, + "learning_rate": 1.925764596937769e-05, + "loss": 0.7323876023292542, + "step": 1145 + }, + { + "epoch": 0.33513671589413657, + "grad_norm": 1.377083094919456, + "learning_rate": 1.9255816882822017e-05, + "loss": 0.5564731955528259, + "step": 1146 + }, + { + "epoch": 0.33542915630940195, + "grad_norm": 1.5323554162589257, + "learning_rate": 1.925398563277542e-05, + "loss": 0.7699049711227417, + "step": 1147 + }, + { + "epoch": 0.33572159672466734, + "grad_norm": 1.4580663324783634, + "learning_rate": 1.925215221966595e-05, + "loss": 0.688602089881897, + "step": 1148 + }, + { + "epoch": 0.3360140371399327, + "grad_norm": 1.4706838569192882, + "learning_rate": 1.9250316643922153e-05, + "loss": 0.7103208899497986, + "step": 1149 + }, + { + "epoch": 0.3363064775551981, + "grad_norm": 1.2723502109555263, + "learning_rate": 1.9248478905973078e-05, + "loss": 0.6313603520393372, + "step": 1150 + }, + { + "epoch": 0.33659891797046354, + "grad_norm": 1.4985289931464978, + "learning_rate": 1.9246639006248294e-05, + "loss": 0.8420118093490601, + "step": 1151 + }, + { + "epoch": 0.3368913583857289, + "grad_norm": 1.4358130705661303, + "learning_rate": 1.9244796945177864e-05, + "loss": 0.6566640734672546, + "step": 1152 + }, + { + "epoch": 0.3371837988009943, + "grad_norm": 1.3408154011751006, + "learning_rate": 1.9242952723192357e-05, + "loss": 0.6455206274986267, + "step": 1153 + }, + { + "epoch": 0.3374762392162597, + "grad_norm": 1.3469873034007918, + "learning_rate": 1.924110634072285e-05, + "loss": 0.7348071336746216, + "step": 1154 + }, + { + "epoch": 0.33776867963152507, + "grad_norm": 1.7471975705727423, + "learning_rate": 1.9239257798200918e-05, + "loss": 0.7187973260879517, + "step": 1155 + }, + { + "epoch": 0.33806112004679045, + "grad_norm": 1.2712100505239146, + "learning_rate": 1.9237407096058655e-05, + "loss": 0.683181643486023, + "step": 1156 + }, + { + "epoch": 0.33835356046205584, + "grad_norm": 1.2445760635583791, + "learning_rate": 1.9235554234728646e-05, + "loss": 0.7296931743621826, + "step": 1157 + }, + { + "epoch": 0.3386460008773212, + "grad_norm": 1.1890040509691011, + "learning_rate": 1.923369921464399e-05, + "loss": 0.6656480431556702, + "step": 1158 + }, + { + "epoch": 0.33893844129258666, + "grad_norm": 1.3248976863888173, + "learning_rate": 1.923184203623828e-05, + "loss": 0.6284874677658081, + "step": 1159 + }, + { + "epoch": 0.33923088170785204, + "grad_norm": 1.29677623825286, + "learning_rate": 1.922998269994563e-05, + "loss": 0.7065030336380005, + "step": 1160 + }, + { + "epoch": 0.3395233221231174, + "grad_norm": 1.3728212504218815, + "learning_rate": 1.9228121206200637e-05, + "loss": 0.7077580690383911, + "step": 1161 + }, + { + "epoch": 0.3398157625383828, + "grad_norm": 1.4855746260471363, + "learning_rate": 1.9226257555438428e-05, + "loss": 0.6012637615203857, + "step": 1162 + }, + { + "epoch": 0.3401082029536482, + "grad_norm": 1.304745585816947, + "learning_rate": 1.9224391748094607e-05, + "loss": 0.8166115283966064, + "step": 1163 + }, + { + "epoch": 0.34040064336891357, + "grad_norm": 1.502407347484804, + "learning_rate": 1.92225237846053e-05, + "loss": 0.6066576242446899, + "step": 1164 + }, + { + "epoch": 0.34069308378417895, + "grad_norm": 1.2739635711375565, + "learning_rate": 1.922065366540713e-05, + "loss": 0.7226361632347107, + "step": 1165 + }, + { + "epoch": 0.3409855241994444, + "grad_norm": 1.6597939275709621, + "learning_rate": 1.9218781390937233e-05, + "loss": 0.7786005139350891, + "step": 1166 + }, + { + "epoch": 0.3412779646147098, + "grad_norm": 1.4812475412632635, + "learning_rate": 1.9216906961633234e-05, + "loss": 0.6534268856048584, + "step": 1167 + }, + { + "epoch": 0.34157040502997515, + "grad_norm": 1.2208380860350694, + "learning_rate": 1.9215030377933274e-05, + "loss": 0.6048434376716614, + "step": 1168 + }, + { + "epoch": 0.34186284544524054, + "grad_norm": 1.356773734579803, + "learning_rate": 1.921315164027599e-05, + "loss": 0.8321201205253601, + "step": 1169 + }, + { + "epoch": 0.3421552858605059, + "grad_norm": 1.5327102242092, + "learning_rate": 1.9211270749100527e-05, + "loss": 0.7142379283905029, + "step": 1170 + }, + { + "epoch": 0.3424477262757713, + "grad_norm": 1.205344060714777, + "learning_rate": 1.9209387704846535e-05, + "loss": 0.6262812614440918, + "step": 1171 + }, + { + "epoch": 0.3427401666910367, + "grad_norm": 1.4345003452190939, + "learning_rate": 1.920750250795416e-05, + "loss": 0.7242386341094971, + "step": 1172 + }, + { + "epoch": 0.34303260710630207, + "grad_norm": 1.3077522067723235, + "learning_rate": 1.9205615158864054e-05, + "loss": 0.6064128875732422, + "step": 1173 + }, + { + "epoch": 0.3433250475215675, + "grad_norm": 1.3868970600691566, + "learning_rate": 1.9203725658017374e-05, + "loss": 0.6720623970031738, + "step": 1174 + }, + { + "epoch": 0.3436174879368329, + "grad_norm": 1.3281016407079367, + "learning_rate": 1.9201834005855785e-05, + "loss": 0.745712161064148, + "step": 1175 + }, + { + "epoch": 0.34390992835209827, + "grad_norm": 1.3336156391355163, + "learning_rate": 1.9199940202821445e-05, + "loss": 0.6387969255447388, + "step": 1176 + }, + { + "epoch": 0.34420236876736365, + "grad_norm": 1.3104807608053473, + "learning_rate": 1.9198044249357018e-05, + "loss": 0.6634984612464905, + "step": 1177 + }, + { + "epoch": 0.34449480918262904, + "grad_norm": 1.0968916991502757, + "learning_rate": 1.919614614590567e-05, + "loss": 0.4732145667076111, + "step": 1178 + }, + { + "epoch": 0.3447872495978944, + "grad_norm": 1.4315145497505135, + "learning_rate": 1.9194245892911077e-05, + "loss": 0.6621897220611572, + "step": 1179 + }, + { + "epoch": 0.3450796900131598, + "grad_norm": 1.3492357768294603, + "learning_rate": 1.9192343490817412e-05, + "loss": 0.5691112279891968, + "step": 1180 + }, + { + "epoch": 0.3453721304284252, + "grad_norm": 1.5009979829344267, + "learning_rate": 1.919043894006934e-05, + "loss": 0.6326683759689331, + "step": 1181 + }, + { + "epoch": 0.3456645708436906, + "grad_norm": 1.3965991672121214, + "learning_rate": 1.9188532241112047e-05, + "loss": 0.6068567037582397, + "step": 1182 + }, + { + "epoch": 0.345957011258956, + "grad_norm": 1.5425955582670972, + "learning_rate": 1.918662339439121e-05, + "loss": 0.707065761089325, + "step": 1183 + }, + { + "epoch": 0.3462494516742214, + "grad_norm": 1.4506511105853803, + "learning_rate": 1.9184712400353008e-05, + "loss": 0.7821887135505676, + "step": 1184 + }, + { + "epoch": 0.34654189208948677, + "grad_norm": 1.3192596730278041, + "learning_rate": 1.918279925944413e-05, + "loss": 0.6759425401687622, + "step": 1185 + }, + { + "epoch": 0.34683433250475215, + "grad_norm": 1.2819153702423505, + "learning_rate": 1.9180883972111756e-05, + "loss": 0.5660048127174377, + "step": 1186 + }, + { + "epoch": 0.34712677292001753, + "grad_norm": 1.392117573401842, + "learning_rate": 1.9178966538803574e-05, + "loss": 0.708798885345459, + "step": 1187 + }, + { + "epoch": 0.3474192133352829, + "grad_norm": 1.0828895012382165, + "learning_rate": 1.9177046959967774e-05, + "loss": 0.603208065032959, + "step": 1188 + }, + { + "epoch": 0.3477116537505483, + "grad_norm": 1.2856052178527815, + "learning_rate": 1.9175125236053043e-05, + "loss": 0.8259323835372925, + "step": 1189 + }, + { + "epoch": 0.34800409416581374, + "grad_norm": 1.2349901090123199, + "learning_rate": 1.9173201367508572e-05, + "loss": 0.573014497756958, + "step": 1190 + }, + { + "epoch": 0.3482965345810791, + "grad_norm": 1.49130421629148, + "learning_rate": 1.9171275354784062e-05, + "loss": 0.8202974200248718, + "step": 1191 + }, + { + "epoch": 0.3485889749963445, + "grad_norm": 1.313328733803151, + "learning_rate": 1.9169347198329693e-05, + "loss": 0.5352192521095276, + "step": 1192 + }, + { + "epoch": 0.3488814154116099, + "grad_norm": 1.4707600848748155, + "learning_rate": 1.916741689859617e-05, + "loss": 0.7303881645202637, + "step": 1193 + }, + { + "epoch": 0.34917385582687527, + "grad_norm": 1.136402601726834, + "learning_rate": 1.9165484456034683e-05, + "loss": 0.670224666595459, + "step": 1194 + }, + { + "epoch": 0.34946629624214065, + "grad_norm": 1.213410956274994, + "learning_rate": 1.9163549871096934e-05, + "loss": 0.7311158776283264, + "step": 1195 + }, + { + "epoch": 0.34975873665740603, + "grad_norm": 1.3163563045896416, + "learning_rate": 1.9161613144235117e-05, + "loss": 0.6346032619476318, + "step": 1196 + }, + { + "epoch": 0.3500511770726714, + "grad_norm": 1.3538502473866518, + "learning_rate": 1.9159674275901932e-05, + "loss": 0.66914302110672, + "step": 1197 + }, + { + "epoch": 0.35034361748793685, + "grad_norm": 1.408804907617288, + "learning_rate": 1.9157733266550577e-05, + "loss": 0.6775194406509399, + "step": 1198 + }, + { + "epoch": 0.35063605790320224, + "grad_norm": 1.3219370751555166, + "learning_rate": 1.915579011663475e-05, + "loss": 0.6887085437774658, + "step": 1199 + }, + { + "epoch": 0.3509284983184676, + "grad_norm": 1.4120877262018603, + "learning_rate": 1.9153844826608652e-05, + "loss": 0.7474929690361023, + "step": 1200 + }, + { + "epoch": 0.351220938733733, + "grad_norm": 1.3551417524104399, + "learning_rate": 1.915189739692698e-05, + "loss": 0.5665907859802246, + "step": 1201 + }, + { + "epoch": 0.3515133791489984, + "grad_norm": 1.4582334765772325, + "learning_rate": 1.9149947828044938e-05, + "loss": 0.6044580340385437, + "step": 1202 + }, + { + "epoch": 0.35180581956426377, + "grad_norm": 1.1481279810019642, + "learning_rate": 1.914799612041822e-05, + "loss": 0.6590601205825806, + "step": 1203 + }, + { + "epoch": 0.35209825997952915, + "grad_norm": 1.1796025597233206, + "learning_rate": 1.9146042274503033e-05, + "loss": 0.5204451084136963, + "step": 1204 + }, + { + "epoch": 0.3523907003947946, + "grad_norm": 1.3267878452954167, + "learning_rate": 1.9144086290756077e-05, + "loss": 0.6036473512649536, + "step": 1205 + }, + { + "epoch": 0.35268314081005997, + "grad_norm": 1.438922587418907, + "learning_rate": 1.914212816963454e-05, + "loss": 0.5652757883071899, + "step": 1206 + }, + { + "epoch": 0.35297558122532535, + "grad_norm": 1.3041918712359999, + "learning_rate": 1.9140167911596133e-05, + "loss": 0.707310676574707, + "step": 1207 + }, + { + "epoch": 0.35326802164059073, + "grad_norm": 1.4881761799215045, + "learning_rate": 1.9138205517099048e-05, + "loss": 0.8539729714393616, + "step": 1208 + }, + { + "epoch": 0.3535604620558561, + "grad_norm": 1.6882139973772572, + "learning_rate": 1.9136240986601986e-05, + "loss": 0.6502546072006226, + "step": 1209 + }, + { + "epoch": 0.3538529024711215, + "grad_norm": 1.3852727639404194, + "learning_rate": 1.9134274320564145e-05, + "loss": 0.7279889583587646, + "step": 1210 + }, + { + "epoch": 0.3541453428863869, + "grad_norm": 1.380784482123245, + "learning_rate": 1.9132305519445215e-05, + "loss": 0.6916895508766174, + "step": 1211 + }, + { + "epoch": 0.35443778330165226, + "grad_norm": 1.3426787464995344, + "learning_rate": 1.9130334583705395e-05, + "loss": 0.6941961050033569, + "step": 1212 + }, + { + "epoch": 0.3547302237169177, + "grad_norm": 1.211958694677935, + "learning_rate": 1.912836151380538e-05, + "loss": 0.6686822175979614, + "step": 1213 + }, + { + "epoch": 0.3550226641321831, + "grad_norm": 1.4335891633323221, + "learning_rate": 1.912638631020636e-05, + "loss": 0.818913459777832, + "step": 1214 + }, + { + "epoch": 0.35531510454744847, + "grad_norm": 1.3902796641746433, + "learning_rate": 1.9124408973370034e-05, + "loss": 0.6461240649223328, + "step": 1215 + }, + { + "epoch": 0.35560754496271385, + "grad_norm": 1.3521177557458626, + "learning_rate": 1.9122429503758586e-05, + "loss": 0.6982225179672241, + "step": 1216 + }, + { + "epoch": 0.35589998537797923, + "grad_norm": 1.2726818955529642, + "learning_rate": 1.9120447901834708e-05, + "loss": 0.6319124698638916, + "step": 1217 + }, + { + "epoch": 0.3561924257932446, + "grad_norm": 1.4379853975185637, + "learning_rate": 1.9118464168061584e-05, + "loss": 0.7092441320419312, + "step": 1218 + }, + { + "epoch": 0.35648486620851, + "grad_norm": 1.4989677994022448, + "learning_rate": 1.9116478302902904e-05, + "loss": 0.7696874141693115, + "step": 1219 + }, + { + "epoch": 0.3567773066237754, + "grad_norm": 1.348418923049424, + "learning_rate": 1.9114490306822846e-05, + "loss": 0.6944275498390198, + "step": 1220 + }, + { + "epoch": 0.3570697470390408, + "grad_norm": 1.4736146352332777, + "learning_rate": 1.9112500180286098e-05, + "loss": 0.6179015636444092, + "step": 1221 + }, + { + "epoch": 0.3573621874543062, + "grad_norm": 1.4832162039625727, + "learning_rate": 1.911050792375784e-05, + "loss": 0.6964149475097656, + "step": 1222 + }, + { + "epoch": 0.3576546278695716, + "grad_norm": 1.314680516503926, + "learning_rate": 1.9108513537703746e-05, + "loss": 0.6923096776008606, + "step": 1223 + }, + { + "epoch": 0.35794706828483697, + "grad_norm": 1.3108509564109556, + "learning_rate": 1.9106517022589993e-05, + "loss": 0.5205660462379456, + "step": 1224 + }, + { + "epoch": 0.35823950870010235, + "grad_norm": 1.604738205722927, + "learning_rate": 1.910451837888325e-05, + "loss": 0.7488006353378296, + "step": 1225 + }, + { + "epoch": 0.35853194911536773, + "grad_norm": 1.1847506052614252, + "learning_rate": 1.91025176070507e-05, + "loss": 0.5414390563964844, + "step": 1226 + }, + { + "epoch": 0.3588243895306331, + "grad_norm": 1.2745914596652235, + "learning_rate": 1.910051470756e-05, + "loss": 0.6891577839851379, + "step": 1227 + }, + { + "epoch": 0.3591168299458985, + "grad_norm": 1.3018823092824294, + "learning_rate": 1.9098509680879318e-05, + "loss": 0.6496376991271973, + "step": 1228 + }, + { + "epoch": 0.35940927036116394, + "grad_norm": 1.386313672695145, + "learning_rate": 1.909650252747732e-05, + "loss": 0.758609414100647, + "step": 1229 + }, + { + "epoch": 0.3597017107764293, + "grad_norm": 1.544442120518355, + "learning_rate": 1.9094493247823164e-05, + "loss": 0.7509145736694336, + "step": 1230 + }, + { + "epoch": 0.3599941511916947, + "grad_norm": 1.2125512669659357, + "learning_rate": 1.9092481842386506e-05, + "loss": 0.7432405352592468, + "step": 1231 + }, + { + "epoch": 0.3602865916069601, + "grad_norm": 1.4492900887661606, + "learning_rate": 1.90904683116375e-05, + "loss": 0.7208698391914368, + "step": 1232 + }, + { + "epoch": 0.36057903202222547, + "grad_norm": 1.4245050002638069, + "learning_rate": 1.9088452656046798e-05, + "loss": 0.638593852519989, + "step": 1233 + }, + { + "epoch": 0.36087147243749085, + "grad_norm": 1.4428449097608804, + "learning_rate": 1.9086434876085548e-05, + "loss": 0.6663007736206055, + "step": 1234 + }, + { + "epoch": 0.36116391285275623, + "grad_norm": 1.4112526680406456, + "learning_rate": 1.908441497222539e-05, + "loss": 0.7132781744003296, + "step": 1235 + }, + { + "epoch": 0.3614563532680216, + "grad_norm": 1.3024532647304885, + "learning_rate": 1.9082392944938467e-05, + "loss": 0.6545308828353882, + "step": 1236 + }, + { + "epoch": 0.36174879368328705, + "grad_norm": 1.1385624733680002, + "learning_rate": 1.908036879469741e-05, + "loss": 0.7525626420974731, + "step": 1237 + }, + { + "epoch": 0.36204123409855243, + "grad_norm": 1.4286424106237192, + "learning_rate": 1.9078342521975365e-05, + "loss": 0.7336804866790771, + "step": 1238 + }, + { + "epoch": 0.3623336745138178, + "grad_norm": 1.3025101490885231, + "learning_rate": 1.907631412724595e-05, + "loss": 0.5822359323501587, + "step": 1239 + }, + { + "epoch": 0.3626261149290832, + "grad_norm": 1.1928464678887247, + "learning_rate": 1.907428361098329e-05, + "loss": 0.6110040545463562, + "step": 1240 + }, + { + "epoch": 0.3629185553443486, + "grad_norm": 1.3329812952112776, + "learning_rate": 1.9072250973662008e-05, + "loss": 0.5363205671310425, + "step": 1241 + }, + { + "epoch": 0.36321099575961396, + "grad_norm": 1.298737392722519, + "learning_rate": 1.9070216215757225e-05, + "loss": 0.6804911494255066, + "step": 1242 + }, + { + "epoch": 0.36350343617487935, + "grad_norm": 1.306574555012534, + "learning_rate": 1.906817933774455e-05, + "loss": 0.5670056343078613, + "step": 1243 + }, + { + "epoch": 0.3637958765901448, + "grad_norm": 1.6342501045897717, + "learning_rate": 1.9066140340100086e-05, + "loss": 0.6839423775672913, + "step": 1244 + }, + { + "epoch": 0.36408831700541017, + "grad_norm": 1.3102468000864722, + "learning_rate": 1.906409922330044e-05, + "loss": 0.6512447595596313, + "step": 1245 + }, + { + "epoch": 0.36438075742067555, + "grad_norm": 1.3767881480650324, + "learning_rate": 1.9062055987822713e-05, + "loss": 0.6602088212966919, + "step": 1246 + }, + { + "epoch": 0.36467319783594093, + "grad_norm": 1.3684046563228518, + "learning_rate": 1.9060010634144502e-05, + "loss": 0.6859074831008911, + "step": 1247 + }, + { + "epoch": 0.3649656382512063, + "grad_norm": 1.4029132597681886, + "learning_rate": 1.9057963162743888e-05, + "loss": 0.6871531009674072, + "step": 1248 + }, + { + "epoch": 0.3652580786664717, + "grad_norm": 1.2778457575589584, + "learning_rate": 1.9055913574099454e-05, + "loss": 0.7396048307418823, + "step": 1249 + }, + { + "epoch": 0.3655505190817371, + "grad_norm": 2.313748947770577, + "learning_rate": 1.9053861868690283e-05, + "loss": 0.7013602256774902, + "step": 1250 + }, + { + "epoch": 0.36584295949700246, + "grad_norm": 1.2854553849472183, + "learning_rate": 1.905180804699595e-05, + "loss": 0.6355527639389038, + "step": 1251 + }, + { + "epoch": 0.3661353999122679, + "grad_norm": 1.1923686434429392, + "learning_rate": 1.9049752109496526e-05, + "loss": 0.6869304180145264, + "step": 1252 + }, + { + "epoch": 0.3664278403275333, + "grad_norm": 1.2404032301108463, + "learning_rate": 1.9047694056672566e-05, + "loss": 0.5267671346664429, + "step": 1253 + }, + { + "epoch": 0.36672028074279867, + "grad_norm": 1.2479293372256655, + "learning_rate": 1.9045633889005134e-05, + "loss": 0.6586635112762451, + "step": 1254 + }, + { + "epoch": 0.36701272115806405, + "grad_norm": 1.2783901733768512, + "learning_rate": 1.9043571606975776e-05, + "loss": 0.6743361949920654, + "step": 1255 + }, + { + "epoch": 0.36730516157332943, + "grad_norm": 1.267912865737822, + "learning_rate": 1.9041507211066543e-05, + "loss": 0.5779668688774109, + "step": 1256 + }, + { + "epoch": 0.3675976019885948, + "grad_norm": 1.240910914837657, + "learning_rate": 1.9039440701759972e-05, + "loss": 0.693313479423523, + "step": 1257 + }, + { + "epoch": 0.3678900424038602, + "grad_norm": 1.2581810913293596, + "learning_rate": 1.9037372079539096e-05, + "loss": 0.6314960718154907, + "step": 1258 + }, + { + "epoch": 0.3681824828191256, + "grad_norm": 1.4026915606466803, + "learning_rate": 1.9035301344887445e-05, + "loss": 0.6483266949653625, + "step": 1259 + }, + { + "epoch": 0.368474923234391, + "grad_norm": 1.1963714897771014, + "learning_rate": 1.903322849828904e-05, + "loss": 0.5896739959716797, + "step": 1260 + }, + { + "epoch": 0.3687673636496564, + "grad_norm": 1.3246139419549132, + "learning_rate": 1.9031153540228398e-05, + "loss": 0.6760983467102051, + "step": 1261 + }, + { + "epoch": 0.3690598040649218, + "grad_norm": 1.409129098147532, + "learning_rate": 1.9029076471190525e-05, + "loss": 0.7453440427780151, + "step": 1262 + }, + { + "epoch": 0.36935224448018716, + "grad_norm": 1.4768395375517958, + "learning_rate": 1.9026997291660926e-05, + "loss": 0.7382408380508423, + "step": 1263 + }, + { + "epoch": 0.36964468489545255, + "grad_norm": 1.3416426687197567, + "learning_rate": 1.9024916002125594e-05, + "loss": 0.6420471668243408, + "step": 1264 + }, + { + "epoch": 0.36993712531071793, + "grad_norm": 1.406350116015231, + "learning_rate": 1.9022832603071017e-05, + "loss": 0.6436389684677124, + "step": 1265 + }, + { + "epoch": 0.3702295657259833, + "grad_norm": 1.3047843220477244, + "learning_rate": 1.9020747094984182e-05, + "loss": 0.689171314239502, + "step": 1266 + }, + { + "epoch": 0.3705220061412487, + "grad_norm": 1.2640328794263636, + "learning_rate": 1.9018659478352556e-05, + "loss": 0.6704196333885193, + "step": 1267 + }, + { + "epoch": 0.37081444655651413, + "grad_norm": 2.0690106215423536, + "learning_rate": 1.9016569753664118e-05, + "loss": 0.6598329544067383, + "step": 1268 + }, + { + "epoch": 0.3711068869717795, + "grad_norm": 1.8262603065561684, + "learning_rate": 1.901447792140732e-05, + "loss": 0.7353986501693726, + "step": 1269 + }, + { + "epoch": 0.3713993273870449, + "grad_norm": 1.4285098808767827, + "learning_rate": 1.9012383982071112e-05, + "loss": 0.666167140007019, + "step": 1270 + }, + { + "epoch": 0.3716917678023103, + "grad_norm": 1.2598465904930443, + "learning_rate": 1.9010287936144948e-05, + "loss": 0.6097015738487244, + "step": 1271 + }, + { + "epoch": 0.37198420821757566, + "grad_norm": 1.107025542737965, + "learning_rate": 1.9008189784118764e-05, + "loss": 0.6352437138557434, + "step": 1272 + }, + { + "epoch": 0.37227664863284105, + "grad_norm": 1.5662430122293758, + "learning_rate": 1.9006089526482982e-05, + "loss": 0.6686104536056519, + "step": 1273 + }, + { + "epoch": 0.3725690890481064, + "grad_norm": 1.1719719158143125, + "learning_rate": 1.9003987163728535e-05, + "loss": 0.6504377126693726, + "step": 1274 + }, + { + "epoch": 0.3728615294633718, + "grad_norm": 1.2550627286183815, + "learning_rate": 1.9001882696346835e-05, + "loss": 0.5834585428237915, + "step": 1275 + }, + { + "epoch": 0.37315396987863725, + "grad_norm": 1.5420452194055032, + "learning_rate": 1.8999776124829788e-05, + "loss": 0.665432870388031, + "step": 1276 + }, + { + "epoch": 0.37344641029390263, + "grad_norm": 1.4022956370096276, + "learning_rate": 1.899766744966979e-05, + "loss": 0.659697949886322, + "step": 1277 + }, + { + "epoch": 0.373738850709168, + "grad_norm": 1.194404836566078, + "learning_rate": 1.899555667135973e-05, + "loss": 0.5703476071357727, + "step": 1278 + }, + { + "epoch": 0.3740312911244334, + "grad_norm": 1.5482464090140011, + "learning_rate": 1.8993443790392994e-05, + "loss": 0.809308648109436, + "step": 1279 + }, + { + "epoch": 0.3743237315396988, + "grad_norm": 1.293354946450912, + "learning_rate": 1.8991328807263455e-05, + "loss": 0.7120508551597595, + "step": 1280 + }, + { + "epoch": 0.37461617195496416, + "grad_norm": 1.2261555314771986, + "learning_rate": 1.898921172246547e-05, + "loss": 0.625985860824585, + "step": 1281 + }, + { + "epoch": 0.37490861237022954, + "grad_norm": 1.3045001966325798, + "learning_rate": 1.898709253649391e-05, + "loss": 0.637261152267456, + "step": 1282 + }, + { + "epoch": 0.375201052785495, + "grad_norm": 1.3550273094265433, + "learning_rate": 1.89849712498441e-05, + "loss": 0.7420133352279663, + "step": 1283 + }, + { + "epoch": 0.37549349320076036, + "grad_norm": 1.2854448504545577, + "learning_rate": 1.8982847863011898e-05, + "loss": 0.6230417490005493, + "step": 1284 + }, + { + "epoch": 0.37578593361602575, + "grad_norm": 1.2127007776565961, + "learning_rate": 1.8980722376493622e-05, + "loss": 0.6896604299545288, + "step": 1285 + }, + { + "epoch": 0.37607837403129113, + "grad_norm": 1.3900367736992565, + "learning_rate": 1.8978594790786092e-05, + "loss": 0.5767710208892822, + "step": 1286 + }, + { + "epoch": 0.3763708144465565, + "grad_norm": 1.3829044432724817, + "learning_rate": 1.8976465106386625e-05, + "loss": 0.6945392489433289, + "step": 1287 + }, + { + "epoch": 0.3766632548618219, + "grad_norm": 1.215943914903153, + "learning_rate": 1.8974333323793014e-05, + "loss": 0.7208314538002014, + "step": 1288 + }, + { + "epoch": 0.3769556952770873, + "grad_norm": 1.5769052361743978, + "learning_rate": 1.8972199443503556e-05, + "loss": 0.7201139330863953, + "step": 1289 + }, + { + "epoch": 0.37724813569235266, + "grad_norm": 1.3366567930451483, + "learning_rate": 1.8970063466017028e-05, + "loss": 0.6791107654571533, + "step": 1290 + }, + { + "epoch": 0.3775405761076181, + "grad_norm": 1.2566261030582595, + "learning_rate": 1.89679253918327e-05, + "loss": 0.6535364389419556, + "step": 1291 + }, + { + "epoch": 0.3778330165228835, + "grad_norm": 1.3498011568256927, + "learning_rate": 1.8965785221450343e-05, + "loss": 0.5910370349884033, + "step": 1292 + }, + { + "epoch": 0.37812545693814886, + "grad_norm": 1.273886266732073, + "learning_rate": 1.8963642955370203e-05, + "loss": 0.7025415897369385, + "step": 1293 + }, + { + "epoch": 0.37841789735341425, + "grad_norm": 1.3743129752593892, + "learning_rate": 1.8961498594093018e-05, + "loss": 0.8007702827453613, + "step": 1294 + }, + { + "epoch": 0.37871033776867963, + "grad_norm": 1.3170193178053329, + "learning_rate": 1.895935213812003e-05, + "loss": 0.6947172284126282, + "step": 1295 + }, + { + "epoch": 0.379002778183945, + "grad_norm": 1.1178211676030798, + "learning_rate": 1.895720358795295e-05, + "loss": 0.7024818658828735, + "step": 1296 + }, + { + "epoch": 0.3792952185992104, + "grad_norm": 1.3447819598276562, + "learning_rate": 1.895505294409399e-05, + "loss": 0.8202607035636902, + "step": 1297 + }, + { + "epoch": 0.3795876590144758, + "grad_norm": 1.3114070841017331, + "learning_rate": 1.8952900207045853e-05, + "loss": 0.8001795411109924, + "step": 1298 + }, + { + "epoch": 0.3798800994297412, + "grad_norm": 1.4499936638579116, + "learning_rate": 1.895074537731173e-05, + "loss": 0.8068668842315674, + "step": 1299 + }, + { + "epoch": 0.3801725398450066, + "grad_norm": 1.5847094151692727, + "learning_rate": 1.8948588455395294e-05, + "loss": 0.7685220241546631, + "step": 1300 + }, + { + "epoch": 0.380464980260272, + "grad_norm": 1.5870604224504243, + "learning_rate": 1.8946429441800715e-05, + "loss": 0.695665717124939, + "step": 1301 + }, + { + "epoch": 0.38075742067553736, + "grad_norm": 1.2451750201018865, + "learning_rate": 1.894426833703265e-05, + "loss": 0.6073132753372192, + "step": 1302 + }, + { + "epoch": 0.38104986109080274, + "grad_norm": 1.3373381283666117, + "learning_rate": 1.894210514159624e-05, + "loss": 0.6334577798843384, + "step": 1303 + }, + { + "epoch": 0.3813423015060681, + "grad_norm": 1.519675902061051, + "learning_rate": 1.8939939855997125e-05, + "loss": 0.6448806524276733, + "step": 1304 + }, + { + "epoch": 0.3816347419213335, + "grad_norm": 1.438744535892561, + "learning_rate": 1.8937772480741427e-05, + "loss": 0.7587993144989014, + "step": 1305 + }, + { + "epoch": 0.3819271823365989, + "grad_norm": 1.3270277700231368, + "learning_rate": 1.8935603016335752e-05, + "loss": 0.6924787759780884, + "step": 1306 + }, + { + "epoch": 0.38221962275186433, + "grad_norm": 1.4268553989545638, + "learning_rate": 1.8933431463287197e-05, + "loss": 0.678055465221405, + "step": 1307 + }, + { + "epoch": 0.3825120631671297, + "grad_norm": 1.5391207675187488, + "learning_rate": 1.8931257822103357e-05, + "loss": 0.7519007325172424, + "step": 1308 + }, + { + "epoch": 0.3828045035823951, + "grad_norm": 1.3654471111870499, + "learning_rate": 1.8929082093292306e-05, + "loss": 0.6905468702316284, + "step": 1309 + }, + { + "epoch": 0.3830969439976605, + "grad_norm": 1.2938870443591295, + "learning_rate": 1.8926904277362603e-05, + "loss": 0.6718122363090515, + "step": 1310 + }, + { + "epoch": 0.38338938441292586, + "grad_norm": 1.2471147738993698, + "learning_rate": 1.89247243748233e-05, + "loss": 0.6903961896896362, + "step": 1311 + }, + { + "epoch": 0.38368182482819124, + "grad_norm": 1.2542516264352948, + "learning_rate": 1.8922542386183942e-05, + "loss": 0.6947582960128784, + "step": 1312 + }, + { + "epoch": 0.3839742652434566, + "grad_norm": 1.188134072228004, + "learning_rate": 1.8920358311954548e-05, + "loss": 0.5850759148597717, + "step": 1313 + }, + { + "epoch": 0.38426670565872206, + "grad_norm": 1.3420186334522382, + "learning_rate": 1.891817215264564e-05, + "loss": 0.6512178778648376, + "step": 1314 + }, + { + "epoch": 0.38455914607398745, + "grad_norm": 1.217758250797112, + "learning_rate": 1.891598390876821e-05, + "loss": 0.5910850167274475, + "step": 1315 + }, + { + "epoch": 0.38485158648925283, + "grad_norm": 1.5593571397265127, + "learning_rate": 1.891379358083375e-05, + "loss": 0.7113536596298218, + "step": 1316 + }, + { + "epoch": 0.3851440269045182, + "grad_norm": 1.293224731928583, + "learning_rate": 1.891160116935424e-05, + "loss": 0.705318808555603, + "step": 1317 + }, + { + "epoch": 0.3854364673197836, + "grad_norm": 1.3559272013315313, + "learning_rate": 1.890940667484214e-05, + "loss": 0.7524716258049011, + "step": 1318 + }, + { + "epoch": 0.385728907735049, + "grad_norm": 1.4516012043532711, + "learning_rate": 1.89072100978104e-05, + "loss": 0.6130248308181763, + "step": 1319 + }, + { + "epoch": 0.38602134815031436, + "grad_norm": 1.4354959704098622, + "learning_rate": 1.8905011438772455e-05, + "loss": 0.6535071134567261, + "step": 1320 + }, + { + "epoch": 0.38631378856557974, + "grad_norm": 1.3663991139031981, + "learning_rate": 1.890281069824223e-05, + "loss": 0.7027082443237305, + "step": 1321 + }, + { + "epoch": 0.3866062289808452, + "grad_norm": 1.4293858623040305, + "learning_rate": 1.8900607876734133e-05, + "loss": 0.7055719494819641, + "step": 1322 + }, + { + "epoch": 0.38689866939611056, + "grad_norm": 1.3719150653410752, + "learning_rate": 1.8898402974763063e-05, + "loss": 0.7403384447097778, + "step": 1323 + }, + { + "epoch": 0.38719110981137594, + "grad_norm": 1.4816215708629428, + "learning_rate": 1.88961959928444e-05, + "loss": 0.6771470308303833, + "step": 1324 + }, + { + "epoch": 0.3874835502266413, + "grad_norm": 1.552809609148836, + "learning_rate": 1.8893986931494015e-05, + "loss": 0.7258767485618591, + "step": 1325 + }, + { + "epoch": 0.3877759906419067, + "grad_norm": 1.4168779145398758, + "learning_rate": 1.889177579122826e-05, + "loss": 0.7587069869041443, + "step": 1326 + }, + { + "epoch": 0.3880684310571721, + "grad_norm": 1.0432394702021985, + "learning_rate": 1.888956257256398e-05, + "loss": 0.5434668660163879, + "step": 1327 + }, + { + "epoch": 0.3883608714724375, + "grad_norm": 1.2927507112469059, + "learning_rate": 1.8887347276018496e-05, + "loss": 0.5311154127120972, + "step": 1328 + }, + { + "epoch": 0.38865331188770286, + "grad_norm": 1.2533915775325788, + "learning_rate": 1.888512990210962e-05, + "loss": 0.5651747584342957, + "step": 1329 + }, + { + "epoch": 0.3889457523029683, + "grad_norm": 1.1863162008873491, + "learning_rate": 1.8882910451355654e-05, + "loss": 0.628046989440918, + "step": 1330 + }, + { + "epoch": 0.3892381927182337, + "grad_norm": 1.362511771688155, + "learning_rate": 1.888068892427538e-05, + "loss": 0.644639253616333, + "step": 1331 + }, + { + "epoch": 0.38953063313349906, + "grad_norm": 1.2081658901416763, + "learning_rate": 1.887846532138806e-05, + "loss": 0.6290382742881775, + "step": 1332 + }, + { + "epoch": 0.38982307354876444, + "grad_norm": 1.319310840364395, + "learning_rate": 1.8876239643213456e-05, + "loss": 0.6881425380706787, + "step": 1333 + }, + { + "epoch": 0.3901155139640298, + "grad_norm": 1.398816649776473, + "learning_rate": 1.8874011890271807e-05, + "loss": 0.645643949508667, + "step": 1334 + }, + { + "epoch": 0.3904079543792952, + "grad_norm": 1.3694554603281324, + "learning_rate": 1.887178206308383e-05, + "loss": 0.6965867280960083, + "step": 1335 + }, + { + "epoch": 0.3907003947945606, + "grad_norm": 1.1683610353079796, + "learning_rate": 1.886955016217074e-05, + "loss": 0.7326550483703613, + "step": 1336 + }, + { + "epoch": 0.390992835209826, + "grad_norm": 1.1781300264884254, + "learning_rate": 1.886731618805422e-05, + "loss": 0.6570208668708801, + "step": 1337 + }, + { + "epoch": 0.3912852756250914, + "grad_norm": 1.3563073747469718, + "learning_rate": 1.886508014125646e-05, + "loss": 0.7391610145568848, + "step": 1338 + }, + { + "epoch": 0.3915777160403568, + "grad_norm": 1.2946978380119605, + "learning_rate": 1.8862842022300124e-05, + "loss": 0.651665985584259, + "step": 1339 + }, + { + "epoch": 0.3918701564556222, + "grad_norm": 1.383137909559831, + "learning_rate": 1.8860601831708346e-05, + "loss": 0.695915699005127, + "step": 1340 + }, + { + "epoch": 0.39216259687088756, + "grad_norm": 1.4373340263094165, + "learning_rate": 1.885835957000476e-05, + "loss": 0.7209347486495972, + "step": 1341 + }, + { + "epoch": 0.39245503728615294, + "grad_norm": 1.3210736597005808, + "learning_rate": 1.885611523771349e-05, + "loss": 0.6083317995071411, + "step": 1342 + }, + { + "epoch": 0.3927474777014183, + "grad_norm": 1.1271235346878163, + "learning_rate": 1.8853868835359127e-05, + "loss": 0.5544713735580444, + "step": 1343 + }, + { + "epoch": 0.3930399181166837, + "grad_norm": 1.5309462253741093, + "learning_rate": 1.8851620363466756e-05, + "loss": 0.6333836317062378, + "step": 1344 + }, + { + "epoch": 0.3933323585319491, + "grad_norm": 1.2454908963117732, + "learning_rate": 1.8849369822561943e-05, + "loss": 0.554995059967041, + "step": 1345 + }, + { + "epoch": 0.3936247989472145, + "grad_norm": 1.1941759610212306, + "learning_rate": 1.884711721317074e-05, + "loss": 0.6696420907974243, + "step": 1346 + }, + { + "epoch": 0.3939172393624799, + "grad_norm": 1.3685512221597254, + "learning_rate": 1.8844862535819682e-05, + "loss": 0.6031695604324341, + "step": 1347 + }, + { + "epoch": 0.3942096797777453, + "grad_norm": 1.2956806020723108, + "learning_rate": 1.884260579103578e-05, + "loss": 0.686814546585083, + "step": 1348 + }, + { + "epoch": 0.3945021201930107, + "grad_norm": 1.3341047094387086, + "learning_rate": 1.884034697934654e-05, + "loss": 0.8113317489624023, + "step": 1349 + }, + { + "epoch": 0.39479456060827606, + "grad_norm": 1.2860694131453334, + "learning_rate": 1.8838086101279946e-05, + "loss": 0.5645952820777893, + "step": 1350 + }, + { + "epoch": 0.39508700102354144, + "grad_norm": 1.2016385972759884, + "learning_rate": 1.883582315736446e-05, + "loss": 0.6431643962860107, + "step": 1351 + }, + { + "epoch": 0.3953794414388068, + "grad_norm": 1.5868415992731069, + "learning_rate": 1.8833558148129034e-05, + "loss": 0.7691985368728638, + "step": 1352 + }, + { + "epoch": 0.39567188185407226, + "grad_norm": 1.2630640231572245, + "learning_rate": 1.88312910741031e-05, + "loss": 0.5951793789863586, + "step": 1353 + }, + { + "epoch": 0.39596432226933764, + "grad_norm": 1.4300765958854422, + "learning_rate": 1.8829021935816572e-05, + "loss": 0.671844482421875, + "step": 1354 + }, + { + "epoch": 0.396256762684603, + "grad_norm": 1.1270834377200167, + "learning_rate": 1.8826750733799845e-05, + "loss": 0.5290843844413757, + "step": 1355 + }, + { + "epoch": 0.3965492030998684, + "grad_norm": 1.5593842090138308, + "learning_rate": 1.8824477468583806e-05, + "loss": 0.6492103934288025, + "step": 1356 + }, + { + "epoch": 0.3968416435151338, + "grad_norm": 1.5289372619537642, + "learning_rate": 1.882220214069981e-05, + "loss": 0.6111055016517639, + "step": 1357 + }, + { + "epoch": 0.3971340839303992, + "grad_norm": 1.5921417426189186, + "learning_rate": 1.8819924750679702e-05, + "loss": 0.8123398423194885, + "step": 1358 + }, + { + "epoch": 0.39742652434566456, + "grad_norm": 1.1798948792158594, + "learning_rate": 1.8817645299055815e-05, + "loss": 0.6118077039718628, + "step": 1359 + }, + { + "epoch": 0.39771896476092994, + "grad_norm": 1.3941391170101545, + "learning_rate": 1.8815363786360948e-05, + "loss": 0.6916248798370361, + "step": 1360 + }, + { + "epoch": 0.3980114051761954, + "grad_norm": 1.2139550861968382, + "learning_rate": 1.8813080213128394e-05, + "loss": 0.5586028099060059, + "step": 1361 + }, + { + "epoch": 0.39830384559146076, + "grad_norm": 1.4269154081314215, + "learning_rate": 1.8810794579891925e-05, + "loss": 0.6132841110229492, + "step": 1362 + }, + { + "epoch": 0.39859628600672614, + "grad_norm": 1.6891257084549798, + "learning_rate": 1.8808506887185793e-05, + "loss": 0.7329133749008179, + "step": 1363 + }, + { + "epoch": 0.3988887264219915, + "grad_norm": 1.2704606915664418, + "learning_rate": 1.8806217135544736e-05, + "loss": 0.5975138545036316, + "step": 1364 + }, + { + "epoch": 0.3991811668372569, + "grad_norm": 1.315662529163245, + "learning_rate": 1.8803925325503963e-05, + "loss": 0.5790295600891113, + "step": 1365 + }, + { + "epoch": 0.3994736072525223, + "grad_norm": 1.2020939373145192, + "learning_rate": 1.8801631457599173e-05, + "loss": 0.6506124138832092, + "step": 1366 + }, + { + "epoch": 0.39976604766778767, + "grad_norm": 1.1981976421950613, + "learning_rate": 1.8799335532366547e-05, + "loss": 0.5577528476715088, + "step": 1367 + }, + { + "epoch": 0.40005848808305305, + "grad_norm": 1.4216027512167424, + "learning_rate": 1.879703755034274e-05, + "loss": 0.675471305847168, + "step": 1368 + }, + { + "epoch": 0.4003509284983185, + "grad_norm": 1.2857175045016282, + "learning_rate": 1.879473751206489e-05, + "loss": 0.5826357007026672, + "step": 1369 + }, + { + "epoch": 0.4006433689135839, + "grad_norm": 1.31090591162355, + "learning_rate": 1.8792435418070623e-05, + "loss": 0.5146772265434265, + "step": 1370 + }, + { + "epoch": 0.40093580932884926, + "grad_norm": 1.224194477069696, + "learning_rate": 1.879013126889804e-05, + "loss": 0.6049208641052246, + "step": 1371 + }, + { + "epoch": 0.40122824974411464, + "grad_norm": 1.4047657351006413, + "learning_rate": 1.878782506508571e-05, + "loss": 0.7058207392692566, + "step": 1372 + }, + { + "epoch": 0.40152069015938, + "grad_norm": 1.3782924104285919, + "learning_rate": 1.8785516807172704e-05, + "loss": 0.6281940937042236, + "step": 1373 + }, + { + "epoch": 0.4018131305746454, + "grad_norm": 1.5710053658084482, + "learning_rate": 1.878320649569856e-05, + "loss": 0.6318703889846802, + "step": 1374 + }, + { + "epoch": 0.4021055709899108, + "grad_norm": 1.369183615763356, + "learning_rate": 1.87808941312033e-05, + "loss": 0.6595311164855957, + "step": 1375 + }, + { + "epoch": 0.40239801140517617, + "grad_norm": 1.2850685362090555, + "learning_rate": 1.8778579714227433e-05, + "loss": 0.6964930295944214, + "step": 1376 + }, + { + "epoch": 0.4026904518204416, + "grad_norm": 1.338060741631637, + "learning_rate": 1.8776263245311926e-05, + "loss": 0.6093966364860535, + "step": 1377 + }, + { + "epoch": 0.402982892235707, + "grad_norm": 1.2438362189425571, + "learning_rate": 1.8773944724998248e-05, + "loss": 0.5337893962860107, + "step": 1378 + }, + { + "epoch": 0.4032753326509724, + "grad_norm": 1.3665212711176857, + "learning_rate": 1.8771624153828336e-05, + "loss": 0.5899128317832947, + "step": 1379 + }, + { + "epoch": 0.40356777306623776, + "grad_norm": 1.41983053503157, + "learning_rate": 1.876930153234461e-05, + "loss": 0.7054699659347534, + "step": 1380 + }, + { + "epoch": 0.40386021348150314, + "grad_norm": 1.4240217447880834, + "learning_rate": 1.876697686108997e-05, + "loss": 0.6910602450370789, + "step": 1381 + }, + { + "epoch": 0.4041526538967685, + "grad_norm": 1.383183489617276, + "learning_rate": 1.876465014060779e-05, + "loss": 0.605659008026123, + "step": 1382 + }, + { + "epoch": 0.4044450943120339, + "grad_norm": 1.4798727853043008, + "learning_rate": 1.8762321371441934e-05, + "loss": 0.7159937620162964, + "step": 1383 + }, + { + "epoch": 0.4047375347272993, + "grad_norm": 1.394589338486404, + "learning_rate": 1.8759990554136733e-05, + "loss": 0.7568333148956299, + "step": 1384 + }, + { + "epoch": 0.4050299751425647, + "grad_norm": 1.613153659624872, + "learning_rate": 1.8757657689236998e-05, + "loss": 0.7117356657981873, + "step": 1385 + }, + { + "epoch": 0.4053224155578301, + "grad_norm": 1.4078248670298101, + "learning_rate": 1.8755322777288027e-05, + "loss": 0.6429109573364258, + "step": 1386 + }, + { + "epoch": 0.4056148559730955, + "grad_norm": 1.571804966063755, + "learning_rate": 1.875298581883559e-05, + "loss": 0.643811821937561, + "step": 1387 + }, + { + "epoch": 0.40590729638836087, + "grad_norm": 1.439883937827134, + "learning_rate": 1.875064681442594e-05, + "loss": 0.7143295407295227, + "step": 1388 + }, + { + "epoch": 0.40619973680362625, + "grad_norm": 1.5655377581350667, + "learning_rate": 1.8748305764605798e-05, + "loss": 0.732312023639679, + "step": 1389 + }, + { + "epoch": 0.40649217721889164, + "grad_norm": 1.2430462426936875, + "learning_rate": 1.8745962669922375e-05, + "loss": 0.6878848075866699, + "step": 1390 + }, + { + "epoch": 0.406784617634157, + "grad_norm": 1.40828278888595, + "learning_rate": 1.8743617530923356e-05, + "loss": 0.6716262698173523, + "step": 1391 + }, + { + "epoch": 0.40707705804942246, + "grad_norm": 1.346254534859124, + "learning_rate": 1.87412703481569e-05, + "loss": 0.4990834593772888, + "step": 1392 + }, + { + "epoch": 0.40736949846468784, + "grad_norm": 1.4289189141042684, + "learning_rate": 1.8738921122171647e-05, + "loss": 0.6541857719421387, + "step": 1393 + }, + { + "epoch": 0.4076619388799532, + "grad_norm": 1.4587922420879296, + "learning_rate": 1.8736569853516715e-05, + "loss": 0.6310811042785645, + "step": 1394 + }, + { + "epoch": 0.4079543792952186, + "grad_norm": 1.5328348699666439, + "learning_rate": 1.8734216542741702e-05, + "loss": 0.6335423588752747, + "step": 1395 + }, + { + "epoch": 0.408246819710484, + "grad_norm": 1.4540701020587141, + "learning_rate": 1.873186119039667e-05, + "loss": 0.6315034627914429, + "step": 1396 + }, + { + "epoch": 0.40853926012574937, + "grad_norm": 1.2591166704430221, + "learning_rate": 1.872950379703218e-05, + "loss": 0.6822362542152405, + "step": 1397 + }, + { + "epoch": 0.40883170054101475, + "grad_norm": 1.3128671260601936, + "learning_rate": 1.8727144363199257e-05, + "loss": 0.699965238571167, + "step": 1398 + }, + { + "epoch": 0.40912414095628014, + "grad_norm": 1.376512615463435, + "learning_rate": 1.8724782889449397e-05, + "loss": 0.6769841313362122, + "step": 1399 + }, + { + "epoch": 0.4094165813715456, + "grad_norm": 1.242831468646962, + "learning_rate": 1.8722419376334584e-05, + "loss": 0.5219473838806152, + "step": 1400 + }, + { + "epoch": 0.40970902178681096, + "grad_norm": 1.1119386554431685, + "learning_rate": 1.872005382440728e-05, + "loss": 0.6091574430465698, + "step": 1401 + }, + { + "epoch": 0.41000146220207634, + "grad_norm": 1.3005524040148213, + "learning_rate": 1.8717686234220406e-05, + "loss": 0.7589390277862549, + "step": 1402 + }, + { + "epoch": 0.4102939026173417, + "grad_norm": 1.2931582987016021, + "learning_rate": 1.8715316606327384e-05, + "loss": 0.7042895555496216, + "step": 1403 + }, + { + "epoch": 0.4105863430326071, + "grad_norm": 1.2509877991876854, + "learning_rate": 1.8712944941282095e-05, + "loss": 0.6490949988365173, + "step": 1404 + }, + { + "epoch": 0.4108787834478725, + "grad_norm": 1.3726951776657805, + "learning_rate": 1.87105712396389e-05, + "loss": 0.6614132523536682, + "step": 1405 + }, + { + "epoch": 0.41117122386313787, + "grad_norm": 1.3416970895813871, + "learning_rate": 1.8708195501952637e-05, + "loss": 0.666157603263855, + "step": 1406 + }, + { + "epoch": 0.41146366427840325, + "grad_norm": 1.5455429688837699, + "learning_rate": 1.8705817728778626e-05, + "loss": 0.7347884178161621, + "step": 1407 + }, + { + "epoch": 0.4117561046936687, + "grad_norm": 1.6323767587093516, + "learning_rate": 1.8703437920672652e-05, + "loss": 0.8129836320877075, + "step": 1408 + }, + { + "epoch": 0.4120485451089341, + "grad_norm": 1.2690047775005027, + "learning_rate": 1.870105607819098e-05, + "loss": 0.645210862159729, + "step": 1409 + }, + { + "epoch": 0.41234098552419945, + "grad_norm": 1.367879279910813, + "learning_rate": 1.8698672201890355e-05, + "loss": 0.6716916561126709, + "step": 1410 + }, + { + "epoch": 0.41263342593946484, + "grad_norm": 1.66933384894401, + "learning_rate": 1.869628629232799e-05, + "loss": 0.8190855383872986, + "step": 1411 + }, + { + "epoch": 0.4129258663547302, + "grad_norm": 1.3900061091611966, + "learning_rate": 1.8693898350061582e-05, + "loss": 0.7618075609207153, + "step": 1412 + }, + { + "epoch": 0.4132183067699956, + "grad_norm": 1.3486290329442485, + "learning_rate": 1.869150837564929e-05, + "loss": 0.719980001449585, + "step": 1413 + }, + { + "epoch": 0.413510747185261, + "grad_norm": 1.2278158990840933, + "learning_rate": 1.8689116369649763e-05, + "loss": 0.6601548194885254, + "step": 1414 + }, + { + "epoch": 0.41380318760052637, + "grad_norm": 1.4157736896401232, + "learning_rate": 1.8686722332622112e-05, + "loss": 0.5991787314414978, + "step": 1415 + }, + { + "epoch": 0.4140956280157918, + "grad_norm": 1.2916299361998576, + "learning_rate": 1.8684326265125935e-05, + "loss": 0.6089641451835632, + "step": 1416 + }, + { + "epoch": 0.4143880684310572, + "grad_norm": 1.4857622706167455, + "learning_rate": 1.8681928167721297e-05, + "loss": 0.8143327236175537, + "step": 1417 + }, + { + "epoch": 0.41468050884632257, + "grad_norm": 1.1137129272750816, + "learning_rate": 1.8679528040968733e-05, + "loss": 0.6127045154571533, + "step": 1418 + }, + { + "epoch": 0.41497294926158795, + "grad_norm": 1.2684856043432204, + "learning_rate": 1.8677125885429262e-05, + "loss": 0.659069299697876, + "step": 1419 + }, + { + "epoch": 0.41526538967685334, + "grad_norm": 1.3122733176612695, + "learning_rate": 1.8674721701664377e-05, + "loss": 0.7277505397796631, + "step": 1420 + }, + { + "epoch": 0.4155578300921187, + "grad_norm": 1.2107555045955465, + "learning_rate": 1.8672315490236034e-05, + "loss": 0.6128710508346558, + "step": 1421 + }, + { + "epoch": 0.4158502705073841, + "grad_norm": 1.7155790773588848, + "learning_rate": 1.866990725170667e-05, + "loss": 0.7439340949058533, + "step": 1422 + }, + { + "epoch": 0.4161427109226495, + "grad_norm": 1.1423179387443951, + "learning_rate": 1.8667496986639206e-05, + "loss": 0.5855459570884705, + "step": 1423 + }, + { + "epoch": 0.4164351513379149, + "grad_norm": 1.3335637335552337, + "learning_rate": 1.866508469559702e-05, + "loss": 0.6865170001983643, + "step": 1424 + }, + { + "epoch": 0.4167275917531803, + "grad_norm": 1.3191415692644766, + "learning_rate": 1.866267037914397e-05, + "loss": 0.6648446917533875, + "step": 1425 + }, + { + "epoch": 0.4170200321684457, + "grad_norm": 1.5198580088053322, + "learning_rate": 1.866025403784439e-05, + "loss": 0.6919275522232056, + "step": 1426 + }, + { + "epoch": 0.41731247258371107, + "grad_norm": 1.3309988770277923, + "learning_rate": 1.865783567226308e-05, + "loss": 0.7270313501358032, + "step": 1427 + }, + { + "epoch": 0.41760491299897645, + "grad_norm": 1.2814450794742573, + "learning_rate": 1.8655415282965327e-05, + "loss": 0.5938387513160706, + "step": 1428 + }, + { + "epoch": 0.41789735341424183, + "grad_norm": 1.378984312222445, + "learning_rate": 1.8652992870516872e-05, + "loss": 0.6517149209976196, + "step": 1429 + }, + { + "epoch": 0.4181897938295072, + "grad_norm": 1.1862439123900306, + "learning_rate": 1.8650568435483948e-05, + "loss": 0.6688356399536133, + "step": 1430 + }, + { + "epoch": 0.41848223424477266, + "grad_norm": 1.153419964025717, + "learning_rate": 1.864814197843325e-05, + "loss": 0.5300855040550232, + "step": 1431 + }, + { + "epoch": 0.41877467466003804, + "grad_norm": 1.270766161064103, + "learning_rate": 1.8645713499931943e-05, + "loss": 0.6404704451560974, + "step": 1432 + }, + { + "epoch": 0.4190671150753034, + "grad_norm": 1.5430855845367462, + "learning_rate": 1.8643283000547673e-05, + "loss": 0.6758813858032227, + "step": 1433 + }, + { + "epoch": 0.4193595554905688, + "grad_norm": 1.3783205387944717, + "learning_rate": 1.8640850480848552e-05, + "loss": 0.6328250169754028, + "step": 1434 + }, + { + "epoch": 0.4196519959058342, + "grad_norm": 1.26459826517306, + "learning_rate": 1.863841594140317e-05, + "loss": 0.6747157573699951, + "step": 1435 + }, + { + "epoch": 0.41994443632109957, + "grad_norm": 1.3504769695047412, + "learning_rate": 1.8635979382780584e-05, + "loss": 0.5314475893974304, + "step": 1436 + }, + { + "epoch": 0.42023687673636495, + "grad_norm": 1.345420561697831, + "learning_rate": 1.863354080555033e-05, + "loss": 0.478320837020874, + "step": 1437 + }, + { + "epoch": 0.42052931715163033, + "grad_norm": 1.7039787917499718, + "learning_rate": 1.86311002102824e-05, + "loss": 0.7389972805976868, + "step": 1438 + }, + { + "epoch": 0.42082175756689577, + "grad_norm": 1.5752835852867741, + "learning_rate": 1.8628657597547273e-05, + "loss": 0.5449938178062439, + "step": 1439 + }, + { + "epoch": 0.42111419798216115, + "grad_norm": 1.417913338852298, + "learning_rate": 1.8626212967915897e-05, + "loss": 0.6752811670303345, + "step": 1440 + }, + { + "epoch": 0.42140663839742654, + "grad_norm": 1.3268814915367182, + "learning_rate": 1.862376632195969e-05, + "loss": 0.7750412821769714, + "step": 1441 + }, + { + "epoch": 0.4216990788126919, + "grad_norm": 1.4150998717703018, + "learning_rate": 1.8621317660250535e-05, + "loss": 0.5967680215835571, + "step": 1442 + }, + { + "epoch": 0.4219915192279573, + "grad_norm": 1.2836272802739963, + "learning_rate": 1.86188669833608e-05, + "loss": 0.6781327724456787, + "step": 1443 + }, + { + "epoch": 0.4222839596432227, + "grad_norm": 1.421988457915262, + "learning_rate": 1.8616414291863307e-05, + "loss": 0.7539681196212769, + "step": 1444 + }, + { + "epoch": 0.42257640005848807, + "grad_norm": 1.5265432564271315, + "learning_rate": 1.8613959586331364e-05, + "loss": 0.6976957321166992, + "step": 1445 + }, + { + "epoch": 0.42286884047375345, + "grad_norm": 1.3365892238255053, + "learning_rate": 1.861150286733874e-05, + "loss": 0.6616528034210205, + "step": 1446 + }, + { + "epoch": 0.4231612808890189, + "grad_norm": 1.4482994306877846, + "learning_rate": 1.860904413545968e-05, + "loss": 0.6407957077026367, + "step": 1447 + }, + { + "epoch": 0.42345372130428427, + "grad_norm": 1.4193133822561126, + "learning_rate": 1.86065833912689e-05, + "loss": 0.5918550491333008, + "step": 1448 + }, + { + "epoch": 0.42374616171954965, + "grad_norm": 1.421765780188314, + "learning_rate": 1.8604120635341574e-05, + "loss": 0.6142056584358215, + "step": 1449 + }, + { + "epoch": 0.42403860213481503, + "grad_norm": 1.4371201128611453, + "learning_rate": 1.8601655868253368e-05, + "loss": 0.6359597444534302, + "step": 1450 + }, + { + "epoch": 0.4243310425500804, + "grad_norm": 1.2914617625794835, + "learning_rate": 1.8599189090580402e-05, + "loss": 0.7149467468261719, + "step": 1451 + }, + { + "epoch": 0.4246234829653458, + "grad_norm": 1.2900964447275098, + "learning_rate": 1.8596720302899272e-05, + "loss": 0.6015822887420654, + "step": 1452 + }, + { + "epoch": 0.4249159233806112, + "grad_norm": 1.1866564154864978, + "learning_rate": 1.8594249505787035e-05, + "loss": 0.6389881372451782, + "step": 1453 + }, + { + "epoch": 0.42520836379587656, + "grad_norm": 1.381321058965008, + "learning_rate": 1.8591776699821235e-05, + "loss": 0.7479783892631531, + "step": 1454 + }, + { + "epoch": 0.425500804211142, + "grad_norm": 1.2271977568055246, + "learning_rate": 1.8589301885579866e-05, + "loss": 0.6574498414993286, + "step": 1455 + }, + { + "epoch": 0.4257932446264074, + "grad_norm": 1.3187836865578064, + "learning_rate": 1.858682506364141e-05, + "loss": 0.6314088702201843, + "step": 1456 + }, + { + "epoch": 0.42608568504167277, + "grad_norm": 1.4747450600155867, + "learning_rate": 1.85843462345848e-05, + "loss": 0.605385959148407, + "step": 1457 + }, + { + "epoch": 0.42637812545693815, + "grad_norm": 1.280849948973879, + "learning_rate": 1.8581865398989452e-05, + "loss": 0.6355551481246948, + "step": 1458 + }, + { + "epoch": 0.42667056587220353, + "grad_norm": 1.3012840164028812, + "learning_rate": 1.8579382557435247e-05, + "loss": 0.6303017139434814, + "step": 1459 + }, + { + "epoch": 0.4269630062874689, + "grad_norm": 1.2629380280411955, + "learning_rate": 1.8576897710502532e-05, + "loss": 0.5916526317596436, + "step": 1460 + }, + { + "epoch": 0.4272554467027343, + "grad_norm": 1.2467440963341316, + "learning_rate": 1.8574410858772126e-05, + "loss": 0.5709279179573059, + "step": 1461 + }, + { + "epoch": 0.4275478871179997, + "grad_norm": 1.2909430743502928, + "learning_rate": 1.8571922002825317e-05, + "loss": 0.571231484413147, + "step": 1462 + }, + { + "epoch": 0.4278403275332651, + "grad_norm": 1.310017395907512, + "learning_rate": 1.8569431143243856e-05, + "loss": 0.6352202892303467, + "step": 1463 + }, + { + "epoch": 0.4281327679485305, + "grad_norm": 1.316165374470179, + "learning_rate": 1.8566938280609965e-05, + "loss": 0.553265392780304, + "step": 1464 + }, + { + "epoch": 0.4284252083637959, + "grad_norm": 1.1127868543655046, + "learning_rate": 1.8564443415506343e-05, + "loss": 0.4913727045059204, + "step": 1465 + }, + { + "epoch": 0.42871764877906127, + "grad_norm": 1.4457215110099157, + "learning_rate": 1.8561946548516143e-05, + "loss": 0.542539119720459, + "step": 1466 + }, + { + "epoch": 0.42901008919432665, + "grad_norm": 1.5261496853017646, + "learning_rate": 1.8559447680222994e-05, + "loss": 0.719292163848877, + "step": 1467 + }, + { + "epoch": 0.42930252960959203, + "grad_norm": 1.4842625427656275, + "learning_rate": 1.8556946811210993e-05, + "loss": 0.8443170785903931, + "step": 1468 + }, + { + "epoch": 0.4295949700248574, + "grad_norm": 1.4024545882927506, + "learning_rate": 1.8554443942064705e-05, + "loss": 0.7899821996688843, + "step": 1469 + }, + { + "epoch": 0.42988741044012285, + "grad_norm": 1.3637198474337424, + "learning_rate": 1.8551939073369155e-05, + "loss": 0.617426872253418, + "step": 1470 + }, + { + "epoch": 0.43017985085538823, + "grad_norm": 1.284473833943433, + "learning_rate": 1.8549432205709842e-05, + "loss": 0.5573505163192749, + "step": 1471 + }, + { + "epoch": 0.4304722912706536, + "grad_norm": 1.2050796372555104, + "learning_rate": 1.8546923339672734e-05, + "loss": 0.5571975111961365, + "step": 1472 + }, + { + "epoch": 0.430764731685919, + "grad_norm": 1.2452948917501594, + "learning_rate": 1.854441247584426e-05, + "loss": 0.6411981582641602, + "step": 1473 + }, + { + "epoch": 0.4310571721011844, + "grad_norm": 1.4342124934143161, + "learning_rate": 1.8541899614811323e-05, + "loss": 0.4766804277896881, + "step": 1474 + }, + { + "epoch": 0.43134961251644977, + "grad_norm": 1.5114551227786939, + "learning_rate": 1.8539384757161285e-05, + "loss": 0.7479405403137207, + "step": 1475 + }, + { + "epoch": 0.43164205293171515, + "grad_norm": 1.3476436799817348, + "learning_rate": 1.8536867903481983e-05, + "loss": 0.6848211288452148, + "step": 1476 + }, + { + "epoch": 0.43193449334698053, + "grad_norm": 1.2973665530504777, + "learning_rate": 1.8534349054361708e-05, + "loss": 0.7413634061813354, + "step": 1477 + }, + { + "epoch": 0.43222693376224597, + "grad_norm": 1.1870657052305638, + "learning_rate": 1.8531828210389236e-05, + "loss": 0.5880843997001648, + "step": 1478 + }, + { + "epoch": 0.43251937417751135, + "grad_norm": 1.16075786792099, + "learning_rate": 1.852930537215379e-05, + "loss": 0.5885627269744873, + "step": 1479 + }, + { + "epoch": 0.43281181459277673, + "grad_norm": 1.3270242768891243, + "learning_rate": 1.8526780540245077e-05, + "loss": 0.706636905670166, + "step": 1480 + }, + { + "epoch": 0.4331042550080421, + "grad_norm": 1.3793959384028218, + "learning_rate": 1.8524253715253255e-05, + "loss": 0.6521843075752258, + "step": 1481 + }, + { + "epoch": 0.4333966954233075, + "grad_norm": 1.3825746336646279, + "learning_rate": 1.8521724897768955e-05, + "loss": 0.6231021881103516, + "step": 1482 + }, + { + "epoch": 0.4336891358385729, + "grad_norm": 1.4460679872410762, + "learning_rate": 1.851919408838327e-05, + "loss": 0.6859451532363892, + "step": 1483 + }, + { + "epoch": 0.43398157625383826, + "grad_norm": 1.2507527028404273, + "learning_rate": 1.851666128768777e-05, + "loss": 0.7948323488235474, + "step": 1484 + }, + { + "epoch": 0.43427401666910365, + "grad_norm": 1.3631419376990976, + "learning_rate": 1.8514126496274473e-05, + "loss": 0.7815203070640564, + "step": 1485 + }, + { + "epoch": 0.4345664570843691, + "grad_norm": 1.2904619284943133, + "learning_rate": 1.8511589714735875e-05, + "loss": 0.6941452622413635, + "step": 1486 + }, + { + "epoch": 0.43485889749963447, + "grad_norm": 1.41567858231915, + "learning_rate": 1.850905094366493e-05, + "loss": 0.5500549674034119, + "step": 1487 + }, + { + "epoch": 0.43515133791489985, + "grad_norm": 1.2918667262960315, + "learning_rate": 1.8506510183655066e-05, + "loss": 0.6616400480270386, + "step": 1488 + }, + { + "epoch": 0.43544377833016523, + "grad_norm": 1.2491627898498192, + "learning_rate": 1.8503967435300166e-05, + "loss": 0.6920043230056763, + "step": 1489 + }, + { + "epoch": 0.4357362187454306, + "grad_norm": 1.215912086863742, + "learning_rate": 1.8501422699194584e-05, + "loss": 0.6080813407897949, + "step": 1490 + }, + { + "epoch": 0.436028659160696, + "grad_norm": 1.2215283867587456, + "learning_rate": 1.8498875975933135e-05, + "loss": 0.576184868812561, + "step": 1491 + }, + { + "epoch": 0.4363210995759614, + "grad_norm": 1.3544983329172053, + "learning_rate": 1.84963272661111e-05, + "loss": 0.6647310256958008, + "step": 1492 + }, + { + "epoch": 0.43661353999122676, + "grad_norm": 1.5126248587795905, + "learning_rate": 1.8493776570324224e-05, + "loss": 0.6738306283950806, + "step": 1493 + }, + { + "epoch": 0.4369059804064922, + "grad_norm": 1.306695091605799, + "learning_rate": 1.849122388916872e-05, + "loss": 0.681056022644043, + "step": 1494 + }, + { + "epoch": 0.4371984208217576, + "grad_norm": 1.2802492616875505, + "learning_rate": 1.848866922324126e-05, + "loss": 0.7844547033309937, + "step": 1495 + }, + { + "epoch": 0.43749086123702297, + "grad_norm": 1.278338668380481, + "learning_rate": 1.8486112573138977e-05, + "loss": 0.6478928327560425, + "step": 1496 + }, + { + "epoch": 0.43778330165228835, + "grad_norm": 1.1565510309984284, + "learning_rate": 1.8483553939459477e-05, + "loss": 0.6035341024398804, + "step": 1497 + }, + { + "epoch": 0.43807574206755373, + "grad_norm": 1.5407821231530743, + "learning_rate": 1.8480993322800826e-05, + "loss": 0.6664912700653076, + "step": 1498 + }, + { + "epoch": 0.4383681824828191, + "grad_norm": 1.2757017491830842, + "learning_rate": 1.847843072376155e-05, + "loss": 0.7171953916549683, + "step": 1499 + }, + { + "epoch": 0.4386606228980845, + "grad_norm": 1.6930649567828897, + "learning_rate": 1.8475866142940646e-05, + "loss": 0.8400344848632812, + "step": 1500 + }, + { + "epoch": 0.4389530633133499, + "grad_norm": 1.4411024776302432, + "learning_rate": 1.8473299580937563e-05, + "loss": 0.5119056701660156, + "step": 1501 + }, + { + "epoch": 0.4392455037286153, + "grad_norm": 1.2781692932924433, + "learning_rate": 1.847073103835222e-05, + "loss": 0.5864866375923157, + "step": 1502 + }, + { + "epoch": 0.4395379441438807, + "grad_norm": 1.1391351003013295, + "learning_rate": 1.8468160515785e-05, + "loss": 0.6389576196670532, + "step": 1503 + }, + { + "epoch": 0.4398303845591461, + "grad_norm": 1.3447539998849671, + "learning_rate": 1.846558801383675e-05, + "loss": 0.6745110750198364, + "step": 1504 + }, + { + "epoch": 0.44012282497441146, + "grad_norm": 1.4359844129069297, + "learning_rate": 1.846301353310877e-05, + "loss": 0.6207559704780579, + "step": 1505 + }, + { + "epoch": 0.44041526538967685, + "grad_norm": 1.4143769366285628, + "learning_rate": 1.8460437074202832e-05, + "loss": 0.6818139553070068, + "step": 1506 + }, + { + "epoch": 0.44070770580494223, + "grad_norm": 1.4877202307925406, + "learning_rate": 1.845785863772117e-05, + "loss": 0.652062714099884, + "step": 1507 + }, + { + "epoch": 0.4410001462202076, + "grad_norm": 1.340284980688535, + "learning_rate": 1.8455278224266476e-05, + "loss": 0.6842166185379028, + "step": 1508 + }, + { + "epoch": 0.44129258663547305, + "grad_norm": 1.3899905625699573, + "learning_rate": 1.8452695834441904e-05, + "loss": 0.6459342837333679, + "step": 1509 + }, + { + "epoch": 0.44158502705073843, + "grad_norm": 1.3677235686172902, + "learning_rate": 1.8450111468851078e-05, + "loss": 0.6036739349365234, + "step": 1510 + }, + { + "epoch": 0.4418774674660038, + "grad_norm": 1.401326082704981, + "learning_rate": 1.844752512809807e-05, + "loss": 0.7530199289321899, + "step": 1511 + }, + { + "epoch": 0.4421699078812692, + "grad_norm": 1.249585374389202, + "learning_rate": 1.8444936812787428e-05, + "loss": 0.6098290085792542, + "step": 1512 + }, + { + "epoch": 0.4424623482965346, + "grad_norm": 1.6252323705163014, + "learning_rate": 1.844234652352415e-05, + "loss": 0.7142464518547058, + "step": 1513 + }, + { + "epoch": 0.44275478871179996, + "grad_norm": 1.3215155589821708, + "learning_rate": 1.8439754260913703e-05, + "loss": 0.4895970821380615, + "step": 1514 + }, + { + "epoch": 0.44304722912706535, + "grad_norm": 1.2855871920553614, + "learning_rate": 1.8437160025562012e-05, + "loss": 0.6166520118713379, + "step": 1515 + }, + { + "epoch": 0.4433396695423307, + "grad_norm": 1.3621423468696194, + "learning_rate": 1.8434563818075462e-05, + "loss": 0.6020585894584656, + "step": 1516 + }, + { + "epoch": 0.44363210995759617, + "grad_norm": 1.3215872914676274, + "learning_rate": 1.8431965639060904e-05, + "loss": 0.6879030466079712, + "step": 1517 + }, + { + "epoch": 0.44392455037286155, + "grad_norm": 1.2000763930073624, + "learning_rate": 1.8429365489125644e-05, + "loss": 0.5753897428512573, + "step": 1518 + }, + { + "epoch": 0.44421699078812693, + "grad_norm": 1.2916902596192155, + "learning_rate": 1.8426763368877455e-05, + "loss": 0.5165301561355591, + "step": 1519 + }, + { + "epoch": 0.4445094312033923, + "grad_norm": 1.630208225804633, + "learning_rate": 1.842415927892456e-05, + "loss": 0.6377310752868652, + "step": 1520 + }, + { + "epoch": 0.4448018716186577, + "grad_norm": 1.4221002668397775, + "learning_rate": 1.842155321987566e-05, + "loss": 0.7429912090301514, + "step": 1521 + }, + { + "epoch": 0.4450943120339231, + "grad_norm": 1.5079395076396265, + "learning_rate": 1.8418945192339892e-05, + "loss": 0.6177542209625244, + "step": 1522 + }, + { + "epoch": 0.44538675244918846, + "grad_norm": 1.2784904022569494, + "learning_rate": 1.8416335196926877e-05, + "loss": 0.662541389465332, + "step": 1523 + }, + { + "epoch": 0.44567919286445384, + "grad_norm": 1.2782173083325044, + "learning_rate": 1.841372323424668e-05, + "loss": 0.6026759743690491, + "step": 1524 + }, + { + "epoch": 0.4459716332797193, + "grad_norm": 1.5759742604234355, + "learning_rate": 1.8411109304909837e-05, + "loss": 0.7902384400367737, + "step": 1525 + }, + { + "epoch": 0.44626407369498466, + "grad_norm": 1.4904175669631523, + "learning_rate": 1.840849340952733e-05, + "loss": 0.6588590145111084, + "step": 1526 + }, + { + "epoch": 0.44655651411025005, + "grad_norm": 1.1682358413615135, + "learning_rate": 1.8405875548710614e-05, + "loss": 0.49133825302124023, + "step": 1527 + }, + { + "epoch": 0.44684895452551543, + "grad_norm": 1.4464174570347765, + "learning_rate": 1.8403255723071597e-05, + "loss": 0.6644654273986816, + "step": 1528 + }, + { + "epoch": 0.4471413949407808, + "grad_norm": 1.2325053536943291, + "learning_rate": 1.8400633933222647e-05, + "loss": 0.6257454752922058, + "step": 1529 + }, + { + "epoch": 0.4474338353560462, + "grad_norm": 1.4100106920950097, + "learning_rate": 1.8398010179776597e-05, + "loss": 0.6671919226646423, + "step": 1530 + }, + { + "epoch": 0.4477262757713116, + "grad_norm": 1.1625081058782702, + "learning_rate": 1.839538446334672e-05, + "loss": 0.6001447439193726, + "step": 1531 + }, + { + "epoch": 0.44801871618657696, + "grad_norm": 1.6509081383772402, + "learning_rate": 1.8392756784546775e-05, + "loss": 0.8103213310241699, + "step": 1532 + }, + { + "epoch": 0.4483111566018424, + "grad_norm": 1.1675484766628168, + "learning_rate": 1.839012714399096e-05, + "loss": 0.7010835409164429, + "step": 1533 + }, + { + "epoch": 0.4486035970171078, + "grad_norm": 1.0773967688725017, + "learning_rate": 1.8387495542293935e-05, + "loss": 0.5709215402603149, + "step": 1534 + }, + { + "epoch": 0.44889603743237316, + "grad_norm": 1.3558935245332375, + "learning_rate": 1.8384861980070826e-05, + "loss": 0.6410949230194092, + "step": 1535 + }, + { + "epoch": 0.44918847784763855, + "grad_norm": 1.358963272892771, + "learning_rate": 1.838222645793721e-05, + "loss": 0.8036839962005615, + "step": 1536 + }, + { + "epoch": 0.44948091826290393, + "grad_norm": 1.1470889977158967, + "learning_rate": 1.8379588976509123e-05, + "loss": 0.49213099479675293, + "step": 1537 + }, + { + "epoch": 0.4497733586781693, + "grad_norm": 1.5829843161961048, + "learning_rate": 1.8376949536403063e-05, + "loss": 0.7111018896102905, + "step": 1538 + }, + { + "epoch": 0.4500657990934347, + "grad_norm": 1.313995907545699, + "learning_rate": 1.837430813823598e-05, + "loss": 0.8506999015808105, + "step": 1539 + }, + { + "epoch": 0.4503582395087001, + "grad_norm": 1.2175571229137518, + "learning_rate": 1.8371664782625287e-05, + "loss": 0.7369798421859741, + "step": 1540 + }, + { + "epoch": 0.4506506799239655, + "grad_norm": 1.3435168892785054, + "learning_rate": 1.8369019470188855e-05, + "loss": 0.5982831120491028, + "step": 1541 + }, + { + "epoch": 0.4509431203392309, + "grad_norm": 1.2303590063922416, + "learning_rate": 1.8366372201545002e-05, + "loss": 0.6129144430160522, + "step": 1542 + }, + { + "epoch": 0.4512355607544963, + "grad_norm": 1.5191607059455674, + "learning_rate": 1.8363722977312512e-05, + "loss": 0.7142921686172485, + "step": 1543 + }, + { + "epoch": 0.45152800116976166, + "grad_norm": 1.1545455601160404, + "learning_rate": 1.8361071798110635e-05, + "loss": 0.515651524066925, + "step": 1544 + }, + { + "epoch": 0.45182044158502704, + "grad_norm": 1.3144713138844157, + "learning_rate": 1.8358418664559058e-05, + "loss": 0.5544168949127197, + "step": 1545 + }, + { + "epoch": 0.4521128820002924, + "grad_norm": 1.2540637765053078, + "learning_rate": 1.8355763577277938e-05, + "loss": 0.6801918745040894, + "step": 1546 + }, + { + "epoch": 0.4524053224155578, + "grad_norm": 1.3664850716479517, + "learning_rate": 1.835310653688789e-05, + "loss": 0.683785080909729, + "step": 1547 + }, + { + "epoch": 0.45269776283082325, + "grad_norm": 1.363558169999723, + "learning_rate": 1.835044754400997e-05, + "loss": 0.5689892172813416, + "step": 1548 + }, + { + "epoch": 0.45299020324608863, + "grad_norm": 1.1621305276584806, + "learning_rate": 1.8347786599265713e-05, + "loss": 0.5260726809501648, + "step": 1549 + }, + { + "epoch": 0.453282643661354, + "grad_norm": 1.2201116845769602, + "learning_rate": 1.834512370327709e-05, + "loss": 0.6792432069778442, + "step": 1550 + }, + { + "epoch": 0.4535750840766194, + "grad_norm": 1.198643016289117, + "learning_rate": 1.8342458856666545e-05, + "loss": 0.6336524486541748, + "step": 1551 + }, + { + "epoch": 0.4538675244918848, + "grad_norm": 1.3472994421503108, + "learning_rate": 1.8339792060056965e-05, + "loss": 0.5929614901542664, + "step": 1552 + }, + { + "epoch": 0.45415996490715016, + "grad_norm": 1.2599505430948363, + "learning_rate": 1.8337123314071696e-05, + "loss": 0.6683382391929626, + "step": 1553 + }, + { + "epoch": 0.45445240532241554, + "grad_norm": 1.2860246628200298, + "learning_rate": 1.833445261933454e-05, + "loss": 0.6256811618804932, + "step": 1554 + }, + { + "epoch": 0.4547448457376809, + "grad_norm": 1.3499468606960694, + "learning_rate": 1.8331779976469765e-05, + "loss": 0.5974653959274292, + "step": 1555 + }, + { + "epoch": 0.45503728615294636, + "grad_norm": 1.2078321854850618, + "learning_rate": 1.8329105386102074e-05, + "loss": 0.5471535325050354, + "step": 1556 + }, + { + "epoch": 0.45532972656821175, + "grad_norm": 1.284169615938693, + "learning_rate": 1.832642884885664e-05, + "loss": 0.5751267075538635, + "step": 1557 + }, + { + "epoch": 0.45562216698347713, + "grad_norm": 1.3234326952626145, + "learning_rate": 1.8323750365359092e-05, + "loss": 0.7003380060195923, + "step": 1558 + }, + { + "epoch": 0.4559146073987425, + "grad_norm": 1.3333099062603002, + "learning_rate": 1.8321069936235503e-05, + "loss": 0.6351351737976074, + "step": 1559 + }, + { + "epoch": 0.4562070478140079, + "grad_norm": 1.4452410048586575, + "learning_rate": 1.8318387562112407e-05, + "loss": 0.6083345413208008, + "step": 1560 + }, + { + "epoch": 0.4564994882292733, + "grad_norm": 1.230127453588353, + "learning_rate": 1.83157032436168e-05, + "loss": 0.589935302734375, + "step": 1561 + }, + { + "epoch": 0.45679192864453866, + "grad_norm": 1.3491229847821233, + "learning_rate": 1.8313016981376116e-05, + "loss": 0.7648014426231384, + "step": 1562 + }, + { + "epoch": 0.45708436905980404, + "grad_norm": 1.2461686063365083, + "learning_rate": 1.831032877601826e-05, + "loss": 0.7309973239898682, + "step": 1563 + }, + { + "epoch": 0.4573768094750695, + "grad_norm": 1.4691097869713072, + "learning_rate": 1.8307638628171575e-05, + "loss": 0.7231593728065491, + "step": 1564 + }, + { + "epoch": 0.45766924989033486, + "grad_norm": 1.4770239307253334, + "learning_rate": 1.8304946538464876e-05, + "loss": 0.7321262359619141, + "step": 1565 + }, + { + "epoch": 0.45796169030560024, + "grad_norm": 1.1157038717428966, + "learning_rate": 1.830225250752742e-05, + "loss": 0.5866271257400513, + "step": 1566 + }, + { + "epoch": 0.4582541307208656, + "grad_norm": 1.4899327841327124, + "learning_rate": 1.8299556535988917e-05, + "loss": 0.7146202325820923, + "step": 1567 + }, + { + "epoch": 0.458546571136131, + "grad_norm": 1.0989226716242009, + "learning_rate": 1.8296858624479536e-05, + "loss": 0.4600168466567993, + "step": 1568 + }, + { + "epoch": 0.4588390115513964, + "grad_norm": 1.5647421342147445, + "learning_rate": 1.8294158773629896e-05, + "loss": 0.5710705518722534, + "step": 1569 + }, + { + "epoch": 0.4591314519666618, + "grad_norm": 1.4737029572986353, + "learning_rate": 1.8291456984071073e-05, + "loss": 0.7075216770172119, + "step": 1570 + }, + { + "epoch": 0.45942389238192716, + "grad_norm": 1.2087048615463696, + "learning_rate": 1.828875325643459e-05, + "loss": 0.5262739062309265, + "step": 1571 + }, + { + "epoch": 0.4597163327971926, + "grad_norm": 1.2732843462549814, + "learning_rate": 1.8286047591352436e-05, + "loss": 0.724657416343689, + "step": 1572 + }, + { + "epoch": 0.460008773212458, + "grad_norm": 1.2778614004914874, + "learning_rate": 1.8283339989457033e-05, + "loss": 0.6047587394714355, + "step": 1573 + }, + { + "epoch": 0.46030121362772336, + "grad_norm": 1.481028950467352, + "learning_rate": 1.828063045138127e-05, + "loss": 0.6647980213165283, + "step": 1574 + }, + { + "epoch": 0.46059365404298874, + "grad_norm": 1.3031844151965102, + "learning_rate": 1.827791897775849e-05, + "loss": 0.6081969738006592, + "step": 1575 + }, + { + "epoch": 0.4608860944582541, + "grad_norm": 1.2574668609577524, + "learning_rate": 1.827520556922248e-05, + "loss": 0.6815003156661987, + "step": 1576 + }, + { + "epoch": 0.4611785348735195, + "grad_norm": 1.25588669780601, + "learning_rate": 1.8272490226407476e-05, + "loss": 0.5571715235710144, + "step": 1577 + }, + { + "epoch": 0.4614709752887849, + "grad_norm": 1.241115553107667, + "learning_rate": 1.8269772949948185e-05, + "loss": 0.7562757730484009, + "step": 1578 + }, + { + "epoch": 0.4617634157040503, + "grad_norm": 1.3753582703744767, + "learning_rate": 1.8267053740479745e-05, + "loss": 0.6330382227897644, + "step": 1579 + }, + { + "epoch": 0.4620558561193157, + "grad_norm": 1.5331426598457012, + "learning_rate": 1.826433259863776e-05, + "loss": 0.7696597576141357, + "step": 1580 + }, + { + "epoch": 0.4623482965345811, + "grad_norm": 1.3594821877317964, + "learning_rate": 1.8261609525058275e-05, + "loss": 0.6953772306442261, + "step": 1581 + }, + { + "epoch": 0.4626407369498465, + "grad_norm": 1.3957443557298115, + "learning_rate": 1.8258884520377797e-05, + "loss": 0.5856037735939026, + "step": 1582 + }, + { + "epoch": 0.46293317736511186, + "grad_norm": 1.3245931479550002, + "learning_rate": 1.8256157585233277e-05, + "loss": 0.5988172888755798, + "step": 1583 + }, + { + "epoch": 0.46322561778037724, + "grad_norm": 1.3153037118046438, + "learning_rate": 1.8253428720262117e-05, + "loss": 0.6320241689682007, + "step": 1584 + }, + { + "epoch": 0.4635180581956426, + "grad_norm": 1.1680775814478943, + "learning_rate": 1.8250697926102182e-05, + "loss": 0.5758935213088989, + "step": 1585 + }, + { + "epoch": 0.463810498610908, + "grad_norm": 1.4295465315991271, + "learning_rate": 1.8247965203391763e-05, + "loss": 0.7104986906051636, + "step": 1586 + }, + { + "epoch": 0.46410293902617344, + "grad_norm": 1.4739846709331708, + "learning_rate": 1.8245230552769634e-05, + "loss": 0.6322015523910522, + "step": 1587 + }, + { + "epoch": 0.4643953794414388, + "grad_norm": 1.4263760736603013, + "learning_rate": 1.824249397487499e-05, + "loss": 0.5881235003471375, + "step": 1588 + }, + { + "epoch": 0.4646878198567042, + "grad_norm": 1.5652864190332019, + "learning_rate": 1.8239755470347497e-05, + "loss": 0.8097240924835205, + "step": 1589 + }, + { + "epoch": 0.4649802602719696, + "grad_norm": 1.4192861983980027, + "learning_rate": 1.823701503982726e-05, + "loss": 0.6538649201393127, + "step": 1590 + }, + { + "epoch": 0.465272700687235, + "grad_norm": 1.2329012857349442, + "learning_rate": 1.8234272683954842e-05, + "loss": 0.5868922472000122, + "step": 1591 + }, + { + "epoch": 0.46556514110250036, + "grad_norm": 1.3076575357637654, + "learning_rate": 1.8231528403371248e-05, + "loss": 0.6747265458106995, + "step": 1592 + }, + { + "epoch": 0.46585758151776574, + "grad_norm": 1.2961728564371904, + "learning_rate": 1.8228782198717936e-05, + "loss": 0.6519996523857117, + "step": 1593 + }, + { + "epoch": 0.4661500219330311, + "grad_norm": 1.2212124627082057, + "learning_rate": 1.822603407063682e-05, + "loss": 0.7268975973129272, + "step": 1594 + }, + { + "epoch": 0.46644246234829656, + "grad_norm": 1.1603454255193932, + "learning_rate": 1.8223284019770252e-05, + "loss": 0.6554980278015137, + "step": 1595 + }, + { + "epoch": 0.46673490276356194, + "grad_norm": 1.350233636463654, + "learning_rate": 1.8220532046761047e-05, + "loss": 0.7014105319976807, + "step": 1596 + }, + { + "epoch": 0.4670273431788273, + "grad_norm": 1.4228663397014873, + "learning_rate": 1.821777815225245e-05, + "loss": 0.5766602158546448, + "step": 1597 + }, + { + "epoch": 0.4673197835940927, + "grad_norm": 1.304159292005938, + "learning_rate": 1.8215022336888182e-05, + "loss": 0.5106521844863892, + "step": 1598 + }, + { + "epoch": 0.4676122240093581, + "grad_norm": 1.419250792414019, + "learning_rate": 1.821226460131239e-05, + "loss": 0.801375150680542, + "step": 1599 + }, + { + "epoch": 0.4679046644246235, + "grad_norm": 1.2845833863087142, + "learning_rate": 1.8209504946169677e-05, + "loss": 0.6189062595367432, + "step": 1600 + }, + { + "epoch": 0.46819710483988886, + "grad_norm": 1.2468379881228138, + "learning_rate": 1.8206743372105098e-05, + "loss": 0.6719359159469604, + "step": 1601 + }, + { + "epoch": 0.46848954525515424, + "grad_norm": 1.4660205035921348, + "learning_rate": 1.8203979879764153e-05, + "loss": 0.7437123656272888, + "step": 1602 + }, + { + "epoch": 0.4687819856704197, + "grad_norm": 1.3639008290802046, + "learning_rate": 1.8201214469792793e-05, + "loss": 0.7273217439651489, + "step": 1603 + }, + { + "epoch": 0.46907442608568506, + "grad_norm": 1.332814377531963, + "learning_rate": 1.8198447142837416e-05, + "loss": 0.6467087268829346, + "step": 1604 + }, + { + "epoch": 0.46936686650095044, + "grad_norm": 1.1167815102053054, + "learning_rate": 1.8195677899544866e-05, + "loss": 0.5764428973197937, + "step": 1605 + }, + { + "epoch": 0.4696593069162158, + "grad_norm": 1.4761144768835275, + "learning_rate": 1.8192906740562437e-05, + "loss": 0.5969977378845215, + "step": 1606 + }, + { + "epoch": 0.4699517473314812, + "grad_norm": 1.3424638711815577, + "learning_rate": 1.819013366653787e-05, + "loss": 0.7237746119499207, + "step": 1607 + }, + { + "epoch": 0.4702441877467466, + "grad_norm": 1.4494789457227795, + "learning_rate": 1.8187358678119355e-05, + "loss": 0.6289568543434143, + "step": 1608 + }, + { + "epoch": 0.47053662816201197, + "grad_norm": 1.1494676131886132, + "learning_rate": 1.8184581775955533e-05, + "loss": 0.5773013234138489, + "step": 1609 + }, + { + "epoch": 0.47082906857727735, + "grad_norm": 1.3055308518970814, + "learning_rate": 1.818180296069548e-05, + "loss": 0.5940284729003906, + "step": 1610 + }, + { + "epoch": 0.4711215089925428, + "grad_norm": 1.506634303312927, + "learning_rate": 1.8179022232988735e-05, + "loss": 0.7051881551742554, + "step": 1611 + }, + { + "epoch": 0.4714139494078082, + "grad_norm": 1.2817274142705404, + "learning_rate": 1.8176239593485267e-05, + "loss": 0.6427813768386841, + "step": 1612 + }, + { + "epoch": 0.47170638982307356, + "grad_norm": 1.3150009445137423, + "learning_rate": 1.817345504283551e-05, + "loss": 0.7041782736778259, + "step": 1613 + }, + { + "epoch": 0.47199883023833894, + "grad_norm": 1.1960422316530261, + "learning_rate": 1.817066858169033e-05, + "loss": 0.6568688154220581, + "step": 1614 + }, + { + "epoch": 0.4722912706536043, + "grad_norm": 1.1082706297141673, + "learning_rate": 1.816788021070105e-05, + "loss": 0.4784452021121979, + "step": 1615 + }, + { + "epoch": 0.4725837110688697, + "grad_norm": 1.403652579196444, + "learning_rate": 1.816508993051943e-05, + "loss": 0.6012705564498901, + "step": 1616 + }, + { + "epoch": 0.4728761514841351, + "grad_norm": 1.441258763214559, + "learning_rate": 1.8162297741797685e-05, + "loss": 0.6414428949356079, + "step": 1617 + }, + { + "epoch": 0.47316859189940047, + "grad_norm": 1.4131643644174843, + "learning_rate": 1.815950364518847e-05, + "loss": 0.6446187496185303, + "step": 1618 + }, + { + "epoch": 0.4734610323146659, + "grad_norm": 1.2552495046018781, + "learning_rate": 1.8156707641344885e-05, + "loss": 0.5153034329414368, + "step": 1619 + }, + { + "epoch": 0.4737534727299313, + "grad_norm": 1.5159052607593526, + "learning_rate": 1.8153909730920485e-05, + "loss": 0.7209463715553284, + "step": 1620 + }, + { + "epoch": 0.4740459131451967, + "grad_norm": 1.2933785450044248, + "learning_rate": 1.8151109914569267e-05, + "loss": 0.5990744829177856, + "step": 1621 + }, + { + "epoch": 0.47433835356046206, + "grad_norm": 1.3033668993107679, + "learning_rate": 1.814830819294566e-05, + "loss": 0.5706672668457031, + "step": 1622 + }, + { + "epoch": 0.47463079397572744, + "grad_norm": 1.1946317041445573, + "learning_rate": 1.814550456670456e-05, + "loss": 0.538548469543457, + "step": 1623 + }, + { + "epoch": 0.4749232343909928, + "grad_norm": 1.3282078081285205, + "learning_rate": 1.8142699036501288e-05, + "loss": 0.6450623273849487, + "step": 1624 + }, + { + "epoch": 0.4752156748062582, + "grad_norm": 1.336508209824809, + "learning_rate": 1.813989160299163e-05, + "loss": 0.6537624597549438, + "step": 1625 + }, + { + "epoch": 0.47550811522152364, + "grad_norm": 1.2777879020397362, + "learning_rate": 1.8137082266831794e-05, + "loss": 0.7126362323760986, + "step": 1626 + }, + { + "epoch": 0.475800555636789, + "grad_norm": 1.4542616967071014, + "learning_rate": 1.813427102867846e-05, + "loss": 0.6686921119689941, + "step": 1627 + }, + { + "epoch": 0.4760929960520544, + "grad_norm": 1.4231643377055359, + "learning_rate": 1.8131457889188723e-05, + "loss": 0.5925619602203369, + "step": 1628 + }, + { + "epoch": 0.4763854364673198, + "grad_norm": 1.2702390975554385, + "learning_rate": 1.8128642849020147e-05, + "loss": 0.7251017689704895, + "step": 1629 + }, + { + "epoch": 0.47667787688258517, + "grad_norm": 1.5675645867645378, + "learning_rate": 1.8125825908830733e-05, + "loss": 0.7524283528327942, + "step": 1630 + }, + { + "epoch": 0.47697031729785055, + "grad_norm": 1.2843975237623166, + "learning_rate": 1.8123007069278914e-05, + "loss": 0.7593197226524353, + "step": 1631 + }, + { + "epoch": 0.47726275771311594, + "grad_norm": 1.2304771008785658, + "learning_rate": 1.812018633102358e-05, + "loss": 0.43353578448295593, + "step": 1632 + }, + { + "epoch": 0.4775551981283813, + "grad_norm": 1.1488804965894268, + "learning_rate": 1.8117363694724063e-05, + "loss": 0.6254708766937256, + "step": 1633 + }, + { + "epoch": 0.47784763854364676, + "grad_norm": 1.2467231401784862, + "learning_rate": 1.811453916104014e-05, + "loss": 0.5970091223716736, + "step": 1634 + }, + { + "epoch": 0.47814007895891214, + "grad_norm": 1.2798152763028137, + "learning_rate": 1.8111712730632024e-05, + "loss": 0.6299331188201904, + "step": 1635 + }, + { + "epoch": 0.4784325193741775, + "grad_norm": 1.4325282365212126, + "learning_rate": 1.810888440416038e-05, + "loss": 0.7461789846420288, + "step": 1636 + }, + { + "epoch": 0.4787249597894429, + "grad_norm": 1.2539146793136515, + "learning_rate": 1.8106054182286305e-05, + "loss": 0.5053290724754333, + "step": 1637 + }, + { + "epoch": 0.4790174002047083, + "grad_norm": 1.2809048918941985, + "learning_rate": 1.810322206567135e-05, + "loss": 0.6853327751159668, + "step": 1638 + }, + { + "epoch": 0.47930984061997367, + "grad_norm": 1.4027979186429358, + "learning_rate": 1.8100388054977508e-05, + "loss": 0.5337134599685669, + "step": 1639 + }, + { + "epoch": 0.47960228103523905, + "grad_norm": 1.357622845311743, + "learning_rate": 1.809755215086721e-05, + "loss": 0.7082560062408447, + "step": 1640 + }, + { + "epoch": 0.47989472145050444, + "grad_norm": 1.3590974916852807, + "learning_rate": 1.8094714354003325e-05, + "loss": 0.680424153804779, + "step": 1641 + }, + { + "epoch": 0.4801871618657699, + "grad_norm": 1.1398057291819046, + "learning_rate": 1.8091874665049183e-05, + "loss": 0.5235139727592468, + "step": 1642 + }, + { + "epoch": 0.48047960228103526, + "grad_norm": 1.3822416905178454, + "learning_rate": 1.8089033084668535e-05, + "loss": 0.7843992114067078, + "step": 1643 + }, + { + "epoch": 0.48077204269630064, + "grad_norm": 1.4941957252025324, + "learning_rate": 1.8086189613525587e-05, + "loss": 0.6736497282981873, + "step": 1644 + }, + { + "epoch": 0.481064483111566, + "grad_norm": 1.3326594399820286, + "learning_rate": 1.808334425228498e-05, + "loss": 0.6898948550224304, + "step": 1645 + }, + { + "epoch": 0.4813569235268314, + "grad_norm": 1.3419429940100798, + "learning_rate": 1.80804970016118e-05, + "loss": 0.6719726324081421, + "step": 1646 + }, + { + "epoch": 0.4816493639420968, + "grad_norm": 1.316270232362313, + "learning_rate": 1.807764786217158e-05, + "loss": 0.6904356479644775, + "step": 1647 + }, + { + "epoch": 0.48194180435736217, + "grad_norm": 1.3009257254922486, + "learning_rate": 1.8074796834630285e-05, + "loss": 0.5956645011901855, + "step": 1648 + }, + { + "epoch": 0.48223424477262755, + "grad_norm": 1.162557710559535, + "learning_rate": 1.8071943919654323e-05, + "loss": 0.5676499009132385, + "step": 1649 + }, + { + "epoch": 0.482526685187893, + "grad_norm": 1.3145895725362904, + "learning_rate": 1.8069089117910547e-05, + "loss": 0.6006937026977539, + "step": 1650 + }, + { + "epoch": 0.48281912560315837, + "grad_norm": 1.3694341047830378, + "learning_rate": 1.806623243006625e-05, + "loss": 0.6241977214813232, + "step": 1651 + }, + { + "epoch": 0.48311156601842375, + "grad_norm": 1.4152304986784254, + "learning_rate": 1.806337385678917e-05, + "loss": 0.7359870672225952, + "step": 1652 + }, + { + "epoch": 0.48340400643368914, + "grad_norm": 1.155725074088707, + "learning_rate": 1.806051339874748e-05, + "loss": 0.6113119125366211, + "step": 1653 + }, + { + "epoch": 0.4836964468489545, + "grad_norm": 1.3288798785197886, + "learning_rate": 1.8057651056609784e-05, + "loss": 0.642951488494873, + "step": 1654 + }, + { + "epoch": 0.4839888872642199, + "grad_norm": 1.3081605749498326, + "learning_rate": 1.8054786831045147e-05, + "loss": 0.7020113468170166, + "step": 1655 + }, + { + "epoch": 0.4842813276794853, + "grad_norm": 1.355302216036822, + "learning_rate": 1.8051920722723063e-05, + "loss": 0.678231418132782, + "step": 1656 + }, + { + "epoch": 0.48457376809475067, + "grad_norm": 1.2407750790627203, + "learning_rate": 1.8049052732313466e-05, + "loss": 0.604765772819519, + "step": 1657 + }, + { + "epoch": 0.4848662085100161, + "grad_norm": 1.501775861517808, + "learning_rate": 1.8046182860486735e-05, + "loss": 0.6812270879745483, + "step": 1658 + }, + { + "epoch": 0.4851586489252815, + "grad_norm": 1.329019452940817, + "learning_rate": 1.8043311107913675e-05, + "loss": 0.6284930109977722, + "step": 1659 + }, + { + "epoch": 0.48545108934054687, + "grad_norm": 1.4460160298748268, + "learning_rate": 1.8040437475265554e-05, + "loss": 0.665177583694458, + "step": 1660 + }, + { + "epoch": 0.48574352975581225, + "grad_norm": 1.365611165893268, + "learning_rate": 1.8037561963214058e-05, + "loss": 0.7628738284111023, + "step": 1661 + }, + { + "epoch": 0.48603597017107764, + "grad_norm": 1.4917601408905583, + "learning_rate": 1.8034684572431322e-05, + "loss": 0.6372654438018799, + "step": 1662 + }, + { + "epoch": 0.486328410586343, + "grad_norm": 1.2986927468884095, + "learning_rate": 1.803180530358992e-05, + "loss": 0.5915756225585938, + "step": 1663 + }, + { + "epoch": 0.4866208510016084, + "grad_norm": 1.3509164579114188, + "learning_rate": 1.802892415736286e-05, + "loss": 0.6821908950805664, + "step": 1664 + }, + { + "epoch": 0.48691329141687384, + "grad_norm": 1.3857679722145793, + "learning_rate": 1.80260411344236e-05, + "loss": 0.6418279409408569, + "step": 1665 + }, + { + "epoch": 0.4872057318321392, + "grad_norm": 1.154306591574384, + "learning_rate": 1.802315623544602e-05, + "loss": 0.5582526922225952, + "step": 1666 + }, + { + "epoch": 0.4874981722474046, + "grad_norm": 1.3431793608397968, + "learning_rate": 1.8020269461104448e-05, + "loss": 0.7145007848739624, + "step": 1667 + }, + { + "epoch": 0.48779061266267, + "grad_norm": 1.2110741699326812, + "learning_rate": 1.8017380812073658e-05, + "loss": 0.5415871739387512, + "step": 1668 + }, + { + "epoch": 0.48808305307793537, + "grad_norm": 1.488356994545647, + "learning_rate": 1.801449028902885e-05, + "loss": 0.728327751159668, + "step": 1669 + }, + { + "epoch": 0.48837549349320075, + "grad_norm": 1.3273378299589804, + "learning_rate": 1.8011597892645665e-05, + "loss": 0.6469160914421082, + "step": 1670 + }, + { + "epoch": 0.48866793390846613, + "grad_norm": 1.3096259850876997, + "learning_rate": 1.8008703623600185e-05, + "loss": 0.7107353210449219, + "step": 1671 + }, + { + "epoch": 0.4889603743237315, + "grad_norm": 1.4201847213896843, + "learning_rate": 1.8005807482568926e-05, + "loss": 0.6918982267379761, + "step": 1672 + }, + { + "epoch": 0.48925281473899696, + "grad_norm": 1.4096024584844806, + "learning_rate": 1.800290947022884e-05, + "loss": 0.661738932132721, + "step": 1673 + }, + { + "epoch": 0.48954525515426234, + "grad_norm": 1.4938181766281158, + "learning_rate": 1.800000958725733e-05, + "loss": 0.6816283464431763, + "step": 1674 + }, + { + "epoch": 0.4898376955695277, + "grad_norm": 1.348689926804817, + "learning_rate": 1.7997107834332217e-05, + "loss": 0.6988941431045532, + "step": 1675 + }, + { + "epoch": 0.4901301359847931, + "grad_norm": 1.5696470599370025, + "learning_rate": 1.799420421213177e-05, + "loss": 0.7997519969940186, + "step": 1676 + }, + { + "epoch": 0.4904225764000585, + "grad_norm": 1.3512394042939826, + "learning_rate": 1.7991298721334697e-05, + "loss": 0.6552794575691223, + "step": 1677 + }, + { + "epoch": 0.49071501681532387, + "grad_norm": 1.2446219807906005, + "learning_rate": 1.7988391362620135e-05, + "loss": 0.6144021153450012, + "step": 1678 + }, + { + "epoch": 0.49100745723058925, + "grad_norm": 1.2086851376188177, + "learning_rate": 1.798548213666766e-05, + "loss": 0.5036276578903198, + "step": 1679 + }, + { + "epoch": 0.49129989764585463, + "grad_norm": 1.1620444251602322, + "learning_rate": 1.7982571044157288e-05, + "loss": 0.5152162313461304, + "step": 1680 + }, + { + "epoch": 0.49159233806112007, + "grad_norm": 1.4266855366652862, + "learning_rate": 1.797965808576947e-05, + "loss": 0.7249797582626343, + "step": 1681 + }, + { + "epoch": 0.49188477847638545, + "grad_norm": 1.138885414798186, + "learning_rate": 1.7976743262185094e-05, + "loss": 0.5769079923629761, + "step": 1682 + }, + { + "epoch": 0.49217721889165084, + "grad_norm": 1.2523240509929359, + "learning_rate": 1.797382657408548e-05, + "loss": 0.7017331123352051, + "step": 1683 + }, + { + "epoch": 0.4924696593069162, + "grad_norm": 1.3095438640742119, + "learning_rate": 1.797090802215238e-05, + "loss": 0.788599967956543, + "step": 1684 + }, + { + "epoch": 0.4927620997221816, + "grad_norm": 1.3652642181905799, + "learning_rate": 1.7967987607067997e-05, + "loss": 0.5716612935066223, + "step": 1685 + }, + { + "epoch": 0.493054540137447, + "grad_norm": 1.396592202891807, + "learning_rate": 1.796506532951496e-05, + "loss": 0.6808345913887024, + "step": 1686 + }, + { + "epoch": 0.49334698055271237, + "grad_norm": 1.421363062787346, + "learning_rate": 1.7962141190176326e-05, + "loss": 0.6540817022323608, + "step": 1687 + }, + { + "epoch": 0.49363942096797775, + "grad_norm": 1.3162774070898267, + "learning_rate": 1.7959215189735604e-05, + "loss": 0.6522870063781738, + "step": 1688 + }, + { + "epoch": 0.4939318613832432, + "grad_norm": 1.2120992084575881, + "learning_rate": 1.7956287328876724e-05, + "loss": 0.5217882990837097, + "step": 1689 + }, + { + "epoch": 0.49422430179850857, + "grad_norm": 1.1456971313507769, + "learning_rate": 1.795335760828405e-05, + "loss": 0.6985372304916382, + "step": 1690 + }, + { + "epoch": 0.49451674221377395, + "grad_norm": 1.6308222645679713, + "learning_rate": 1.7950426028642397e-05, + "loss": 0.7199063301086426, + "step": 1691 + }, + { + "epoch": 0.49480918262903933, + "grad_norm": 1.2503132677681021, + "learning_rate": 1.7947492590636998e-05, + "loss": 0.5810575485229492, + "step": 1692 + }, + { + "epoch": 0.4951016230443047, + "grad_norm": 1.5393913616038981, + "learning_rate": 1.7944557294953528e-05, + "loss": 0.7443726658821106, + "step": 1693 + }, + { + "epoch": 0.4953940634595701, + "grad_norm": 1.4257690332105803, + "learning_rate": 1.7941620142278092e-05, + "loss": 0.6774560213088989, + "step": 1694 + }, + { + "epoch": 0.4956865038748355, + "grad_norm": 1.4876883296800856, + "learning_rate": 1.793868113329724e-05, + "loss": 0.6983137726783752, + "step": 1695 + }, + { + "epoch": 0.49597894429010086, + "grad_norm": 1.500775887710686, + "learning_rate": 1.793574026869793e-05, + "loss": 0.6481274366378784, + "step": 1696 + }, + { + "epoch": 0.4962713847053663, + "grad_norm": 1.5261372345633493, + "learning_rate": 1.793279754916759e-05, + "loss": 0.6489002704620361, + "step": 1697 + }, + { + "epoch": 0.4965638251206317, + "grad_norm": 1.200851338265551, + "learning_rate": 1.7929852975394056e-05, + "loss": 0.7054505348205566, + "step": 1698 + }, + { + "epoch": 0.49685626553589707, + "grad_norm": 1.1948769153228862, + "learning_rate": 1.79269065480656e-05, + "loss": 0.5257681608200073, + "step": 1699 + }, + { + "epoch": 0.49714870595116245, + "grad_norm": 1.2760885846913066, + "learning_rate": 1.7923958267870936e-05, + "loss": 0.8625251054763794, + "step": 1700 + }, + { + "epoch": 0.49744114636642783, + "grad_norm": 1.223950331700182, + "learning_rate": 1.7921008135499205e-05, + "loss": 0.6736147999763489, + "step": 1701 + }, + { + "epoch": 0.4977335867816932, + "grad_norm": 1.351351583663473, + "learning_rate": 1.7918056151639985e-05, + "loss": 0.5079643130302429, + "step": 1702 + }, + { + "epoch": 0.4980260271969586, + "grad_norm": 1.2324398794203584, + "learning_rate": 1.791510231698328e-05, + "loss": 0.597242534160614, + "step": 1703 + }, + { + "epoch": 0.49831846761222404, + "grad_norm": 1.3776511171825507, + "learning_rate": 1.791214663221953e-05, + "loss": 0.6695376038551331, + "step": 1704 + }, + { + "epoch": 0.4986109080274894, + "grad_norm": 1.2400454845090276, + "learning_rate": 1.7909189098039616e-05, + "loss": 0.6411684155464172, + "step": 1705 + }, + { + "epoch": 0.4989033484427548, + "grad_norm": 1.3917271277458743, + "learning_rate": 1.790622971513484e-05, + "loss": 0.6671754121780396, + "step": 1706 + }, + { + "epoch": 0.4991957888580202, + "grad_norm": 1.1384272276613905, + "learning_rate": 1.7903268484196936e-05, + "loss": 0.5312573909759521, + "step": 1707 + }, + { + "epoch": 0.49948822927328557, + "grad_norm": 1.3626241120949947, + "learning_rate": 1.7900305405918076e-05, + "loss": 0.643236517906189, + "step": 1708 + }, + { + "epoch": 0.49978066968855095, + "grad_norm": 1.4093385837144417, + "learning_rate": 1.7897340480990863e-05, + "loss": 0.7942951321601868, + "step": 1709 + }, + { + "epoch": 0.5000731101038164, + "grad_norm": 1.3198251548980515, + "learning_rate": 1.789437371010833e-05, + "loss": 0.701362133026123, + "step": 1710 + }, + { + "epoch": 0.5003655505190817, + "grad_norm": 1.3304955567316399, + "learning_rate": 1.789140509396394e-05, + "loss": 0.6993157863616943, + "step": 1711 + }, + { + "epoch": 0.5006579909343472, + "grad_norm": 1.0719148279657758, + "learning_rate": 1.788843463325159e-05, + "loss": 0.568405270576477, + "step": 1712 + }, + { + "epoch": 0.5009504313496125, + "grad_norm": 0.976150644308567, + "learning_rate": 1.7885462328665605e-05, + "loss": 0.4948374032974243, + "step": 1713 + }, + { + "epoch": 0.5012428717648779, + "grad_norm": 1.4692514127239873, + "learning_rate": 1.7882488180900743e-05, + "loss": 0.6679480671882629, + "step": 1714 + }, + { + "epoch": 0.5015353121801432, + "grad_norm": 1.5018221461401142, + "learning_rate": 1.78795121906522e-05, + "loss": 0.706131100654602, + "step": 1715 + }, + { + "epoch": 0.5018277525954087, + "grad_norm": 1.207740414795638, + "learning_rate": 1.787653435861559e-05, + "loss": 0.6691830158233643, + "step": 1716 + }, + { + "epoch": 0.5021201930106741, + "grad_norm": 1.163150990025552, + "learning_rate": 1.787355468548696e-05, + "loss": 0.5624213218688965, + "step": 1717 + }, + { + "epoch": 0.5024126334259394, + "grad_norm": 1.3394004970303723, + "learning_rate": 1.78705731719628e-05, + "loss": 0.4589618444442749, + "step": 1718 + }, + { + "epoch": 0.5027050738412049, + "grad_norm": 1.384883869852314, + "learning_rate": 1.7867589818740012e-05, + "loss": 0.571403980255127, + "step": 1719 + }, + { + "epoch": 0.5029975142564702, + "grad_norm": 1.0668853872947273, + "learning_rate": 1.786460462651594e-05, + "loss": 0.5395561456680298, + "step": 1720 + }, + { + "epoch": 0.5032899546717357, + "grad_norm": 1.243223907233259, + "learning_rate": 1.7861617595988355e-05, + "loss": 0.6166945695877075, + "step": 1721 + }, + { + "epoch": 0.503582395087001, + "grad_norm": 1.4857752879775032, + "learning_rate": 1.7858628727855458e-05, + "loss": 0.6812523603439331, + "step": 1722 + }, + { + "epoch": 0.5038748355022664, + "grad_norm": 1.2390654420633957, + "learning_rate": 1.7855638022815872e-05, + "loss": 0.6602752208709717, + "step": 1723 + }, + { + "epoch": 0.5041672759175319, + "grad_norm": 1.0873682718880517, + "learning_rate": 1.7852645481568665e-05, + "loss": 0.49925822019577026, + "step": 1724 + }, + { + "epoch": 0.5044597163327972, + "grad_norm": 1.3265310908908576, + "learning_rate": 1.784965110481332e-05, + "loss": 0.5557682514190674, + "step": 1725 + }, + { + "epoch": 0.5047521567480626, + "grad_norm": 1.2775644185514514, + "learning_rate": 1.7846654893249756e-05, + "loss": 0.6576372981071472, + "step": 1726 + }, + { + "epoch": 0.505044597163328, + "grad_norm": 2.047704943438843, + "learning_rate": 1.7843656847578317e-05, + "loss": 0.5266367197036743, + "step": 1727 + }, + { + "epoch": 0.5053370375785934, + "grad_norm": 1.6086224094226402, + "learning_rate": 1.7840656968499782e-05, + "loss": 0.7368261218070984, + "step": 1728 + }, + { + "epoch": 0.5056294779938587, + "grad_norm": 1.2755318597370908, + "learning_rate": 1.7837655256715355e-05, + "loss": 0.6583619117736816, + "step": 1729 + }, + { + "epoch": 0.5059219184091241, + "grad_norm": 1.4196511617190575, + "learning_rate": 1.7834651712926662e-05, + "loss": 0.7323073148727417, + "step": 1730 + }, + { + "epoch": 0.5062143588243895, + "grad_norm": 1.540686270234863, + "learning_rate": 1.783164633783577e-05, + "loss": 0.6059812307357788, + "step": 1731 + }, + { + "epoch": 0.5065067992396549, + "grad_norm": 1.451028079648097, + "learning_rate": 1.782863913214516e-05, + "loss": 0.5992608070373535, + "step": 1732 + }, + { + "epoch": 0.5067992396549204, + "grad_norm": 1.3452146161553644, + "learning_rate": 1.7825630096557754e-05, + "loss": 0.5729147791862488, + "step": 1733 + }, + { + "epoch": 0.5070916800701857, + "grad_norm": 1.4383912240083958, + "learning_rate": 1.782261923177689e-05, + "loss": 0.6708269119262695, + "step": 1734 + }, + { + "epoch": 0.5073841204854511, + "grad_norm": 1.0922943221428454, + "learning_rate": 1.7819606538506347e-05, + "loss": 0.5377235412597656, + "step": 1735 + }, + { + "epoch": 0.5076765609007164, + "grad_norm": 1.3060450837457043, + "learning_rate": 1.781659201745032e-05, + "loss": 0.6899171471595764, + "step": 1736 + }, + { + "epoch": 0.5079690013159819, + "grad_norm": 1.2574262616785272, + "learning_rate": 1.7813575669313434e-05, + "loss": 0.6712576150894165, + "step": 1737 + }, + { + "epoch": 0.5082614417312472, + "grad_norm": 1.3797290531865334, + "learning_rate": 1.781055749480074e-05, + "loss": 0.6989667415618896, + "step": 1738 + }, + { + "epoch": 0.5085538821465126, + "grad_norm": 1.4976341004458755, + "learning_rate": 1.7807537494617723e-05, + "loss": 0.6103490591049194, + "step": 1739 + }, + { + "epoch": 0.5088463225617781, + "grad_norm": 1.2059878229475702, + "learning_rate": 1.7804515669470287e-05, + "loss": 0.4882289171218872, + "step": 1740 + }, + { + "epoch": 0.5091387629770434, + "grad_norm": 1.3963253268337052, + "learning_rate": 1.7801492020064764e-05, + "loss": 0.7244713306427002, + "step": 1741 + }, + { + "epoch": 0.5094312033923089, + "grad_norm": 1.2588544303384788, + "learning_rate": 1.7798466547107918e-05, + "loss": 0.6055952310562134, + "step": 1742 + }, + { + "epoch": 0.5097236438075742, + "grad_norm": 1.3449125705801426, + "learning_rate": 1.779543925130693e-05, + "loss": 0.5893995761871338, + "step": 1743 + }, + { + "epoch": 0.5100160842228396, + "grad_norm": 1.4169541262971606, + "learning_rate": 1.7792410133369413e-05, + "loss": 0.6154330968856812, + "step": 1744 + }, + { + "epoch": 0.5103085246381049, + "grad_norm": 1.294650393818464, + "learning_rate": 1.778937919400341e-05, + "loss": 0.6227806806564331, + "step": 1745 + }, + { + "epoch": 0.5106009650533704, + "grad_norm": 1.563882907776874, + "learning_rate": 1.7786346433917376e-05, + "loss": 0.6192313432693481, + "step": 1746 + }, + { + "epoch": 0.5108934054686358, + "grad_norm": 1.324638073205218, + "learning_rate": 1.7783311853820205e-05, + "loss": 0.6175359487533569, + "step": 1747 + }, + { + "epoch": 0.5111858458839011, + "grad_norm": 1.17912928754983, + "learning_rate": 1.7780275454421218e-05, + "loss": 0.5588991641998291, + "step": 1748 + }, + { + "epoch": 0.5114782862991666, + "grad_norm": 1.0201894222615457, + "learning_rate": 1.777723723643014e-05, + "loss": 0.637115478515625, + "step": 1749 + }, + { + "epoch": 0.5117707267144319, + "grad_norm": 1.5101308062255179, + "learning_rate": 1.777419720055715e-05, + "loss": 0.6762860417366028, + "step": 1750 + }, + { + "epoch": 0.5120631671296973, + "grad_norm": 1.5211239881114056, + "learning_rate": 1.7771155347512828e-05, + "loss": 0.6980293989181519, + "step": 1751 + }, + { + "epoch": 0.5123556075449627, + "grad_norm": 1.3145597239587745, + "learning_rate": 1.7768111678008194e-05, + "loss": 0.6587250232696533, + "step": 1752 + }, + { + "epoch": 0.5126480479602281, + "grad_norm": 1.4750219793579704, + "learning_rate": 1.776506619275469e-05, + "loss": 0.6571120619773865, + "step": 1753 + }, + { + "epoch": 0.5129404883754934, + "grad_norm": 1.705487520120489, + "learning_rate": 1.7762018892464172e-05, + "loss": 0.8127633333206177, + "step": 1754 + }, + { + "epoch": 0.5132329287907589, + "grad_norm": 1.4136977790679228, + "learning_rate": 1.7758969777848935e-05, + "loss": 0.6585550308227539, + "step": 1755 + }, + { + "epoch": 0.5135253692060243, + "grad_norm": 1.5019600327645424, + "learning_rate": 1.7755918849621686e-05, + "loss": 0.6347511410713196, + "step": 1756 + }, + { + "epoch": 0.5138178096212896, + "grad_norm": 1.4489353235186164, + "learning_rate": 1.775286610849556e-05, + "loss": 0.5918457508087158, + "step": 1757 + }, + { + "epoch": 0.5141102500365551, + "grad_norm": 1.2541802522573693, + "learning_rate": 1.774981155518412e-05, + "loss": 0.7042769193649292, + "step": 1758 + }, + { + "epoch": 0.5144026904518204, + "grad_norm": 1.4327318826910254, + "learning_rate": 1.7746755190401353e-05, + "loss": 0.8014250993728638, + "step": 1759 + }, + { + "epoch": 0.5146951308670858, + "grad_norm": 1.339232110324459, + "learning_rate": 1.774369701486166e-05, + "loss": 0.6703939437866211, + "step": 1760 + }, + { + "epoch": 0.5149875712823512, + "grad_norm": 1.1710558248660605, + "learning_rate": 1.774063702927987e-05, + "loss": 0.6189682483673096, + "step": 1761 + }, + { + "epoch": 0.5152800116976166, + "grad_norm": 1.4110546220906648, + "learning_rate": 1.7737575234371238e-05, + "loss": 0.5386991500854492, + "step": 1762 + }, + { + "epoch": 0.515572452112882, + "grad_norm": 1.4204019461155708, + "learning_rate": 1.773451163085144e-05, + "loss": 0.6389357447624207, + "step": 1763 + }, + { + "epoch": 0.5158648925281474, + "grad_norm": 1.1798787279597898, + "learning_rate": 1.7731446219436577e-05, + "loss": 0.7247746586799622, + "step": 1764 + }, + { + "epoch": 0.5161573329434128, + "grad_norm": 1.2114702713778023, + "learning_rate": 1.7728379000843164e-05, + "loss": 0.5538983941078186, + "step": 1765 + }, + { + "epoch": 0.5164497733586781, + "grad_norm": 1.155329008927324, + "learning_rate": 1.7725309975788155e-05, + "loss": 0.6003320813179016, + "step": 1766 + }, + { + "epoch": 0.5167422137739436, + "grad_norm": 1.4065479816352848, + "learning_rate": 1.7722239144988908e-05, + "loss": 0.603177011013031, + "step": 1767 + }, + { + "epoch": 0.5170346541892089, + "grad_norm": 1.1699743536266287, + "learning_rate": 1.771916650916321e-05, + "loss": 0.6071338653564453, + "step": 1768 + }, + { + "epoch": 0.5173270946044743, + "grad_norm": 1.4268603398797357, + "learning_rate": 1.7716092069029275e-05, + "loss": 0.6148535013198853, + "step": 1769 + }, + { + "epoch": 0.5176195350197397, + "grad_norm": 1.3460628970570976, + "learning_rate": 1.7713015825305735e-05, + "loss": 0.6236969828605652, + "step": 1770 + }, + { + "epoch": 0.5179119754350051, + "grad_norm": 1.4613715991480511, + "learning_rate": 1.770993777871164e-05, + "loss": 0.5439775586128235, + "step": 1771 + }, + { + "epoch": 0.5182044158502705, + "grad_norm": 1.3246469866549868, + "learning_rate": 1.770685792996647e-05, + "loss": 0.6498249769210815, + "step": 1772 + }, + { + "epoch": 0.5184968562655359, + "grad_norm": 1.307598965769502, + "learning_rate": 1.7703776279790113e-05, + "loss": 0.5838749408721924, + "step": 1773 + }, + { + "epoch": 0.5187892966808013, + "grad_norm": 1.44861400348765, + "learning_rate": 1.770069282890289e-05, + "loss": 0.6467812657356262, + "step": 1774 + }, + { + "epoch": 0.5190817370960666, + "grad_norm": 1.3332181124442455, + "learning_rate": 1.7697607578025543e-05, + "loss": 0.5878627896308899, + "step": 1775 + }, + { + "epoch": 0.5193741775113321, + "grad_norm": 1.2905348700615993, + "learning_rate": 1.7694520527879223e-05, + "loss": 0.6252161264419556, + "step": 1776 + }, + { + "epoch": 0.5196666179265974, + "grad_norm": 1.2071686484495499, + "learning_rate": 1.7691431679185518e-05, + "loss": 0.6098401546478271, + "step": 1777 + }, + { + "epoch": 0.5199590583418628, + "grad_norm": 1.4529959736387221, + "learning_rate": 1.7688341032666415e-05, + "loss": 0.7401748299598694, + "step": 1778 + }, + { + "epoch": 0.5202514987571283, + "grad_norm": 1.278188059333223, + "learning_rate": 1.768524858904435e-05, + "loss": 0.5398571491241455, + "step": 1779 + }, + { + "epoch": 0.5205439391723936, + "grad_norm": 1.211971903081478, + "learning_rate": 1.768215434904215e-05, + "loss": 0.5565935969352722, + "step": 1780 + }, + { + "epoch": 0.520836379587659, + "grad_norm": 1.3982258941889667, + "learning_rate": 1.7679058313383078e-05, + "loss": 0.5510461926460266, + "step": 1781 + }, + { + "epoch": 0.5211288200029244, + "grad_norm": 1.5839871959956162, + "learning_rate": 1.7675960482790818e-05, + "loss": 0.670242428779602, + "step": 1782 + }, + { + "epoch": 0.5214212604181898, + "grad_norm": 1.309838763427276, + "learning_rate": 1.7672860857989463e-05, + "loss": 0.6556246280670166, + "step": 1783 + }, + { + "epoch": 0.5217137008334551, + "grad_norm": 1.3555406156984307, + "learning_rate": 1.7669759439703537e-05, + "loss": 0.7133421897888184, + "step": 1784 + }, + { + "epoch": 0.5220061412487206, + "grad_norm": 1.340410804208978, + "learning_rate": 1.766665622865797e-05, + "loss": 0.5520647168159485, + "step": 1785 + }, + { + "epoch": 0.522298581663986, + "grad_norm": 1.2754706768801123, + "learning_rate": 1.766355122557813e-05, + "loss": 0.6906430125236511, + "step": 1786 + }, + { + "epoch": 0.5225910220792513, + "grad_norm": 1.331418831759662, + "learning_rate": 1.766044443118978e-05, + "loss": 0.6847748756408691, + "step": 1787 + }, + { + "epoch": 0.5228834624945168, + "grad_norm": 1.6656678493050783, + "learning_rate": 1.7657335846219125e-05, + "loss": 0.6690354347229004, + "step": 1788 + }, + { + "epoch": 0.5231759029097821, + "grad_norm": 1.5097667681145126, + "learning_rate": 1.765422547139277e-05, + "loss": 0.6508032083511353, + "step": 1789 + }, + { + "epoch": 0.5234683433250475, + "grad_norm": 1.3545274700404182, + "learning_rate": 1.7651113307437754e-05, + "loss": 0.7686585187911987, + "step": 1790 + }, + { + "epoch": 0.5237607837403129, + "grad_norm": 1.5694388106807053, + "learning_rate": 1.764799935508152e-05, + "loss": 0.7669490575790405, + "step": 1791 + }, + { + "epoch": 0.5240532241555783, + "grad_norm": 1.3694245126086426, + "learning_rate": 1.7644883615051936e-05, + "loss": 0.6630266308784485, + "step": 1792 + }, + { + "epoch": 0.5243456645708436, + "grad_norm": 1.350854180871217, + "learning_rate": 1.764176608807729e-05, + "loss": 0.6054951548576355, + "step": 1793 + }, + { + "epoch": 0.5246381049861091, + "grad_norm": 1.3573271710882402, + "learning_rate": 1.7638646774886282e-05, + "loss": 0.6519330739974976, + "step": 1794 + }, + { + "epoch": 0.5249305454013745, + "grad_norm": 1.3013890836364408, + "learning_rate": 1.7635525676208034e-05, + "loss": 0.6797915101051331, + "step": 1795 + }, + { + "epoch": 0.5252229858166398, + "grad_norm": 1.4138018427804997, + "learning_rate": 1.7632402792772084e-05, + "loss": 0.7296736240386963, + "step": 1796 + }, + { + "epoch": 0.5255154262319053, + "grad_norm": 1.4894816204298726, + "learning_rate": 1.7629278125308388e-05, + "loss": 0.6371006965637207, + "step": 1797 + }, + { + "epoch": 0.5258078666471706, + "grad_norm": 1.1913157227609021, + "learning_rate": 1.762615167454732e-05, + "loss": 0.5315746068954468, + "step": 1798 + }, + { + "epoch": 0.526100307062436, + "grad_norm": 1.115665172593258, + "learning_rate": 1.762302344121966e-05, + "loss": 0.5285685062408447, + "step": 1799 + }, + { + "epoch": 0.5263927474777014, + "grad_norm": 1.269936179033053, + "learning_rate": 1.7619893426056622e-05, + "loss": 0.623146653175354, + "step": 1800 + }, + { + "epoch": 0.5266851878929668, + "grad_norm": 1.3314922698636598, + "learning_rate": 1.7616761629789824e-05, + "loss": 0.5433363318443298, + "step": 1801 + }, + { + "epoch": 0.5269776283082322, + "grad_norm": 1.422200045831386, + "learning_rate": 1.7613628053151307e-05, + "loss": 0.5035480260848999, + "step": 1802 + }, + { + "epoch": 0.5272700687234976, + "grad_norm": 1.3947936859584276, + "learning_rate": 1.7610492696873523e-05, + "loss": 0.678544819355011, + "step": 1803 + }, + { + "epoch": 0.527562509138763, + "grad_norm": 1.2973841494755158, + "learning_rate": 1.7607355561689347e-05, + "loss": 0.6237714290618896, + "step": 1804 + }, + { + "epoch": 0.5278549495540283, + "grad_norm": 1.8411758190439966, + "learning_rate": 1.760421664833206e-05, + "loss": 0.6943943500518799, + "step": 1805 + }, + { + "epoch": 0.5281473899692938, + "grad_norm": 1.1545458109151105, + "learning_rate": 1.7601075957535366e-05, + "loss": 0.5477268695831299, + "step": 1806 + }, + { + "epoch": 0.5284398303845591, + "grad_norm": 1.5589440207416567, + "learning_rate": 1.759793349003338e-05, + "loss": 0.6627641320228577, + "step": 1807 + }, + { + "epoch": 0.5287322707998245, + "grad_norm": 1.169894530317387, + "learning_rate": 1.7594789246560638e-05, + "loss": 0.5394496917724609, + "step": 1808 + }, + { + "epoch": 0.5290247112150899, + "grad_norm": 1.5989109343746286, + "learning_rate": 1.759164322785209e-05, + "loss": 0.7824013233184814, + "step": 1809 + }, + { + "epoch": 0.5293171516303553, + "grad_norm": 1.5859531867022811, + "learning_rate": 1.7588495434643094e-05, + "loss": 0.6959671974182129, + "step": 1810 + }, + { + "epoch": 0.5296095920456207, + "grad_norm": 1.256097179377318, + "learning_rate": 1.7585345867669427e-05, + "loss": 0.7036902904510498, + "step": 1811 + }, + { + "epoch": 0.5299020324608861, + "grad_norm": 1.2520265115718123, + "learning_rate": 1.7582194527667285e-05, + "loss": 0.6700775623321533, + "step": 1812 + }, + { + "epoch": 0.5301944728761515, + "grad_norm": 1.4077714911889505, + "learning_rate": 1.7579041415373273e-05, + "loss": 0.648280918598175, + "step": 1813 + }, + { + "epoch": 0.5304869132914168, + "grad_norm": 1.3424741441047479, + "learning_rate": 1.757588653152441e-05, + "loss": 0.688485324382782, + "step": 1814 + }, + { + "epoch": 0.5307793537066823, + "grad_norm": 1.4718330240816029, + "learning_rate": 1.757272987685813e-05, + "loss": 0.6743370890617371, + "step": 1815 + }, + { + "epoch": 0.5310717941219476, + "grad_norm": 1.2524252340987996, + "learning_rate": 1.7569571452112288e-05, + "loss": 0.5597015619277954, + "step": 1816 + }, + { + "epoch": 0.531364234537213, + "grad_norm": 1.0387462800714626, + "learning_rate": 1.756641125802514e-05, + "loss": 0.48607051372528076, + "step": 1817 + }, + { + "epoch": 0.5316566749524785, + "grad_norm": 1.3375496888713005, + "learning_rate": 1.7563249295335366e-05, + "loss": 0.6712289452552795, + "step": 1818 + }, + { + "epoch": 0.5319491153677438, + "grad_norm": 1.4037646661677698, + "learning_rate": 1.7560085564782057e-05, + "loss": 0.5937772989273071, + "step": 1819 + }, + { + "epoch": 0.5322415557830092, + "grad_norm": 1.5529497860681427, + "learning_rate": 1.7556920067104714e-05, + "loss": 0.7416468262672424, + "step": 1820 + }, + { + "epoch": 0.5325339961982746, + "grad_norm": 1.1975217725231788, + "learning_rate": 1.7553752803043247e-05, + "loss": 0.6302096247673035, + "step": 1821 + }, + { + "epoch": 0.53282643661354, + "grad_norm": 1.268842982106158, + "learning_rate": 1.7550583773337992e-05, + "loss": 0.5576045513153076, + "step": 1822 + }, + { + "epoch": 0.5331188770288053, + "grad_norm": 1.3076658324014316, + "learning_rate": 1.7547412978729688e-05, + "loss": 0.5436257123947144, + "step": 1823 + }, + { + "epoch": 0.5334113174440708, + "grad_norm": 1.2387778464918946, + "learning_rate": 1.754424041995949e-05, + "loss": 0.5674831867218018, + "step": 1824 + }, + { + "epoch": 0.5337037578593362, + "grad_norm": 1.3729116406743342, + "learning_rate": 1.7541066097768965e-05, + "loss": 0.7254515290260315, + "step": 1825 + }, + { + "epoch": 0.5339961982746015, + "grad_norm": 1.1721694105309242, + "learning_rate": 1.7537890012900088e-05, + "loss": 0.5706701278686523, + "step": 1826 + }, + { + "epoch": 0.534288638689867, + "grad_norm": 1.4929452380767032, + "learning_rate": 1.7534712166095253e-05, + "loss": 0.6801357269287109, + "step": 1827 + }, + { + "epoch": 0.5345810791051323, + "grad_norm": 1.115878861059579, + "learning_rate": 1.753153255809726e-05, + "loss": 0.6851463317871094, + "step": 1828 + }, + { + "epoch": 0.5348735195203977, + "grad_norm": 1.3277835192492438, + "learning_rate": 1.7528351189649324e-05, + "loss": 0.6475861072540283, + "step": 1829 + }, + { + "epoch": 0.5351659599356631, + "grad_norm": 1.462925601634232, + "learning_rate": 1.752516806149507e-05, + "loss": 0.6953648924827576, + "step": 1830 + }, + { + "epoch": 0.5354584003509285, + "grad_norm": 1.5314952476377168, + "learning_rate": 1.7521983174378537e-05, + "loss": 0.5128777623176575, + "step": 1831 + }, + { + "epoch": 0.5357508407661938, + "grad_norm": 1.3754167803768682, + "learning_rate": 1.751879652904417e-05, + "loss": 0.5780255198478699, + "step": 1832 + }, + { + "epoch": 0.5360432811814593, + "grad_norm": 1.1326334157819233, + "learning_rate": 1.751560812623683e-05, + "loss": 0.581814169883728, + "step": 1833 + }, + { + "epoch": 0.5363357215967247, + "grad_norm": 1.2244339664502468, + "learning_rate": 1.7512417966701788e-05, + "loss": 0.5609169006347656, + "step": 1834 + }, + { + "epoch": 0.53662816201199, + "grad_norm": 1.2348222464159622, + "learning_rate": 1.7509226051184716e-05, + "loss": 0.6029868125915527, + "step": 1835 + }, + { + "epoch": 0.5369206024272555, + "grad_norm": 1.5575658935823142, + "learning_rate": 1.7506032380431718e-05, + "loss": 0.6749545335769653, + "step": 1836 + }, + { + "epoch": 0.5372130428425208, + "grad_norm": 1.4261868258477342, + "learning_rate": 1.750283695518929e-05, + "loss": 0.7710991501808167, + "step": 1837 + }, + { + "epoch": 0.5375054832577862, + "grad_norm": 1.2797893583505542, + "learning_rate": 1.7499639776204334e-05, + "loss": 0.6330907940864563, + "step": 1838 + }, + { + "epoch": 0.5377979236730516, + "grad_norm": 1.3697405221939354, + "learning_rate": 1.7496440844224186e-05, + "loss": 0.655827522277832, + "step": 1839 + }, + { + "epoch": 0.538090364088317, + "grad_norm": 1.3640883815652403, + "learning_rate": 1.7493240159996565e-05, + "loss": 0.723412275314331, + "step": 1840 + }, + { + "epoch": 0.5383828045035824, + "grad_norm": 1.273855459734962, + "learning_rate": 1.7490037724269618e-05, + "loss": 0.5504157543182373, + "step": 1841 + }, + { + "epoch": 0.5386752449188478, + "grad_norm": 1.3867652356352673, + "learning_rate": 1.7486833537791895e-05, + "loss": 0.6258282661437988, + "step": 1842 + }, + { + "epoch": 0.5389676853341132, + "grad_norm": 1.3063024833172743, + "learning_rate": 1.748362760131235e-05, + "loss": 0.7044231295585632, + "step": 1843 + }, + { + "epoch": 0.5392601257493785, + "grad_norm": 1.329844005030904, + "learning_rate": 1.7480419915580357e-05, + "loss": 0.5979568362236023, + "step": 1844 + }, + { + "epoch": 0.539552566164644, + "grad_norm": 1.2396904419147898, + "learning_rate": 1.7477210481345686e-05, + "loss": 0.558562159538269, + "step": 1845 + }, + { + "epoch": 0.5398450065799093, + "grad_norm": 1.5914882070233294, + "learning_rate": 1.747399929935853e-05, + "loss": 0.5965149402618408, + "step": 1846 + }, + { + "epoch": 0.5401374469951747, + "grad_norm": 1.2286076413347484, + "learning_rate": 1.7470786370369483e-05, + "loss": 0.6202878355979919, + "step": 1847 + }, + { + "epoch": 0.5404298874104401, + "grad_norm": 1.4696847585462156, + "learning_rate": 1.746757169512954e-05, + "loss": 0.652141273021698, + "step": 1848 + }, + { + "epoch": 0.5407223278257055, + "grad_norm": 1.3491880900702233, + "learning_rate": 1.746435527439012e-05, + "loss": 0.5713402628898621, + "step": 1849 + }, + { + "epoch": 0.541014768240971, + "grad_norm": 1.1036198614058235, + "learning_rate": 1.7461137108903042e-05, + "loss": 0.49776554107666016, + "step": 1850 + }, + { + "epoch": 0.5413072086562363, + "grad_norm": 1.3593053008733638, + "learning_rate": 1.7457917199420525e-05, + "loss": 0.7047991752624512, + "step": 1851 + }, + { + "epoch": 0.5415996490715017, + "grad_norm": 1.249302868601747, + "learning_rate": 1.7454695546695207e-05, + "loss": 0.7019875049591064, + "step": 1852 + }, + { + "epoch": 0.541892089486767, + "grad_norm": 1.1395410254023401, + "learning_rate": 1.745147215148013e-05, + "loss": 0.5448435544967651, + "step": 1853 + }, + { + "epoch": 0.5421845299020325, + "grad_norm": 1.3392616230054089, + "learning_rate": 1.7448247014528745e-05, + "loss": 0.6042202711105347, + "step": 1854 + }, + { + "epoch": 0.5424769703172978, + "grad_norm": 1.6632726033150385, + "learning_rate": 1.744502013659491e-05, + "loss": 0.8448539972305298, + "step": 1855 + }, + { + "epoch": 0.5427694107325632, + "grad_norm": 1.5168637416823716, + "learning_rate": 1.7441791518432877e-05, + "loss": 0.6541755795478821, + "step": 1856 + }, + { + "epoch": 0.5430618511478287, + "grad_norm": 1.3214742528031191, + "learning_rate": 1.7438561160797326e-05, + "loss": 0.6700184345245361, + "step": 1857 + }, + { + "epoch": 0.543354291563094, + "grad_norm": 1.5975598198717695, + "learning_rate": 1.7435329064443335e-05, + "loss": 0.6407896280288696, + "step": 1858 + }, + { + "epoch": 0.5436467319783594, + "grad_norm": 1.1007084555597737, + "learning_rate": 1.7432095230126382e-05, + "loss": 0.5380120277404785, + "step": 1859 + }, + { + "epoch": 0.5439391723936248, + "grad_norm": 1.4184366915429367, + "learning_rate": 1.7428859658602353e-05, + "loss": 0.6561373472213745, + "step": 1860 + }, + { + "epoch": 0.5442316128088902, + "grad_norm": 1.7211281199225186, + "learning_rate": 1.7425622350627545e-05, + "loss": 0.724541962146759, + "step": 1861 + }, + { + "epoch": 0.5445240532241555, + "grad_norm": 1.3361773700031112, + "learning_rate": 1.7422383306958666e-05, + "loss": 0.6258946657180786, + "step": 1862 + }, + { + "epoch": 0.544816493639421, + "grad_norm": 1.4343211647036773, + "learning_rate": 1.7419142528352815e-05, + "loss": 0.560769259929657, + "step": 1863 + }, + { + "epoch": 0.5451089340546864, + "grad_norm": 1.3199774156859019, + "learning_rate": 1.741590001556751e-05, + "loss": 0.7782202363014221, + "step": 1864 + }, + { + "epoch": 0.5454013744699517, + "grad_norm": 1.1330260111547463, + "learning_rate": 1.7412655769360663e-05, + "loss": 0.5956888198852539, + "step": 1865 + }, + { + "epoch": 0.5456938148852172, + "grad_norm": 1.2304180375361309, + "learning_rate": 1.7409409790490602e-05, + "loss": 0.6251999139785767, + "step": 1866 + }, + { + "epoch": 0.5459862553004825, + "grad_norm": 1.201828702533108, + "learning_rate": 1.740616207971605e-05, + "loss": 0.5864061713218689, + "step": 1867 + }, + { + "epoch": 0.5462786957157479, + "grad_norm": 1.1335552643310969, + "learning_rate": 1.7402912637796146e-05, + "loss": 0.6241225004196167, + "step": 1868 + }, + { + "epoch": 0.5465711361310133, + "grad_norm": 1.4457655679285375, + "learning_rate": 1.739966146549042e-05, + "loss": 0.7190053462982178, + "step": 1869 + }, + { + "epoch": 0.5468635765462787, + "grad_norm": 1.3107442552185273, + "learning_rate": 1.739640856355882e-05, + "loss": 0.6771985292434692, + "step": 1870 + }, + { + "epoch": 0.547156016961544, + "grad_norm": 1.3163112428890422, + "learning_rate": 1.7393153932761687e-05, + "loss": 0.5480636954307556, + "step": 1871 + }, + { + "epoch": 0.5474484573768095, + "grad_norm": 1.5272520029044583, + "learning_rate": 1.7389897573859773e-05, + "loss": 0.7362977862358093, + "step": 1872 + }, + { + "epoch": 0.5477408977920749, + "grad_norm": 1.3701377425052599, + "learning_rate": 1.7386639487614232e-05, + "loss": 0.6483198404312134, + "step": 1873 + }, + { + "epoch": 0.5480333382073402, + "grad_norm": 1.137215399363759, + "learning_rate": 1.7383379674786622e-05, + "loss": 0.479977548122406, + "step": 1874 + }, + { + "epoch": 0.5483257786226057, + "grad_norm": 1.2815568792711947, + "learning_rate": 1.738011813613891e-05, + "loss": 0.6824718117713928, + "step": 1875 + }, + { + "epoch": 0.548618219037871, + "grad_norm": 1.4252738341228008, + "learning_rate": 1.737685487243345e-05, + "loss": 0.564873218536377, + "step": 1876 + }, + { + "epoch": 0.5489106594531364, + "grad_norm": 1.208162951014484, + "learning_rate": 1.7373589884433015e-05, + "loss": 0.5748772621154785, + "step": 1877 + }, + { + "epoch": 0.5492030998684018, + "grad_norm": 1.329038884364766, + "learning_rate": 1.7370323172900778e-05, + "loss": 0.6403437852859497, + "step": 1878 + }, + { + "epoch": 0.5494955402836672, + "grad_norm": 1.7288162586927747, + "learning_rate": 1.7367054738600312e-05, + "loss": 0.8253078460693359, + "step": 1879 + }, + { + "epoch": 0.5497879806989326, + "grad_norm": 1.204164217164209, + "learning_rate": 1.7363784582295596e-05, + "loss": 0.6823058128356934, + "step": 1880 + }, + { + "epoch": 0.550080421114198, + "grad_norm": 1.0289811643005782, + "learning_rate": 1.7360512704751003e-05, + "loss": 0.48659563064575195, + "step": 1881 + }, + { + "epoch": 0.5503728615294634, + "grad_norm": 1.5395158772607802, + "learning_rate": 1.735723910673132e-05, + "loss": 0.6380710601806641, + "step": 1882 + }, + { + "epoch": 0.5506653019447287, + "grad_norm": 1.512121712639047, + "learning_rate": 1.7353963789001723e-05, + "loss": 0.6956683397293091, + "step": 1883 + }, + { + "epoch": 0.5509577423599942, + "grad_norm": 1.2992852551955654, + "learning_rate": 1.735068675232781e-05, + "loss": 0.5751473903656006, + "step": 1884 + }, + { + "epoch": 0.5512501827752595, + "grad_norm": 1.3297689987083825, + "learning_rate": 1.734740799747556e-05, + "loss": 0.7265490293502808, + "step": 1885 + }, + { + "epoch": 0.5515426231905249, + "grad_norm": 1.3317519459591716, + "learning_rate": 1.734412752521136e-05, + "loss": 0.7419843673706055, + "step": 1886 + }, + { + "epoch": 0.5518350636057903, + "grad_norm": 1.3385317545855182, + "learning_rate": 1.734084533630201e-05, + "loss": 0.7381073236465454, + "step": 1887 + }, + { + "epoch": 0.5521275040210557, + "grad_norm": 1.4687535531628584, + "learning_rate": 1.7337561431514692e-05, + "loss": 0.6542054414749146, + "step": 1888 + }, + { + "epoch": 0.5524199444363211, + "grad_norm": 1.3144027889366288, + "learning_rate": 1.7334275811617e-05, + "loss": 0.6283866167068481, + "step": 1889 + }, + { + "epoch": 0.5527123848515865, + "grad_norm": 1.34879443340803, + "learning_rate": 1.7330988477376935e-05, + "loss": 0.6171330809593201, + "step": 1890 + }, + { + "epoch": 0.5530048252668519, + "grad_norm": 1.309075407888037, + "learning_rate": 1.7327699429562887e-05, + "loss": 0.5181430578231812, + "step": 1891 + }, + { + "epoch": 0.5532972656821172, + "grad_norm": 1.4382455208413174, + "learning_rate": 1.7324408668943645e-05, + "loss": 0.7337771058082581, + "step": 1892 + }, + { + "epoch": 0.5535897060973827, + "grad_norm": 1.3677542553778577, + "learning_rate": 1.7321116196288413e-05, + "loss": 0.5193721055984497, + "step": 1893 + }, + { + "epoch": 0.553882146512648, + "grad_norm": 1.2912522952038028, + "learning_rate": 1.731782201236678e-05, + "loss": 0.7743211388587952, + "step": 1894 + }, + { + "epoch": 0.5541745869279134, + "grad_norm": 1.5457463678190766, + "learning_rate": 1.731452611794875e-05, + "loss": 0.8244242072105408, + "step": 1895 + }, + { + "epoch": 0.5544670273431789, + "grad_norm": 1.3153817051947132, + "learning_rate": 1.7311228513804712e-05, + "loss": 0.6276153326034546, + "step": 1896 + }, + { + "epoch": 0.5547594677584442, + "grad_norm": 1.4741498614217154, + "learning_rate": 1.7307929200705463e-05, + "loss": 0.7919771671295166, + "step": 1897 + }, + { + "epoch": 0.5550519081737096, + "grad_norm": 1.527110359994231, + "learning_rate": 1.7304628179422192e-05, + "loss": 0.6187459230422974, + "step": 1898 + }, + { + "epoch": 0.555344348588975, + "grad_norm": 1.1766146767977552, + "learning_rate": 1.7301325450726497e-05, + "loss": 0.6190363764762878, + "step": 1899 + }, + { + "epoch": 0.5556367890042404, + "grad_norm": 1.209178127119406, + "learning_rate": 1.7298021015390375e-05, + "loss": 0.5537956953048706, + "step": 1900 + }, + { + "epoch": 0.5559292294195057, + "grad_norm": 1.434637926231007, + "learning_rate": 1.729471487418621e-05, + "loss": 0.7164788246154785, + "step": 1901 + }, + { + "epoch": 0.5562216698347712, + "grad_norm": 1.2878374944552806, + "learning_rate": 1.7291407027886796e-05, + "loss": 0.6101689338684082, + "step": 1902 + }, + { + "epoch": 0.5565141102500366, + "grad_norm": 1.4102535348815881, + "learning_rate": 1.7288097477265322e-05, + "loss": 0.7112093567848206, + "step": 1903 + }, + { + "epoch": 0.5568065506653019, + "grad_norm": 1.698804519808014, + "learning_rate": 1.7284786223095376e-05, + "loss": 0.7807149291038513, + "step": 1904 + }, + { + "epoch": 0.5570989910805674, + "grad_norm": 1.3150296925108194, + "learning_rate": 1.7281473266150942e-05, + "loss": 0.5723121166229248, + "step": 1905 + }, + { + "epoch": 0.5573914314958327, + "grad_norm": 1.4287078485940368, + "learning_rate": 1.7278158607206402e-05, + "loss": 0.6901307106018066, + "step": 1906 + }, + { + "epoch": 0.5576838719110981, + "grad_norm": 1.3895105915390893, + "learning_rate": 1.7274842247036547e-05, + "loss": 0.8247314095497131, + "step": 1907 + }, + { + "epoch": 0.5579763123263635, + "grad_norm": 1.2902939634670878, + "learning_rate": 1.727152418641654e-05, + "loss": 0.758405327796936, + "step": 1908 + }, + { + "epoch": 0.5582687527416289, + "grad_norm": 1.1507745861737273, + "learning_rate": 1.7268204426121967e-05, + "loss": 0.6448276042938232, + "step": 1909 + }, + { + "epoch": 0.5585611931568942, + "grad_norm": 1.4597983603763345, + "learning_rate": 1.7264882966928803e-05, + "loss": 0.6846790313720703, + "step": 1910 + }, + { + "epoch": 0.5588536335721597, + "grad_norm": 1.494960410585431, + "learning_rate": 1.726155980961342e-05, + "loss": 0.6427637338638306, + "step": 1911 + }, + { + "epoch": 0.5591460739874251, + "grad_norm": 1.6049335332675108, + "learning_rate": 1.7258234954952578e-05, + "loss": 0.7105496525764465, + "step": 1912 + }, + { + "epoch": 0.5594385144026904, + "grad_norm": 1.247874236176648, + "learning_rate": 1.7254908403723446e-05, + "loss": 0.6307404041290283, + "step": 1913 + }, + { + "epoch": 0.5597309548179559, + "grad_norm": 1.3460021193743466, + "learning_rate": 1.7251580156703587e-05, + "loss": 0.7194197177886963, + "step": 1914 + }, + { + "epoch": 0.5600233952332212, + "grad_norm": 1.4541814827650097, + "learning_rate": 1.7248250214670955e-05, + "loss": 0.676772952079773, + "step": 1915 + }, + { + "epoch": 0.5603158356484866, + "grad_norm": 1.4231220185819522, + "learning_rate": 1.724491857840391e-05, + "loss": 0.6047924160957336, + "step": 1916 + }, + { + "epoch": 0.560608276063752, + "grad_norm": 1.4639689581400968, + "learning_rate": 1.7241585248681192e-05, + "loss": 0.7412474155426025, + "step": 1917 + }, + { + "epoch": 0.5609007164790174, + "grad_norm": 1.3634846491128696, + "learning_rate": 1.7238250226281952e-05, + "loss": 0.6337922215461731, + "step": 1918 + }, + { + "epoch": 0.5611931568942828, + "grad_norm": 1.325394488194612, + "learning_rate": 1.7234913511985733e-05, + "loss": 0.7192416787147522, + "step": 1919 + }, + { + "epoch": 0.5614855973095482, + "grad_norm": 1.5807591545293311, + "learning_rate": 1.723157510657247e-05, + "loss": 0.6576168537139893, + "step": 1920 + }, + { + "epoch": 0.5617780377248136, + "grad_norm": 1.2677184116479052, + "learning_rate": 1.722823501082249e-05, + "loss": 0.6592451333999634, + "step": 1921 + }, + { + "epoch": 0.5620704781400789, + "grad_norm": 1.3384834377307993, + "learning_rate": 1.722489322551653e-05, + "loss": 0.8042774796485901, + "step": 1922 + }, + { + "epoch": 0.5623629185553444, + "grad_norm": 1.4566017039283872, + "learning_rate": 1.7221549751435706e-05, + "loss": 0.727135181427002, + "step": 1923 + }, + { + "epoch": 0.5626553589706097, + "grad_norm": 1.3099994778880142, + "learning_rate": 1.7218204589361535e-05, + "loss": 0.5641134977340698, + "step": 1924 + }, + { + "epoch": 0.5629477993858751, + "grad_norm": 1.5113194940037022, + "learning_rate": 1.7214857740075924e-05, + "loss": 0.6354084610939026, + "step": 1925 + }, + { + "epoch": 0.5632402398011405, + "grad_norm": 1.3038206210364904, + "learning_rate": 1.7211509204361187e-05, + "loss": 0.6044377088546753, + "step": 1926 + }, + { + "epoch": 0.5635326802164059, + "grad_norm": 1.2045011077136063, + "learning_rate": 1.7208158983000022e-05, + "loss": 0.5519559383392334, + "step": 1927 + }, + { + "epoch": 0.5638251206316713, + "grad_norm": 1.162061868190052, + "learning_rate": 1.7204807076775514e-05, + "loss": 0.4480612277984619, + "step": 1928 + }, + { + "epoch": 0.5641175610469367, + "grad_norm": 1.3899173129631617, + "learning_rate": 1.7201453486471167e-05, + "loss": 0.5929607152938843, + "step": 1929 + }, + { + "epoch": 0.5644100014622021, + "grad_norm": 2.021763483016241, + "learning_rate": 1.7198098212870847e-05, + "loss": 0.6863572001457214, + "step": 1930 + }, + { + "epoch": 0.5647024418774674, + "grad_norm": 1.0938398450209694, + "learning_rate": 1.719474125675884e-05, + "loss": 0.5551834106445312, + "step": 1931 + }, + { + "epoch": 0.5649948822927329, + "grad_norm": 1.3644128319132816, + "learning_rate": 1.7191382618919802e-05, + "loss": 0.6113166809082031, + "step": 1932 + }, + { + "epoch": 0.5652873227079982, + "grad_norm": 1.419009993473521, + "learning_rate": 1.7188022300138805e-05, + "loss": 0.7833362817764282, + "step": 1933 + }, + { + "epoch": 0.5655797631232636, + "grad_norm": 1.3899666208681147, + "learning_rate": 1.71846603012013e-05, + "loss": 0.5981882810592651, + "step": 1934 + }, + { + "epoch": 0.5658722035385291, + "grad_norm": 1.3211180154101085, + "learning_rate": 1.7181296622893132e-05, + "loss": 0.6009912490844727, + "step": 1935 + }, + { + "epoch": 0.5661646439537944, + "grad_norm": 1.5201002205446237, + "learning_rate": 1.717793126600054e-05, + "loss": 0.5605272054672241, + "step": 1936 + }, + { + "epoch": 0.5664570843690598, + "grad_norm": 1.9581129231236365, + "learning_rate": 1.717456423131016e-05, + "loss": 0.6310821771621704, + "step": 1937 + }, + { + "epoch": 0.5667495247843252, + "grad_norm": 1.3290964241159713, + "learning_rate": 1.7171195519609013e-05, + "loss": 0.6776266694068909, + "step": 1938 + }, + { + "epoch": 0.5670419651995906, + "grad_norm": 1.5744599660597636, + "learning_rate": 1.7167825131684516e-05, + "loss": 0.6369091868400574, + "step": 1939 + }, + { + "epoch": 0.5673344056148559, + "grad_norm": 1.5708596771950396, + "learning_rate": 1.7164453068324472e-05, + "loss": 0.6241647005081177, + "step": 1940 + }, + { + "epoch": 0.5676268460301214, + "grad_norm": 1.1863544042032323, + "learning_rate": 1.7161079330317086e-05, + "loss": 0.6411961317062378, + "step": 1941 + }, + { + "epoch": 0.5679192864453868, + "grad_norm": 1.4635134179889109, + "learning_rate": 1.7157703918450942e-05, + "loss": 0.6148936152458191, + "step": 1942 + }, + { + "epoch": 0.5682117268606521, + "grad_norm": 1.3183225060577142, + "learning_rate": 1.7154326833515034e-05, + "loss": 0.5006934404373169, + "step": 1943 + }, + { + "epoch": 0.5685041672759176, + "grad_norm": 1.462356689812602, + "learning_rate": 1.7150948076298722e-05, + "loss": 0.7446701526641846, + "step": 1944 + }, + { + "epoch": 0.5687966076911829, + "grad_norm": 1.2052848826016378, + "learning_rate": 1.7147567647591777e-05, + "loss": 0.6159533262252808, + "step": 1945 + }, + { + "epoch": 0.5690890481064483, + "grad_norm": 1.4298530885651661, + "learning_rate": 1.7144185548184355e-05, + "loss": 0.6437554359436035, + "step": 1946 + }, + { + "epoch": 0.5693814885217137, + "grad_norm": 1.3361469734250542, + "learning_rate": 1.7140801778866995e-05, + "loss": 0.6229397654533386, + "step": 1947 + }, + { + "epoch": 0.5696739289369791, + "grad_norm": 1.4197238006731758, + "learning_rate": 1.7137416340430636e-05, + "loss": 0.5777184963226318, + "step": 1948 + }, + { + "epoch": 0.5699663693522444, + "grad_norm": 1.543436374887725, + "learning_rate": 1.7134029233666603e-05, + "loss": 0.7817827463150024, + "step": 1949 + }, + { + "epoch": 0.5702588097675099, + "grad_norm": 1.3527927450904613, + "learning_rate": 1.713064045936662e-05, + "loss": 0.6784861087799072, + "step": 1950 + }, + { + "epoch": 0.5705512501827753, + "grad_norm": 1.2839254399050724, + "learning_rate": 1.7127250018322777e-05, + "loss": 0.6883150339126587, + "step": 1951 + }, + { + "epoch": 0.5708436905980406, + "grad_norm": 1.093202890209594, + "learning_rate": 1.712385791132758e-05, + "loss": 0.5464504957199097, + "step": 1952 + }, + { + "epoch": 0.5711361310133061, + "grad_norm": 1.2617859237604026, + "learning_rate": 1.7120464139173908e-05, + "loss": 0.5950040817260742, + "step": 1953 + }, + { + "epoch": 0.5714285714285714, + "grad_norm": 1.374864335037442, + "learning_rate": 1.7117068702655034e-05, + "loss": 0.6381576061248779, + "step": 1954 + }, + { + "epoch": 0.5717210118438368, + "grad_norm": 1.2624571465966312, + "learning_rate": 1.7113671602564628e-05, + "loss": 0.6611777544021606, + "step": 1955 + }, + { + "epoch": 0.5720134522591022, + "grad_norm": 1.2625162580462326, + "learning_rate": 1.7110272839696735e-05, + "loss": 0.5057446956634521, + "step": 1956 + }, + { + "epoch": 0.5723058926743676, + "grad_norm": 1.3802970727547992, + "learning_rate": 1.7106872414845798e-05, + "loss": 0.6095671653747559, + "step": 1957 + }, + { + "epoch": 0.572598333089633, + "grad_norm": 1.4171107803407814, + "learning_rate": 1.710347032880664e-05, + "loss": 0.5514808893203735, + "step": 1958 + }, + { + "epoch": 0.5728907735048984, + "grad_norm": 2.1059044775107516, + "learning_rate": 1.7100066582374487e-05, + "loss": 0.6491304039955139, + "step": 1959 + }, + { + "epoch": 0.5731832139201638, + "grad_norm": 1.2887931231971388, + "learning_rate": 1.7096661176344936e-05, + "loss": 0.6759692430496216, + "step": 1960 + }, + { + "epoch": 0.5734756543354291, + "grad_norm": 1.4738884192318065, + "learning_rate": 1.709325411151399e-05, + "loss": 0.5897858142852783, + "step": 1961 + }, + { + "epoch": 0.5737680947506946, + "grad_norm": 1.537196415964603, + "learning_rate": 1.7089845388678015e-05, + "loss": 0.6822922229766846, + "step": 1962 + }, + { + "epoch": 0.5740605351659599, + "grad_norm": 1.2963583337618676, + "learning_rate": 1.7086435008633792e-05, + "loss": 0.7694820165634155, + "step": 1963 + }, + { + "epoch": 0.5743529755812253, + "grad_norm": 1.5109651591265172, + "learning_rate": 1.7083022972178473e-05, + "loss": 0.702151358127594, + "step": 1964 + }, + { + "epoch": 0.5746454159964907, + "grad_norm": 1.564445011536072, + "learning_rate": 1.7079609280109597e-05, + "loss": 0.768844485282898, + "step": 1965 + }, + { + "epoch": 0.5749378564117561, + "grad_norm": 1.4251497195478635, + "learning_rate": 1.7076193933225097e-05, + "loss": 0.6641331911087036, + "step": 1966 + }, + { + "epoch": 0.5752302968270215, + "grad_norm": 1.3577479649866828, + "learning_rate": 1.707277693232329e-05, + "loss": 0.7176777124404907, + "step": 1967 + }, + { + "epoch": 0.5755227372422869, + "grad_norm": 1.4539026175393464, + "learning_rate": 1.7069358278202877e-05, + "loss": 0.6543929576873779, + "step": 1968 + }, + { + "epoch": 0.5758151776575523, + "grad_norm": 1.422676342883674, + "learning_rate": 1.7065937971662953e-05, + "loss": 0.7501214742660522, + "step": 1969 + }, + { + "epoch": 0.5761076180728176, + "grad_norm": 1.1830543705848042, + "learning_rate": 1.7062516013502984e-05, + "loss": 0.6013212203979492, + "step": 1970 + }, + { + "epoch": 0.5764000584880831, + "grad_norm": 1.489892931502725, + "learning_rate": 1.7059092404522843e-05, + "loss": 0.5920547246932983, + "step": 1971 + }, + { + "epoch": 0.5766924989033484, + "grad_norm": 1.1082983109051399, + "learning_rate": 1.7055667145522767e-05, + "loss": 0.6720744371414185, + "step": 1972 + }, + { + "epoch": 0.5769849393186138, + "grad_norm": 1.3476214386922525, + "learning_rate": 1.70522402373034e-05, + "loss": 0.6938234567642212, + "step": 1973 + }, + { + "epoch": 0.5772773797338793, + "grad_norm": 1.321699429936501, + "learning_rate": 1.704881168066575e-05, + "loss": 0.6430555582046509, + "step": 1974 + }, + { + "epoch": 0.5775698201491446, + "grad_norm": 1.331724408429167, + "learning_rate": 1.7045381476411234e-05, + "loss": 0.7738221883773804, + "step": 1975 + }, + { + "epoch": 0.57786226056441, + "grad_norm": 1.2033511527827634, + "learning_rate": 1.704194962534163e-05, + "loss": 0.5335453748703003, + "step": 1976 + }, + { + "epoch": 0.5781547009796754, + "grad_norm": 1.4123366931040846, + "learning_rate": 1.7038516128259118e-05, + "loss": 0.691404402256012, + "step": 1977 + }, + { + "epoch": 0.5784471413949408, + "grad_norm": 1.6032589522393152, + "learning_rate": 1.7035080985966253e-05, + "loss": 0.7371880412101746, + "step": 1978 + }, + { + "epoch": 0.5787395818102061, + "grad_norm": 1.356558066648364, + "learning_rate": 1.7031644199265987e-05, + "loss": 0.5661574602127075, + "step": 1979 + }, + { + "epoch": 0.5790320222254716, + "grad_norm": 1.069750621474732, + "learning_rate": 1.702820576896164e-05, + "loss": 0.5823863744735718, + "step": 1980 + }, + { + "epoch": 0.579324462640737, + "grad_norm": 1.608685609966537, + "learning_rate": 1.7024765695856924e-05, + "loss": 0.6228796243667603, + "step": 1981 + }, + { + "epoch": 0.5796169030560023, + "grad_norm": 1.3395261062815815, + "learning_rate": 1.702132398075594e-05, + "loss": 0.5788040161132812, + "step": 1982 + }, + { + "epoch": 0.5799093434712678, + "grad_norm": 1.1540676629937416, + "learning_rate": 1.701788062446317e-05, + "loss": 0.5950253009796143, + "step": 1983 + }, + { + "epoch": 0.5802017838865331, + "grad_norm": 1.2446098890682338, + "learning_rate": 1.7014435627783466e-05, + "loss": 0.5672034025192261, + "step": 1984 + }, + { + "epoch": 0.5804942243017985, + "grad_norm": 1.329055336569987, + "learning_rate": 1.7010988991522085e-05, + "loss": 0.6646316051483154, + "step": 1985 + }, + { + "epoch": 0.5807866647170639, + "grad_norm": 1.2423480846022465, + "learning_rate": 1.7007540716484657e-05, + "loss": 0.6430097818374634, + "step": 1986 + }, + { + "epoch": 0.5810791051323293, + "grad_norm": 1.2889752174339557, + "learning_rate": 1.700409080347719e-05, + "loss": 0.5803329348564148, + "step": 1987 + }, + { + "epoch": 0.5813715455475946, + "grad_norm": 1.613226747300198, + "learning_rate": 1.7000639253306085e-05, + "loss": 0.7526525259017944, + "step": 1988 + }, + { + "epoch": 0.5816639859628601, + "grad_norm": 1.329271357875936, + "learning_rate": 1.6997186066778118e-05, + "loss": 0.6679468750953674, + "step": 1989 + }, + { + "epoch": 0.5819564263781255, + "grad_norm": 1.5773364597040387, + "learning_rate": 1.6993731244700454e-05, + "loss": 0.7233256101608276, + "step": 1990 + }, + { + "epoch": 0.5822488667933908, + "grad_norm": 1.3632345541871926, + "learning_rate": 1.6990274787880633e-05, + "loss": 0.5986290574073792, + "step": 1991 + }, + { + "epoch": 0.5825413072086563, + "grad_norm": 1.3136772281139917, + "learning_rate": 1.6986816697126583e-05, + "loss": 0.6898672580718994, + "step": 1992 + }, + { + "epoch": 0.5828337476239216, + "grad_norm": 1.6057802032529045, + "learning_rate": 1.698335697324661e-05, + "loss": 0.6888613104820251, + "step": 1993 + }, + { + "epoch": 0.583126188039187, + "grad_norm": 1.3469913891844598, + "learning_rate": 1.6979895617049404e-05, + "loss": 0.6002428531646729, + "step": 1994 + }, + { + "epoch": 0.5834186284544524, + "grad_norm": 1.3517104173069454, + "learning_rate": 1.6976432629344036e-05, + "loss": 0.6372438669204712, + "step": 1995 + }, + { + "epoch": 0.5837110688697178, + "grad_norm": 1.0868680846473084, + "learning_rate": 1.6972968010939953e-05, + "loss": 0.529569149017334, + "step": 1996 + }, + { + "epoch": 0.5840035092849832, + "grad_norm": 1.415626330345063, + "learning_rate": 1.6969501762647002e-05, + "loss": 0.5534025430679321, + "step": 1997 + }, + { + "epoch": 0.5842959497002486, + "grad_norm": 1.5855609078257513, + "learning_rate": 1.6966033885275384e-05, + "loss": 0.8105937242507935, + "step": 1998 + }, + { + "epoch": 0.584588390115514, + "grad_norm": 1.4927698791899027, + "learning_rate": 1.6962564379635702e-05, + "loss": 0.7657530903816223, + "step": 1999 + }, + { + "epoch": 0.5848808305307793, + "grad_norm": 1.2186213815751603, + "learning_rate": 1.6959093246538927e-05, + "loss": 0.5941641330718994, + "step": 2000 + }, + { + "epoch": 0.5851732709460448, + "grad_norm": 1.2310851342087676, + "learning_rate": 1.695562048679642e-05, + "loss": 0.6130149364471436, + "step": 2001 + }, + { + "epoch": 0.5854657113613101, + "grad_norm": 1.4904324383349616, + "learning_rate": 1.6952146101219914e-05, + "loss": 0.7078043222427368, + "step": 2002 + }, + { + "epoch": 0.5857581517765755, + "grad_norm": 1.4412882425814895, + "learning_rate": 1.6948670090621528e-05, + "loss": 0.6330863237380981, + "step": 2003 + }, + { + "epoch": 0.5860505921918409, + "grad_norm": 1.2835823957491164, + "learning_rate": 1.6945192455813755e-05, + "loss": 0.6631220579147339, + "step": 2004 + }, + { + "epoch": 0.5863430326071063, + "grad_norm": 1.411600977622384, + "learning_rate": 1.6941713197609476e-05, + "loss": 0.6669473648071289, + "step": 2005 + }, + { + "epoch": 0.5866354730223717, + "grad_norm": 1.370088328820007, + "learning_rate": 1.6938232316821938e-05, + "loss": 0.608252763748169, + "step": 2006 + }, + { + "epoch": 0.5869279134376371, + "grad_norm": 1.3777699704962545, + "learning_rate": 1.6934749814264786e-05, + "loss": 0.5979427695274353, + "step": 2007 + }, + { + "epoch": 0.5872203538529025, + "grad_norm": 1.541200433158731, + "learning_rate": 1.6931265690752027e-05, + "loss": 0.5653454661369324, + "step": 2008 + }, + { + "epoch": 0.5875127942681678, + "grad_norm": 1.1212005773159774, + "learning_rate": 1.6927779947098052e-05, + "loss": 0.6399147510528564, + "step": 2009 + }, + { + "epoch": 0.5878052346834333, + "grad_norm": 1.1797468758477498, + "learning_rate": 1.6924292584117642e-05, + "loss": 0.41824793815612793, + "step": 2010 + }, + { + "epoch": 0.5880976750986986, + "grad_norm": 1.917297128854583, + "learning_rate": 1.6920803602625938e-05, + "loss": 0.8881042003631592, + "step": 2011 + }, + { + "epoch": 0.588390115513964, + "grad_norm": 1.331713386917835, + "learning_rate": 1.6917313003438473e-05, + "loss": 0.636030912399292, + "step": 2012 + }, + { + "epoch": 0.5886825559292295, + "grad_norm": 1.4002891525649699, + "learning_rate": 1.6913820787371147e-05, + "loss": 0.6038305759429932, + "step": 2013 + }, + { + "epoch": 0.5889749963444948, + "grad_norm": 1.1435051787090085, + "learning_rate": 1.6910326955240252e-05, + "loss": 0.7073840498924255, + "step": 2014 + }, + { + "epoch": 0.5892674367597602, + "grad_norm": 1.4386346426993692, + "learning_rate": 1.6906831507862446e-05, + "loss": 0.5804994106292725, + "step": 2015 + }, + { + "epoch": 0.5895598771750256, + "grad_norm": 1.5741785374654678, + "learning_rate": 1.6903334446054768e-05, + "loss": 0.8194780349731445, + "step": 2016 + }, + { + "epoch": 0.589852317590291, + "grad_norm": 1.812303850133564, + "learning_rate": 1.689983577063464e-05, + "loss": 0.7348685264587402, + "step": 2017 + }, + { + "epoch": 0.5901447580055563, + "grad_norm": 1.1971589423872142, + "learning_rate": 1.689633548241985e-05, + "loss": 0.5855007171630859, + "step": 2018 + }, + { + "epoch": 0.5904371984208218, + "grad_norm": 1.3707253561652837, + "learning_rate": 1.689283358222857e-05, + "loss": 0.7387616634368896, + "step": 2019 + }, + { + "epoch": 0.5907296388360872, + "grad_norm": 1.1680954205847025, + "learning_rate": 1.688933007087935e-05, + "loss": 0.688759446144104, + "step": 2020 + }, + { + "epoch": 0.5910220792513525, + "grad_norm": 1.5341989172452428, + "learning_rate": 1.6885824949191117e-05, + "loss": 0.7203953266143799, + "step": 2021 + }, + { + "epoch": 0.591314519666618, + "grad_norm": 1.2850552689542662, + "learning_rate": 1.6882318217983165e-05, + "loss": 0.6465663909912109, + "step": 2022 + }, + { + "epoch": 0.5916069600818833, + "grad_norm": 1.1725524993946357, + "learning_rate": 1.6878809878075176e-05, + "loss": 0.6625394821166992, + "step": 2023 + }, + { + "epoch": 0.5918994004971487, + "grad_norm": 1.1518619162929866, + "learning_rate": 1.68752999302872e-05, + "loss": 0.6577074527740479, + "step": 2024 + }, + { + "epoch": 0.5921918409124141, + "grad_norm": 1.2660442226503865, + "learning_rate": 1.6871788375439667e-05, + "loss": 0.50509113073349, + "step": 2025 + }, + { + "epoch": 0.5924842813276795, + "grad_norm": 1.1506732126554624, + "learning_rate": 1.6868275214353387e-05, + "loss": 0.5723974704742432, + "step": 2026 + }, + { + "epoch": 0.5927767217429448, + "grad_norm": 1.5630741195611901, + "learning_rate": 1.6864760447849533e-05, + "loss": 0.6383459568023682, + "step": 2027 + }, + { + "epoch": 0.5930691621582103, + "grad_norm": 1.5937791400894217, + "learning_rate": 1.6861244076749663e-05, + "loss": 0.5307388305664062, + "step": 2028 + }, + { + "epoch": 0.5933616025734757, + "grad_norm": 1.3756662975981515, + "learning_rate": 1.6857726101875706e-05, + "loss": 0.8009265661239624, + "step": 2029 + }, + { + "epoch": 0.593654042988741, + "grad_norm": 1.3635510886639874, + "learning_rate": 1.685420652404997e-05, + "loss": 0.5505321025848389, + "step": 2030 + }, + { + "epoch": 0.5939464834040065, + "grad_norm": 1.2645625310092812, + "learning_rate": 1.6850685344095134e-05, + "loss": 0.680927038192749, + "step": 2031 + }, + { + "epoch": 0.5942389238192718, + "grad_norm": 1.419624052256642, + "learning_rate": 1.684716256283425e-05, + "loss": 0.7357309460639954, + "step": 2032 + }, + { + "epoch": 0.5945313642345372, + "grad_norm": 1.2277919560967578, + "learning_rate": 1.6843638181090748e-05, + "loss": 0.5896620750427246, + "step": 2033 + }, + { + "epoch": 0.5948238046498026, + "grad_norm": 1.261982037348603, + "learning_rate": 1.6840112199688432e-05, + "loss": 0.5567387342453003, + "step": 2034 + }, + { + "epoch": 0.595116245065068, + "grad_norm": 1.2606984508496513, + "learning_rate": 1.6836584619451478e-05, + "loss": 0.6428712606430054, + "step": 2035 + }, + { + "epoch": 0.5954086854803334, + "grad_norm": 1.3387753764851709, + "learning_rate": 1.6833055441204436e-05, + "loss": 0.7430459260940552, + "step": 2036 + }, + { + "epoch": 0.5957011258955988, + "grad_norm": 1.250181817593343, + "learning_rate": 1.682952466577223e-05, + "loss": 0.5982654690742493, + "step": 2037 + }, + { + "epoch": 0.5959935663108642, + "grad_norm": 1.2721973260460164, + "learning_rate": 1.6825992293980158e-05, + "loss": 0.5807450413703918, + "step": 2038 + }, + { + "epoch": 0.5962860067261295, + "grad_norm": 1.4202543697420538, + "learning_rate": 1.6822458326653888e-05, + "loss": 0.7667814493179321, + "step": 2039 + }, + { + "epoch": 0.596578447141395, + "grad_norm": 1.4555539952275451, + "learning_rate": 1.6818922764619467e-05, + "loss": 0.8192781805992126, + "step": 2040 + }, + { + "epoch": 0.5968708875566603, + "grad_norm": 1.3146767820144227, + "learning_rate": 1.681538560870331e-05, + "loss": 0.6652504205703735, + "step": 2041 + }, + { + "epoch": 0.5971633279719257, + "grad_norm": 1.4465108366403951, + "learning_rate": 1.6811846859732207e-05, + "loss": 0.6227332353591919, + "step": 2042 + }, + { + "epoch": 0.597455768387191, + "grad_norm": 1.1394575473936808, + "learning_rate": 1.6808306518533315e-05, + "loss": 0.5459558963775635, + "step": 2043 + }, + { + "epoch": 0.5977482088024565, + "grad_norm": 1.3498516241816683, + "learning_rate": 1.6804764585934167e-05, + "loss": 0.5176202058792114, + "step": 2044 + }, + { + "epoch": 0.5980406492177219, + "grad_norm": 1.5025501377940633, + "learning_rate": 1.6801221062762677e-05, + "loss": 0.5818016529083252, + "step": 2045 + }, + { + "epoch": 0.5983330896329873, + "grad_norm": 1.3397658451047565, + "learning_rate": 1.679767594984711e-05, + "loss": 0.622256875038147, + "step": 2046 + }, + { + "epoch": 0.5986255300482527, + "grad_norm": 1.2198859984633783, + "learning_rate": 1.6794129248016124e-05, + "loss": 0.5538911819458008, + "step": 2047 + }, + { + "epoch": 0.598917970463518, + "grad_norm": 1.1939205886096602, + "learning_rate": 1.6790580958098733e-05, + "loss": 0.4934890568256378, + "step": 2048 + }, + { + "epoch": 0.5992104108787835, + "grad_norm": 1.5628453531282531, + "learning_rate": 1.678703108092433e-05, + "loss": 0.6754223108291626, + "step": 2049 + }, + { + "epoch": 0.5995028512940488, + "grad_norm": 1.3047429440272302, + "learning_rate": 1.678347961732268e-05, + "loss": 0.48618268966674805, + "step": 2050 + }, + { + "epoch": 0.5997952917093142, + "grad_norm": 2.239352665042965, + "learning_rate": 1.6779926568123913e-05, + "loss": 0.6844758987426758, + "step": 2051 + }, + { + "epoch": 0.6000877321245797, + "grad_norm": 1.222439693123936, + "learning_rate": 1.677637193415853e-05, + "loss": 0.5258621573448181, + "step": 2052 + }, + { + "epoch": 0.600380172539845, + "grad_norm": 1.5856950316684058, + "learning_rate": 1.6772815716257414e-05, + "loss": 0.5571128129959106, + "step": 2053 + }, + { + "epoch": 0.6006726129551104, + "grad_norm": 1.514916071293939, + "learning_rate": 1.67692579152518e-05, + "loss": 0.5881344079971313, + "step": 2054 + }, + { + "epoch": 0.6009650533703758, + "grad_norm": 1.5804701546241575, + "learning_rate": 1.6765698531973305e-05, + "loss": 0.7162419557571411, + "step": 2055 + }, + { + "epoch": 0.6012574937856412, + "grad_norm": 1.487082432347586, + "learning_rate": 1.6762137567253917e-05, + "loss": 0.7470849752426147, + "step": 2056 + }, + { + "epoch": 0.6015499342009065, + "grad_norm": 1.4154424289161787, + "learning_rate": 1.6758575021925987e-05, + "loss": 0.6043628454208374, + "step": 2057 + }, + { + "epoch": 0.601842374616172, + "grad_norm": 1.4033964052969388, + "learning_rate": 1.6755010896822237e-05, + "loss": 0.6574143171310425, + "step": 2058 + }, + { + "epoch": 0.6021348150314374, + "grad_norm": 1.3508419478610747, + "learning_rate": 1.675144519277576e-05, + "loss": 0.605838418006897, + "step": 2059 + }, + { + "epoch": 0.6024272554467027, + "grad_norm": 1.2890691190480261, + "learning_rate": 1.6747877910620022e-05, + "loss": 0.5859218239784241, + "step": 2060 + }, + { + "epoch": 0.6027196958619682, + "grad_norm": 1.3985794655421304, + "learning_rate": 1.674430905118885e-05, + "loss": 0.7272971868515015, + "step": 2061 + }, + { + "epoch": 0.6030121362772335, + "grad_norm": 1.2870566467248659, + "learning_rate": 1.674073861531644e-05, + "loss": 0.606023907661438, + "step": 2062 + }, + { + "epoch": 0.6033045766924989, + "grad_norm": 1.345090429761192, + "learning_rate": 1.6737166603837364e-05, + "loss": 0.6029521822929382, + "step": 2063 + }, + { + "epoch": 0.6035970171077643, + "grad_norm": 1.1860277395685632, + "learning_rate": 1.673359301758656e-05, + "loss": 0.7544999122619629, + "step": 2064 + }, + { + "epoch": 0.6038894575230297, + "grad_norm": 1.3953376279645262, + "learning_rate": 1.6730017857399327e-05, + "loss": 0.7487601637840271, + "step": 2065 + }, + { + "epoch": 0.604181897938295, + "grad_norm": 1.3904468062872732, + "learning_rate": 1.672644112411134e-05, + "loss": 0.6429200172424316, + "step": 2066 + }, + { + "epoch": 0.6044743383535605, + "grad_norm": 1.4246263416975375, + "learning_rate": 1.6722862818558635e-05, + "loss": 0.7337179183959961, + "step": 2067 + }, + { + "epoch": 0.6047667787688259, + "grad_norm": 1.432290850861675, + "learning_rate": 1.671928294157762e-05, + "loss": 0.6644014120101929, + "step": 2068 + }, + { + "epoch": 0.6050592191840912, + "grad_norm": 1.3048966935224826, + "learning_rate": 1.6715701494005078e-05, + "loss": 0.5987672805786133, + "step": 2069 + }, + { + "epoch": 0.6053516595993567, + "grad_norm": 1.5176113056744007, + "learning_rate": 1.671211847667814e-05, + "loss": 0.5878695845603943, + "step": 2070 + }, + { + "epoch": 0.605644100014622, + "grad_norm": 1.3348485026555847, + "learning_rate": 1.670853389043432e-05, + "loss": 0.540128231048584, + "step": 2071 + }, + { + "epoch": 0.6059365404298874, + "grad_norm": 1.3888450119982874, + "learning_rate": 1.670494773611149e-05, + "loss": 0.667206346988678, + "step": 2072 + }, + { + "epoch": 0.6062289808451528, + "grad_norm": 1.5911825658421195, + "learning_rate": 1.6701360014547896e-05, + "loss": 0.6433641910552979, + "step": 2073 + }, + { + "epoch": 0.6065214212604182, + "grad_norm": 1.447981653333928, + "learning_rate": 1.669777072658214e-05, + "loss": 0.5803529024124146, + "step": 2074 + }, + { + "epoch": 0.6068138616756836, + "grad_norm": 1.125005009009719, + "learning_rate": 1.6694179873053202e-05, + "loss": 0.6203820705413818, + "step": 2075 + }, + { + "epoch": 0.607106302090949, + "grad_norm": 1.3092542979615172, + "learning_rate": 1.669058745480042e-05, + "loss": 0.6194918155670166, + "step": 2076 + }, + { + "epoch": 0.6073987425062144, + "grad_norm": 1.593480689755987, + "learning_rate": 1.66869934726635e-05, + "loss": 0.6797547936439514, + "step": 2077 + }, + { + "epoch": 0.6076911829214797, + "grad_norm": 1.3923211889522802, + "learning_rate": 1.6683397927482512e-05, + "loss": 0.6076459884643555, + "step": 2078 + }, + { + "epoch": 0.6079836233367452, + "grad_norm": 1.3874225830336557, + "learning_rate": 1.6679800820097895e-05, + "loss": 0.6958068609237671, + "step": 2079 + }, + { + "epoch": 0.6082760637520105, + "grad_norm": 1.3355509335032223, + "learning_rate": 1.6676202151350453e-05, + "loss": 0.5819929242134094, + "step": 2080 + }, + { + "epoch": 0.6085685041672759, + "grad_norm": 1.3476445996808082, + "learning_rate": 1.6672601922081347e-05, + "loss": 0.7125047445297241, + "step": 2081 + }, + { + "epoch": 0.6088609445825413, + "grad_norm": 1.4432332437479862, + "learning_rate": 1.6669000133132108e-05, + "loss": 0.8046560287475586, + "step": 2082 + }, + { + "epoch": 0.6091533849978067, + "grad_norm": 1.192025927247586, + "learning_rate": 1.666539678534464e-05, + "loss": 0.5468478202819824, + "step": 2083 + }, + { + "epoch": 0.6094458254130721, + "grad_norm": 1.3403719695971306, + "learning_rate": 1.6661791879561204e-05, + "loss": 0.6387852430343628, + "step": 2084 + }, + { + "epoch": 0.6097382658283375, + "grad_norm": 1.3327872578740647, + "learning_rate": 1.6658185416624415e-05, + "loss": 0.643539547920227, + "step": 2085 + }, + { + "epoch": 0.6100307062436029, + "grad_norm": 1.2236148701775094, + "learning_rate": 1.6654577397377266e-05, + "loss": 0.5031965374946594, + "step": 2086 + }, + { + "epoch": 0.6103231466588682, + "grad_norm": 1.507439246425782, + "learning_rate": 1.6650967822663115e-05, + "loss": 0.6690273284912109, + "step": 2087 + }, + { + "epoch": 0.6106155870741337, + "grad_norm": 1.2924449065282086, + "learning_rate": 1.6647356693325672e-05, + "loss": 0.6396887302398682, + "step": 2088 + }, + { + "epoch": 0.610908027489399, + "grad_norm": 1.4444361497865652, + "learning_rate": 1.664374401020902e-05, + "loss": 0.6306549310684204, + "step": 2089 + }, + { + "epoch": 0.6112004679046644, + "grad_norm": 1.3565777173208147, + "learning_rate": 1.66401297741576e-05, + "loss": 0.5936366319656372, + "step": 2090 + }, + { + "epoch": 0.6114929083199299, + "grad_norm": 1.1669567203268514, + "learning_rate": 1.6636513986016215e-05, + "loss": 0.6153277158737183, + "step": 2091 + }, + { + "epoch": 0.6117853487351952, + "grad_norm": 1.2085146124175858, + "learning_rate": 1.663289664663004e-05, + "loss": 0.6361621618270874, + "step": 2092 + }, + { + "epoch": 0.6120777891504606, + "grad_norm": 1.2163858440552462, + "learning_rate": 1.6629277756844603e-05, + "loss": 0.6511524319648743, + "step": 2093 + }, + { + "epoch": 0.612370229565726, + "grad_norm": 1.2219001757495958, + "learning_rate": 1.6625657317505792e-05, + "loss": 0.5811333656311035, + "step": 2094 + }, + { + "epoch": 0.6126626699809914, + "grad_norm": 1.4531007944498606, + "learning_rate": 1.6622035329459872e-05, + "loss": 0.6935377717018127, + "step": 2095 + }, + { + "epoch": 0.6129551103962567, + "grad_norm": 1.3697721797296887, + "learning_rate": 1.6618411793553455e-05, + "loss": 0.6363199949264526, + "step": 2096 + }, + { + "epoch": 0.6132475508115222, + "grad_norm": 1.6107434013725794, + "learning_rate": 1.6614786710633525e-05, + "loss": 0.7325713634490967, + "step": 2097 + }, + { + "epoch": 0.6135399912267876, + "grad_norm": 1.3944095356365322, + "learning_rate": 1.6611160081547414e-05, + "loss": 0.5739182829856873, + "step": 2098 + }, + { + "epoch": 0.6138324316420529, + "grad_norm": 1.4193388816384238, + "learning_rate": 1.6607531907142835e-05, + "loss": 0.611133873462677, + "step": 2099 + }, + { + "epoch": 0.6141248720573184, + "grad_norm": 1.579788361702439, + "learning_rate": 1.6603902188267842e-05, + "loss": 0.6419532299041748, + "step": 2100 + }, + { + "epoch": 0.6144173124725837, + "grad_norm": 1.482873128334509, + "learning_rate": 1.660027092577087e-05, + "loss": 0.7736743688583374, + "step": 2101 + }, + { + "epoch": 0.6147097528878491, + "grad_norm": 1.199857125427724, + "learning_rate": 1.6596638120500696e-05, + "loss": 0.5249119400978088, + "step": 2102 + }, + { + "epoch": 0.6150021933031145, + "grad_norm": 2.505852142425954, + "learning_rate": 1.6593003773306475e-05, + "loss": 0.7145636081695557, + "step": 2103 + }, + { + "epoch": 0.6152946337183799, + "grad_norm": 1.3335089477583737, + "learning_rate": 1.65893678850377e-05, + "loss": 0.5807666182518005, + "step": 2104 + }, + { + "epoch": 0.6155870741336452, + "grad_norm": 1.2437068513912055, + "learning_rate": 1.6585730456544255e-05, + "loss": 0.5049663782119751, + "step": 2105 + }, + { + "epoch": 0.6158795145489107, + "grad_norm": 1.4826397888996732, + "learning_rate": 1.658209148867635e-05, + "loss": 0.6744092702865601, + "step": 2106 + }, + { + "epoch": 0.6161719549641761, + "grad_norm": 1.4821897923446594, + "learning_rate": 1.6578450982284584e-05, + "loss": 0.605404794216156, + "step": 2107 + }, + { + "epoch": 0.6164643953794414, + "grad_norm": 1.1917544416711534, + "learning_rate": 1.6574808938219894e-05, + "loss": 0.6074866056442261, + "step": 2108 + }, + { + "epoch": 0.6167568357947069, + "grad_norm": 1.284543555588908, + "learning_rate": 1.6571165357333594e-05, + "loss": 0.6758207082748413, + "step": 2109 + }, + { + "epoch": 0.6170492762099722, + "grad_norm": 1.580962080275822, + "learning_rate": 1.6567520240477344e-05, + "loss": 0.7669274806976318, + "step": 2110 + }, + { + "epoch": 0.6173417166252376, + "grad_norm": 1.3997913559025885, + "learning_rate": 1.6563873588503173e-05, + "loss": 0.497562050819397, + "step": 2111 + }, + { + "epoch": 0.617634157040503, + "grad_norm": 1.6655652024231358, + "learning_rate": 1.656022540226345e-05, + "loss": 0.6398104429244995, + "step": 2112 + }, + { + "epoch": 0.6179265974557684, + "grad_norm": 1.4155810596985208, + "learning_rate": 1.6556575682610935e-05, + "loss": 0.6739988327026367, + "step": 2113 + }, + { + "epoch": 0.6182190378710338, + "grad_norm": 1.3164921836609038, + "learning_rate": 1.6552924430398716e-05, + "loss": 0.5710165500640869, + "step": 2114 + }, + { + "epoch": 0.6185114782862992, + "grad_norm": 1.1567442833736337, + "learning_rate": 1.6549271646480253e-05, + "loss": 0.6087738871574402, + "step": 2115 + }, + { + "epoch": 0.6188039187015646, + "grad_norm": 1.1877649418617353, + "learning_rate": 1.6545617331709364e-05, + "loss": 0.5300824642181396, + "step": 2116 + }, + { + "epoch": 0.6190963591168299, + "grad_norm": 1.3759503189909044, + "learning_rate": 1.6541961486940222e-05, + "loss": 0.7384774684906006, + "step": 2117 + }, + { + "epoch": 0.6193887995320954, + "grad_norm": 1.1608035895573054, + "learning_rate": 1.6538304113027356e-05, + "loss": 0.5867838263511658, + "step": 2118 + }, + { + "epoch": 0.6196812399473607, + "grad_norm": 1.4435135524238625, + "learning_rate": 1.653464521082566e-05, + "loss": 0.617068886756897, + "step": 2119 + }, + { + "epoch": 0.6199736803626261, + "grad_norm": 1.2420433862943483, + "learning_rate": 1.6530984781190374e-05, + "loss": 0.7316439151763916, + "step": 2120 + }, + { + "epoch": 0.6202661207778914, + "grad_norm": 1.3153827472233475, + "learning_rate": 1.6527322824977104e-05, + "loss": 0.5469995737075806, + "step": 2121 + }, + { + "epoch": 0.6205585611931569, + "grad_norm": 1.4608354678316708, + "learning_rate": 1.6523659343041815e-05, + "loss": 0.6577411890029907, + "step": 2122 + }, + { + "epoch": 0.6208510016084223, + "grad_norm": 1.5130442860821829, + "learning_rate": 1.6519994336240816e-05, + "loss": 0.7425049543380737, + "step": 2123 + }, + { + "epoch": 0.6211434420236877, + "grad_norm": 1.7408354143028393, + "learning_rate": 1.6516327805430785e-05, + "loss": 0.7894090414047241, + "step": 2124 + }, + { + "epoch": 0.6214358824389531, + "grad_norm": 1.2267269656084083, + "learning_rate": 1.651265975146875e-05, + "loss": 0.5739543437957764, + "step": 2125 + }, + { + "epoch": 0.6217283228542184, + "grad_norm": 1.2973694692382243, + "learning_rate": 1.6508990175212092e-05, + "loss": 0.6987308263778687, + "step": 2126 + }, + { + "epoch": 0.6220207632694839, + "grad_norm": 1.237403110571432, + "learning_rate": 1.650531907751856e-05, + "loss": 0.5956544280052185, + "step": 2127 + }, + { + "epoch": 0.6223132036847492, + "grad_norm": 1.3646659152675398, + "learning_rate": 1.6501646459246245e-05, + "loss": 0.582348108291626, + "step": 2128 + }, + { + "epoch": 0.6226056441000146, + "grad_norm": 1.327256978138479, + "learning_rate": 1.64979723212536e-05, + "loss": 0.8057917356491089, + "step": 2129 + }, + { + "epoch": 0.6228980845152801, + "grad_norm": 1.1623408864017983, + "learning_rate": 1.6494296664399428e-05, + "loss": 0.6237305402755737, + "step": 2130 + }, + { + "epoch": 0.6231905249305454, + "grad_norm": 1.3152067943219485, + "learning_rate": 1.6490619489542905e-05, + "loss": 0.6445767879486084, + "step": 2131 + }, + { + "epoch": 0.6234829653458108, + "grad_norm": 1.4611569228302668, + "learning_rate": 1.648694079754354e-05, + "loss": 0.6397994160652161, + "step": 2132 + }, + { + "epoch": 0.6237754057610762, + "grad_norm": 1.3955823025243248, + "learning_rate": 1.64832605892612e-05, + "loss": 0.8216533660888672, + "step": 2133 + }, + { + "epoch": 0.6240678461763416, + "grad_norm": 1.3134524569329014, + "learning_rate": 1.6479578865556115e-05, + "loss": 0.6894406080245972, + "step": 2134 + }, + { + "epoch": 0.6243602865916069, + "grad_norm": 1.2940264658828888, + "learning_rate": 1.6475895627288873e-05, + "loss": 0.6608946323394775, + "step": 2135 + }, + { + "epoch": 0.6246527270068724, + "grad_norm": 1.4094544295935185, + "learning_rate": 1.6472210875320397e-05, + "loss": 0.6070076823234558, + "step": 2136 + }, + { + "epoch": 0.6249451674221378, + "grad_norm": 1.4359082412623407, + "learning_rate": 1.6468524610511982e-05, + "loss": 0.7357348799705505, + "step": 2137 + }, + { + "epoch": 0.6252376078374031, + "grad_norm": 1.201965871501085, + "learning_rate": 1.6464836833725267e-05, + "loss": 0.5959880352020264, + "step": 2138 + }, + { + "epoch": 0.6255300482526686, + "grad_norm": 1.3046810888024383, + "learning_rate": 1.646114754582225e-05, + "loss": 0.7812649011611938, + "step": 2139 + }, + { + "epoch": 0.6258224886679339, + "grad_norm": 1.6609760293820528, + "learning_rate": 1.6457456747665282e-05, + "loss": 0.5985091924667358, + "step": 2140 + }, + { + "epoch": 0.6261149290831993, + "grad_norm": 1.5609316045902142, + "learning_rate": 1.645376444011706e-05, + "loss": 0.6610564589500427, + "step": 2141 + }, + { + "epoch": 0.6264073694984647, + "grad_norm": 1.3917319855245425, + "learning_rate": 1.6450070624040636e-05, + "loss": 0.6876299381256104, + "step": 2142 + }, + { + "epoch": 0.6266998099137301, + "grad_norm": 1.3567193814213938, + "learning_rate": 1.6446375300299425e-05, + "loss": 0.6715782284736633, + "step": 2143 + }, + { + "epoch": 0.6269922503289954, + "grad_norm": 1.6061237563072754, + "learning_rate": 1.644267846975718e-05, + "loss": 0.6066923141479492, + "step": 2144 + }, + { + "epoch": 0.6272846907442609, + "grad_norm": 1.2493532553829008, + "learning_rate": 1.6438980133278017e-05, + "loss": 0.5642968416213989, + "step": 2145 + }, + { + "epoch": 0.6275771311595263, + "grad_norm": 1.0703284322753808, + "learning_rate": 1.6435280291726394e-05, + "loss": 0.604590654373169, + "step": 2146 + }, + { + "epoch": 0.6278695715747916, + "grad_norm": 1.3292746736885825, + "learning_rate": 1.643157894596713e-05, + "loss": 0.6313889026641846, + "step": 2147 + }, + { + "epoch": 0.6281620119900571, + "grad_norm": 1.0767305616181233, + "learning_rate": 1.6427876096865394e-05, + "loss": 0.5084092617034912, + "step": 2148 + }, + { + "epoch": 0.6284544524053224, + "grad_norm": 1.250433663172197, + "learning_rate": 1.6424171745286704e-05, + "loss": 0.5191931128501892, + "step": 2149 + }, + { + "epoch": 0.6287468928205878, + "grad_norm": 1.3567625810681667, + "learning_rate": 1.6420465892096924e-05, + "loss": 0.7397615909576416, + "step": 2150 + }, + { + "epoch": 0.6290393332358531, + "grad_norm": 1.1359315638082286, + "learning_rate": 1.641675853816228e-05, + "loss": 0.622586727142334, + "step": 2151 + }, + { + "epoch": 0.6293317736511186, + "grad_norm": 1.433028642480203, + "learning_rate": 1.6413049684349344e-05, + "loss": 0.7894928455352783, + "step": 2152 + }, + { + "epoch": 0.629624214066384, + "grad_norm": 1.4395392231763253, + "learning_rate": 1.640933933152504e-05, + "loss": 0.5752773284912109, + "step": 2153 + }, + { + "epoch": 0.6299166544816494, + "grad_norm": 1.3952520818076775, + "learning_rate": 1.640562748055663e-05, + "loss": 0.6738473176956177, + "step": 2154 + }, + { + "epoch": 0.6302090948969148, + "grad_norm": 1.2597002399242925, + "learning_rate": 1.6401914132311745e-05, + "loss": 0.5789517164230347, + "step": 2155 + }, + { + "epoch": 0.6305015353121801, + "grad_norm": 1.2840904364476742, + "learning_rate": 1.6398199287658358e-05, + "loss": 0.5925524830818176, + "step": 2156 + }, + { + "epoch": 0.6307939757274456, + "grad_norm": 1.4374336859820211, + "learning_rate": 1.6394482947464784e-05, + "loss": 0.6949414610862732, + "step": 2157 + }, + { + "epoch": 0.6310864161427109, + "grad_norm": 1.3617313094593515, + "learning_rate": 1.6390765112599705e-05, + "loss": 0.7435301542282104, + "step": 2158 + }, + { + "epoch": 0.6313788565579763, + "grad_norm": 1.5109256996682827, + "learning_rate": 1.6387045783932137e-05, + "loss": 0.6931856274604797, + "step": 2159 + }, + { + "epoch": 0.6316712969732416, + "grad_norm": 1.4369843702380298, + "learning_rate": 1.638332496233145e-05, + "loss": 0.7856471538543701, + "step": 2160 + }, + { + "epoch": 0.6319637373885071, + "grad_norm": 1.460850634730034, + "learning_rate": 1.6379602648667362e-05, + "loss": 0.6299946308135986, + "step": 2161 + }, + { + "epoch": 0.6322561778037725, + "grad_norm": 1.5299113211206812, + "learning_rate": 1.6375878843809946e-05, + "loss": 0.6209328174591064, + "step": 2162 + }, + { + "epoch": 0.6325486182190379, + "grad_norm": 1.4269696757613273, + "learning_rate": 1.6372153548629617e-05, + "loss": 0.6498390436172485, + "step": 2163 + }, + { + "epoch": 0.6328410586343033, + "grad_norm": 2.1028833494160573, + "learning_rate": 1.6368426763997137e-05, + "loss": 0.6757122278213501, + "step": 2164 + }, + { + "epoch": 0.6331334990495686, + "grad_norm": 1.289589419762841, + "learning_rate": 1.6364698490783623e-05, + "loss": 0.5137026906013489, + "step": 2165 + }, + { + "epoch": 0.633425939464834, + "grad_norm": 1.3914324771074273, + "learning_rate": 1.6360968729860536e-05, + "loss": 0.5876519680023193, + "step": 2166 + }, + { + "epoch": 0.6337183798800994, + "grad_norm": 1.2533286000898018, + "learning_rate": 1.6357237482099682e-05, + "loss": 0.5804057717323303, + "step": 2167 + }, + { + "epoch": 0.6340108202953648, + "grad_norm": 1.361440329822907, + "learning_rate": 1.635350474837322e-05, + "loss": 0.6186444759368896, + "step": 2168 + }, + { + "epoch": 0.6343032607106303, + "grad_norm": 1.4479908785794617, + "learning_rate": 1.6349770529553654e-05, + "loss": 0.6358560919761658, + "step": 2169 + }, + { + "epoch": 0.6345957011258956, + "grad_norm": 1.2507636068938528, + "learning_rate": 1.6346034826513834e-05, + "loss": 0.64283686876297, + "step": 2170 + }, + { + "epoch": 0.634888141541161, + "grad_norm": 1.3854516647796151, + "learning_rate": 1.6342297640126955e-05, + "loss": 0.5269169807434082, + "step": 2171 + }, + { + "epoch": 0.6351805819564263, + "grad_norm": 1.3233372829927026, + "learning_rate": 1.6338558971266563e-05, + "loss": 0.5338561534881592, + "step": 2172 + }, + { + "epoch": 0.6354730223716918, + "grad_norm": 1.365606957045604, + "learning_rate": 1.6334818820806555e-05, + "loss": 0.5587184429168701, + "step": 2173 + }, + { + "epoch": 0.6357654627869571, + "grad_norm": 1.2288709810094502, + "learning_rate": 1.633107718962116e-05, + "loss": 0.6468764543533325, + "step": 2174 + }, + { + "epoch": 0.6360579032022226, + "grad_norm": 1.4431243955955453, + "learning_rate": 1.6327334078584967e-05, + "loss": 0.7305203676223755, + "step": 2175 + }, + { + "epoch": 0.636350343617488, + "grad_norm": 1.3207763162749322, + "learning_rate": 1.6323589488572908e-05, + "loss": 0.6226189136505127, + "step": 2176 + }, + { + "epoch": 0.6366427840327533, + "grad_norm": 1.4828987038724675, + "learning_rate": 1.631984342046025e-05, + "loss": 0.6552053093910217, + "step": 2177 + }, + { + "epoch": 0.6369352244480188, + "grad_norm": 1.6836072588979352, + "learning_rate": 1.6316095875122617e-05, + "loss": 0.8121978044509888, + "step": 2178 + }, + { + "epoch": 0.6372276648632841, + "grad_norm": 1.3359221660901908, + "learning_rate": 1.6312346853435976e-05, + "loss": 0.5826296806335449, + "step": 2179 + }, + { + "epoch": 0.6375201052785495, + "grad_norm": 1.3567795832303162, + "learning_rate": 1.630859635627664e-05, + "loss": 0.5862709283828735, + "step": 2180 + }, + { + "epoch": 0.6378125456938148, + "grad_norm": 1.2132204868801326, + "learning_rate": 1.6304844384521263e-05, + "loss": 0.7081524133682251, + "step": 2181 + }, + { + "epoch": 0.6381049861090803, + "grad_norm": 1.2359384159808198, + "learning_rate": 1.6301090939046843e-05, + "loss": 0.6394449472427368, + "step": 2182 + }, + { + "epoch": 0.6383974265243456, + "grad_norm": 1.25131780401235, + "learning_rate": 1.6297336020730727e-05, + "loss": 0.6184799075126648, + "step": 2183 + }, + { + "epoch": 0.638689866939611, + "grad_norm": 1.3090426226978378, + "learning_rate": 1.6293579630450606e-05, + "loss": 0.6877666711807251, + "step": 2184 + }, + { + "epoch": 0.6389823073548765, + "grad_norm": 1.3648594367613462, + "learning_rate": 1.6289821769084512e-05, + "loss": 0.5596371293067932, + "step": 2185 + }, + { + "epoch": 0.6392747477701418, + "grad_norm": 1.1779148594123119, + "learning_rate": 1.6286062437510823e-05, + "loss": 0.5378291010856628, + "step": 2186 + }, + { + "epoch": 0.6395671881854073, + "grad_norm": 1.2132664638530417, + "learning_rate": 1.6282301636608256e-05, + "loss": 0.6965627670288086, + "step": 2187 + }, + { + "epoch": 0.6398596286006726, + "grad_norm": 1.3017112466193883, + "learning_rate": 1.6278539367255885e-05, + "loss": 0.5939220190048218, + "step": 2188 + }, + { + "epoch": 0.640152069015938, + "grad_norm": 1.3743138396251577, + "learning_rate": 1.6274775630333104e-05, + "loss": 0.6225341558456421, + "step": 2189 + }, + { + "epoch": 0.6404445094312033, + "grad_norm": 1.103061387587319, + "learning_rate": 1.6271010426719672e-05, + "loss": 0.471333384513855, + "step": 2190 + }, + { + "epoch": 0.6407369498464688, + "grad_norm": 1.3505910885858836, + "learning_rate": 1.626724375729568e-05, + "loss": 0.6066263914108276, + "step": 2191 + }, + { + "epoch": 0.6410293902617342, + "grad_norm": 1.2842885881869934, + "learning_rate": 1.626347562294157e-05, + "loss": 0.6525982618331909, + "step": 2192 + }, + { + "epoch": 0.6413218306769995, + "grad_norm": 1.375624970339684, + "learning_rate": 1.6259706024538113e-05, + "loss": 0.7395817041397095, + "step": 2193 + }, + { + "epoch": 0.641614271092265, + "grad_norm": 1.326045982489242, + "learning_rate": 1.6255934962966432e-05, + "loss": 0.720014214515686, + "step": 2194 + }, + { + "epoch": 0.6419067115075303, + "grad_norm": 1.4102074363113735, + "learning_rate": 1.625216243910799e-05, + "loss": 0.6905295252799988, + "step": 2195 + }, + { + "epoch": 0.6421991519227958, + "grad_norm": 1.3533501829991437, + "learning_rate": 1.6248388453844596e-05, + "loss": 0.6877295970916748, + "step": 2196 + }, + { + "epoch": 0.6424915923380611, + "grad_norm": 1.414790050061214, + "learning_rate": 1.6244613008058386e-05, + "loss": 0.5782181024551392, + "step": 2197 + }, + { + "epoch": 0.6427840327533265, + "grad_norm": 1.2129092557671588, + "learning_rate": 1.6240836102631856e-05, + "loss": 0.5253425240516663, + "step": 2198 + }, + { + "epoch": 0.6430764731685918, + "grad_norm": 1.2461747547364295, + "learning_rate": 1.623705773844783e-05, + "loss": 0.6631319522857666, + "step": 2199 + }, + { + "epoch": 0.6433689135838573, + "grad_norm": 1.6130890971192966, + "learning_rate": 1.6233277916389482e-05, + "loss": 0.6458526849746704, + "step": 2200 + }, + { + "epoch": 0.6436613539991227, + "grad_norm": 1.5712729506149452, + "learning_rate": 1.622949663734032e-05, + "loss": 0.5723023414611816, + "step": 2201 + }, + { + "epoch": 0.643953794414388, + "grad_norm": 1.4119455791937807, + "learning_rate": 1.6225713902184193e-05, + "loss": 0.6852096319198608, + "step": 2202 + }, + { + "epoch": 0.6442462348296535, + "grad_norm": 1.460558869527006, + "learning_rate": 1.6221929711805297e-05, + "loss": 0.6343507170677185, + "step": 2203 + }, + { + "epoch": 0.6445386752449188, + "grad_norm": 1.217897103510346, + "learning_rate": 1.6218144067088157e-05, + "loss": 0.6378631591796875, + "step": 2204 + }, + { + "epoch": 0.6448311156601843, + "grad_norm": 1.1203441428966674, + "learning_rate": 1.621435696891765e-05, + "loss": 0.6550023555755615, + "step": 2205 + }, + { + "epoch": 0.6451235560754496, + "grad_norm": 1.3522778560223117, + "learning_rate": 1.6210568418178983e-05, + "loss": 0.5555052757263184, + "step": 2206 + }, + { + "epoch": 0.645415996490715, + "grad_norm": 1.330819772406298, + "learning_rate": 1.6206778415757715e-05, + "loss": 0.7171934247016907, + "step": 2207 + }, + { + "epoch": 0.6457084369059805, + "grad_norm": 1.2953726655501339, + "learning_rate": 1.6202986962539726e-05, + "loss": 0.6464889049530029, + "step": 2208 + }, + { + "epoch": 0.6460008773212458, + "grad_norm": 1.5324773487302452, + "learning_rate": 1.619919405941125e-05, + "loss": 0.6316033601760864, + "step": 2209 + }, + { + "epoch": 0.6462933177365112, + "grad_norm": 1.2083095479015487, + "learning_rate": 1.6195399707258855e-05, + "loss": 0.5548732876777649, + "step": 2210 + }, + { + "epoch": 0.6465857581517765, + "grad_norm": 1.088879983740594, + "learning_rate": 1.6191603906969447e-05, + "loss": 0.5055203437805176, + "step": 2211 + }, + { + "epoch": 0.646878198567042, + "grad_norm": 1.3416079726495937, + "learning_rate": 1.6187806659430268e-05, + "loss": 0.7010073661804199, + "step": 2212 + }, + { + "epoch": 0.6471706389823073, + "grad_norm": 1.39696751963916, + "learning_rate": 1.6184007965528908e-05, + "loss": 0.6188487410545349, + "step": 2213 + }, + { + "epoch": 0.6474630793975727, + "grad_norm": 1.1122504211535682, + "learning_rate": 1.6180207826153284e-05, + "loss": 0.46920153498649597, + "step": 2214 + }, + { + "epoch": 0.6477555198128382, + "grad_norm": 1.1420938414191775, + "learning_rate": 1.617640624219166e-05, + "loss": 0.6811172962188721, + "step": 2215 + }, + { + "epoch": 0.6480479602281035, + "grad_norm": 1.456471656413964, + "learning_rate": 1.617260321453263e-05, + "loss": 0.6425800323486328, + "step": 2216 + }, + { + "epoch": 0.648340400643369, + "grad_norm": 1.5968265799871777, + "learning_rate": 1.6168798744065123e-05, + "loss": 0.7020897269248962, + "step": 2217 + }, + { + "epoch": 0.6486328410586343, + "grad_norm": 1.1227944263783516, + "learning_rate": 1.6164992831678422e-05, + "loss": 0.5872179865837097, + "step": 2218 + }, + { + "epoch": 0.6489252814738997, + "grad_norm": 1.6374275819992907, + "learning_rate": 1.6161185478262127e-05, + "loss": 0.7414118647575378, + "step": 2219 + }, + { + "epoch": 0.649217721889165, + "grad_norm": 1.2707285395428818, + "learning_rate": 1.615737668470619e-05, + "loss": 0.5408385396003723, + "step": 2220 + }, + { + "epoch": 0.6495101623044305, + "grad_norm": 1.2587309097221344, + "learning_rate": 1.6153566451900887e-05, + "loss": 0.6145513653755188, + "step": 2221 + }, + { + "epoch": 0.6498026027196958, + "grad_norm": 1.1746181148032837, + "learning_rate": 1.6149754780736847e-05, + "loss": 0.556422233581543, + "step": 2222 + }, + { + "epoch": 0.6500950431349612, + "grad_norm": 1.4903419319059785, + "learning_rate": 1.614594167210501e-05, + "loss": 0.7155405282974243, + "step": 2223 + }, + { + "epoch": 0.6503874835502267, + "grad_norm": 1.2945043385192228, + "learning_rate": 1.6142127126896682e-05, + "loss": 0.4988427758216858, + "step": 2224 + }, + { + "epoch": 0.650679923965492, + "grad_norm": 1.3962995233264988, + "learning_rate": 1.6138311146003477e-05, + "loss": 0.6187007427215576, + "step": 2225 + }, + { + "epoch": 0.6509723643807575, + "grad_norm": 1.329312474096709, + "learning_rate": 1.6134493730317364e-05, + "loss": 0.5668798685073853, + "step": 2226 + }, + { + "epoch": 0.6512648047960228, + "grad_norm": 1.2528148742640925, + "learning_rate": 1.6130674880730642e-05, + "loss": 0.6354215145111084, + "step": 2227 + }, + { + "epoch": 0.6515572452112882, + "grad_norm": 1.3738601794334195, + "learning_rate": 1.612685459813594e-05, + "loss": 0.5409573912620544, + "step": 2228 + }, + { + "epoch": 0.6518496856265535, + "grad_norm": 1.24582725943008, + "learning_rate": 1.612303288342623e-05, + "loss": 0.5622435808181763, + "step": 2229 + }, + { + "epoch": 0.652142126041819, + "grad_norm": 1.3303126336426627, + "learning_rate": 1.6119209737494814e-05, + "loss": 0.786159873008728, + "step": 2230 + }, + { + "epoch": 0.6524345664570844, + "grad_norm": 1.3038971892359654, + "learning_rate": 1.611538516123532e-05, + "loss": 0.6359272003173828, + "step": 2231 + }, + { + "epoch": 0.6527270068723497, + "grad_norm": 1.2508619512631416, + "learning_rate": 1.6111559155541732e-05, + "loss": 0.5688974261283875, + "step": 2232 + }, + { + "epoch": 0.6530194472876152, + "grad_norm": 1.1877745994435736, + "learning_rate": 1.610773172130835e-05, + "loss": 0.581497311592102, + "step": 2233 + }, + { + "epoch": 0.6533118877028805, + "grad_norm": 1.6577687870030173, + "learning_rate": 1.6103902859429812e-05, + "loss": 0.674004316329956, + "step": 2234 + }, + { + "epoch": 0.653604328118146, + "grad_norm": 1.4167456148188138, + "learning_rate": 1.6100072570801092e-05, + "loss": 0.6798728108406067, + "step": 2235 + }, + { + "epoch": 0.6538967685334113, + "grad_norm": 1.245467514643811, + "learning_rate": 1.60962408563175e-05, + "loss": 0.5742023587226868, + "step": 2236 + }, + { + "epoch": 0.6541892089486767, + "grad_norm": 1.1993067492933944, + "learning_rate": 1.6092407716874674e-05, + "loss": 0.470009446144104, + "step": 2237 + }, + { + "epoch": 0.654481649363942, + "grad_norm": 1.3725626324774514, + "learning_rate": 1.6088573153368586e-05, + "loss": 0.8113270998001099, + "step": 2238 + }, + { + "epoch": 0.6547740897792075, + "grad_norm": 1.4825942391015299, + "learning_rate": 1.6084737166695542e-05, + "loss": 0.7737559676170349, + "step": 2239 + }, + { + "epoch": 0.6550665301944729, + "grad_norm": 1.5932921988768602, + "learning_rate": 1.6080899757752183e-05, + "loss": 0.6499667167663574, + "step": 2240 + }, + { + "epoch": 0.6553589706097382, + "grad_norm": 1.5295213411109583, + "learning_rate": 1.6077060927435476e-05, + "loss": 0.6898500323295593, + "step": 2241 + }, + { + "epoch": 0.6556514110250037, + "grad_norm": 1.264521733401818, + "learning_rate": 1.6073220676642724e-05, + "loss": 0.5933262705802917, + "step": 2242 + }, + { + "epoch": 0.655943851440269, + "grad_norm": 1.6150723182894215, + "learning_rate": 1.606937900627157e-05, + "loss": 0.6566172242164612, + "step": 2243 + }, + { + "epoch": 0.6562362918555344, + "grad_norm": 1.5267009306631556, + "learning_rate": 1.606553591721997e-05, + "loss": 0.6955286264419556, + "step": 2244 + }, + { + "epoch": 0.6565287322707998, + "grad_norm": 1.2904648803296817, + "learning_rate": 1.6061691410386234e-05, + "loss": 0.6905182600021362, + "step": 2245 + }, + { + "epoch": 0.6568211726860652, + "grad_norm": 1.3780634556903595, + "learning_rate": 1.6057845486668984e-05, + "loss": 0.6733677387237549, + "step": 2246 + }, + { + "epoch": 0.6571136131013307, + "grad_norm": 1.2340466884298544, + "learning_rate": 1.6053998146967186e-05, + "loss": 0.5368545055389404, + "step": 2247 + }, + { + "epoch": 0.657406053516596, + "grad_norm": 1.4627351725055429, + "learning_rate": 1.6050149392180125e-05, + "loss": 0.6995619535446167, + "step": 2248 + }, + { + "epoch": 0.6576984939318614, + "grad_norm": 1.2552392614352392, + "learning_rate": 1.6046299223207432e-05, + "loss": 0.6637085676193237, + "step": 2249 + }, + { + "epoch": 0.6579909343471267, + "grad_norm": 1.3894808498189977, + "learning_rate": 1.6042447640949058e-05, + "loss": 0.5834380388259888, + "step": 2250 + }, + { + "epoch": 0.6582833747623922, + "grad_norm": 1.1700440243092598, + "learning_rate": 1.6038594646305285e-05, + "loss": 0.5735288858413696, + "step": 2251 + }, + { + "epoch": 0.6585758151776575, + "grad_norm": 1.274727070163542, + "learning_rate": 1.6034740240176728e-05, + "loss": 0.6227413415908813, + "step": 2252 + }, + { + "epoch": 0.658868255592923, + "grad_norm": 1.5091805441488135, + "learning_rate": 1.6030884423464336e-05, + "loss": 0.6881246566772461, + "step": 2253 + }, + { + "epoch": 0.6591606960081884, + "grad_norm": 1.3237201049051734, + "learning_rate": 1.6027027197069376e-05, + "loss": 0.6059132814407349, + "step": 2254 + }, + { + "epoch": 0.6594531364234537, + "grad_norm": 1.5070949945133527, + "learning_rate": 1.6023168561893453e-05, + "loss": 0.5829097032546997, + "step": 2255 + }, + { + "epoch": 0.6597455768387191, + "grad_norm": 1.1821076640408643, + "learning_rate": 1.60193085188385e-05, + "loss": 0.5173588991165161, + "step": 2256 + }, + { + "epoch": 0.6600380172539845, + "grad_norm": 1.0404057140160172, + "learning_rate": 1.601544706880678e-05, + "loss": 0.5128534436225891, + "step": 2257 + }, + { + "epoch": 0.6603304576692499, + "grad_norm": 1.4274902732235735, + "learning_rate": 1.601158421270088e-05, + "loss": 0.5472848415374756, + "step": 2258 + }, + { + "epoch": 0.6606228980845152, + "grad_norm": 1.2505155913554076, + "learning_rate": 1.6007719951423725e-05, + "loss": 0.5775434970855713, + "step": 2259 + }, + { + "epoch": 0.6609153384997807, + "grad_norm": 1.2760490287043558, + "learning_rate": 1.6003854285878558e-05, + "loss": 0.5529654622077942, + "step": 2260 + }, + { + "epoch": 0.661207778915046, + "grad_norm": 1.2950239037035343, + "learning_rate": 1.5999987216968954e-05, + "loss": 0.5295222997665405, + "step": 2261 + }, + { + "epoch": 0.6615002193303114, + "grad_norm": 1.42880093351922, + "learning_rate": 1.5996118745598817e-05, + "loss": 0.6782759428024292, + "step": 2262 + }, + { + "epoch": 0.6617926597455769, + "grad_norm": 1.5123560217291456, + "learning_rate": 1.5992248872672384e-05, + "loss": 0.7698723077774048, + "step": 2263 + }, + { + "epoch": 0.6620851001608422, + "grad_norm": 1.224014553870767, + "learning_rate": 1.5988377599094208e-05, + "loss": 0.5056325793266296, + "step": 2264 + }, + { + "epoch": 0.6623775405761076, + "grad_norm": 1.2811286417806291, + "learning_rate": 1.598450492576918e-05, + "loss": 0.6748740673065186, + "step": 2265 + }, + { + "epoch": 0.662669980991373, + "grad_norm": 1.4413699029522251, + "learning_rate": 1.598063085360251e-05, + "loss": 0.6594111919403076, + "step": 2266 + }, + { + "epoch": 0.6629624214066384, + "grad_norm": 1.490546706478741, + "learning_rate": 1.5976755383499743e-05, + "loss": 0.5942472815513611, + "step": 2267 + }, + { + "epoch": 0.6632548618219037, + "grad_norm": 1.4166382340274284, + "learning_rate": 1.5972878516366742e-05, + "loss": 0.6956725120544434, + "step": 2268 + }, + { + "epoch": 0.6635473022371692, + "grad_norm": 1.5479108671282409, + "learning_rate": 1.5969000253109707e-05, + "loss": 0.6743103265762329, + "step": 2269 + }, + { + "epoch": 0.6638397426524346, + "grad_norm": 1.2415014970437994, + "learning_rate": 1.596512059463515e-05, + "loss": 0.5452187061309814, + "step": 2270 + }, + { + "epoch": 0.6641321830676999, + "grad_norm": 1.305856048148522, + "learning_rate": 1.5961239541849923e-05, + "loss": 0.6064754128456116, + "step": 2271 + }, + { + "epoch": 0.6644246234829654, + "grad_norm": 1.1672873660489786, + "learning_rate": 1.59573570956612e-05, + "loss": 0.5879498720169067, + "step": 2272 + }, + { + "epoch": 0.6647170638982307, + "grad_norm": 1.2464190562799757, + "learning_rate": 1.595347325697648e-05, + "loss": 0.6610721945762634, + "step": 2273 + }, + { + "epoch": 0.6650095043134961, + "grad_norm": 1.5001752360693776, + "learning_rate": 1.594958802670358e-05, + "loss": 0.6674839854240417, + "step": 2274 + }, + { + "epoch": 0.6653019447287615, + "grad_norm": 1.2669024802691538, + "learning_rate": 1.5945701405750654e-05, + "loss": 0.5189186334609985, + "step": 2275 + }, + { + "epoch": 0.6655943851440269, + "grad_norm": 1.096047033017533, + "learning_rate": 1.5941813395026174e-05, + "loss": 0.5225304365158081, + "step": 2276 + }, + { + "epoch": 0.6658868255592922, + "grad_norm": 1.1982797539630743, + "learning_rate": 1.5937923995438942e-05, + "loss": 0.5426747798919678, + "step": 2277 + }, + { + "epoch": 0.6661792659745577, + "grad_norm": 1.1331316680397499, + "learning_rate": 1.593403320789808e-05, + "loss": 0.6408158540725708, + "step": 2278 + }, + { + "epoch": 0.6664717063898231, + "grad_norm": 1.2777185085969938, + "learning_rate": 1.5930141033313034e-05, + "loss": 0.6213311553001404, + "step": 2279 + }, + { + "epoch": 0.6667641468050884, + "grad_norm": 1.2938845863415658, + "learning_rate": 1.5926247472593575e-05, + "loss": 0.6538233757019043, + "step": 2280 + }, + { + "epoch": 0.6670565872203539, + "grad_norm": 1.4396815547692279, + "learning_rate": 1.5922352526649803e-05, + "loss": 0.6714701056480408, + "step": 2281 + }, + { + "epoch": 0.6673490276356192, + "grad_norm": 1.2875131974555427, + "learning_rate": 1.5918456196392137e-05, + "loss": 0.501068115234375, + "step": 2282 + }, + { + "epoch": 0.6676414680508846, + "grad_norm": 1.483722651200639, + "learning_rate": 1.5914558482731317e-05, + "loss": 0.6551339626312256, + "step": 2283 + }, + { + "epoch": 0.66793390846615, + "grad_norm": 1.575561891265528, + "learning_rate": 1.5910659386578415e-05, + "loss": 0.666611909866333, + "step": 2284 + }, + { + "epoch": 0.6682263488814154, + "grad_norm": 1.3058077151253007, + "learning_rate": 1.590675890884482e-05, + "loss": 0.6612483859062195, + "step": 2285 + }, + { + "epoch": 0.6685187892966808, + "grad_norm": 1.535602248808955, + "learning_rate": 1.590285705044224e-05, + "loss": 0.5299272537231445, + "step": 2286 + }, + { + "epoch": 0.6688112297119462, + "grad_norm": 1.5209550044520355, + "learning_rate": 1.589895381228272e-05, + "loss": 0.6873815655708313, + "step": 2287 + }, + { + "epoch": 0.6691036701272116, + "grad_norm": 1.333463107294571, + "learning_rate": 1.5895049195278608e-05, + "loss": 0.6473613977432251, + "step": 2288 + }, + { + "epoch": 0.6693961105424769, + "grad_norm": 1.4389212790848083, + "learning_rate": 1.589114320034259e-05, + "loss": 0.6600902080535889, + "step": 2289 + }, + { + "epoch": 0.6696885509577424, + "grad_norm": 1.7581559017014303, + "learning_rate": 1.5887235828387667e-05, + "loss": 0.6066039800643921, + "step": 2290 + }, + { + "epoch": 0.6699809913730077, + "grad_norm": 1.2475073124572584, + "learning_rate": 1.5883327080327165e-05, + "loss": 0.5411461591720581, + "step": 2291 + }, + { + "epoch": 0.6702734317882731, + "grad_norm": 1.3264098990068387, + "learning_rate": 1.587941695707473e-05, + "loss": 0.5678138136863708, + "step": 2292 + }, + { + "epoch": 0.6705658722035386, + "grad_norm": 1.2017893940389541, + "learning_rate": 1.5875505459544327e-05, + "loss": 0.6175323724746704, + "step": 2293 + }, + { + "epoch": 0.6708583126188039, + "grad_norm": 1.2255154092981597, + "learning_rate": 1.587159258865025e-05, + "loss": 0.5790976285934448, + "step": 2294 + }, + { + "epoch": 0.6711507530340693, + "grad_norm": 1.4070059880127774, + "learning_rate": 1.58676783453071e-05, + "loss": 0.5891247391700745, + "step": 2295 + }, + { + "epoch": 0.6714431934493347, + "grad_norm": 1.3680740765730994, + "learning_rate": 1.5863762730429817e-05, + "loss": 0.5604299902915955, + "step": 2296 + }, + { + "epoch": 0.6717356338646001, + "grad_norm": 1.156075846793115, + "learning_rate": 1.585984574493365e-05, + "loss": 0.5402317047119141, + "step": 2297 + }, + { + "epoch": 0.6720280742798654, + "grad_norm": 1.2729484704762741, + "learning_rate": 1.5855927389734163e-05, + "loss": 0.5569097995758057, + "step": 2298 + }, + { + "epoch": 0.6723205146951309, + "grad_norm": 1.792109537125727, + "learning_rate": 1.5852007665747255e-05, + "loss": 0.6754734516143799, + "step": 2299 + }, + { + "epoch": 0.6726129551103962, + "grad_norm": 1.2015482502693244, + "learning_rate": 1.584808657388914e-05, + "loss": 0.5555064678192139, + "step": 2300 + }, + { + "epoch": 0.6729053955256616, + "grad_norm": 1.2978798977032824, + "learning_rate": 1.584416411507634e-05, + "loss": 0.5735480785369873, + "step": 2301 + }, + { + "epoch": 0.6731978359409271, + "grad_norm": 1.3948021707686127, + "learning_rate": 1.5840240290225713e-05, + "loss": 0.6084697842597961, + "step": 2302 + }, + { + "epoch": 0.6734902763561924, + "grad_norm": 1.3972987341637648, + "learning_rate": 1.5836315100254427e-05, + "loss": 0.5747361779212952, + "step": 2303 + }, + { + "epoch": 0.6737827167714578, + "grad_norm": 1.3042539657521541, + "learning_rate": 1.583238854607997e-05, + "loss": 0.6597394943237305, + "step": 2304 + }, + { + "epoch": 0.6740751571867232, + "grad_norm": 1.2885200657030746, + "learning_rate": 1.582846062862016e-05, + "loss": 0.6054418087005615, + "step": 2305 + }, + { + "epoch": 0.6743675976019886, + "grad_norm": 1.4670353156004656, + "learning_rate": 1.5824531348793106e-05, + "loss": 0.6897715330123901, + "step": 2306 + }, + { + "epoch": 0.6746600380172539, + "grad_norm": 1.2379672312585208, + "learning_rate": 1.5820600707517265e-05, + "loss": 0.5438888072967529, + "step": 2307 + }, + { + "epoch": 0.6749524784325194, + "grad_norm": 1.3511076823584265, + "learning_rate": 1.5816668705711402e-05, + "loss": 0.5139850378036499, + "step": 2308 + }, + { + "epoch": 0.6752449188477848, + "grad_norm": 1.3878243291723096, + "learning_rate": 1.5812735344294594e-05, + "loss": 0.5970615744590759, + "step": 2309 + }, + { + "epoch": 0.6755373592630501, + "grad_norm": 1.5290136714699685, + "learning_rate": 1.580880062418624e-05, + "loss": 0.6206730604171753, + "step": 2310 + }, + { + "epoch": 0.6758297996783156, + "grad_norm": 1.5283867982171593, + "learning_rate": 1.580486454630606e-05, + "loss": 0.6545864939689636, + "step": 2311 + }, + { + "epoch": 0.6761222400935809, + "grad_norm": 1.6726831788405112, + "learning_rate": 1.5800927111574084e-05, + "loss": 0.6284571290016174, + "step": 2312 + }, + { + "epoch": 0.6764146805088463, + "grad_norm": 1.3062366838416066, + "learning_rate": 1.5796988320910665e-05, + "loss": 0.6662822365760803, + "step": 2313 + }, + { + "epoch": 0.6767071209241117, + "grad_norm": 1.4857961720461585, + "learning_rate": 1.5793048175236477e-05, + "loss": 0.6952080130577087, + "step": 2314 + }, + { + "epoch": 0.6769995613393771, + "grad_norm": 1.1527122349254486, + "learning_rate": 1.5789106675472496e-05, + "loss": 0.55562424659729, + "step": 2315 + }, + { + "epoch": 0.6772920017546424, + "grad_norm": 1.417075363017466, + "learning_rate": 1.578516382254003e-05, + "loss": 0.696354866027832, + "step": 2316 + }, + { + "epoch": 0.6775844421699079, + "grad_norm": 1.2481046919985836, + "learning_rate": 1.5781219617360695e-05, + "loss": 0.5764954686164856, + "step": 2317 + }, + { + "epoch": 0.6778768825851733, + "grad_norm": 1.5617477082955222, + "learning_rate": 1.577727406085642e-05, + "loss": 0.6944533586502075, + "step": 2318 + }, + { + "epoch": 0.6781693230004386, + "grad_norm": 1.5273473613933928, + "learning_rate": 1.5773327153949465e-05, + "loss": 0.5517882704734802, + "step": 2319 + }, + { + "epoch": 0.6784617634157041, + "grad_norm": 1.3495609581159556, + "learning_rate": 1.576937889756239e-05, + "loss": 0.6151533126831055, + "step": 2320 + }, + { + "epoch": 0.6787542038309694, + "grad_norm": 1.3729348393231853, + "learning_rate": 1.5765429292618075e-05, + "loss": 0.6221417784690857, + "step": 2321 + }, + { + "epoch": 0.6790466442462348, + "grad_norm": 1.5561656408525308, + "learning_rate": 1.576147834003972e-05, + "loss": 0.6218827962875366, + "step": 2322 + }, + { + "epoch": 0.6793390846615002, + "grad_norm": 1.2844085482190328, + "learning_rate": 1.575752604075083e-05, + "loss": 0.689696192741394, + "step": 2323 + }, + { + "epoch": 0.6796315250767656, + "grad_norm": 1.459910366351317, + "learning_rate": 1.5753572395675234e-05, + "loss": 0.6457825899124146, + "step": 2324 + }, + { + "epoch": 0.679923965492031, + "grad_norm": 1.660980107305809, + "learning_rate": 1.5749617405737075e-05, + "loss": 0.6261845827102661, + "step": 2325 + }, + { + "epoch": 0.6802164059072964, + "grad_norm": 1.5113706854166593, + "learning_rate": 1.5745661071860802e-05, + "loss": 0.6631760597229004, + "step": 2326 + }, + { + "epoch": 0.6805088463225618, + "grad_norm": 1.4700703601826162, + "learning_rate": 1.574170339497119e-05, + "loss": 0.6223125457763672, + "step": 2327 + }, + { + "epoch": 0.6808012867378271, + "grad_norm": 1.4289384563362724, + "learning_rate": 1.5737744375993318e-05, + "loss": 0.5649152398109436, + "step": 2328 + }, + { + "epoch": 0.6810937271530926, + "grad_norm": 1.3637036537520066, + "learning_rate": 1.573378401585259e-05, + "loss": 0.6822011470794678, + "step": 2329 + }, + { + "epoch": 0.6813861675683579, + "grad_norm": 1.243454490323945, + "learning_rate": 1.5729822315474704e-05, + "loss": 0.4853206276893616, + "step": 2330 + }, + { + "epoch": 0.6816786079836233, + "grad_norm": 1.3491879449563893, + "learning_rate": 1.572585927578569e-05, + "loss": 0.6410783529281616, + "step": 2331 + }, + { + "epoch": 0.6819710483988888, + "grad_norm": 1.2349335330440738, + "learning_rate": 1.572189489771189e-05, + "loss": 0.607154369354248, + "step": 2332 + }, + { + "epoch": 0.6822634888141541, + "grad_norm": 1.2303800918258645, + "learning_rate": 1.571792918217994e-05, + "loss": 0.5079061388969421, + "step": 2333 + }, + { + "epoch": 0.6825559292294195, + "grad_norm": 1.355109139858454, + "learning_rate": 1.5713962130116812e-05, + "loss": 0.534178614616394, + "step": 2334 + }, + { + "epoch": 0.6828483696446849, + "grad_norm": 1.099124567807314, + "learning_rate": 1.5709993742449777e-05, + "loss": 0.6172807812690735, + "step": 2335 + }, + { + "epoch": 0.6831408100599503, + "grad_norm": 1.468863618054796, + "learning_rate": 1.5706024020106425e-05, + "loss": 0.6863975524902344, + "step": 2336 + }, + { + "epoch": 0.6834332504752156, + "grad_norm": 1.3542187494807805, + "learning_rate": 1.570205296401465e-05, + "loss": 0.6314880847930908, + "step": 2337 + }, + { + "epoch": 0.6837256908904811, + "grad_norm": 1.4888474767820694, + "learning_rate": 1.5698080575102662e-05, + "loss": 0.5420910120010376, + "step": 2338 + }, + { + "epoch": 0.6840181313057464, + "grad_norm": 1.545548665208996, + "learning_rate": 1.5694106854298988e-05, + "loss": 0.6598352789878845, + "step": 2339 + }, + { + "epoch": 0.6843105717210118, + "grad_norm": 1.1855737189309028, + "learning_rate": 1.5690131802532454e-05, + "loss": 0.49957770109176636, + "step": 2340 + }, + { + "epoch": 0.6846030121362773, + "grad_norm": 1.3910703437631544, + "learning_rate": 1.568615542073221e-05, + "loss": 0.7217017412185669, + "step": 2341 + }, + { + "epoch": 0.6848954525515426, + "grad_norm": 1.383168011584397, + "learning_rate": 1.5682177709827705e-05, + "loss": 0.5824606418609619, + "step": 2342 + }, + { + "epoch": 0.685187892966808, + "grad_norm": 1.4861418668417947, + "learning_rate": 1.567819867074871e-05, + "loss": 0.5932704210281372, + "step": 2343 + }, + { + "epoch": 0.6854803333820734, + "grad_norm": 1.1927307747773088, + "learning_rate": 1.5674218304425304e-05, + "loss": 0.6098836660385132, + "step": 2344 + }, + { + "epoch": 0.6857727737973388, + "grad_norm": 1.3302018518433079, + "learning_rate": 1.5670236611787865e-05, + "loss": 0.5158270597457886, + "step": 2345 + }, + { + "epoch": 0.6860652142126041, + "grad_norm": 1.431950758183516, + "learning_rate": 1.5666253593767095e-05, + "loss": 0.7840174436569214, + "step": 2346 + }, + { + "epoch": 0.6863576546278696, + "grad_norm": 1.3462478651155303, + "learning_rate": 1.5662269251294e-05, + "loss": 0.5665150880813599, + "step": 2347 + }, + { + "epoch": 0.686650095043135, + "grad_norm": 1.2308130347699304, + "learning_rate": 1.5658283585299894e-05, + "loss": 0.5801588296890259, + "step": 2348 + }, + { + "epoch": 0.6869425354584003, + "grad_norm": 1.487298330014143, + "learning_rate": 1.56542965967164e-05, + "loss": 0.759188175201416, + "step": 2349 + }, + { + "epoch": 0.6872349758736658, + "grad_norm": 1.5717076197736846, + "learning_rate": 1.565030828647546e-05, + "loss": 0.7182703018188477, + "step": 2350 + }, + { + "epoch": 0.6875274162889311, + "grad_norm": 1.3681215378392677, + "learning_rate": 1.564631865550931e-05, + "loss": 0.7172018885612488, + "step": 2351 + }, + { + "epoch": 0.6878198567041965, + "grad_norm": 1.3897042930637002, + "learning_rate": 1.5642327704750502e-05, + "loss": 0.5959519743919373, + "step": 2352 + }, + { + "epoch": 0.6881122971194619, + "grad_norm": 1.3686338632915553, + "learning_rate": 1.5638335435131902e-05, + "loss": 0.5531836748123169, + "step": 2353 + }, + { + "epoch": 0.6884047375347273, + "grad_norm": 1.2097339017222586, + "learning_rate": 1.5634341847586676e-05, + "loss": 0.672225296497345, + "step": 2354 + }, + { + "epoch": 0.6886971779499926, + "grad_norm": 1.3740176007353215, + "learning_rate": 1.5630346943048297e-05, + "loss": 0.5721465349197388, + "step": 2355 + }, + { + "epoch": 0.6889896183652581, + "grad_norm": 1.2416767467837069, + "learning_rate": 1.5626350722450555e-05, + "loss": 0.6357900500297546, + "step": 2356 + }, + { + "epoch": 0.6892820587805235, + "grad_norm": 1.241847883566859, + "learning_rate": 1.5622353186727542e-05, + "loss": 0.6348878145217896, + "step": 2357 + }, + { + "epoch": 0.6895744991957888, + "grad_norm": 1.390537638221337, + "learning_rate": 1.5618354336813656e-05, + "loss": 0.5473623275756836, + "step": 2358 + }, + { + "epoch": 0.6898669396110543, + "grad_norm": 1.4299851255948683, + "learning_rate": 1.5614354173643606e-05, + "loss": 0.8284158706665039, + "step": 2359 + }, + { + "epoch": 0.6901593800263196, + "grad_norm": 1.3561063303885135, + "learning_rate": 1.5610352698152396e-05, + "loss": 0.5915359854698181, + "step": 2360 + }, + { + "epoch": 0.690451820441585, + "grad_norm": 1.434488423567872, + "learning_rate": 1.560634991127536e-05, + "loss": 0.6173555254936218, + "step": 2361 + }, + { + "epoch": 0.6907442608568504, + "grad_norm": 1.2348756002421877, + "learning_rate": 1.560234581394812e-05, + "loss": 0.5551577806472778, + "step": 2362 + }, + { + "epoch": 0.6910367012721158, + "grad_norm": 1.6912535037446208, + "learning_rate": 1.559834040710661e-05, + "loss": 0.7160264253616333, + "step": 2363 + }, + { + "epoch": 0.6913291416873812, + "grad_norm": 1.4348139771874249, + "learning_rate": 1.5594333691687062e-05, + "loss": 0.5986248850822449, + "step": 2364 + }, + { + "epoch": 0.6916215821026466, + "grad_norm": 1.6827348555719241, + "learning_rate": 1.559032566862603e-05, + "loss": 0.7347019910812378, + "step": 2365 + }, + { + "epoch": 0.691914022517912, + "grad_norm": 1.1496166027771255, + "learning_rate": 1.5586316338860363e-05, + "loss": 0.502663791179657, + "step": 2366 + }, + { + "epoch": 0.6922064629331773, + "grad_norm": 1.1610976211375774, + "learning_rate": 1.558230570332722e-05, + "loss": 0.5026617050170898, + "step": 2367 + }, + { + "epoch": 0.6924989033484428, + "grad_norm": 1.3196703072069724, + "learning_rate": 1.5578293762964057e-05, + "loss": 0.6091101169586182, + "step": 2368 + }, + { + "epoch": 0.6927913437637081, + "grad_norm": 1.1607138049044183, + "learning_rate": 1.5574280518708645e-05, + "loss": 0.6202579736709595, + "step": 2369 + }, + { + "epoch": 0.6930837841789735, + "grad_norm": 1.3867301068189375, + "learning_rate": 1.557026597149905e-05, + "loss": 0.6532948017120361, + "step": 2370 + }, + { + "epoch": 0.693376224594239, + "grad_norm": 1.2799465632685962, + "learning_rate": 1.5566250122273658e-05, + "loss": 0.6197448372840881, + "step": 2371 + }, + { + "epoch": 0.6936686650095043, + "grad_norm": 1.330123548058068, + "learning_rate": 1.556223297197114e-05, + "loss": 0.6181553602218628, + "step": 2372 + }, + { + "epoch": 0.6939611054247697, + "grad_norm": 1.3757625130132767, + "learning_rate": 1.5558214521530482e-05, + "loss": 0.6015427112579346, + "step": 2373 + }, + { + "epoch": 0.6942535458400351, + "grad_norm": 1.4511778478720454, + "learning_rate": 1.555419477189098e-05, + "loss": 0.6204534769058228, + "step": 2374 + }, + { + "epoch": 0.6945459862553005, + "grad_norm": 1.2237746404921626, + "learning_rate": 1.5550173723992218e-05, + "loss": 0.5914584994316101, + "step": 2375 + }, + { + "epoch": 0.6948384266705658, + "grad_norm": 1.2633817911858796, + "learning_rate": 1.554615137877409e-05, + "loss": 0.5077188611030579, + "step": 2376 + }, + { + "epoch": 0.6951308670858313, + "grad_norm": 1.1523903505061626, + "learning_rate": 1.55421277371768e-05, + "loss": 0.5560270547866821, + "step": 2377 + }, + { + "epoch": 0.6954233075010966, + "grad_norm": 1.6214020445600121, + "learning_rate": 1.553810280014085e-05, + "loss": 0.7064549922943115, + "step": 2378 + }, + { + "epoch": 0.695715747916362, + "grad_norm": 1.4249847873824701, + "learning_rate": 1.5534076568607043e-05, + "loss": 0.7433110475540161, + "step": 2379 + }, + { + "epoch": 0.6960081883316275, + "grad_norm": 1.4661372034410074, + "learning_rate": 1.553004904351648e-05, + "loss": 0.6061110496520996, + "step": 2380 + }, + { + "epoch": 0.6963006287468928, + "grad_norm": 1.3530915937691412, + "learning_rate": 1.5526020225810583e-05, + "loss": 0.604006290435791, + "step": 2381 + }, + { + "epoch": 0.6965930691621582, + "grad_norm": 1.3193058416919141, + "learning_rate": 1.5521990116431052e-05, + "loss": 0.6221635341644287, + "step": 2382 + }, + { + "epoch": 0.6968855095774236, + "grad_norm": 1.17260855579956, + "learning_rate": 1.551795871631991e-05, + "loss": 0.5848093032836914, + "step": 2383 + }, + { + "epoch": 0.697177949992689, + "grad_norm": 1.3909866883805502, + "learning_rate": 1.5513926026419464e-05, + "loss": 0.6451606154441833, + "step": 2384 + }, + { + "epoch": 0.6974703904079543, + "grad_norm": 1.2515682694896817, + "learning_rate": 1.5509892047672336e-05, + "loss": 0.7922245264053345, + "step": 2385 + }, + { + "epoch": 0.6977628308232198, + "grad_norm": 1.501698757307051, + "learning_rate": 1.5505856781021443e-05, + "loss": 0.6458885073661804, + "step": 2386 + }, + { + "epoch": 0.6980552712384852, + "grad_norm": 1.3253141303151825, + "learning_rate": 1.5501820227410002e-05, + "loss": 0.5989570617675781, + "step": 2387 + }, + { + "epoch": 0.6983477116537505, + "grad_norm": 1.4240123629840666, + "learning_rate": 1.5497782387781536e-05, + "loss": 0.740998387336731, + "step": 2388 + }, + { + "epoch": 0.698640152069016, + "grad_norm": 1.4547948512453808, + "learning_rate": 1.5493743263079866e-05, + "loss": 0.63981032371521, + "step": 2389 + }, + { + "epoch": 0.6989325924842813, + "grad_norm": 1.325001348454028, + "learning_rate": 1.5489702854249106e-05, + "loss": 0.766716480255127, + "step": 2390 + }, + { + "epoch": 0.6992250328995467, + "grad_norm": 1.541044208915787, + "learning_rate": 1.5485661162233684e-05, + "loss": 0.7879365086555481, + "step": 2391 + }, + { + "epoch": 0.6995174733148121, + "grad_norm": 1.3532949065271656, + "learning_rate": 1.5481618187978322e-05, + "loss": 0.6005786657333374, + "step": 2392 + }, + { + "epoch": 0.6998099137300775, + "grad_norm": 1.2952910023515818, + "learning_rate": 1.5477573932428033e-05, + "loss": 0.6207927465438843, + "step": 2393 + }, + { + "epoch": 0.7001023541453428, + "grad_norm": 1.4490674696543298, + "learning_rate": 1.5473528396528144e-05, + "loss": 0.5582053661346436, + "step": 2394 + }, + { + "epoch": 0.7003947945606083, + "grad_norm": 1.6315416515790502, + "learning_rate": 1.5469481581224274e-05, + "loss": 0.5701307058334351, + "step": 2395 + }, + { + "epoch": 0.7006872349758737, + "grad_norm": 1.3804181292115258, + "learning_rate": 1.546543348746233e-05, + "loss": 0.6201068162918091, + "step": 2396 + }, + { + "epoch": 0.700979675391139, + "grad_norm": 1.3282086716914991, + "learning_rate": 1.5461384116188546e-05, + "loss": 0.6102321147918701, + "step": 2397 + }, + { + "epoch": 0.7012721158064045, + "grad_norm": 1.361382387889105, + "learning_rate": 1.545733346834943e-05, + "loss": 0.5445820093154907, + "step": 2398 + }, + { + "epoch": 0.7015645562216698, + "grad_norm": 1.3134018034606705, + "learning_rate": 1.5453281544891797e-05, + "loss": 0.5278012752532959, + "step": 2399 + }, + { + "epoch": 0.7018569966369352, + "grad_norm": 1.6159840401286016, + "learning_rate": 1.544922834676276e-05, + "loss": 0.7051252126693726, + "step": 2400 + }, + { + "epoch": 0.7021494370522006, + "grad_norm": 1.3552623655435003, + "learning_rate": 1.544517387490973e-05, + "loss": 0.6024646759033203, + "step": 2401 + }, + { + "epoch": 0.702441877467466, + "grad_norm": 1.3323978020414873, + "learning_rate": 1.5441118130280406e-05, + "loss": 0.5563746094703674, + "step": 2402 + }, + { + "epoch": 0.7027343178827314, + "grad_norm": 1.3671297363224464, + "learning_rate": 1.5437061113822805e-05, + "loss": 0.5971669554710388, + "step": 2403 + }, + { + "epoch": 0.7030267582979968, + "grad_norm": 1.5082475685517047, + "learning_rate": 1.5433002826485234e-05, + "loss": 0.5846019983291626, + "step": 2404 + }, + { + "epoch": 0.7033191987132622, + "grad_norm": 1.2921876796744827, + "learning_rate": 1.5428943269216278e-05, + "loss": 0.5571885108947754, + "step": 2405 + }, + { + "epoch": 0.7036116391285275, + "grad_norm": 1.15652993390593, + "learning_rate": 1.542488244296484e-05, + "loss": 0.4770846962928772, + "step": 2406 + }, + { + "epoch": 0.703904079543793, + "grad_norm": 1.6398352091801953, + "learning_rate": 1.542082034868012e-05, + "loss": 0.636760950088501, + "step": 2407 + }, + { + "epoch": 0.7041965199590583, + "grad_norm": 1.6877906333209267, + "learning_rate": 1.5416756987311603e-05, + "loss": 0.7264662981033325, + "step": 2408 + }, + { + "epoch": 0.7044889603743237, + "grad_norm": 1.372256728403267, + "learning_rate": 1.5412692359809073e-05, + "loss": 0.6723978519439697, + "step": 2409 + }, + { + "epoch": 0.7047814007895892, + "grad_norm": 1.4362583031777838, + "learning_rate": 1.5408626467122612e-05, + "loss": 0.6205083727836609, + "step": 2410 + }, + { + "epoch": 0.7050738412048545, + "grad_norm": 1.4495567778043355, + "learning_rate": 1.54045593102026e-05, + "loss": 0.5980903506278992, + "step": 2411 + }, + { + "epoch": 0.7053662816201199, + "grad_norm": 1.4897959908790472, + "learning_rate": 1.540049088999971e-05, + "loss": 0.6311691999435425, + "step": 2412 + }, + { + "epoch": 0.7056587220353853, + "grad_norm": 1.428243709143454, + "learning_rate": 1.539642120746491e-05, + "loss": 0.5872593522071838, + "step": 2413 + }, + { + "epoch": 0.7059511624506507, + "grad_norm": 1.351001450570791, + "learning_rate": 1.5392350263549462e-05, + "loss": 0.5037539005279541, + "step": 2414 + }, + { + "epoch": 0.706243602865916, + "grad_norm": 1.4775045660401276, + "learning_rate": 1.538827805920493e-05, + "loss": 0.5917855501174927, + "step": 2415 + }, + { + "epoch": 0.7065360432811815, + "grad_norm": 1.3687769613569196, + "learning_rate": 1.538420459538316e-05, + "loss": 0.6350749731063843, + "step": 2416 + }, + { + "epoch": 0.7068284836964468, + "grad_norm": 1.330110483636511, + "learning_rate": 1.53801298730363e-05, + "loss": 0.6828908920288086, + "step": 2417 + }, + { + "epoch": 0.7071209241117122, + "grad_norm": 1.5864329436081315, + "learning_rate": 1.5376053893116796e-05, + "loss": 0.6307995319366455, + "step": 2418 + }, + { + "epoch": 0.7074133645269777, + "grad_norm": 1.3609756396375527, + "learning_rate": 1.5371976656577385e-05, + "loss": 0.5305014252662659, + "step": 2419 + }, + { + "epoch": 0.707705804942243, + "grad_norm": 1.2953614031977334, + "learning_rate": 1.536789816437109e-05, + "loss": 0.560103178024292, + "step": 2420 + }, + { + "epoch": 0.7079982453575084, + "grad_norm": 1.4823675619867462, + "learning_rate": 1.5363818417451236e-05, + "loss": 0.5449249148368835, + "step": 2421 + }, + { + "epoch": 0.7082906857727738, + "grad_norm": 1.575423149049035, + "learning_rate": 1.5359737416771438e-05, + "loss": 0.7456427812576294, + "step": 2422 + }, + { + "epoch": 0.7085831261880392, + "grad_norm": 1.4606336998212586, + "learning_rate": 1.5355655163285607e-05, + "loss": 0.5401932597160339, + "step": 2423 + }, + { + "epoch": 0.7088755666033045, + "grad_norm": 1.4384817217494414, + "learning_rate": 1.5351571657947947e-05, + "loss": 0.6215255856513977, + "step": 2424 + }, + { + "epoch": 0.70916800701857, + "grad_norm": 1.454238489435378, + "learning_rate": 1.5347486901712946e-05, + "loss": 0.724073052406311, + "step": 2425 + }, + { + "epoch": 0.7094604474338354, + "grad_norm": 1.280381472439187, + "learning_rate": 1.5343400895535402e-05, + "loss": 0.6375223398208618, + "step": 2426 + }, + { + "epoch": 0.7097528878491007, + "grad_norm": 1.4740965908748953, + "learning_rate": 1.533931364037038e-05, + "loss": 0.6087045669555664, + "step": 2427 + }, + { + "epoch": 0.7100453282643662, + "grad_norm": 1.4709664710326, + "learning_rate": 1.5335225137173262e-05, + "loss": 0.7927658557891846, + "step": 2428 + }, + { + "epoch": 0.7103377686796315, + "grad_norm": 1.4583904783773962, + "learning_rate": 1.5331135386899702e-05, + "loss": 0.6312417387962341, + "step": 2429 + }, + { + "epoch": 0.7106302090948969, + "grad_norm": 1.3514647325044575, + "learning_rate": 1.5327044390505666e-05, + "loss": 0.6856948137283325, + "step": 2430 + }, + { + "epoch": 0.7109226495101623, + "grad_norm": 1.3486136616110067, + "learning_rate": 1.532295214894739e-05, + "loss": 0.5683865547180176, + "step": 2431 + }, + { + "epoch": 0.7112150899254277, + "grad_norm": 1.5290242403967753, + "learning_rate": 1.5318858663181412e-05, + "loss": 0.6208291053771973, + "step": 2432 + }, + { + "epoch": 0.711507530340693, + "grad_norm": 1.5265891330435364, + "learning_rate": 1.531476393416456e-05, + "loss": 0.6751389503479004, + "step": 2433 + }, + { + "epoch": 0.7117999707559585, + "grad_norm": 1.1685210774635664, + "learning_rate": 1.5310667962853954e-05, + "loss": 0.422024667263031, + "step": 2434 + }, + { + "epoch": 0.7120924111712239, + "grad_norm": 1.377587949543332, + "learning_rate": 1.5306570750207003e-05, + "loss": 0.6714169979095459, + "step": 2435 + }, + { + "epoch": 0.7123848515864892, + "grad_norm": 1.4808127088080212, + "learning_rate": 1.53024722971814e-05, + "loss": 0.5757386088371277, + "step": 2436 + }, + { + "epoch": 0.7126772920017547, + "grad_norm": 1.4790386820456973, + "learning_rate": 1.529837260473514e-05, + "loss": 0.5686037540435791, + "step": 2437 + }, + { + "epoch": 0.71296973241702, + "grad_norm": 1.570681384959534, + "learning_rate": 1.5294271673826498e-05, + "loss": 0.7601959705352783, + "step": 2438 + }, + { + "epoch": 0.7132621728322854, + "grad_norm": 1.44814607189911, + "learning_rate": 1.529016950541404e-05, + "loss": 0.5654840469360352, + "step": 2439 + }, + { + "epoch": 0.7135546132475508, + "grad_norm": 1.5844428110219366, + "learning_rate": 1.5286066100456623e-05, + "loss": 0.7009234428405762, + "step": 2440 + }, + { + "epoch": 0.7138470536628162, + "grad_norm": 1.166961279939158, + "learning_rate": 1.52819614599134e-05, + "loss": 0.4856370687484741, + "step": 2441 + }, + { + "epoch": 0.7141394940780816, + "grad_norm": 1.2283639685035557, + "learning_rate": 1.52778555847438e-05, + "loss": 0.5135019421577454, + "step": 2442 + }, + { + "epoch": 0.714431934493347, + "grad_norm": 1.3437978900697465, + "learning_rate": 1.5273748475907542e-05, + "loss": 0.7350283861160278, + "step": 2443 + }, + { + "epoch": 0.7147243749086124, + "grad_norm": 1.1274424242274286, + "learning_rate": 1.5269640134364646e-05, + "loss": 0.5985803604125977, + "step": 2444 + }, + { + "epoch": 0.7150168153238777, + "grad_norm": 1.2982732418248375, + "learning_rate": 1.5265530561075407e-05, + "loss": 0.6840892434120178, + "step": 2445 + }, + { + "epoch": 0.7153092557391432, + "grad_norm": 1.2979743249484705, + "learning_rate": 1.5261419757000417e-05, + "loss": 0.6921327114105225, + "step": 2446 + }, + { + "epoch": 0.7156016961544085, + "grad_norm": 1.429595570109343, + "learning_rate": 1.525730772310055e-05, + "loss": 0.6428500413894653, + "step": 2447 + }, + { + "epoch": 0.7158941365696739, + "grad_norm": 1.3812578358040712, + "learning_rate": 1.5253194460336964e-05, + "loss": 0.645559549331665, + "step": 2448 + }, + { + "epoch": 0.7161865769849394, + "grad_norm": 1.7222193716043204, + "learning_rate": 1.5249079969671114e-05, + "loss": 0.6211013793945312, + "step": 2449 + }, + { + "epoch": 0.7164790174002047, + "grad_norm": 1.2302035745629583, + "learning_rate": 1.5244964252064737e-05, + "loss": 0.5709721446037292, + "step": 2450 + }, + { + "epoch": 0.7167714578154701, + "grad_norm": 1.4516717315033434, + "learning_rate": 1.5240847308479855e-05, + "loss": 0.6781377196311951, + "step": 2451 + }, + { + "epoch": 0.7170638982307355, + "grad_norm": 1.5070563114338018, + "learning_rate": 1.523672913987878e-05, + "loss": 0.6476876735687256, + "step": 2452 + }, + { + "epoch": 0.7173563386460009, + "grad_norm": 1.5653544129198373, + "learning_rate": 1.523260974722411e-05, + "loss": 0.6564218997955322, + "step": 2453 + }, + { + "epoch": 0.7176487790612662, + "grad_norm": 1.3630096136191563, + "learning_rate": 1.5228489131478722e-05, + "loss": 0.6455773711204529, + "step": 2454 + }, + { + "epoch": 0.7179412194765317, + "grad_norm": 1.393672298684458, + "learning_rate": 1.5224367293605791e-05, + "loss": 0.6039570569992065, + "step": 2455 + }, + { + "epoch": 0.718233659891797, + "grad_norm": 1.337144764968105, + "learning_rate": 1.522024423456877e-05, + "loss": 0.7060747146606445, + "step": 2456 + }, + { + "epoch": 0.7185261003070624, + "grad_norm": 1.3843662041380984, + "learning_rate": 1.52161199553314e-05, + "loss": 0.561469316482544, + "step": 2457 + }, + { + "epoch": 0.7188185407223279, + "grad_norm": 1.4419398084710615, + "learning_rate": 1.5211994456857706e-05, + "loss": 0.6682697534561157, + "step": 2458 + }, + { + "epoch": 0.7191109811375932, + "grad_norm": 1.5477256567407798, + "learning_rate": 1.5207867740111994e-05, + "loss": 0.7893983125686646, + "step": 2459 + }, + { + "epoch": 0.7194034215528586, + "grad_norm": 1.3454706918314496, + "learning_rate": 1.5203739806058863e-05, + "loss": 0.617809534072876, + "step": 2460 + }, + { + "epoch": 0.719695861968124, + "grad_norm": 1.2866429351470308, + "learning_rate": 1.5199610655663193e-05, + "loss": 0.5444413423538208, + "step": 2461 + }, + { + "epoch": 0.7199883023833894, + "grad_norm": 1.342633438363169, + "learning_rate": 1.5195480289890146e-05, + "loss": 0.615330696105957, + "step": 2462 + }, + { + "epoch": 0.7202807427986547, + "grad_norm": 1.7658118623485195, + "learning_rate": 1.5191348709705169e-05, + "loss": 0.6811497211456299, + "step": 2463 + }, + { + "epoch": 0.7205731832139202, + "grad_norm": 1.3224396770739022, + "learning_rate": 1.5187215916073997e-05, + "loss": 0.612322211265564, + "step": 2464 + }, + { + "epoch": 0.7208656236291856, + "grad_norm": 1.2201627110269677, + "learning_rate": 1.518308190996264e-05, + "loss": 0.6106880903244019, + "step": 2465 + }, + { + "epoch": 0.7211580640444509, + "grad_norm": 1.2431923365136468, + "learning_rate": 1.5178946692337405e-05, + "loss": 0.4901464581489563, + "step": 2466 + }, + { + "epoch": 0.7214505044597164, + "grad_norm": 2.053814058775723, + "learning_rate": 1.5174810264164865e-05, + "loss": 0.6777167320251465, + "step": 2467 + }, + { + "epoch": 0.7217429448749817, + "grad_norm": 1.4212256530727148, + "learning_rate": 1.5170672626411888e-05, + "loss": 0.6353746056556702, + "step": 2468 + }, + { + "epoch": 0.7220353852902471, + "grad_norm": 1.4867453474426244, + "learning_rate": 1.516653378004563e-05, + "loss": 0.6218847632408142, + "step": 2469 + }, + { + "epoch": 0.7223278257055125, + "grad_norm": 1.2225434595050702, + "learning_rate": 1.5162393726033508e-05, + "loss": 0.5001585483551025, + "step": 2470 + }, + { + "epoch": 0.7226202661207779, + "grad_norm": 1.5129628743171017, + "learning_rate": 1.5158252465343242e-05, + "loss": 0.6801280975341797, + "step": 2471 + }, + { + "epoch": 0.7229127065360432, + "grad_norm": 1.208746836224967, + "learning_rate": 1.5154109998942823e-05, + "loss": 0.6739565134048462, + "step": 2472 + }, + { + "epoch": 0.7232051469513087, + "grad_norm": 1.1414220178862078, + "learning_rate": 1.5149966327800532e-05, + "loss": 0.5970213413238525, + "step": 2473 + }, + { + "epoch": 0.7234975873665741, + "grad_norm": 1.295455082889375, + "learning_rate": 1.5145821452884923e-05, + "loss": 0.7367317080497742, + "step": 2474 + }, + { + "epoch": 0.7237900277818394, + "grad_norm": 1.3877158266331615, + "learning_rate": 1.5141675375164839e-05, + "loss": 0.6332153677940369, + "step": 2475 + }, + { + "epoch": 0.7240824681971049, + "grad_norm": 1.223786080062607, + "learning_rate": 1.5137528095609395e-05, + "loss": 0.6185739636421204, + "step": 2476 + }, + { + "epoch": 0.7243749086123702, + "grad_norm": 1.436341367228992, + "learning_rate": 1.5133379615187996e-05, + "loss": 0.5982746481895447, + "step": 2477 + }, + { + "epoch": 0.7246673490276356, + "grad_norm": 1.32306496712973, + "learning_rate": 1.512922993487032e-05, + "loss": 0.5946815013885498, + "step": 2478 + }, + { + "epoch": 0.724959789442901, + "grad_norm": 1.2916301226572995, + "learning_rate": 1.5125079055626337e-05, + "loss": 0.5645624399185181, + "step": 2479 + }, + { + "epoch": 0.7252522298581664, + "grad_norm": 1.0689440382368105, + "learning_rate": 1.5120926978426288e-05, + "loss": 0.43329858779907227, + "step": 2480 + }, + { + "epoch": 0.7255446702734318, + "grad_norm": 1.420557871943188, + "learning_rate": 1.5116773704240689e-05, + "loss": 0.64244544506073, + "step": 2481 + }, + { + "epoch": 0.7258371106886972, + "grad_norm": 1.3002221181867923, + "learning_rate": 1.5112619234040348e-05, + "loss": 0.6640222072601318, + "step": 2482 + }, + { + "epoch": 0.7261295511039626, + "grad_norm": 1.4810661665547034, + "learning_rate": 1.5108463568796346e-05, + "loss": 0.6346921324729919, + "step": 2483 + }, + { + "epoch": 0.7264219915192279, + "grad_norm": 1.4101536258246594, + "learning_rate": 1.5104306709480045e-05, + "loss": 0.5891947746276855, + "step": 2484 + }, + { + "epoch": 0.7267144319344934, + "grad_norm": 1.2478330500785222, + "learning_rate": 1.5100148657063089e-05, + "loss": 0.616216242313385, + "step": 2485 + }, + { + "epoch": 0.7270068723497587, + "grad_norm": 1.3541911638943873, + "learning_rate": 1.5095989412517389e-05, + "loss": 0.5961766242980957, + "step": 2486 + }, + { + "epoch": 0.7272993127650241, + "grad_norm": 1.27681624299837, + "learning_rate": 1.509182897681515e-05, + "loss": 0.5629050731658936, + "step": 2487 + }, + { + "epoch": 0.7275917531802896, + "grad_norm": 1.3918382252124497, + "learning_rate": 1.5087667350928844e-05, + "loss": 0.6640661954879761, + "step": 2488 + }, + { + "epoch": 0.7278841935955549, + "grad_norm": 1.1741006713729014, + "learning_rate": 1.5083504535831233e-05, + "loss": 0.5884503126144409, + "step": 2489 + }, + { + "epoch": 0.7281766340108203, + "grad_norm": 1.2387841976936662, + "learning_rate": 1.5079340532495344e-05, + "loss": 0.5395207405090332, + "step": 2490 + }, + { + "epoch": 0.7284690744260857, + "grad_norm": 1.5570127298934886, + "learning_rate": 1.5075175341894487e-05, + "loss": 0.5713212490081787, + "step": 2491 + }, + { + "epoch": 0.7287615148413511, + "grad_norm": 1.8811783299638292, + "learning_rate": 1.5071008965002252e-05, + "loss": 0.5732176303863525, + "step": 2492 + }, + { + "epoch": 0.7290539552566164, + "grad_norm": 1.3314823409610355, + "learning_rate": 1.50668414027925e-05, + "loss": 0.6381006240844727, + "step": 2493 + }, + { + "epoch": 0.7293463956718819, + "grad_norm": 1.5687830928425197, + "learning_rate": 1.5062672656239381e-05, + "loss": 0.6533833742141724, + "step": 2494 + }, + { + "epoch": 0.7296388360871472, + "grad_norm": 1.213698756503139, + "learning_rate": 1.5058502726317309e-05, + "loss": 0.5919456481933594, + "step": 2495 + }, + { + "epoch": 0.7299312765024126, + "grad_norm": 1.3954865057419796, + "learning_rate": 1.5054331614000984e-05, + "loss": 0.6128921508789062, + "step": 2496 + }, + { + "epoch": 0.7302237169176781, + "grad_norm": 1.3910630571139424, + "learning_rate": 1.5050159320265371e-05, + "loss": 0.5949394702911377, + "step": 2497 + }, + { + "epoch": 0.7305161573329434, + "grad_norm": 1.5386167534502115, + "learning_rate": 1.5045985846085724e-05, + "loss": 0.6262483596801758, + "step": 2498 + }, + { + "epoch": 0.7308085977482088, + "grad_norm": 1.4477928134421267, + "learning_rate": 1.5041811192437563e-05, + "loss": 0.5032243728637695, + "step": 2499 + }, + { + "epoch": 0.7311010381634742, + "grad_norm": 1.31776348667592, + "learning_rate": 1.5037635360296695e-05, + "loss": 0.6721810102462769, + "step": 2500 + }, + { + "epoch": 0.7313934785787396, + "grad_norm": 1.3556666925406757, + "learning_rate": 1.5033458350639185e-05, + "loss": 0.7091001272201538, + "step": 2501 + }, + { + "epoch": 0.7316859189940049, + "grad_norm": 3.8317594491760163, + "learning_rate": 1.5029280164441395e-05, + "loss": 0.5414971113204956, + "step": 2502 + }, + { + "epoch": 0.7319783594092704, + "grad_norm": 1.364096425695391, + "learning_rate": 1.5025100802679944e-05, + "loss": 0.6714789271354675, + "step": 2503 + }, + { + "epoch": 0.7322707998245358, + "grad_norm": 1.3566105024089323, + "learning_rate": 1.5020920266331733e-05, + "loss": 0.5008493065834045, + "step": 2504 + }, + { + "epoch": 0.7325632402398011, + "grad_norm": 1.652155025588763, + "learning_rate": 1.5016738556373936e-05, + "loss": 0.563892126083374, + "step": 2505 + }, + { + "epoch": 0.7328556806550666, + "grad_norm": 1.3313159442091285, + "learning_rate": 1.5012555673784004e-05, + "loss": 0.6371973752975464, + "step": 2506 + }, + { + "epoch": 0.7331481210703319, + "grad_norm": 1.5289698261558242, + "learning_rate": 1.5008371619539661e-05, + "loss": 0.7365365624427795, + "step": 2507 + }, + { + "epoch": 0.7334405614855973, + "grad_norm": 1.357001447635837, + "learning_rate": 1.5004186394618906e-05, + "loss": 0.5401967763900757, + "step": 2508 + }, + { + "epoch": 0.7337330019008627, + "grad_norm": 1.4338213701683389, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.5827134847640991, + "step": 2509 + }, + { + "epoch": 0.7340254423161281, + "grad_norm": 1.5173171956884226, + "learning_rate": 1.49958124366615e-05, + "loss": 0.7655869126319885, + "step": 2510 + }, + { + "epoch": 0.7343178827313934, + "grad_norm": 1.3360976464033478, + "learning_rate": 1.4991623705582216e-05, + "loss": 0.5410823822021484, + "step": 2511 + }, + { + "epoch": 0.7346103231466589, + "grad_norm": 1.6453007873220271, + "learning_rate": 1.4987433807741242e-05, + "loss": 0.6831178665161133, + "step": 2512 + }, + { + "epoch": 0.7349027635619243, + "grad_norm": 1.2152056235269613, + "learning_rate": 1.498324274411794e-05, + "loss": 0.4952821731567383, + "step": 2513 + }, + { + "epoch": 0.7351952039771896, + "grad_norm": 1.4425254779779118, + "learning_rate": 1.4979050515691944e-05, + "loss": 0.6973339319229126, + "step": 2514 + }, + { + "epoch": 0.7354876443924551, + "grad_norm": 1.428400853551732, + "learning_rate": 1.4974857123443163e-05, + "loss": 0.6604373455047607, + "step": 2515 + }, + { + "epoch": 0.7357800848077204, + "grad_norm": 1.3355207439959806, + "learning_rate": 1.4970662568351776e-05, + "loss": 0.6523034572601318, + "step": 2516 + }, + { + "epoch": 0.7360725252229858, + "grad_norm": 1.2739776061453822, + "learning_rate": 1.4966466851398238e-05, + "loss": 0.6557538509368896, + "step": 2517 + }, + { + "epoch": 0.7363649656382512, + "grad_norm": 1.3243836594251046, + "learning_rate": 1.4962269973563269e-05, + "loss": 0.6993967294692993, + "step": 2518 + }, + { + "epoch": 0.7366574060535166, + "grad_norm": 1.3043008466806634, + "learning_rate": 1.4958071935827862e-05, + "loss": 0.611979067325592, + "step": 2519 + }, + { + "epoch": 0.736949846468782, + "grad_norm": 1.5837280682600245, + "learning_rate": 1.4953872739173289e-05, + "loss": 0.9108786582946777, + "step": 2520 + }, + { + "epoch": 0.7372422868840474, + "grad_norm": 1.5471791396278156, + "learning_rate": 1.4949672384581082e-05, + "loss": 0.7086392045021057, + "step": 2521 + }, + { + "epoch": 0.7375347272993128, + "grad_norm": 1.341070279173996, + "learning_rate": 1.494547087303305e-05, + "loss": 0.6103025674819946, + "step": 2522 + }, + { + "epoch": 0.7378271677145781, + "grad_norm": 1.223930383405044, + "learning_rate": 1.4941268205511272e-05, + "loss": 0.5597528219223022, + "step": 2523 + }, + { + "epoch": 0.7381196081298436, + "grad_norm": 1.4817126292023657, + "learning_rate": 1.4937064382998091e-05, + "loss": 0.6222598552703857, + "step": 2524 + }, + { + "epoch": 0.7384120485451089, + "grad_norm": 1.4738198225513357, + "learning_rate": 1.4932859406476131e-05, + "loss": 0.6083353757858276, + "step": 2525 + }, + { + "epoch": 0.7387044889603743, + "grad_norm": 1.2716230350108357, + "learning_rate": 1.4928653276928275e-05, + "loss": 0.47920671105384827, + "step": 2526 + }, + { + "epoch": 0.7389969293756398, + "grad_norm": 1.2356122713189879, + "learning_rate": 1.4924445995337685e-05, + "loss": 0.5752983093261719, + "step": 2527 + }, + { + "epoch": 0.7392893697909051, + "grad_norm": 1.3500870063925003, + "learning_rate": 1.4920237562687784e-05, + "loss": 0.6275333762168884, + "step": 2528 + }, + { + "epoch": 0.7395818102061705, + "grad_norm": 1.3423023519178945, + "learning_rate": 1.4916027979962266e-05, + "loss": 0.6362103223800659, + "step": 2529 + }, + { + "epoch": 0.7398742506214359, + "grad_norm": 1.4246415171584412, + "learning_rate": 1.49118172481451e-05, + "loss": 0.5902664661407471, + "step": 2530 + }, + { + "epoch": 0.7401666910367013, + "grad_norm": 1.3036213595476636, + "learning_rate": 1.4907605368220514e-05, + "loss": 0.5293874740600586, + "step": 2531 + }, + { + "epoch": 0.7404591314519666, + "grad_norm": 1.3590290047464213, + "learning_rate": 1.4903392341173013e-05, + "loss": 0.7298746109008789, + "step": 2532 + }, + { + "epoch": 0.7407515718672321, + "grad_norm": 1.3755489549876734, + "learning_rate": 1.4899178167987367e-05, + "loss": 0.6428382396697998, + "step": 2533 + }, + { + "epoch": 0.7410440122824974, + "grad_norm": 1.3444422145970576, + "learning_rate": 1.489496284964861e-05, + "loss": 0.6204425096511841, + "step": 2534 + }, + { + "epoch": 0.7413364526977628, + "grad_norm": 1.2627663029943075, + "learning_rate": 1.4890746387142052e-05, + "loss": 0.6025601625442505, + "step": 2535 + }, + { + "epoch": 0.7416288931130283, + "grad_norm": 1.212213289149315, + "learning_rate": 1.4886528781453258e-05, + "loss": 0.5570085644721985, + "step": 2536 + }, + { + "epoch": 0.7419213335282936, + "grad_norm": 1.387517207017057, + "learning_rate": 1.4882310033568072e-05, + "loss": 0.6816439628601074, + "step": 2537 + }, + { + "epoch": 0.742213773943559, + "grad_norm": 1.341130650337267, + "learning_rate": 1.4878090144472603e-05, + "loss": 0.5424396991729736, + "step": 2538 + }, + { + "epoch": 0.7425062143588244, + "grad_norm": 1.583973779595893, + "learning_rate": 1.4873869115153223e-05, + "loss": 0.58860182762146, + "step": 2539 + }, + { + "epoch": 0.7427986547740898, + "grad_norm": 1.227937032120959, + "learning_rate": 1.4869646946596568e-05, + "loss": 0.513140857219696, + "step": 2540 + }, + { + "epoch": 0.7430910951893551, + "grad_norm": 1.3321578929704418, + "learning_rate": 1.486542363978955e-05, + "loss": 0.5967035293579102, + "step": 2541 + }, + { + "epoch": 0.7433835356046206, + "grad_norm": 1.2958174333377406, + "learning_rate": 1.4861199195719334e-05, + "loss": 0.6988440752029419, + "step": 2542 + }, + { + "epoch": 0.743675976019886, + "grad_norm": 1.3279731889181368, + "learning_rate": 1.4856973615373366e-05, + "loss": 0.6176164746284485, + "step": 2543 + }, + { + "epoch": 0.7439684164351513, + "grad_norm": 1.394214331783624, + "learning_rate": 1.4852746899739346e-05, + "loss": 0.5616505742073059, + "step": 2544 + }, + { + "epoch": 0.7442608568504168, + "grad_norm": 1.199172810090394, + "learning_rate": 1.4848519049805243e-05, + "loss": 0.5470465421676636, + "step": 2545 + }, + { + "epoch": 0.7445532972656821, + "grad_norm": 1.393649724579279, + "learning_rate": 1.4844290066559292e-05, + "loss": 0.6362754106521606, + "step": 2546 + }, + { + "epoch": 0.7448457376809475, + "grad_norm": 1.2298975206172837, + "learning_rate": 1.4840059950989992e-05, + "loss": 0.6290515661239624, + "step": 2547 + }, + { + "epoch": 0.7451381780962129, + "grad_norm": 1.4356832247939193, + "learning_rate": 1.4835828704086105e-05, + "loss": 0.7225647568702698, + "step": 2548 + }, + { + "epoch": 0.7454306185114783, + "grad_norm": 1.4603777863967904, + "learning_rate": 1.483159632683666e-05, + "loss": 0.6993023157119751, + "step": 2549 + }, + { + "epoch": 0.7457230589267436, + "grad_norm": 1.5062925776475273, + "learning_rate": 1.482736282023095e-05, + "loss": 0.6960086226463318, + "step": 2550 + }, + { + "epoch": 0.7460154993420091, + "grad_norm": 1.4783046017210701, + "learning_rate": 1.4823128185258535e-05, + "loss": 0.627712607383728, + "step": 2551 + }, + { + "epoch": 0.7463079397572745, + "grad_norm": 1.3756379084869055, + "learning_rate": 1.481889242290923e-05, + "loss": 0.6314729452133179, + "step": 2552 + }, + { + "epoch": 0.7466003801725398, + "grad_norm": 1.293029687195421, + "learning_rate": 1.4814655534173121e-05, + "loss": 0.5948070287704468, + "step": 2553 + }, + { + "epoch": 0.7468928205878053, + "grad_norm": 1.28283626174806, + "learning_rate": 1.4810417520040551e-05, + "loss": 0.6227586269378662, + "step": 2554 + }, + { + "epoch": 0.7471852610030706, + "grad_norm": 1.156874509923564, + "learning_rate": 1.4806178381502139e-05, + "loss": 0.589213490486145, + "step": 2555 + }, + { + "epoch": 0.747477701418336, + "grad_norm": 1.3920763104069633, + "learning_rate": 1.4801938119548748e-05, + "loss": 0.6748968362808228, + "step": 2556 + }, + { + "epoch": 0.7477701418336014, + "grad_norm": 1.5278244850962377, + "learning_rate": 1.4797696735171521e-05, + "loss": 0.627450704574585, + "step": 2557 + }, + { + "epoch": 0.7480625822488668, + "grad_norm": 1.3979513679962843, + "learning_rate": 1.479345422936185e-05, + "loss": 0.5816184878349304, + "step": 2558 + }, + { + "epoch": 0.7483550226641322, + "grad_norm": 1.3403975244231432, + "learning_rate": 1.4789210603111399e-05, + "loss": 0.5184855461120605, + "step": 2559 + }, + { + "epoch": 0.7486474630793976, + "grad_norm": 1.3184163367774433, + "learning_rate": 1.4784965857412088e-05, + "loss": 0.5747300982475281, + "step": 2560 + }, + { + "epoch": 0.748939903494663, + "grad_norm": 1.5154750654158269, + "learning_rate": 1.4780719993256104e-05, + "loss": 0.6957682371139526, + "step": 2561 + }, + { + "epoch": 0.7492323439099283, + "grad_norm": 1.3790848349629903, + "learning_rate": 1.4776473011635886e-05, + "loss": 0.5711330771446228, + "step": 2562 + }, + { + "epoch": 0.7495247843251938, + "grad_norm": 1.260228471581513, + "learning_rate": 1.4772224913544142e-05, + "loss": 0.687350869178772, + "step": 2563 + }, + { + "epoch": 0.7498172247404591, + "grad_norm": 1.549796921470129, + "learning_rate": 1.476797569997384e-05, + "loss": 0.71396803855896, + "step": 2564 + }, + { + "epoch": 0.7501096651557245, + "grad_norm": 1.3620133851355087, + "learning_rate": 1.4763725371918209e-05, + "loss": 0.5457814335823059, + "step": 2565 + }, + { + "epoch": 0.75040210557099, + "grad_norm": 1.4687420339775556, + "learning_rate": 1.4759473930370738e-05, + "loss": 0.5889413952827454, + "step": 2566 + }, + { + "epoch": 0.7506945459862553, + "grad_norm": 1.8883582542449355, + "learning_rate": 1.4755221376325171e-05, + "loss": 0.6222226619720459, + "step": 2567 + }, + { + "epoch": 0.7509869864015207, + "grad_norm": 1.17580934018018, + "learning_rate": 1.475096771077552e-05, + "loss": 0.5273243188858032, + "step": 2568 + }, + { + "epoch": 0.7512794268167861, + "grad_norm": 1.2062680853030614, + "learning_rate": 1.4746712934716055e-05, + "loss": 0.5665162801742554, + "step": 2569 + }, + { + "epoch": 0.7515718672320515, + "grad_norm": 1.6320800654071554, + "learning_rate": 1.4742457049141298e-05, + "loss": 0.5748391151428223, + "step": 2570 + }, + { + "epoch": 0.7518643076473168, + "grad_norm": 1.4197866961281498, + "learning_rate": 1.4738200055046044e-05, + "loss": 0.7002041339874268, + "step": 2571 + }, + { + "epoch": 0.7521567480625823, + "grad_norm": 1.3507056136966096, + "learning_rate": 1.4733941953425337e-05, + "loss": 0.6841630935668945, + "step": 2572 + }, + { + "epoch": 0.7524491884778476, + "grad_norm": 1.6017928671701795, + "learning_rate": 1.4729682745274478e-05, + "loss": 0.7047172784805298, + "step": 2573 + }, + { + "epoch": 0.752741628893113, + "grad_norm": 1.4397980876250445, + "learning_rate": 1.4725422431589035e-05, + "loss": 0.6979919672012329, + "step": 2574 + }, + { + "epoch": 0.7530340693083785, + "grad_norm": 1.3152000128748418, + "learning_rate": 1.4721161013364829e-05, + "loss": 0.6437125205993652, + "step": 2575 + }, + { + "epoch": 0.7533265097236438, + "grad_norm": 1.4573280156715103, + "learning_rate": 1.4716898491597942e-05, + "loss": 0.591254711151123, + "step": 2576 + }, + { + "epoch": 0.7536189501389092, + "grad_norm": 1.592793146861773, + "learning_rate": 1.4712634867284714e-05, + "loss": 0.6276297569274902, + "step": 2577 + }, + { + "epoch": 0.7539113905541746, + "grad_norm": 1.2004846116513588, + "learning_rate": 1.4708370141421737e-05, + "loss": 0.5310626029968262, + "step": 2578 + }, + { + "epoch": 0.75420383096944, + "grad_norm": 1.374287364754045, + "learning_rate": 1.4704104315005864e-05, + "loss": 0.5256849527359009, + "step": 2579 + }, + { + "epoch": 0.7544962713847053, + "grad_norm": 1.4473126972035357, + "learning_rate": 1.4699837389034212e-05, + "loss": 0.6050584316253662, + "step": 2580 + }, + { + "epoch": 0.7547887117999708, + "grad_norm": 1.3425248874126274, + "learning_rate": 1.4695569364504144e-05, + "loss": 0.5124386548995972, + "step": 2581 + }, + { + "epoch": 0.7550811522152362, + "grad_norm": 1.1600080124683732, + "learning_rate": 1.4691300242413289e-05, + "loss": 0.5631951093673706, + "step": 2582 + }, + { + "epoch": 0.7553735926305015, + "grad_norm": 1.3017433820111879, + "learning_rate": 1.4687030023759527e-05, + "loss": 0.6352444291114807, + "step": 2583 + }, + { + "epoch": 0.755666033045767, + "grad_norm": 1.4490307646785157, + "learning_rate": 1.4682758709540992e-05, + "loss": 0.6717500686645508, + "step": 2584 + }, + { + "epoch": 0.7559584734610323, + "grad_norm": 3.0905292476778428, + "learning_rate": 1.467848630075608e-05, + "loss": 0.5889217853546143, + "step": 2585 + }, + { + "epoch": 0.7562509138762977, + "grad_norm": 1.258529998432557, + "learning_rate": 1.4674212798403443e-05, + "loss": 0.49069908261299133, + "step": 2586 + }, + { + "epoch": 0.756543354291563, + "grad_norm": 1.1729027861993524, + "learning_rate": 1.4669938203481982e-05, + "loss": 0.6272397041320801, + "step": 2587 + }, + { + "epoch": 0.7568357947068285, + "grad_norm": 1.5090841451643915, + "learning_rate": 1.466566251699086e-05, + "loss": 0.6218451261520386, + "step": 2588 + }, + { + "epoch": 0.7571282351220939, + "grad_norm": 1.4025085245751263, + "learning_rate": 1.4661385739929492e-05, + "loss": 0.6174849271774292, + "step": 2589 + }, + { + "epoch": 0.7574206755373593, + "grad_norm": 1.3554209784525295, + "learning_rate": 1.465710787329755e-05, + "loss": 0.5595160126686096, + "step": 2590 + }, + { + "epoch": 0.7577131159526247, + "grad_norm": 1.5657464206953444, + "learning_rate": 1.4652828918094954e-05, + "loss": 0.757240891456604, + "step": 2591 + }, + { + "epoch": 0.75800555636789, + "grad_norm": 1.3337551846990978, + "learning_rate": 1.4648548875321893e-05, + "loss": 0.630811333656311, + "step": 2592 + }, + { + "epoch": 0.7582979967831555, + "grad_norm": 1.208341715070646, + "learning_rate": 1.4644267745978797e-05, + "loss": 0.5857812762260437, + "step": 2593 + }, + { + "epoch": 0.7585904371984208, + "grad_norm": 1.1785954348430454, + "learning_rate": 1.463998553106635e-05, + "loss": 0.5869519710540771, + "step": 2594 + }, + { + "epoch": 0.7588828776136862, + "grad_norm": 1.2035584714461103, + "learning_rate": 1.4635702231585498e-05, + "loss": 0.5610413551330566, + "step": 2595 + }, + { + "epoch": 0.7591753180289516, + "grad_norm": 1.255732340436211, + "learning_rate": 1.4631417848537435e-05, + "loss": 0.5634676218032837, + "step": 2596 + }, + { + "epoch": 0.759467758444217, + "grad_norm": 1.2847976698363035, + "learning_rate": 1.4627132382923607e-05, + "loss": 0.6813392639160156, + "step": 2597 + }, + { + "epoch": 0.7597601988594824, + "grad_norm": 1.5611350123657577, + "learning_rate": 1.4622845835745723e-05, + "loss": 0.644945502281189, + "step": 2598 + }, + { + "epoch": 0.7600526392747478, + "grad_norm": 1.4458723370490596, + "learning_rate": 1.461855820800573e-05, + "loss": 0.7432133555412292, + "step": 2599 + }, + { + "epoch": 0.7603450796900132, + "grad_norm": 1.1406983279122715, + "learning_rate": 1.4614269500705832e-05, + "loss": 0.4729112982749939, + "step": 2600 + }, + { + "epoch": 0.7606375201052785, + "grad_norm": 1.4806970647351285, + "learning_rate": 1.4609979714848499e-05, + "loss": 0.7146443128585815, + "step": 2601 + }, + { + "epoch": 0.760929960520544, + "grad_norm": 1.4348530933940364, + "learning_rate": 1.4605688851436436e-05, + "loss": 0.5959945917129517, + "step": 2602 + }, + { + "epoch": 0.7612224009358093, + "grad_norm": 1.3380784718799885, + "learning_rate": 1.4601396911472605e-05, + "loss": 0.6091525554656982, + "step": 2603 + }, + { + "epoch": 0.7615148413510747, + "grad_norm": 1.3043703832448297, + "learning_rate": 1.4597103895960228e-05, + "loss": 0.5101523399353027, + "step": 2604 + }, + { + "epoch": 0.7618072817663402, + "grad_norm": 1.3937793894568855, + "learning_rate": 1.4592809805902762e-05, + "loss": 0.6036165952682495, + "step": 2605 + }, + { + "epoch": 0.7620997221816055, + "grad_norm": 1.361507946530242, + "learning_rate": 1.4588514642303928e-05, + "loss": 0.6094970703125, + "step": 2606 + }, + { + "epoch": 0.7623921625968709, + "grad_norm": 1.3770518433820003, + "learning_rate": 1.4584218406167697e-05, + "loss": 0.49754881858825684, + "step": 2607 + }, + { + "epoch": 0.7626846030121363, + "grad_norm": 1.3703785644048119, + "learning_rate": 1.4579921098498285e-05, + "loss": 0.6066807508468628, + "step": 2608 + }, + { + "epoch": 0.7629770434274017, + "grad_norm": 1.4768479795454132, + "learning_rate": 1.4575622720300162e-05, + "loss": 0.5758910179138184, + "step": 2609 + }, + { + "epoch": 0.763269483842667, + "grad_norm": 1.4281250780822374, + "learning_rate": 1.457132327257805e-05, + "loss": 0.6641621589660645, + "step": 2610 + }, + { + "epoch": 0.7635619242579325, + "grad_norm": 1.506727865728889, + "learning_rate": 1.4567022756336916e-05, + "loss": 0.7024788856506348, + "step": 2611 + }, + { + "epoch": 0.7638543646731978, + "grad_norm": 1.2921755321984356, + "learning_rate": 1.4562721172581982e-05, + "loss": 0.6066344380378723, + "step": 2612 + }, + { + "epoch": 0.7641468050884632, + "grad_norm": 1.3533854830579282, + "learning_rate": 1.4558418522318713e-05, + "loss": 0.566038966178894, + "step": 2613 + }, + { + "epoch": 0.7644392455037287, + "grad_norm": 1.3370326372322123, + "learning_rate": 1.4554114806552833e-05, + "loss": 0.5817335844039917, + "step": 2614 + }, + { + "epoch": 0.764731685918994, + "grad_norm": 1.2813703243908812, + "learning_rate": 1.4549810026290305e-05, + "loss": 0.6001763343811035, + "step": 2615 + }, + { + "epoch": 0.7650241263342594, + "grad_norm": 1.617460530676573, + "learning_rate": 1.4545504182537346e-05, + "loss": 0.6363068222999573, + "step": 2616 + }, + { + "epoch": 0.7653165667495248, + "grad_norm": 1.4805158326873171, + "learning_rate": 1.4541197276300424e-05, + "loss": 0.669566810131073, + "step": 2617 + }, + { + "epoch": 0.7656090071647902, + "grad_norm": 1.2122677055370945, + "learning_rate": 1.4536889308586245e-05, + "loss": 0.47967004776000977, + "step": 2618 + }, + { + "epoch": 0.7659014475800555, + "grad_norm": 1.310958704364757, + "learning_rate": 1.4532580280401777e-05, + "loss": 0.5803399085998535, + "step": 2619 + }, + { + "epoch": 0.766193887995321, + "grad_norm": 1.3185113057937472, + "learning_rate": 1.452827019275423e-05, + "loss": 0.6870115995407104, + "step": 2620 + }, + { + "epoch": 0.7664863284105864, + "grad_norm": 1.307156915151953, + "learning_rate": 1.4523959046651058e-05, + "loss": 0.6190885901451111, + "step": 2621 + }, + { + "epoch": 0.7667787688258517, + "grad_norm": 1.4891479565012034, + "learning_rate": 1.4519646843099961e-05, + "loss": 0.6624859571456909, + "step": 2622 + }, + { + "epoch": 0.7670712092411172, + "grad_norm": 1.253302711959068, + "learning_rate": 1.4515333583108896e-05, + "loss": 0.5770546197891235, + "step": 2623 + }, + { + "epoch": 0.7673636496563825, + "grad_norm": 1.3410371709150275, + "learning_rate": 1.451101926768606e-05, + "loss": 0.6843355894088745, + "step": 2624 + }, + { + "epoch": 0.7676560900716479, + "grad_norm": 1.0930173610522418, + "learning_rate": 1.4506703897839895e-05, + "loss": 0.5293717384338379, + "step": 2625 + }, + { + "epoch": 0.7679485304869133, + "grad_norm": 1.1789701874259584, + "learning_rate": 1.45023874745791e-05, + "loss": 0.44534316658973694, + "step": 2626 + }, + { + "epoch": 0.7682409709021787, + "grad_norm": 4.2234169958332295, + "learning_rate": 1.4498069998912603e-05, + "loss": 0.7279446721076965, + "step": 2627 + }, + { + "epoch": 0.7685334113174441, + "grad_norm": 1.3924343198630234, + "learning_rate": 1.4493751471849596e-05, + "loss": 0.6990453600883484, + "step": 2628 + }, + { + "epoch": 0.7688258517327095, + "grad_norm": 1.3337373981179779, + "learning_rate": 1.44894318943995e-05, + "loss": 0.6610965728759766, + "step": 2629 + }, + { + "epoch": 0.7691182921479749, + "grad_norm": 1.285212706548779, + "learning_rate": 1.4485111267571999e-05, + "loss": 0.5124749541282654, + "step": 2630 + }, + { + "epoch": 0.7694107325632402, + "grad_norm": 1.3445630320041935, + "learning_rate": 1.448078959237701e-05, + "loss": 0.7191518545150757, + "step": 2631 + }, + { + "epoch": 0.7697031729785057, + "grad_norm": 1.1499690572165278, + "learning_rate": 1.4476466869824694e-05, + "loss": 0.5798880457878113, + "step": 2632 + }, + { + "epoch": 0.769995613393771, + "grad_norm": 1.3900006441925277, + "learning_rate": 1.4472143100925467e-05, + "loss": 0.5187106728553772, + "step": 2633 + }, + { + "epoch": 0.7702880538090364, + "grad_norm": 1.1672945310140501, + "learning_rate": 1.4467818286689981e-05, + "loss": 0.5794588327407837, + "step": 2634 + }, + { + "epoch": 0.7705804942243017, + "grad_norm": 1.2435528275045493, + "learning_rate": 1.4463492428129133e-05, + "loss": 0.4884936809539795, + "step": 2635 + }, + { + "epoch": 0.7708729346395672, + "grad_norm": 1.3037745440935204, + "learning_rate": 1.4459165526254074e-05, + "loss": 0.5782946348190308, + "step": 2636 + }, + { + "epoch": 0.7711653750548326, + "grad_norm": 1.2531837165046444, + "learning_rate": 1.445483758207618e-05, + "loss": 0.5173349380493164, + "step": 2637 + }, + { + "epoch": 0.771457815470098, + "grad_norm": 1.4752149684021225, + "learning_rate": 1.4450508596607087e-05, + "loss": 0.616407573223114, + "step": 2638 + }, + { + "epoch": 0.7717502558853634, + "grad_norm": 1.4855666629653779, + "learning_rate": 1.4446178570858672e-05, + "loss": 0.537878155708313, + "step": 2639 + }, + { + "epoch": 0.7720426963006287, + "grad_norm": 1.2968861628303388, + "learning_rate": 1.4441847505843048e-05, + "loss": 0.674277663230896, + "step": 2640 + }, + { + "epoch": 0.7723351367158942, + "grad_norm": 1.440782866010467, + "learning_rate": 1.4437515402572576e-05, + "loss": 0.5064860582351685, + "step": 2641 + }, + { + "epoch": 0.7726275771311595, + "grad_norm": 1.2859384806045262, + "learning_rate": 1.4433182262059861e-05, + "loss": 0.6256883144378662, + "step": 2642 + }, + { + "epoch": 0.7729200175464249, + "grad_norm": 1.2490391757844836, + "learning_rate": 1.4428848085317744e-05, + "loss": 0.6023700833320618, + "step": 2643 + }, + { + "epoch": 0.7732124579616904, + "grad_norm": 1.5137270909206324, + "learning_rate": 1.4424512873359316e-05, + "loss": 0.5670932531356812, + "step": 2644 + }, + { + "epoch": 0.7735048983769557, + "grad_norm": 1.406486208295682, + "learning_rate": 1.4420176627197906e-05, + "loss": 0.760460376739502, + "step": 2645 + }, + { + "epoch": 0.7737973387922211, + "grad_norm": 1.3383411751300025, + "learning_rate": 1.4415839347847082e-05, + "loss": 0.5680848956108093, + "step": 2646 + }, + { + "epoch": 0.7740897792074865, + "grad_norm": 1.2948318300140997, + "learning_rate": 1.4411501036320661e-05, + "loss": 0.5962368249893188, + "step": 2647 + }, + { + "epoch": 0.7743822196227519, + "grad_norm": 1.3851281269469669, + "learning_rate": 1.4407161693632697e-05, + "loss": 0.7149791121482849, + "step": 2648 + }, + { + "epoch": 0.7746746600380172, + "grad_norm": 1.4438569377090373, + "learning_rate": 1.440282132079748e-05, + "loss": 0.5943992733955383, + "step": 2649 + }, + { + "epoch": 0.7749671004532827, + "grad_norm": 1.681920535370579, + "learning_rate": 1.439847991882955e-05, + "loss": 0.7265899181365967, + "step": 2650 + }, + { + "epoch": 0.775259540868548, + "grad_norm": 1.257384791880329, + "learning_rate": 1.4394137488743682e-05, + "loss": 0.6011309027671814, + "step": 2651 + }, + { + "epoch": 0.7755519812838134, + "grad_norm": 1.4419500386554907, + "learning_rate": 1.4389794031554894e-05, + "loss": 0.6853964328765869, + "step": 2652 + }, + { + "epoch": 0.7758444216990789, + "grad_norm": 1.4140520249216477, + "learning_rate": 1.438544954827844e-05, + "loss": 0.6598547697067261, + "step": 2653 + }, + { + "epoch": 0.7761368621143442, + "grad_norm": 1.3919438302264315, + "learning_rate": 1.4381104039929819e-05, + "loss": 0.5776119232177734, + "step": 2654 + }, + { + "epoch": 0.7764293025296096, + "grad_norm": 1.182931573556341, + "learning_rate": 1.4376757507524766e-05, + "loss": 0.6026376485824585, + "step": 2655 + }, + { + "epoch": 0.776721742944875, + "grad_norm": 1.2883148172478378, + "learning_rate": 1.4372409952079256e-05, + "loss": 0.5776997804641724, + "step": 2656 + }, + { + "epoch": 0.7770141833601404, + "grad_norm": 1.5317545348037325, + "learning_rate": 1.4368061374609505e-05, + "loss": 0.5766068696975708, + "step": 2657 + }, + { + "epoch": 0.7773066237754057, + "grad_norm": 1.0428168520269592, + "learning_rate": 1.4363711776131966e-05, + "loss": 0.4783105254173279, + "step": 2658 + }, + { + "epoch": 0.7775990641906712, + "grad_norm": 1.4837098758543301, + "learning_rate": 1.4359361157663332e-05, + "loss": 0.6563695073127747, + "step": 2659 + }, + { + "epoch": 0.7778915046059366, + "grad_norm": 1.0898257169197185, + "learning_rate": 1.4355009520220531e-05, + "loss": 0.5177119374275208, + "step": 2660 + }, + { + "epoch": 0.7781839450212019, + "grad_norm": 1.3520526907259511, + "learning_rate": 1.4350656864820733e-05, + "loss": 0.6590641736984253, + "step": 2661 + }, + { + "epoch": 0.7784763854364674, + "grad_norm": 1.2923155412118275, + "learning_rate": 1.4346303192481348e-05, + "loss": 0.6012274622917175, + "step": 2662 + }, + { + "epoch": 0.7787688258517327, + "grad_norm": 1.439032337982527, + "learning_rate": 1.4341948504220016e-05, + "loss": 0.6731704473495483, + "step": 2663 + }, + { + "epoch": 0.7790612662669981, + "grad_norm": 1.4598986218346195, + "learning_rate": 1.4337592801054623e-05, + "loss": 0.6827171444892883, + "step": 2664 + }, + { + "epoch": 0.7793537066822634, + "grad_norm": 1.3963311439466064, + "learning_rate": 1.4333236084003282e-05, + "loss": 0.6654937267303467, + "step": 2665 + }, + { + "epoch": 0.7796461470975289, + "grad_norm": 1.276825216432019, + "learning_rate": 1.4328878354084355e-05, + "loss": 0.5673532485961914, + "step": 2666 + }, + { + "epoch": 0.7799385875127943, + "grad_norm": 1.3049192363130713, + "learning_rate": 1.432451961231643e-05, + "loss": 0.5401986241340637, + "step": 2667 + }, + { + "epoch": 0.7802310279280597, + "grad_norm": 1.2877259559166432, + "learning_rate": 1.4320159859718341e-05, + "loss": 0.6134701371192932, + "step": 2668 + }, + { + "epoch": 0.7805234683433251, + "grad_norm": 1.5022932512908924, + "learning_rate": 1.4315799097309152e-05, + "loss": 0.6913554668426514, + "step": 2669 + }, + { + "epoch": 0.7808159087585904, + "grad_norm": 1.6126405133572825, + "learning_rate": 1.4311437326108167e-05, + "loss": 0.6969482898712158, + "step": 2670 + }, + { + "epoch": 0.7811083491738559, + "grad_norm": 1.343855488902383, + "learning_rate": 1.4307074547134918e-05, + "loss": 0.6612537503242493, + "step": 2671 + }, + { + "epoch": 0.7814007895891212, + "grad_norm": 1.1627822310905236, + "learning_rate": 1.430271076140918e-05, + "loss": 0.5545899868011475, + "step": 2672 + }, + { + "epoch": 0.7816932300043866, + "grad_norm": 1.1885930128001867, + "learning_rate": 1.4298345969950965e-05, + "loss": 0.6635574698448181, + "step": 2673 + }, + { + "epoch": 0.781985670419652, + "grad_norm": 1.4316816688950922, + "learning_rate": 1.4293980173780514e-05, + "loss": 0.5859510898590088, + "step": 2674 + }, + { + "epoch": 0.7822781108349174, + "grad_norm": 1.246244040215616, + "learning_rate": 1.4289613373918304e-05, + "loss": 0.5839825868606567, + "step": 2675 + }, + { + "epoch": 0.7825705512501828, + "grad_norm": 1.7192756445293216, + "learning_rate": 1.428524557138505e-05, + "loss": 0.6376889944076538, + "step": 2676 + }, + { + "epoch": 0.7828629916654481, + "grad_norm": 1.2061132029389496, + "learning_rate": 1.4280876767201696e-05, + "loss": 0.5473129749298096, + "step": 2677 + }, + { + "epoch": 0.7831554320807136, + "grad_norm": 1.2355367438994083, + "learning_rate": 1.4276506962389429e-05, + "loss": 0.6723904609680176, + "step": 2678 + }, + { + "epoch": 0.7834478724959789, + "grad_norm": 1.318329485547163, + "learning_rate": 1.4272136157969658e-05, + "loss": 0.6036845445632935, + "step": 2679 + }, + { + "epoch": 0.7837403129112444, + "grad_norm": 1.4527977807212105, + "learning_rate": 1.4267764354964038e-05, + "loss": 0.5993655920028687, + "step": 2680 + }, + { + "epoch": 0.7840327533265097, + "grad_norm": 1.5159579383707373, + "learning_rate": 1.4263391554394448e-05, + "loss": 0.6678075194358826, + "step": 2681 + }, + { + "epoch": 0.7843251937417751, + "grad_norm": 1.2588619303254647, + "learning_rate": 1.4259017757283003e-05, + "loss": 0.5627151727676392, + "step": 2682 + }, + { + "epoch": 0.7846176341570406, + "grad_norm": 1.2632820141578516, + "learning_rate": 1.4254642964652053e-05, + "loss": 0.6060316562652588, + "step": 2683 + }, + { + "epoch": 0.7849100745723059, + "grad_norm": 1.590473454276912, + "learning_rate": 1.4250267177524177e-05, + "loss": 0.6535854935646057, + "step": 2684 + }, + { + "epoch": 0.7852025149875713, + "grad_norm": 1.499355267260573, + "learning_rate": 1.4245890396922195e-05, + "loss": 0.7141643762588501, + "step": 2685 + }, + { + "epoch": 0.7854949554028366, + "grad_norm": 1.5067703709229516, + "learning_rate": 1.4241512623869143e-05, + "loss": 0.6685847640037537, + "step": 2686 + }, + { + "epoch": 0.7857873958181021, + "grad_norm": 1.4195544467165693, + "learning_rate": 1.4237133859388305e-05, + "loss": 0.6745196580886841, + "step": 2687 + }, + { + "epoch": 0.7860798362333674, + "grad_norm": 1.5617010746630147, + "learning_rate": 1.423275410450319e-05, + "loss": 0.6891968250274658, + "step": 2688 + }, + { + "epoch": 0.7863722766486329, + "grad_norm": 1.3584703297700564, + "learning_rate": 1.422837336023754e-05, + "loss": 0.5614763498306274, + "step": 2689 + }, + { + "epoch": 0.7866647170638982, + "grad_norm": 1.3595148335065306, + "learning_rate": 1.4223991627615324e-05, + "loss": 0.5867494344711304, + "step": 2690 + }, + { + "epoch": 0.7869571574791636, + "grad_norm": 1.453264768444311, + "learning_rate": 1.421960890766075e-05, + "loss": 0.644777774810791, + "step": 2691 + }, + { + "epoch": 0.787249597894429, + "grad_norm": 1.3023857436912896, + "learning_rate": 1.4215225201398249e-05, + "loss": 0.7237588167190552, + "step": 2692 + }, + { + "epoch": 0.7875420383096944, + "grad_norm": 1.45851809360972, + "learning_rate": 1.4210840509852484e-05, + "loss": 0.6314423680305481, + "step": 2693 + }, + { + "epoch": 0.7878344787249598, + "grad_norm": 1.2286351961246127, + "learning_rate": 1.4206454834048353e-05, + "loss": 0.5298433303833008, + "step": 2694 + }, + { + "epoch": 0.7881269191402251, + "grad_norm": 1.1185262454319822, + "learning_rate": 1.420206817501098e-05, + "loss": 0.507548451423645, + "step": 2695 + }, + { + "epoch": 0.7884193595554906, + "grad_norm": 1.7207072983596743, + "learning_rate": 1.4197680533765721e-05, + "loss": 0.7742520570755005, + "step": 2696 + }, + { + "epoch": 0.7887117999707559, + "grad_norm": 1.3752660802878722, + "learning_rate": 1.4193291911338161e-05, + "loss": 0.6261187195777893, + "step": 2697 + }, + { + "epoch": 0.7890042403860213, + "grad_norm": 1.521521524262885, + "learning_rate": 1.4188902308754108e-05, + "loss": 0.7501171827316284, + "step": 2698 + }, + { + "epoch": 0.7892966808012868, + "grad_norm": 1.3001128857102173, + "learning_rate": 1.4184511727039612e-05, + "loss": 0.5590647459030151, + "step": 2699 + }, + { + "epoch": 0.7895891212165521, + "grad_norm": 1.4479349527989895, + "learning_rate": 1.4180120167220941e-05, + "loss": 0.586786150932312, + "step": 2700 + }, + { + "epoch": 0.7898815616318176, + "grad_norm": 1.2133244570308048, + "learning_rate": 1.4175727630324598e-05, + "loss": 0.5208219289779663, + "step": 2701 + }, + { + "epoch": 0.7901740020470829, + "grad_norm": 1.2365924450408214, + "learning_rate": 1.4171334117377312e-05, + "loss": 0.5925623178482056, + "step": 2702 + }, + { + "epoch": 0.7904664424623483, + "grad_norm": 1.5006045037979843, + "learning_rate": 1.4166939629406034e-05, + "loss": 0.7095032930374146, + "step": 2703 + }, + { + "epoch": 0.7907588828776136, + "grad_norm": 1.167282378609361, + "learning_rate": 1.4162544167437955e-05, + "loss": 0.5683872699737549, + "step": 2704 + }, + { + "epoch": 0.7910513232928791, + "grad_norm": 1.2605941476894575, + "learning_rate": 1.4158147732500482e-05, + "loss": 0.7079274654388428, + "step": 2705 + }, + { + "epoch": 0.7913437637081445, + "grad_norm": 1.3186161570017685, + "learning_rate": 1.415375032562126e-05, + "loss": 0.6336439847946167, + "step": 2706 + }, + { + "epoch": 0.7916362041234098, + "grad_norm": 1.14446239802259, + "learning_rate": 1.414935194782816e-05, + "loss": 0.4842381477355957, + "step": 2707 + }, + { + "epoch": 0.7919286445386753, + "grad_norm": 1.4296190875249344, + "learning_rate": 1.4144952600149267e-05, + "loss": 0.5439653396606445, + "step": 2708 + }, + { + "epoch": 0.7922210849539406, + "grad_norm": 1.2988205927389838, + "learning_rate": 1.4140552283612906e-05, + "loss": 0.6365468502044678, + "step": 2709 + }, + { + "epoch": 0.792513525369206, + "grad_norm": 1.3854921286863888, + "learning_rate": 1.4136150999247623e-05, + "loss": 0.6192438006401062, + "step": 2710 + }, + { + "epoch": 0.7928059657844714, + "grad_norm": 1.2293031316317269, + "learning_rate": 1.4131748748082191e-05, + "loss": 0.5695269703865051, + "step": 2711 + }, + { + "epoch": 0.7930984061997368, + "grad_norm": 1.3405661548900325, + "learning_rate": 1.4127345531145614e-05, + "loss": 0.6892319321632385, + "step": 2712 + }, + { + "epoch": 0.7933908466150021, + "grad_norm": 1.5220370415080073, + "learning_rate": 1.4122941349467109e-05, + "loss": 0.6294678449630737, + "step": 2713 + }, + { + "epoch": 0.7936832870302676, + "grad_norm": 1.2086123903849104, + "learning_rate": 1.4118536204076135e-05, + "loss": 0.6666272878646851, + "step": 2714 + }, + { + "epoch": 0.793975727445533, + "grad_norm": 1.2066166036349477, + "learning_rate": 1.4114130096002363e-05, + "loss": 0.5981796383857727, + "step": 2715 + }, + { + "epoch": 0.7942681678607983, + "grad_norm": 1.5676320725913573, + "learning_rate": 1.4109723026275695e-05, + "loss": 0.6120023131370544, + "step": 2716 + }, + { + "epoch": 0.7945606082760638, + "grad_norm": 1.536602454646116, + "learning_rate": 1.4105314995926257e-05, + "loss": 0.5892866849899292, + "step": 2717 + }, + { + "epoch": 0.7948530486913291, + "grad_norm": 1.504529299257153, + "learning_rate": 1.4100906005984404e-05, + "loss": 0.7625553607940674, + "step": 2718 + }, + { + "epoch": 0.7951454891065945, + "grad_norm": 1.4565362056936688, + "learning_rate": 1.40964960574807e-05, + "loss": 0.643633246421814, + "step": 2719 + }, + { + "epoch": 0.7954379295218599, + "grad_norm": 1.2108583839611744, + "learning_rate": 1.4092085151445953e-05, + "loss": 0.46422284841537476, + "step": 2720 + }, + { + "epoch": 0.7957303699371253, + "grad_norm": 1.2654408745652597, + "learning_rate": 1.4087673288911182e-05, + "loss": 0.6290001273155212, + "step": 2721 + }, + { + "epoch": 0.7960228103523908, + "grad_norm": 1.2400549293858325, + "learning_rate": 1.4083260470907632e-05, + "loss": 0.5175197124481201, + "step": 2722 + }, + { + "epoch": 0.7963152507676561, + "grad_norm": 1.4748861405916942, + "learning_rate": 1.4078846698466776e-05, + "loss": 0.6475427150726318, + "step": 2723 + }, + { + "epoch": 0.7966076911829215, + "grad_norm": 1.3254407316825372, + "learning_rate": 1.40744319726203e-05, + "loss": 0.5978254079818726, + "step": 2724 + }, + { + "epoch": 0.7969001315981868, + "grad_norm": 1.2991181525686113, + "learning_rate": 1.4070016294400124e-05, + "loss": 0.5738629102706909, + "step": 2725 + }, + { + "epoch": 0.7971925720134523, + "grad_norm": 1.3493198611941248, + "learning_rate": 1.4065599664838388e-05, + "loss": 0.5809024572372437, + "step": 2726 + }, + { + "epoch": 0.7974850124287176, + "grad_norm": 1.1539725667160117, + "learning_rate": 1.4061182084967446e-05, + "loss": 0.5907782316207886, + "step": 2727 + }, + { + "epoch": 0.797777452843983, + "grad_norm": 1.4493981600012322, + "learning_rate": 1.4056763555819887e-05, + "loss": 0.7640036344528198, + "step": 2728 + }, + { + "epoch": 0.7980698932592484, + "grad_norm": 1.5601806517528776, + "learning_rate": 1.4052344078428513e-05, + "loss": 0.7472168207168579, + "step": 2729 + }, + { + "epoch": 0.7983623336745138, + "grad_norm": 1.6018546047693625, + "learning_rate": 1.4047923653826347e-05, + "loss": 0.6726990342140198, + "step": 2730 + }, + { + "epoch": 0.7986547740897793, + "grad_norm": 1.3791137229331067, + "learning_rate": 1.404350228304664e-05, + "loss": 0.5949650406837463, + "step": 2731 + }, + { + "epoch": 0.7989472145050446, + "grad_norm": 1.386756095528374, + "learning_rate": 1.403907996712286e-05, + "loss": 0.5578774213790894, + "step": 2732 + }, + { + "epoch": 0.79923965492031, + "grad_norm": 1.5271585141569006, + "learning_rate": 1.4034656707088692e-05, + "loss": 0.6092333197593689, + "step": 2733 + }, + { + "epoch": 0.7995320953355753, + "grad_norm": 1.3098390209876276, + "learning_rate": 1.4030232503978053e-05, + "loss": 0.5095718502998352, + "step": 2734 + }, + { + "epoch": 0.7998245357508408, + "grad_norm": 1.3675399597044373, + "learning_rate": 1.4025807358825072e-05, + "loss": 0.5155727863311768, + "step": 2735 + }, + { + "epoch": 0.8001169761661061, + "grad_norm": 1.3309663791332569, + "learning_rate": 1.4021381272664094e-05, + "loss": 0.5752589702606201, + "step": 2736 + }, + { + "epoch": 0.8004094165813715, + "grad_norm": 1.3619611747950222, + "learning_rate": 1.4016954246529697e-05, + "loss": 0.6334787607192993, + "step": 2737 + }, + { + "epoch": 0.800701856996637, + "grad_norm": 1.3830503239164076, + "learning_rate": 1.4012526281456666e-05, + "loss": 0.7406032085418701, + "step": 2738 + }, + { + "epoch": 0.8009942974119023, + "grad_norm": 1.2904369174268238, + "learning_rate": 1.4008097378480014e-05, + "loss": 0.5805078744888306, + "step": 2739 + }, + { + "epoch": 0.8012867378271677, + "grad_norm": 1.3584200788658642, + "learning_rate": 1.4003667538634972e-05, + "loss": 0.6849163770675659, + "step": 2740 + }, + { + "epoch": 0.8015791782424331, + "grad_norm": 1.5354340760410032, + "learning_rate": 1.3999236762956985e-05, + "loss": 0.7707695960998535, + "step": 2741 + }, + { + "epoch": 0.8018716186576985, + "grad_norm": 1.426293329050591, + "learning_rate": 1.3994805052481715e-05, + "loss": 0.6253059506416321, + "step": 2742 + }, + { + "epoch": 0.8021640590729638, + "grad_norm": 1.274928204575108, + "learning_rate": 1.3990372408245057e-05, + "loss": 0.6450316905975342, + "step": 2743 + }, + { + "epoch": 0.8024564994882293, + "grad_norm": 1.2867865996346037, + "learning_rate": 1.398593883128311e-05, + "loss": 0.672899603843689, + "step": 2744 + }, + { + "epoch": 0.8027489399034947, + "grad_norm": 1.38176481949922, + "learning_rate": 1.3981504322632198e-05, + "loss": 0.6203787326812744, + "step": 2745 + }, + { + "epoch": 0.80304138031876, + "grad_norm": 1.296034523853111, + "learning_rate": 1.3977068883328854e-05, + "loss": 0.541740894317627, + "step": 2746 + }, + { + "epoch": 0.8033338207340255, + "grad_norm": 1.3608273440615848, + "learning_rate": 1.3972632514409843e-05, + "loss": 0.5566504001617432, + "step": 2747 + }, + { + "epoch": 0.8036262611492908, + "grad_norm": 1.378445494532888, + "learning_rate": 1.3968195216912135e-05, + "loss": 0.6911404728889465, + "step": 2748 + }, + { + "epoch": 0.8039187015645562, + "grad_norm": 1.3758218413869647, + "learning_rate": 1.3963756991872921e-05, + "loss": 0.6744735240936279, + "step": 2749 + }, + { + "epoch": 0.8042111419798216, + "grad_norm": 1.3810636187989935, + "learning_rate": 1.3959317840329613e-05, + "loss": 0.6660502552986145, + "step": 2750 + }, + { + "epoch": 0.804503582395087, + "grad_norm": 1.611467815082346, + "learning_rate": 1.3954877763319832e-05, + "loss": 0.607395589351654, + "step": 2751 + }, + { + "epoch": 0.8047960228103523, + "grad_norm": 1.3065536354182021, + "learning_rate": 1.395043676188142e-05, + "loss": 0.53249192237854, + "step": 2752 + }, + { + "epoch": 0.8050884632256178, + "grad_norm": 1.384670069600496, + "learning_rate": 1.394599483705243e-05, + "loss": 0.5728630423545837, + "step": 2753 + }, + { + "epoch": 0.8053809036408832, + "grad_norm": 1.354298055615179, + "learning_rate": 1.3941551989871142e-05, + "loss": 0.6912537813186646, + "step": 2754 + }, + { + "epoch": 0.8056733440561485, + "grad_norm": 1.2211163784496284, + "learning_rate": 1.3937108221376041e-05, + "loss": 0.6002523899078369, + "step": 2755 + }, + { + "epoch": 0.805965784471414, + "grad_norm": 1.165855753943377, + "learning_rate": 1.3932663532605832e-05, + "loss": 0.6573797464370728, + "step": 2756 + }, + { + "epoch": 0.8062582248866793, + "grad_norm": 1.2846173311931015, + "learning_rate": 1.3928217924599433e-05, + "loss": 0.6997278928756714, + "step": 2757 + }, + { + "epoch": 0.8065506653019447, + "grad_norm": 1.3457721921363819, + "learning_rate": 1.3923771398395978e-05, + "loss": 0.565264105796814, + "step": 2758 + }, + { + "epoch": 0.8068431057172101, + "grad_norm": 1.7064740069380804, + "learning_rate": 1.3919323955034815e-05, + "loss": 0.8065239191055298, + "step": 2759 + }, + { + "epoch": 0.8071355461324755, + "grad_norm": 1.4850507802988735, + "learning_rate": 1.3914875595555509e-05, + "loss": 0.556678056716919, + "step": 2760 + }, + { + "epoch": 0.807427986547741, + "grad_norm": 1.653442619870376, + "learning_rate": 1.3910426320997834e-05, + "loss": 0.5528635382652283, + "step": 2761 + }, + { + "epoch": 0.8077204269630063, + "grad_norm": 1.4210714864438183, + "learning_rate": 1.3905976132401785e-05, + "loss": 0.6127038598060608, + "step": 2762 + }, + { + "epoch": 0.8080128673782717, + "grad_norm": 1.4473812948635245, + "learning_rate": 1.390152503080756e-05, + "loss": 0.6311757564544678, + "step": 2763 + }, + { + "epoch": 0.808305307793537, + "grad_norm": 1.256496005559394, + "learning_rate": 1.389707301725558e-05, + "loss": 0.669788122177124, + "step": 2764 + }, + { + "epoch": 0.8085977482088025, + "grad_norm": 1.1602455830470428, + "learning_rate": 1.3892620092786477e-05, + "loss": 0.48408570885658264, + "step": 2765 + }, + { + "epoch": 0.8088901886240678, + "grad_norm": 1.3816192110102654, + "learning_rate": 1.3888166258441098e-05, + "loss": 0.5648288726806641, + "step": 2766 + }, + { + "epoch": 0.8091826290393332, + "grad_norm": 1.359222924847667, + "learning_rate": 1.3883711515260497e-05, + "loss": 0.5894806385040283, + "step": 2767 + }, + { + "epoch": 0.8094750694545986, + "grad_norm": 1.609438084965147, + "learning_rate": 1.3879255864285939e-05, + "loss": 0.8325392603874207, + "step": 2768 + }, + { + "epoch": 0.809767509869864, + "grad_norm": 1.3200888192290248, + "learning_rate": 1.387479930655891e-05, + "loss": 0.5282119512557983, + "step": 2769 + }, + { + "epoch": 0.8100599502851294, + "grad_norm": 1.2020970963419326, + "learning_rate": 1.3870341843121104e-05, + "loss": 0.7565277218818665, + "step": 2770 + }, + { + "epoch": 0.8103523907003948, + "grad_norm": 1.20769025145285, + "learning_rate": 1.3865883475014424e-05, + "loss": 0.5767146944999695, + "step": 2771 + }, + { + "epoch": 0.8106448311156602, + "grad_norm": 1.3747646237948088, + "learning_rate": 1.3861424203280987e-05, + "loss": 0.5988898873329163, + "step": 2772 + }, + { + "epoch": 0.8109372715309255, + "grad_norm": 1.2837797411261327, + "learning_rate": 1.3856964028963119e-05, + "loss": 0.5752500295639038, + "step": 2773 + }, + { + "epoch": 0.811229711946191, + "grad_norm": 1.3281997353125305, + "learning_rate": 1.385250295310336e-05, + "loss": 0.6834297776222229, + "step": 2774 + }, + { + "epoch": 0.8115221523614563, + "grad_norm": 1.376792748908409, + "learning_rate": 1.3848040976744459e-05, + "loss": 0.5667037963867188, + "step": 2775 + }, + { + "epoch": 0.8118145927767217, + "grad_norm": 1.33236222276005, + "learning_rate": 1.3843578100929375e-05, + "loss": 0.5618781447410583, + "step": 2776 + }, + { + "epoch": 0.8121070331919872, + "grad_norm": 1.4974631308124338, + "learning_rate": 1.3839114326701281e-05, + "loss": 0.538033664226532, + "step": 2777 + }, + { + "epoch": 0.8123994736072525, + "grad_norm": 1.3236430994846111, + "learning_rate": 1.3834649655103556e-05, + "loss": 0.7218335270881653, + "step": 2778 + }, + { + "epoch": 0.812691914022518, + "grad_norm": 1.3045533775783231, + "learning_rate": 1.383018408717979e-05, + "loss": 0.5979611873626709, + "step": 2779 + }, + { + "epoch": 0.8129843544377833, + "grad_norm": 1.191818251767074, + "learning_rate": 1.3825717623973775e-05, + "loss": 0.4958215355873108, + "step": 2780 + }, + { + "epoch": 0.8132767948530487, + "grad_norm": 1.4132643925978479, + "learning_rate": 1.3821250266529531e-05, + "loss": 0.6759654879570007, + "step": 2781 + }, + { + "epoch": 0.813569235268314, + "grad_norm": 1.1873413404245543, + "learning_rate": 1.3816782015891272e-05, + "loss": 0.5499521493911743, + "step": 2782 + }, + { + "epoch": 0.8138616756835795, + "grad_norm": 1.327517100573182, + "learning_rate": 1.3812312873103425e-05, + "loss": 0.5308753252029419, + "step": 2783 + }, + { + "epoch": 0.8141541160988449, + "grad_norm": 1.4850132833469487, + "learning_rate": 1.3807842839210617e-05, + "loss": 0.585492730140686, + "step": 2784 + }, + { + "epoch": 0.8144465565141102, + "grad_norm": 1.5985853231384999, + "learning_rate": 1.3803371915257702e-05, + "loss": 0.6598281860351562, + "step": 2785 + }, + { + "epoch": 0.8147389969293757, + "grad_norm": 1.2500600856454092, + "learning_rate": 1.3798900102289726e-05, + "loss": 0.6819334030151367, + "step": 2786 + }, + { + "epoch": 0.815031437344641, + "grad_norm": 2.1106639284366877, + "learning_rate": 1.3794427401351946e-05, + "loss": 0.6548545360565186, + "step": 2787 + }, + { + "epoch": 0.8153238777599064, + "grad_norm": 1.4934248295829666, + "learning_rate": 1.3789953813489834e-05, + "loss": 0.7836263179779053, + "step": 2788 + }, + { + "epoch": 0.8156163181751718, + "grad_norm": 1.3092153960785353, + "learning_rate": 1.3785479339749062e-05, + "loss": 0.6108324527740479, + "step": 2789 + }, + { + "epoch": 0.8159087585904372, + "grad_norm": 1.4189973842835568, + "learning_rate": 1.378100398117551e-05, + "loss": 0.7079485058784485, + "step": 2790 + }, + { + "epoch": 0.8162011990057025, + "grad_norm": 1.2593140459847156, + "learning_rate": 1.3776527738815264e-05, + "loss": 0.5935578346252441, + "step": 2791 + }, + { + "epoch": 0.816493639420968, + "grad_norm": 1.159439153093783, + "learning_rate": 1.3772050613714623e-05, + "loss": 0.5559983253479004, + "step": 2792 + }, + { + "epoch": 0.8167860798362334, + "grad_norm": 1.2282449471592758, + "learning_rate": 1.3767572606920083e-05, + "loss": 0.6230447292327881, + "step": 2793 + }, + { + "epoch": 0.8170785202514987, + "grad_norm": 1.3750755360912204, + "learning_rate": 1.3763093719478357e-05, + "loss": 0.5672184824943542, + "step": 2794 + }, + { + "epoch": 0.8173709606667642, + "grad_norm": 1.3345649111405589, + "learning_rate": 1.3758613952436353e-05, + "loss": 0.6933468580245972, + "step": 2795 + }, + { + "epoch": 0.8176634010820295, + "grad_norm": 1.299919441217989, + "learning_rate": 1.3754133306841188e-05, + "loss": 0.5873827934265137, + "step": 2796 + }, + { + "epoch": 0.8179558414972949, + "grad_norm": 1.3238138716227077, + "learning_rate": 1.3749651783740188e-05, + "loss": 0.6061393022537231, + "step": 2797 + }, + { + "epoch": 0.8182482819125603, + "grad_norm": 1.3503137209197107, + "learning_rate": 1.3745169384180886e-05, + "loss": 0.6218947768211365, + "step": 2798 + }, + { + "epoch": 0.8185407223278257, + "grad_norm": 1.584036085033884, + "learning_rate": 1.3740686109211008e-05, + "loss": 0.6092264652252197, + "step": 2799 + }, + { + "epoch": 0.8188331627430911, + "grad_norm": 1.4327213465282531, + "learning_rate": 1.3736201959878497e-05, + "loss": 0.6145539283752441, + "step": 2800 + }, + { + "epoch": 0.8191256031583565, + "grad_norm": 1.1433366189059146, + "learning_rate": 1.3731716937231493e-05, + "loss": 0.4637746214866638, + "step": 2801 + }, + { + "epoch": 0.8194180435736219, + "grad_norm": 1.2802202387296946, + "learning_rate": 1.3727231042318345e-05, + "loss": 0.6102726459503174, + "step": 2802 + }, + { + "epoch": 0.8197104839888872, + "grad_norm": 1.3432330324336637, + "learning_rate": 1.3722744276187603e-05, + "loss": 0.5885297060012817, + "step": 2803 + }, + { + "epoch": 0.8200029244041527, + "grad_norm": 1.4575985112282515, + "learning_rate": 1.3718256639888021e-05, + "loss": 0.592369019985199, + "step": 2804 + }, + { + "epoch": 0.820295364819418, + "grad_norm": 1.4943856663354038, + "learning_rate": 1.3713768134468557e-05, + "loss": 0.5194098949432373, + "step": 2805 + }, + { + "epoch": 0.8205878052346834, + "grad_norm": 1.3716539173176907, + "learning_rate": 1.370927876097837e-05, + "loss": 0.6033506393432617, + "step": 2806 + }, + { + "epoch": 0.8208802456499488, + "grad_norm": 1.686602588559283, + "learning_rate": 1.3704788520466828e-05, + "loss": 0.6866108179092407, + "step": 2807 + }, + { + "epoch": 0.8211726860652142, + "grad_norm": 1.564205528186879, + "learning_rate": 1.3700297413983492e-05, + "loss": 0.7325261831283569, + "step": 2808 + }, + { + "epoch": 0.8214651264804796, + "grad_norm": 1.531257665763453, + "learning_rate": 1.3695805442578136e-05, + "loss": 0.5422608852386475, + "step": 2809 + }, + { + "epoch": 0.821757566895745, + "grad_norm": 1.5581516895112182, + "learning_rate": 1.369131260730073e-05, + "loss": 0.6124732494354248, + "step": 2810 + }, + { + "epoch": 0.8220500073110104, + "grad_norm": 1.3009124551880797, + "learning_rate": 1.3686818909201442e-05, + "loss": 0.6097716093063354, + "step": 2811 + }, + { + "epoch": 0.8223424477262757, + "grad_norm": 1.302794206877671, + "learning_rate": 1.3682324349330652e-05, + "loss": 0.6283478140830994, + "step": 2812 + }, + { + "epoch": 0.8226348881415412, + "grad_norm": 1.6179042229288885, + "learning_rate": 1.3677828928738934e-05, + "loss": 0.6590027213096619, + "step": 2813 + }, + { + "epoch": 0.8229273285568065, + "grad_norm": 1.5247617474384554, + "learning_rate": 1.3673332648477065e-05, + "loss": 0.6417049169540405, + "step": 2814 + }, + { + "epoch": 0.8232197689720719, + "grad_norm": 1.510678230362789, + "learning_rate": 1.3668835509596023e-05, + "loss": 0.6217149496078491, + "step": 2815 + }, + { + "epoch": 0.8235122093873374, + "grad_norm": 1.9022694632783144, + "learning_rate": 1.3664337513146993e-05, + "loss": 0.7530043125152588, + "step": 2816 + }, + { + "epoch": 0.8238046498026027, + "grad_norm": 1.3235640761468095, + "learning_rate": 1.3659838660181341e-05, + "loss": 0.6690578460693359, + "step": 2817 + }, + { + "epoch": 0.8240970902178681, + "grad_norm": 1.5311368229830338, + "learning_rate": 1.3655338951750657e-05, + "loss": 0.5348777174949646, + "step": 2818 + }, + { + "epoch": 0.8243895306331335, + "grad_norm": 1.494896630136579, + "learning_rate": 1.3650838388906718e-05, + "loss": 0.7076361179351807, + "step": 2819 + }, + { + "epoch": 0.8246819710483989, + "grad_norm": 1.611810759372966, + "learning_rate": 1.3646336972701507e-05, + "loss": 0.6649855375289917, + "step": 2820 + }, + { + "epoch": 0.8249744114636642, + "grad_norm": 1.4188027146347701, + "learning_rate": 1.3641834704187194e-05, + "loss": 0.6484942436218262, + "step": 2821 + }, + { + "epoch": 0.8252668518789297, + "grad_norm": 1.066364944063908, + "learning_rate": 1.3637331584416163e-05, + "loss": 0.5167717337608337, + "step": 2822 + }, + { + "epoch": 0.8255592922941951, + "grad_norm": 1.4320675291883214, + "learning_rate": 1.3632827614440988e-05, + "loss": 0.7808440327644348, + "step": 2823 + }, + { + "epoch": 0.8258517327094604, + "grad_norm": 1.6437853600585473, + "learning_rate": 1.3628322795314449e-05, + "loss": 0.551183819770813, + "step": 2824 + }, + { + "epoch": 0.8261441731247259, + "grad_norm": 1.3439080199790612, + "learning_rate": 1.3623817128089513e-05, + "loss": 0.6084691286087036, + "step": 2825 + }, + { + "epoch": 0.8264366135399912, + "grad_norm": 1.3974747336185755, + "learning_rate": 1.3619310613819363e-05, + "loss": 0.6251019239425659, + "step": 2826 + }, + { + "epoch": 0.8267290539552566, + "grad_norm": 1.237260204163714, + "learning_rate": 1.3614803253557358e-05, + "loss": 0.5037761926651001, + "step": 2827 + }, + { + "epoch": 0.827021494370522, + "grad_norm": 1.3461097726205675, + "learning_rate": 1.3610295048357072e-05, + "loss": 0.5606831312179565, + "step": 2828 + }, + { + "epoch": 0.8273139347857874, + "grad_norm": 1.3850167464051482, + "learning_rate": 1.360578599927227e-05, + "loss": 0.6664785146713257, + "step": 2829 + }, + { + "epoch": 0.8276063752010527, + "grad_norm": 1.3613746427457352, + "learning_rate": 1.360127610735691e-05, + "loss": 0.7105492353439331, + "step": 2830 + }, + { + "epoch": 0.8278988156163182, + "grad_norm": 1.3577681820511107, + "learning_rate": 1.3596765373665162e-05, + "loss": 0.6255359053611755, + "step": 2831 + }, + { + "epoch": 0.8281912560315836, + "grad_norm": 1.3150522794807806, + "learning_rate": 1.3592253799251377e-05, + "loss": 0.5422149300575256, + "step": 2832 + }, + { + "epoch": 0.8284836964468489, + "grad_norm": 1.4383576380181533, + "learning_rate": 1.3587741385170104e-05, + "loss": 0.6044044494628906, + "step": 2833 + }, + { + "epoch": 0.8287761368621144, + "grad_norm": 1.2478223452248756, + "learning_rate": 1.3583228132476094e-05, + "loss": 0.6256763935089111, + "step": 2834 + }, + { + "epoch": 0.8290685772773797, + "grad_norm": 1.2507601544621354, + "learning_rate": 1.3578714042224297e-05, + "loss": 0.6759064793586731, + "step": 2835 + }, + { + "epoch": 0.8293610176926451, + "grad_norm": 1.3610869198536528, + "learning_rate": 1.3574199115469852e-05, + "loss": 0.5819023251533508, + "step": 2836 + }, + { + "epoch": 0.8296534581079105, + "grad_norm": 1.331505314238688, + "learning_rate": 1.3569683353268098e-05, + "loss": 0.5412642359733582, + "step": 2837 + }, + { + "epoch": 0.8299458985231759, + "grad_norm": 1.0998151045906572, + "learning_rate": 1.356516675667456e-05, + "loss": 0.5129171013832092, + "step": 2838 + }, + { + "epoch": 0.8302383389384413, + "grad_norm": 1.310393887156268, + "learning_rate": 1.356064932674497e-05, + "loss": 0.5165198445320129, + "step": 2839 + }, + { + "epoch": 0.8305307793537067, + "grad_norm": 1.287643091691659, + "learning_rate": 1.3556131064535249e-05, + "loss": 0.6545724272727966, + "step": 2840 + }, + { + "epoch": 0.8308232197689721, + "grad_norm": 1.2180901867245224, + "learning_rate": 1.3551611971101513e-05, + "loss": 0.5715968608856201, + "step": 2841 + }, + { + "epoch": 0.8311156601842374, + "grad_norm": 1.1619522611517994, + "learning_rate": 1.3547092047500074e-05, + "loss": 0.7063779830932617, + "step": 2842 + }, + { + "epoch": 0.8314081005995029, + "grad_norm": 1.2876429096537105, + "learning_rate": 1.3542571294787437e-05, + "loss": 0.6391212940216064, + "step": 2843 + }, + { + "epoch": 0.8317005410147682, + "grad_norm": 1.3047489403917027, + "learning_rate": 1.3538049714020298e-05, + "loss": 0.7145380973815918, + "step": 2844 + }, + { + "epoch": 0.8319929814300336, + "grad_norm": 1.4749234473747483, + "learning_rate": 1.3533527306255547e-05, + "loss": 0.7262213230133057, + "step": 2845 + }, + { + "epoch": 0.832285421845299, + "grad_norm": 1.5661213009447377, + "learning_rate": 1.3529004072550276e-05, + "loss": 0.7621959447860718, + "step": 2846 + }, + { + "epoch": 0.8325778622605644, + "grad_norm": 1.2349365167185542, + "learning_rate": 1.3524480013961757e-05, + "loss": 0.6372592449188232, + "step": 2847 + }, + { + "epoch": 0.8328703026758298, + "grad_norm": 1.5746526285594844, + "learning_rate": 1.3519955131547469e-05, + "loss": 0.6223774552345276, + "step": 2848 + }, + { + "epoch": 0.8331627430910952, + "grad_norm": 1.3246634087041118, + "learning_rate": 1.3515429426365066e-05, + "loss": 0.6500433683395386, + "step": 2849 + }, + { + "epoch": 0.8334551835063606, + "grad_norm": 1.4424195637381385, + "learning_rate": 1.3510902899472408e-05, + "loss": 0.6136040687561035, + "step": 2850 + }, + { + "epoch": 0.8337476239216259, + "grad_norm": 1.512738908953339, + "learning_rate": 1.3506375551927546e-05, + "loss": 0.5297173261642456, + "step": 2851 + }, + { + "epoch": 0.8340400643368914, + "grad_norm": 1.4629352546381682, + "learning_rate": 1.3501847384788718e-05, + "loss": 0.6215870976448059, + "step": 2852 + }, + { + "epoch": 0.8343325047521567, + "grad_norm": 1.3184866454725659, + "learning_rate": 1.3497318399114354e-05, + "loss": 0.5507583618164062, + "step": 2853 + }, + { + "epoch": 0.8346249451674221, + "grad_norm": 1.6022185079697295, + "learning_rate": 1.349278859596308e-05, + "loss": 0.6348794102668762, + "step": 2854 + }, + { + "epoch": 0.8349173855826876, + "grad_norm": 1.4038791520130975, + "learning_rate": 1.3488257976393708e-05, + "loss": 0.7009605765342712, + "step": 2855 + }, + { + "epoch": 0.8352098259979529, + "grad_norm": 1.2288500000369813, + "learning_rate": 1.3483726541465238e-05, + "loss": 0.6268658638000488, + "step": 2856 + }, + { + "epoch": 0.8355022664132183, + "grad_norm": 1.1391793971559063, + "learning_rate": 1.3479194292236875e-05, + "loss": 0.7187683582305908, + "step": 2857 + }, + { + "epoch": 0.8357947068284837, + "grad_norm": 1.5724396660128028, + "learning_rate": 1.3474661229768002e-05, + "loss": 0.7016449570655823, + "step": 2858 + }, + { + "epoch": 0.8360871472437491, + "grad_norm": 1.5882858400771258, + "learning_rate": 1.347012735511819e-05, + "loss": 0.5852428674697876, + "step": 2859 + }, + { + "epoch": 0.8363795876590144, + "grad_norm": 1.4143289380031852, + "learning_rate": 1.3465592669347207e-05, + "loss": 0.6232450008392334, + "step": 2860 + }, + { + "epoch": 0.8366720280742799, + "grad_norm": 1.3444277392597084, + "learning_rate": 1.346105717351501e-05, + "loss": 0.526097297668457, + "step": 2861 + }, + { + "epoch": 0.8369644684895453, + "grad_norm": 1.5627282993073515, + "learning_rate": 1.3456520868681741e-05, + "loss": 0.6065535545349121, + "step": 2862 + }, + { + "epoch": 0.8372569089048106, + "grad_norm": 1.3941305759607394, + "learning_rate": 1.3451983755907736e-05, + "loss": 0.5836296677589417, + "step": 2863 + }, + { + "epoch": 0.8375493493200761, + "grad_norm": 1.336778139255592, + "learning_rate": 1.3447445836253519e-05, + "loss": 0.678827166557312, + "step": 2864 + }, + { + "epoch": 0.8378417897353414, + "grad_norm": 1.3002974651392025, + "learning_rate": 1.3442907110779794e-05, + "loss": 0.5206096172332764, + "step": 2865 + }, + { + "epoch": 0.8381342301506068, + "grad_norm": 1.3468789034772342, + "learning_rate": 1.3438367580547468e-05, + "loss": 0.6424980163574219, + "step": 2866 + }, + { + "epoch": 0.8384266705658722, + "grad_norm": 1.1467777796306478, + "learning_rate": 1.3433827246617624e-05, + "loss": 0.6293484568595886, + "step": 2867 + }, + { + "epoch": 0.8387191109811376, + "grad_norm": 1.2601562582063903, + "learning_rate": 1.3429286110051539e-05, + "loss": 0.5912167429924011, + "step": 2868 + }, + { + "epoch": 0.8390115513964029, + "grad_norm": 1.5181261084157656, + "learning_rate": 1.342474417191068e-05, + "loss": 0.6571674346923828, + "step": 2869 + }, + { + "epoch": 0.8393039918116684, + "grad_norm": 1.421037061270542, + "learning_rate": 1.342020143325669e-05, + "loss": 0.5519720911979675, + "step": 2870 + }, + { + "epoch": 0.8395964322269338, + "grad_norm": 1.3997247827352193, + "learning_rate": 1.341565789515141e-05, + "loss": 0.6465001106262207, + "step": 2871 + }, + { + "epoch": 0.8398888726421991, + "grad_norm": 1.398359818513133, + "learning_rate": 1.3411113558656865e-05, + "loss": 0.6022073030471802, + "step": 2872 + }, + { + "epoch": 0.8401813130574646, + "grad_norm": 1.361775248337709, + "learning_rate": 1.3406568424835264e-05, + "loss": 0.610893726348877, + "step": 2873 + }, + { + "epoch": 0.8404737534727299, + "grad_norm": 1.427563498701008, + "learning_rate": 1.340202249474901e-05, + "loss": 0.5296563506126404, + "step": 2874 + }, + { + "epoch": 0.8407661938879953, + "grad_norm": 1.170906744718837, + "learning_rate": 1.3397475769460679e-05, + "loss": 0.6327008605003357, + "step": 2875 + }, + { + "epoch": 0.8410586343032607, + "grad_norm": 1.3517928558744952, + "learning_rate": 1.3392928250033045e-05, + "loss": 0.6437617540359497, + "step": 2876 + }, + { + "epoch": 0.8413510747185261, + "grad_norm": 1.3416431365752262, + "learning_rate": 1.3388379937529063e-05, + "loss": 0.5627291202545166, + "step": 2877 + }, + { + "epoch": 0.8416435151337915, + "grad_norm": 1.3602688623647594, + "learning_rate": 1.3383830833011871e-05, + "loss": 0.5921163558959961, + "step": 2878 + }, + { + "epoch": 0.8419359555490569, + "grad_norm": 1.2033937218328357, + "learning_rate": 1.3379280937544797e-05, + "loss": 0.5749082565307617, + "step": 2879 + }, + { + "epoch": 0.8422283959643223, + "grad_norm": 1.462463173522237, + "learning_rate": 1.3374730252191347e-05, + "loss": 0.6294553279876709, + "step": 2880 + }, + { + "epoch": 0.8425208363795876, + "grad_norm": 1.222130659730857, + "learning_rate": 1.3370178778015223e-05, + "loss": 0.5172078609466553, + "step": 2881 + }, + { + "epoch": 0.8428132767948531, + "grad_norm": 1.3695607626504847, + "learning_rate": 1.3365626516080301e-05, + "loss": 0.44069811701774597, + "step": 2882 + }, + { + "epoch": 0.8431057172101184, + "grad_norm": 1.31704500891114, + "learning_rate": 1.336107346745064e-05, + "loss": 0.72663813829422, + "step": 2883 + }, + { + "epoch": 0.8433981576253838, + "grad_norm": 1.3488066557741722, + "learning_rate": 1.3356519633190495e-05, + "loss": 0.6562269926071167, + "step": 2884 + }, + { + "epoch": 0.8436905980406492, + "grad_norm": 1.3994820366244107, + "learning_rate": 1.3351965014364293e-05, + "loss": 0.699925422668457, + "step": 2885 + }, + { + "epoch": 0.8439830384559146, + "grad_norm": 1.3294441855934318, + "learning_rate": 1.3347409612036651e-05, + "loss": 0.5902425646781921, + "step": 2886 + }, + { + "epoch": 0.84427547887118, + "grad_norm": 1.401705271294413, + "learning_rate": 1.3342853427272362e-05, + "loss": 0.613966703414917, + "step": 2887 + }, + { + "epoch": 0.8445679192864454, + "grad_norm": 1.2630848315271062, + "learning_rate": 1.333829646113641e-05, + "loss": 0.5864139199256897, + "step": 2888 + }, + { + "epoch": 0.8448603597017108, + "grad_norm": 1.5447722719058155, + "learning_rate": 1.3333738714693958e-05, + "loss": 0.5851572751998901, + "step": 2889 + }, + { + "epoch": 0.8451528001169761, + "grad_norm": 1.4679598706703352, + "learning_rate": 1.3329180189010348e-05, + "loss": 0.6564328074455261, + "step": 2890 + }, + { + "epoch": 0.8454452405322416, + "grad_norm": 1.3794930949186583, + "learning_rate": 1.3324620885151115e-05, + "loss": 0.6745615005493164, + "step": 2891 + }, + { + "epoch": 0.8457376809475069, + "grad_norm": 1.277678612967463, + "learning_rate": 1.3320060804181962e-05, + "loss": 0.5003606081008911, + "step": 2892 + }, + { + "epoch": 0.8460301213627723, + "grad_norm": 1.4995028165986726, + "learning_rate": 1.3315499947168781e-05, + "loss": 0.6646369695663452, + "step": 2893 + }, + { + "epoch": 0.8463225617780378, + "grad_norm": 1.3696086888087433, + "learning_rate": 1.3310938315177647e-05, + "loss": 0.6903572082519531, + "step": 2894 + }, + { + "epoch": 0.8466150021933031, + "grad_norm": 1.633835119151456, + "learning_rate": 1.330637590927481e-05, + "loss": 0.6221956610679626, + "step": 2895 + }, + { + "epoch": 0.8469074426085685, + "grad_norm": 1.5369372818354106, + "learning_rate": 1.3301812730526713e-05, + "loss": 0.5602666139602661, + "step": 2896 + }, + { + "epoch": 0.8471998830238339, + "grad_norm": 1.2910113915198014, + "learning_rate": 1.3297248779999963e-05, + "loss": 0.5843783617019653, + "step": 2897 + }, + { + "epoch": 0.8474923234390993, + "grad_norm": 1.304495064263293, + "learning_rate": 1.3292684058761357e-05, + "loss": 0.5040254592895508, + "step": 2898 + }, + { + "epoch": 0.8477847638543646, + "grad_norm": 1.4968280315795712, + "learning_rate": 1.3288118567877874e-05, + "loss": 0.6180210709571838, + "step": 2899 + }, + { + "epoch": 0.8480772042696301, + "grad_norm": 1.345230482752467, + "learning_rate": 1.3283552308416668e-05, + "loss": 0.5050851106643677, + "step": 2900 + }, + { + "epoch": 0.8483696446848955, + "grad_norm": 1.604217394640997, + "learning_rate": 1.3278985281445072e-05, + "loss": 0.6627126932144165, + "step": 2901 + }, + { + "epoch": 0.8486620851001608, + "grad_norm": 1.40930260394039, + "learning_rate": 1.3274417488030607e-05, + "loss": 0.5984441041946411, + "step": 2902 + }, + { + "epoch": 0.8489545255154263, + "grad_norm": 1.3584927833580034, + "learning_rate": 1.3269848929240958e-05, + "loss": 0.611599326133728, + "step": 2903 + }, + { + "epoch": 0.8492469659306916, + "grad_norm": 1.4743229169395644, + "learning_rate": 1.3265279606144006e-05, + "loss": 0.6057847142219543, + "step": 2904 + }, + { + "epoch": 0.849539406345957, + "grad_norm": 1.5324921987406994, + "learning_rate": 1.3260709519807797e-05, + "loss": 0.7123644948005676, + "step": 2905 + }, + { + "epoch": 0.8498318467612224, + "grad_norm": 1.4337194400937256, + "learning_rate": 1.3256138671300564e-05, + "loss": 0.6193811893463135, + "step": 2906 + }, + { + "epoch": 0.8501242871764878, + "grad_norm": 1.6102821646068017, + "learning_rate": 1.3251567061690717e-05, + "loss": 0.5775484442710876, + "step": 2907 + }, + { + "epoch": 0.8504167275917531, + "grad_norm": 1.5171257755680165, + "learning_rate": 1.3246994692046837e-05, + "loss": 0.5655511617660522, + "step": 2908 + }, + { + "epoch": 0.8507091680070186, + "grad_norm": 1.8299545213851978, + "learning_rate": 1.3242421563437688e-05, + "loss": 0.6216102838516235, + "step": 2909 + }, + { + "epoch": 0.851001608422284, + "grad_norm": 1.4045274179517395, + "learning_rate": 1.3237847676932217e-05, + "loss": 0.649554967880249, + "step": 2910 + }, + { + "epoch": 0.8512940488375493, + "grad_norm": 1.5965021256139, + "learning_rate": 1.3233273033599534e-05, + "loss": 0.6688281297683716, + "step": 2911 + }, + { + "epoch": 0.8515864892528148, + "grad_norm": 1.2158705367599922, + "learning_rate": 1.322869763450894e-05, + "loss": 0.664188027381897, + "step": 2912 + }, + { + "epoch": 0.8518789296680801, + "grad_norm": 1.27994094299147, + "learning_rate": 1.3224121480729905e-05, + "loss": 0.47189265489578247, + "step": 2913 + }, + { + "epoch": 0.8521713700833455, + "grad_norm": 1.387813816085696, + "learning_rate": 1.3219544573332075e-05, + "loss": 0.6190480589866638, + "step": 2914 + }, + { + "epoch": 0.8524638104986109, + "grad_norm": 1.3459335682790516, + "learning_rate": 1.3214966913385277e-05, + "loss": 0.6564091444015503, + "step": 2915 + }, + { + "epoch": 0.8527562509138763, + "grad_norm": 1.563994961699158, + "learning_rate": 1.321038850195951e-05, + "loss": 0.6083766222000122, + "step": 2916 + }, + { + "epoch": 0.8530486913291417, + "grad_norm": 1.2689051257322506, + "learning_rate": 1.3205809340124951e-05, + "loss": 0.5262473821640015, + "step": 2917 + }, + { + "epoch": 0.8533411317444071, + "grad_norm": 1.3633671661320785, + "learning_rate": 1.320122942895195e-05, + "loss": 0.6170297861099243, + "step": 2918 + }, + { + "epoch": 0.8536335721596725, + "grad_norm": 1.3838619263880951, + "learning_rate": 1.3196648769511036e-05, + "loss": 0.5791536569595337, + "step": 2919 + }, + { + "epoch": 0.8539260125749378, + "grad_norm": 1.4116909766151964, + "learning_rate": 1.3192067362872904e-05, + "loss": 0.5870766639709473, + "step": 2920 + }, + { + "epoch": 0.8542184529902033, + "grad_norm": 1.5317627298998806, + "learning_rate": 1.3187485210108438e-05, + "loss": 0.604548990726471, + "step": 2921 + }, + { + "epoch": 0.8545108934054686, + "grad_norm": 1.3458362989469688, + "learning_rate": 1.3182902312288682e-05, + "loss": 0.5292568206787109, + "step": 2922 + }, + { + "epoch": 0.854803333820734, + "grad_norm": 1.276264176970529, + "learning_rate": 1.3178318670484862e-05, + "loss": 0.5638582706451416, + "step": 2923 + }, + { + "epoch": 0.8550957742359994, + "grad_norm": 1.5369089697533718, + "learning_rate": 1.317373428576838e-05, + "loss": 0.5730164051055908, + "step": 2924 + }, + { + "epoch": 0.8553882146512648, + "grad_norm": 1.369500285153578, + "learning_rate": 1.3169149159210803e-05, + "loss": 0.6170799732208252, + "step": 2925 + }, + { + "epoch": 0.8556806550665302, + "grad_norm": 1.3401436683949477, + "learning_rate": 1.3164563291883879e-05, + "loss": 0.591925323009491, + "step": 2926 + }, + { + "epoch": 0.8559730954817956, + "grad_norm": 1.5178314176439451, + "learning_rate": 1.3159976684859528e-05, + "loss": 0.7269439697265625, + "step": 2927 + }, + { + "epoch": 0.856265535897061, + "grad_norm": 1.552203527248451, + "learning_rate": 1.3155389339209839e-05, + "loss": 0.615471363067627, + "step": 2928 + }, + { + "epoch": 0.8565579763123263, + "grad_norm": 1.4397776020126687, + "learning_rate": 1.3150801256007076e-05, + "loss": 0.6264692544937134, + "step": 2929 + }, + { + "epoch": 0.8568504167275918, + "grad_norm": 1.203302342126932, + "learning_rate": 1.314621243632368e-05, + "loss": 0.5729779005050659, + "step": 2930 + }, + { + "epoch": 0.8571428571428571, + "grad_norm": 1.3833464526102248, + "learning_rate": 1.314162288123225e-05, + "loss": 0.6462980508804321, + "step": 2931 + }, + { + "epoch": 0.8574352975581225, + "grad_norm": 1.1795102455310789, + "learning_rate": 1.3137032591805577e-05, + "loss": 0.5493176579475403, + "step": 2932 + }, + { + "epoch": 0.857727737973388, + "grad_norm": 1.1422942251299026, + "learning_rate": 1.3132441569116608e-05, + "loss": 0.49161234498023987, + "step": 2933 + }, + { + "epoch": 0.8580201783886533, + "grad_norm": 1.428090020215004, + "learning_rate": 1.312784981423847e-05, + "loss": 0.6724506616592407, + "step": 2934 + }, + { + "epoch": 0.8583126188039187, + "grad_norm": 1.6216709335890533, + "learning_rate": 1.3123257328244455e-05, + "loss": 0.6180965900421143, + "step": 2935 + }, + { + "epoch": 0.8586050592191841, + "grad_norm": 1.6797724821518334, + "learning_rate": 1.3118664112208027e-05, + "loss": 0.6676491498947144, + "step": 2936 + }, + { + "epoch": 0.8588974996344495, + "grad_norm": 1.1911121778916818, + "learning_rate": 1.3114070167202827e-05, + "loss": 0.5964041948318481, + "step": 2937 + }, + { + "epoch": 0.8591899400497148, + "grad_norm": 1.3660050885815391, + "learning_rate": 1.3109475494302657e-05, + "loss": 0.708328366279602, + "step": 2938 + }, + { + "epoch": 0.8594823804649803, + "grad_norm": 1.6146616988047677, + "learning_rate": 1.3104880094581495e-05, + "loss": 0.6360403299331665, + "step": 2939 + }, + { + "epoch": 0.8597748208802457, + "grad_norm": 1.5628439078603966, + "learning_rate": 1.3100283969113494e-05, + "loss": 0.5450131893157959, + "step": 2940 + }, + { + "epoch": 0.860067261295511, + "grad_norm": 1.2422442713506727, + "learning_rate": 1.3095687118972962e-05, + "loss": 0.4472329020500183, + "step": 2941 + }, + { + "epoch": 0.8603597017107765, + "grad_norm": 1.2824654152788901, + "learning_rate": 1.3091089545234387e-05, + "loss": 0.6853972673416138, + "step": 2942 + }, + { + "epoch": 0.8606521421260418, + "grad_norm": 1.5236765495118778, + "learning_rate": 1.3086491248972429e-05, + "loss": 0.6547979116439819, + "step": 2943 + }, + { + "epoch": 0.8609445825413072, + "grad_norm": 1.2521364069886292, + "learning_rate": 1.3081892231261903e-05, + "loss": 0.46194693446159363, + "step": 2944 + }, + { + "epoch": 0.8612370229565726, + "grad_norm": 1.3749685968664958, + "learning_rate": 1.307729249317781e-05, + "loss": 0.5715345144271851, + "step": 2945 + }, + { + "epoch": 0.861529463371838, + "grad_norm": 1.2925136251134925, + "learning_rate": 1.3072692035795305e-05, + "loss": 0.5590982437133789, + "step": 2946 + }, + { + "epoch": 0.8618219037871033, + "grad_norm": 1.4594997051230878, + "learning_rate": 1.3068090860189719e-05, + "loss": 0.5435009002685547, + "step": 2947 + }, + { + "epoch": 0.8621143442023688, + "grad_norm": 1.1604259212434795, + "learning_rate": 1.3063488967436548e-05, + "loss": 0.4528965651988983, + "step": 2948 + }, + { + "epoch": 0.8624067846176342, + "grad_norm": 1.1967844606343032, + "learning_rate": 1.3058886358611457e-05, + "loss": 0.5520291328430176, + "step": 2949 + }, + { + "epoch": 0.8626992250328995, + "grad_norm": 1.3959982999797578, + "learning_rate": 1.305428303479028e-05, + "loss": 0.6444021463394165, + "step": 2950 + }, + { + "epoch": 0.862991665448165, + "grad_norm": 1.597979452275331, + "learning_rate": 1.3049678997049016e-05, + "loss": 0.7808041572570801, + "step": 2951 + }, + { + "epoch": 0.8632841058634303, + "grad_norm": 1.6855013913251111, + "learning_rate": 1.3045074246463825e-05, + "loss": 0.6297428607940674, + "step": 2952 + }, + { + "epoch": 0.8635765462786957, + "grad_norm": 1.5203533995419023, + "learning_rate": 1.3040468784111045e-05, + "loss": 0.5776612162590027, + "step": 2953 + }, + { + "epoch": 0.8638689866939611, + "grad_norm": 1.3696314111811954, + "learning_rate": 1.3035862611067169e-05, + "loss": 0.49298524856567383, + "step": 2954 + }, + { + "epoch": 0.8641614271092265, + "grad_norm": 1.7023849342400221, + "learning_rate": 1.303125572840887e-05, + "loss": 0.8061650991439819, + "step": 2955 + }, + { + "epoch": 0.8644538675244919, + "grad_norm": 1.4509821363343893, + "learning_rate": 1.3026648137212976e-05, + "loss": 0.7741662859916687, + "step": 2956 + }, + { + "epoch": 0.8647463079397573, + "grad_norm": 1.350671993753925, + "learning_rate": 1.302203983855648e-05, + "loss": 0.5589889287948608, + "step": 2957 + }, + { + "epoch": 0.8650387483550227, + "grad_norm": 1.400755532782556, + "learning_rate": 1.3017430833516547e-05, + "loss": 0.5801941752433777, + "step": 2958 + }, + { + "epoch": 0.865331188770288, + "grad_norm": 1.3298019485580883, + "learning_rate": 1.30128211231705e-05, + "loss": 0.5874185562133789, + "step": 2959 + }, + { + "epoch": 0.8656236291855535, + "grad_norm": 1.1737111706818832, + "learning_rate": 1.3008210708595837e-05, + "loss": 0.6062727570533752, + "step": 2960 + }, + { + "epoch": 0.8659160696008188, + "grad_norm": 1.3334829952801492, + "learning_rate": 1.3003599590870209e-05, + "loss": 0.571448802947998, + "step": 2961 + }, + { + "epoch": 0.8662085100160842, + "grad_norm": 1.3654619359177553, + "learning_rate": 1.2998987771071442e-05, + "loss": 0.7001944780349731, + "step": 2962 + }, + { + "epoch": 0.8665009504313496, + "grad_norm": 1.48577297171421, + "learning_rate": 1.2994375250277516e-05, + "loss": 0.49182790517807007, + "step": 2963 + }, + { + "epoch": 0.866793390846615, + "grad_norm": 1.342673325945858, + "learning_rate": 1.298976202956658e-05, + "loss": 0.5299041271209717, + "step": 2964 + }, + { + "epoch": 0.8670858312618804, + "grad_norm": 1.1975267191215118, + "learning_rate": 1.2985148110016947e-05, + "loss": 0.4955265522003174, + "step": 2965 + }, + { + "epoch": 0.8673782716771458, + "grad_norm": 1.132262479106049, + "learning_rate": 1.2980533492707094e-05, + "loss": 0.6395630836486816, + "step": 2966 + }, + { + "epoch": 0.8676707120924112, + "grad_norm": 1.1303573523984183, + "learning_rate": 1.2975918178715661e-05, + "loss": 0.5926274061203003, + "step": 2967 + }, + { + "epoch": 0.8679631525076765, + "grad_norm": 1.194805436445147, + "learning_rate": 1.2971302169121447e-05, + "loss": 0.5556914806365967, + "step": 2968 + }, + { + "epoch": 0.868255592922942, + "grad_norm": 1.2766981949480176, + "learning_rate": 1.2966685465003415e-05, + "loss": 0.5347195863723755, + "step": 2969 + }, + { + "epoch": 0.8685480333382073, + "grad_norm": 1.3728880032694415, + "learning_rate": 1.2962068067440694e-05, + "loss": 0.6839208006858826, + "step": 2970 + }, + { + "epoch": 0.8688404737534727, + "grad_norm": 1.1132776608061867, + "learning_rate": 1.295744997751257e-05, + "loss": 0.5741337537765503, + "step": 2971 + }, + { + "epoch": 0.8691329141687382, + "grad_norm": 1.536125480269087, + "learning_rate": 1.29528311962985e-05, + "loss": 0.7383404970169067, + "step": 2972 + }, + { + "epoch": 0.8694253545840035, + "grad_norm": 1.4560088611056379, + "learning_rate": 1.294821172487809e-05, + "loss": 0.5075374245643616, + "step": 2973 + }, + { + "epoch": 0.8697177949992689, + "grad_norm": 1.235849675897421, + "learning_rate": 1.2943591564331113e-05, + "loss": 0.557248592376709, + "step": 2974 + }, + { + "epoch": 0.8700102354145343, + "grad_norm": 1.3655420768672006, + "learning_rate": 1.2938970715737506e-05, + "loss": 0.5687203407287598, + "step": 2975 + }, + { + "epoch": 0.8703026758297997, + "grad_norm": 1.3479345698129241, + "learning_rate": 1.2934349180177364e-05, + "loss": 0.5946108102798462, + "step": 2976 + }, + { + "epoch": 0.870595116245065, + "grad_norm": 1.258994257926457, + "learning_rate": 1.2929726958730942e-05, + "loss": 0.6103173494338989, + "step": 2977 + }, + { + "epoch": 0.8708875566603305, + "grad_norm": 1.4914714674105345, + "learning_rate": 1.2925104052478657e-05, + "loss": 0.7007244825363159, + "step": 2978 + }, + { + "epoch": 0.8711799970755959, + "grad_norm": 1.4140285074261345, + "learning_rate": 1.2920480462501082e-05, + "loss": 0.6157742142677307, + "step": 2979 + }, + { + "epoch": 0.8714724374908612, + "grad_norm": 1.4708644175648395, + "learning_rate": 1.2915856189878956e-05, + "loss": 0.6501113176345825, + "step": 2980 + }, + { + "epoch": 0.8717648779061267, + "grad_norm": 1.2555000815915451, + "learning_rate": 1.2911231235693178e-05, + "loss": 0.5084626078605652, + "step": 2981 + }, + { + "epoch": 0.872057318321392, + "grad_norm": 1.343175395168551, + "learning_rate": 1.2906605601024796e-05, + "loss": 0.5953651666641235, + "step": 2982 + }, + { + "epoch": 0.8723497587366574, + "grad_norm": 1.218776434986359, + "learning_rate": 1.290197928695503e-05, + "loss": 0.5733205676078796, + "step": 2983 + }, + { + "epoch": 0.8726421991519228, + "grad_norm": 1.5420791901099857, + "learning_rate": 1.2897352294565248e-05, + "loss": 0.5976133942604065, + "step": 2984 + }, + { + "epoch": 0.8729346395671882, + "grad_norm": 1.2904353456419873, + "learning_rate": 1.2892724624936983e-05, + "loss": 0.5092414617538452, + "step": 2985 + }, + { + "epoch": 0.8732270799824535, + "grad_norm": 1.4935525581566107, + "learning_rate": 1.2888096279151926e-05, + "loss": 0.7244688272476196, + "step": 2986 + }, + { + "epoch": 0.873519520397719, + "grad_norm": 1.5818576721862576, + "learning_rate": 1.2883467258291922e-05, + "loss": 0.6943881511688232, + "step": 2987 + }, + { + "epoch": 0.8738119608129844, + "grad_norm": 1.422762914124539, + "learning_rate": 1.287883756343898e-05, + "loss": 0.6484338641166687, + "step": 2988 + }, + { + "epoch": 0.8741044012282497, + "grad_norm": 1.23046146833686, + "learning_rate": 1.2874207195675262e-05, + "loss": 0.620865523815155, + "step": 2989 + }, + { + "epoch": 0.8743968416435152, + "grad_norm": 1.245843663622743, + "learning_rate": 1.2869576156083085e-05, + "loss": 0.5290236473083496, + "step": 2990 + }, + { + "epoch": 0.8746892820587805, + "grad_norm": 1.383695697280258, + "learning_rate": 1.2864944445744932e-05, + "loss": 0.7140257358551025, + "step": 2991 + }, + { + "epoch": 0.8749817224740459, + "grad_norm": 1.3901579888827407, + "learning_rate": 1.286031206574343e-05, + "loss": 0.7167611122131348, + "step": 2992 + }, + { + "epoch": 0.8752741628893113, + "grad_norm": 1.4097752029885913, + "learning_rate": 1.2855679017161372e-05, + "loss": 0.5631322860717773, + "step": 2993 + }, + { + "epoch": 0.8755666033045767, + "grad_norm": 1.4535459078300315, + "learning_rate": 1.2851045301081714e-05, + "loss": 0.6250770092010498, + "step": 2994 + }, + { + "epoch": 0.8758590437198421, + "grad_norm": 1.3041015408341177, + "learning_rate": 1.2846410918587546e-05, + "loss": 0.5121266841888428, + "step": 2995 + }, + { + "epoch": 0.8761514841351075, + "grad_norm": 1.2982813372349626, + "learning_rate": 1.2841775870762134e-05, + "loss": 0.6075780987739563, + "step": 2996 + }, + { + "epoch": 0.8764439245503729, + "grad_norm": 1.2610269556078437, + "learning_rate": 1.283714015868889e-05, + "loss": 0.516838014125824, + "step": 2997 + }, + { + "epoch": 0.8767363649656382, + "grad_norm": 1.3997368275790003, + "learning_rate": 1.2832503783451384e-05, + "loss": 0.6952051520347595, + "step": 2998 + }, + { + "epoch": 0.8770288053809037, + "grad_norm": 1.668277066498958, + "learning_rate": 1.2827866746133342e-05, + "loss": 0.8039685487747192, + "step": 2999 + }, + { + "epoch": 0.877321245796169, + "grad_norm": 1.3275322129226486, + "learning_rate": 1.2823229047818642e-05, + "loss": 0.6200549602508545, + "step": 3000 + }, + { + "epoch": 0.8776136862114344, + "grad_norm": 1.3153882408773916, + "learning_rate": 1.2818590689591315e-05, + "loss": 0.6666116714477539, + "step": 3001 + }, + { + "epoch": 0.8779061266266998, + "grad_norm": 1.4255915593552042, + "learning_rate": 1.2813951672535551e-05, + "loss": 0.566741943359375, + "step": 3002 + }, + { + "epoch": 0.8781985670419652, + "grad_norm": 1.2390037918473238, + "learning_rate": 1.2809311997735697e-05, + "loss": 0.6103402376174927, + "step": 3003 + }, + { + "epoch": 0.8784910074572306, + "grad_norm": 1.1444583076116077, + "learning_rate": 1.280467166627624e-05, + "loss": 0.48296916484832764, + "step": 3004 + }, + { + "epoch": 0.878783447872496, + "grad_norm": 1.4235586871910597, + "learning_rate": 1.2800030679241834e-05, + "loss": 0.5995723605155945, + "step": 3005 + }, + { + "epoch": 0.8790758882877614, + "grad_norm": 1.5173093942193803, + "learning_rate": 1.2795389037717286e-05, + "loss": 0.6199642419815063, + "step": 3006 + }, + { + "epoch": 0.8793683287030267, + "grad_norm": 1.5757356892284924, + "learning_rate": 1.279074674278754e-05, + "loss": 0.6740807294845581, + "step": 3007 + }, + { + "epoch": 0.8796607691182922, + "grad_norm": 1.4923318097982954, + "learning_rate": 1.2786103795537714e-05, + "loss": 0.7330688238143921, + "step": 3008 + }, + { + "epoch": 0.8799532095335575, + "grad_norm": 1.1357910142893406, + "learning_rate": 1.2781460197053066e-05, + "loss": 0.5048441290855408, + "step": 3009 + }, + { + "epoch": 0.8802456499488229, + "grad_norm": 1.2484561154788956, + "learning_rate": 1.277681594841901e-05, + "loss": 0.6103702187538147, + "step": 3010 + }, + { + "epoch": 0.8805380903640884, + "grad_norm": 1.3117487221252475, + "learning_rate": 1.2772171050721107e-05, + "loss": 0.5223366022109985, + "step": 3011 + }, + { + "epoch": 0.8808305307793537, + "grad_norm": 1.5806437295259135, + "learning_rate": 1.2767525505045078e-05, + "loss": 0.708305835723877, + "step": 3012 + }, + { + "epoch": 0.8811229711946191, + "grad_norm": 1.324207789268205, + "learning_rate": 1.2762879312476785e-05, + "loss": 0.6827911734580994, + "step": 3013 + }, + { + "epoch": 0.8814154116098845, + "grad_norm": 1.7302207886555443, + "learning_rate": 1.2758232474102254e-05, + "loss": 0.6977027654647827, + "step": 3014 + }, + { + "epoch": 0.8817078520251499, + "grad_norm": 1.235299173012923, + "learning_rate": 1.2753584991007654e-05, + "loss": 0.5534720420837402, + "step": 3015 + }, + { + "epoch": 0.8820002924404152, + "grad_norm": 1.1722300923390174, + "learning_rate": 1.2748936864279305e-05, + "loss": 0.541682243347168, + "step": 3016 + }, + { + "epoch": 0.8822927328556807, + "grad_norm": 1.4134630737456748, + "learning_rate": 1.2744288095003674e-05, + "loss": 0.6195456981658936, + "step": 3017 + }, + { + "epoch": 0.8825851732709461, + "grad_norm": 1.1963339495389647, + "learning_rate": 1.2739638684267387e-05, + "loss": 0.5050234794616699, + "step": 3018 + }, + { + "epoch": 0.8828776136862114, + "grad_norm": 1.1967088542641229, + "learning_rate": 1.2734988633157218e-05, + "loss": 0.5397066473960876, + "step": 3019 + }, + { + "epoch": 0.8831700541014769, + "grad_norm": 1.3480056981854442, + "learning_rate": 1.273033794276008e-05, + "loss": 0.5932190418243408, + "step": 3020 + }, + { + "epoch": 0.8834624945167422, + "grad_norm": 1.2383533139434324, + "learning_rate": 1.2725686614163055e-05, + "loss": 0.5780059099197388, + "step": 3021 + }, + { + "epoch": 0.8837549349320076, + "grad_norm": 1.4379159594856536, + "learning_rate": 1.2721034648453353e-05, + "loss": 0.5850226879119873, + "step": 3022 + }, + { + "epoch": 0.884047375347273, + "grad_norm": 1.351057706249645, + "learning_rate": 1.2716382046718346e-05, + "loss": 0.6684393882751465, + "step": 3023 + }, + { + "epoch": 0.8843398157625384, + "grad_norm": 1.3578422906902012, + "learning_rate": 1.271172881004555e-05, + "loss": 0.6045842170715332, + "step": 3024 + }, + { + "epoch": 0.8846322561778037, + "grad_norm": 1.4246831207517041, + "learning_rate": 1.2707074939522633e-05, + "loss": 0.6769551038742065, + "step": 3025 + }, + { + "epoch": 0.8849246965930692, + "grad_norm": 1.541147063192512, + "learning_rate": 1.2702420436237408e-05, + "loss": 0.5581091642379761, + "step": 3026 + }, + { + "epoch": 0.8852171370083346, + "grad_norm": 1.381695049653859, + "learning_rate": 1.269776530127784e-05, + "loss": 0.5010186433792114, + "step": 3027 + }, + { + "epoch": 0.8855095774235999, + "grad_norm": 1.3620137613749654, + "learning_rate": 1.2693109535732034e-05, + "loss": 0.4537884294986725, + "step": 3028 + }, + { + "epoch": 0.8858020178388654, + "grad_norm": 1.155156838639785, + "learning_rate": 1.2688453140688246e-05, + "loss": 0.5920443534851074, + "step": 3029 + }, + { + "epoch": 0.8860944582541307, + "grad_norm": 1.4193738144287875, + "learning_rate": 1.2683796117234884e-05, + "loss": 0.564072847366333, + "step": 3030 + }, + { + "epoch": 0.8863868986693961, + "grad_norm": 1.3206014730711304, + "learning_rate": 1.26791384664605e-05, + "loss": 0.657585620880127, + "step": 3031 + }, + { + "epoch": 0.8866793390846615, + "grad_norm": 1.4850669504718117, + "learning_rate": 1.2674480189453786e-05, + "loss": 0.6864298582077026, + "step": 3032 + }, + { + "epoch": 0.8869717794999269, + "grad_norm": 1.4143994971740543, + "learning_rate": 1.266982128730359e-05, + "loss": 0.6416069865226746, + "step": 3033 + }, + { + "epoch": 0.8872642199151923, + "grad_norm": 1.3298070008922416, + "learning_rate": 1.2665161761098899e-05, + "loss": 0.6405118703842163, + "step": 3034 + }, + { + "epoch": 0.8875566603304577, + "grad_norm": 1.4036133965159712, + "learning_rate": 1.266050161192885e-05, + "loss": 0.649673342704773, + "step": 3035 + }, + { + "epoch": 0.8878491007457231, + "grad_norm": 1.359043965576467, + "learning_rate": 1.2655840840882729e-05, + "loss": 0.5914620161056519, + "step": 3036 + }, + { + "epoch": 0.8881415411609884, + "grad_norm": 1.5837746169822255, + "learning_rate": 1.2651179449049958e-05, + "loss": 0.6080621480941772, + "step": 3037 + }, + { + "epoch": 0.8884339815762539, + "grad_norm": 1.5302588008128089, + "learning_rate": 1.264651743752011e-05, + "loss": 0.657015860080719, + "step": 3038 + }, + { + "epoch": 0.8887264219915192, + "grad_norm": 1.3603604072518423, + "learning_rate": 1.26418548073829e-05, + "loss": 0.5384848713874817, + "step": 3039 + }, + { + "epoch": 0.8890188624067846, + "grad_norm": 1.5457096573294893, + "learning_rate": 1.2637191559728195e-05, + "loss": 0.7452554106712341, + "step": 3040 + }, + { + "epoch": 0.88931130282205, + "grad_norm": 1.4411555623785637, + "learning_rate": 1.2632527695645993e-05, + "loss": 0.743236780166626, + "step": 3041 + }, + { + "epoch": 0.8896037432373154, + "grad_norm": 1.5417347407679962, + "learning_rate": 1.2627863216226453e-05, + "loss": 0.557692289352417, + "step": 3042 + }, + { + "epoch": 0.8898961836525808, + "grad_norm": 1.3302198914823486, + "learning_rate": 1.2623198122559863e-05, + "loss": 0.5637259483337402, + "step": 3043 + }, + { + "epoch": 0.8901886240678462, + "grad_norm": 1.4403910054587767, + "learning_rate": 1.261853241573666e-05, + "loss": 0.5217350721359253, + "step": 3044 + }, + { + "epoch": 0.8904810644831116, + "grad_norm": 1.4659582389098327, + "learning_rate": 1.2613866096847423e-05, + "loss": 0.5971624255180359, + "step": 3045 + }, + { + "epoch": 0.8907735048983769, + "grad_norm": 1.5641010174504344, + "learning_rate": 1.260919916698288e-05, + "loss": 0.6586427092552185, + "step": 3046 + }, + { + "epoch": 0.8910659453136424, + "grad_norm": 1.8045032510726307, + "learning_rate": 1.2604531627233895e-05, + "loss": 0.7059915661811829, + "step": 3047 + }, + { + "epoch": 0.8913583857289077, + "grad_norm": 1.3406441666811264, + "learning_rate": 1.2599863478691483e-05, + "loss": 0.582252025604248, + "step": 3048 + }, + { + "epoch": 0.8916508261441731, + "grad_norm": 1.2760858553291834, + "learning_rate": 1.2595194722446786e-05, + "loss": 0.6901981830596924, + "step": 3049 + }, + { + "epoch": 0.8919432665594386, + "grad_norm": 1.5789638647855007, + "learning_rate": 1.2590525359591101e-05, + "loss": 0.7462388873100281, + "step": 3050 + }, + { + "epoch": 0.8922357069747039, + "grad_norm": 1.1893369289763132, + "learning_rate": 1.2585855391215866e-05, + "loss": 0.4963245391845703, + "step": 3051 + }, + { + "epoch": 0.8925281473899693, + "grad_norm": 1.427293357699651, + "learning_rate": 1.2581184818412655e-05, + "loss": 0.6408337354660034, + "step": 3052 + }, + { + "epoch": 0.8928205878052347, + "grad_norm": 1.3357664905418998, + "learning_rate": 1.257651364227319e-05, + "loss": 0.44528326392173767, + "step": 3053 + }, + { + "epoch": 0.8931130282205001, + "grad_norm": 1.4527206031665332, + "learning_rate": 1.2571841863889322e-05, + "loss": 0.4595017731189728, + "step": 3054 + }, + { + "epoch": 0.8934054686357654, + "grad_norm": 1.435143014894245, + "learning_rate": 1.2567169484353057e-05, + "loss": 0.6934910416603088, + "step": 3055 + }, + { + "epoch": 0.8936979090510309, + "grad_norm": 1.3543177360296097, + "learning_rate": 1.2562496504756535e-05, + "loss": 0.6392845511436462, + "step": 3056 + }, + { + "epoch": 0.8939903494662963, + "grad_norm": 1.3638361282130094, + "learning_rate": 1.255782292619203e-05, + "loss": 0.5506458878517151, + "step": 3057 + }, + { + "epoch": 0.8942827898815616, + "grad_norm": 1.3861859212756857, + "learning_rate": 1.255314874975197e-05, + "loss": 0.5871223211288452, + "step": 3058 + }, + { + "epoch": 0.8945752302968271, + "grad_norm": 1.4446737131271559, + "learning_rate": 1.254847397652892e-05, + "loss": 0.603033185005188, + "step": 3059 + }, + { + "epoch": 0.8948676707120924, + "grad_norm": 1.4764688506929942, + "learning_rate": 1.2543798607615566e-05, + "loss": 0.667452335357666, + "step": 3060 + }, + { + "epoch": 0.8951601111273578, + "grad_norm": 1.5052245195755742, + "learning_rate": 1.2539122644104755e-05, + "loss": 0.6264449954032898, + "step": 3061 + }, + { + "epoch": 0.8954525515426232, + "grad_norm": 1.2694525054193362, + "learning_rate": 1.2534446087089465e-05, + "loss": 0.6085609793663025, + "step": 3062 + }, + { + "epoch": 0.8957449919578886, + "grad_norm": 1.5027824768205942, + "learning_rate": 1.252976893766281e-05, + "loss": 0.6414828896522522, + "step": 3063 + }, + { + "epoch": 0.8960374323731539, + "grad_norm": 1.5067492390612103, + "learning_rate": 1.2525091196918049e-05, + "loss": 0.714614987373352, + "step": 3064 + }, + { + "epoch": 0.8963298727884194, + "grad_norm": 1.4473594871396505, + "learning_rate": 1.2520412865948574e-05, + "loss": 0.5966176986694336, + "step": 3065 + }, + { + "epoch": 0.8966223132036848, + "grad_norm": 1.234582474772498, + "learning_rate": 1.2515733945847914e-05, + "loss": 0.5162957906723022, + "step": 3066 + }, + { + "epoch": 0.8969147536189501, + "grad_norm": 1.5378382727824902, + "learning_rate": 1.2511054437709743e-05, + "loss": 0.6460821628570557, + "step": 3067 + }, + { + "epoch": 0.8972071940342156, + "grad_norm": 1.3526579806372556, + "learning_rate": 1.2506374342627861e-05, + "loss": 0.6802507638931274, + "step": 3068 + }, + { + "epoch": 0.8974996344494809, + "grad_norm": 1.4306769896677902, + "learning_rate": 1.2501693661696218e-05, + "loss": 0.5966957807540894, + "step": 3069 + }, + { + "epoch": 0.8977920748647463, + "grad_norm": 1.336293797847081, + "learning_rate": 1.2497012396008893e-05, + "loss": 0.607227087020874, + "step": 3070 + }, + { + "epoch": 0.8980845152800117, + "grad_norm": 1.360686606627987, + "learning_rate": 1.2492330546660098e-05, + "loss": 0.6544637084007263, + "step": 3071 + }, + { + "epoch": 0.8983769556952771, + "grad_norm": 1.410133865972111, + "learning_rate": 1.2487648114744196e-05, + "loss": 0.5896593332290649, + "step": 3072 + }, + { + "epoch": 0.8986693961105425, + "grad_norm": 1.296908458370691, + "learning_rate": 1.248296510135567e-05, + "loss": 0.5710231065750122, + "step": 3073 + }, + { + "epoch": 0.8989618365258079, + "grad_norm": 1.2057046094411794, + "learning_rate": 1.2478281507589147e-05, + "loss": 0.5918926000595093, + "step": 3074 + }, + { + "epoch": 0.8992542769410733, + "grad_norm": 1.5306817529094334, + "learning_rate": 1.2473597334539392e-05, + "loss": 0.681663453578949, + "step": 3075 + }, + { + "epoch": 0.8995467173563386, + "grad_norm": 1.2671727964507529, + "learning_rate": 1.24689125833013e-05, + "loss": 0.5229436159133911, + "step": 3076 + }, + { + "epoch": 0.8998391577716041, + "grad_norm": 1.5769374861363958, + "learning_rate": 1.2464227254969903e-05, + "loss": 0.7165119051933289, + "step": 3077 + }, + { + "epoch": 0.9001315981868694, + "grad_norm": 1.2324966791017462, + "learning_rate": 1.2459541350640368e-05, + "loss": 0.514594554901123, + "step": 3078 + }, + { + "epoch": 0.9004240386021348, + "grad_norm": 1.4144268048636097, + "learning_rate": 1.2454854871407993e-05, + "loss": 0.6173784732818604, + "step": 3079 + }, + { + "epoch": 0.9007164790174002, + "grad_norm": 1.6555744107314199, + "learning_rate": 1.245016781836822e-05, + "loss": 0.6796407103538513, + "step": 3080 + }, + { + "epoch": 0.9010089194326656, + "grad_norm": 1.3666754181554102, + "learning_rate": 1.2445480192616619e-05, + "loss": 0.6901683807373047, + "step": 3081 + }, + { + "epoch": 0.901301359847931, + "grad_norm": 1.295839204252469, + "learning_rate": 1.2440791995248886e-05, + "loss": 0.6215920448303223, + "step": 3082 + }, + { + "epoch": 0.9015938002631964, + "grad_norm": 1.29381925555321, + "learning_rate": 1.243610322736087e-05, + "loss": 0.6109690070152283, + "step": 3083 + }, + { + "epoch": 0.9018862406784618, + "grad_norm": 1.3751453546430485, + "learning_rate": 1.2431413890048534e-05, + "loss": 0.5273362398147583, + "step": 3084 + }, + { + "epoch": 0.9021786810937271, + "grad_norm": 1.197511083408015, + "learning_rate": 1.2426723984407982e-05, + "loss": 0.5219408273696899, + "step": 3085 + }, + { + "epoch": 0.9024711215089926, + "grad_norm": 1.4389803986869047, + "learning_rate": 1.2422033511535458e-05, + "loss": 0.6894690990447998, + "step": 3086 + }, + { + "epoch": 0.9027635619242579, + "grad_norm": 1.2949596320128054, + "learning_rate": 1.2417342472527325e-05, + "loss": 0.6135656833648682, + "step": 3087 + }, + { + "epoch": 0.9030560023395233, + "grad_norm": 1.4997841327771624, + "learning_rate": 1.2412650868480088e-05, + "loss": 0.595108151435852, + "step": 3088 + }, + { + "epoch": 0.9033484427547888, + "grad_norm": 1.4068106482758378, + "learning_rate": 1.2407958700490376e-05, + "loss": 0.6445261240005493, + "step": 3089 + }, + { + "epoch": 0.9036408831700541, + "grad_norm": 1.1391728287440939, + "learning_rate": 1.240326596965496e-05, + "loss": 0.5601890087127686, + "step": 3090 + }, + { + "epoch": 0.9039333235853195, + "grad_norm": 1.4556896662499954, + "learning_rate": 1.239857267707074e-05, + "loss": 0.6229134798049927, + "step": 3091 + }, + { + "epoch": 0.9042257640005849, + "grad_norm": 1.3633245090329542, + "learning_rate": 1.2393878823834737e-05, + "loss": 0.5769803524017334, + "step": 3092 + }, + { + "epoch": 0.9045182044158503, + "grad_norm": 1.5373386649577192, + "learning_rate": 1.2389184411044113e-05, + "loss": 0.8101233243942261, + "step": 3093 + }, + { + "epoch": 0.9048106448311156, + "grad_norm": 1.3507156228218853, + "learning_rate": 1.2384489439796159e-05, + "loss": 0.5562945604324341, + "step": 3094 + }, + { + "epoch": 0.9051030852463811, + "grad_norm": 1.6942487879562902, + "learning_rate": 1.2379793911188299e-05, + "loss": 0.5764975547790527, + "step": 3095 + }, + { + "epoch": 0.9053955256616465, + "grad_norm": 1.4280019855873591, + "learning_rate": 1.2375097826318079e-05, + "loss": 0.5951659083366394, + "step": 3096 + }, + { + "epoch": 0.9056879660769118, + "grad_norm": 1.3804272066554735, + "learning_rate": 1.2370401186283186e-05, + "loss": 0.5550940632820129, + "step": 3097 + }, + { + "epoch": 0.9059804064921773, + "grad_norm": 1.5012418323017303, + "learning_rate": 1.2365703992181425e-05, + "loss": 0.5423737168312073, + "step": 3098 + }, + { + "epoch": 0.9062728469074426, + "grad_norm": 1.3277873552974655, + "learning_rate": 1.236100624511074e-05, + "loss": 0.633366048336029, + "step": 3099 + }, + { + "epoch": 0.906565287322708, + "grad_norm": 1.2113954677804317, + "learning_rate": 1.2356307946169202e-05, + "loss": 0.6067361831665039, + "step": 3100 + }, + { + "epoch": 0.9068577277379734, + "grad_norm": 1.1800329005672614, + "learning_rate": 1.2351609096455006e-05, + "loss": 0.6039519309997559, + "step": 3101 + }, + { + "epoch": 0.9071501681532388, + "grad_norm": 1.3373962705942997, + "learning_rate": 1.2346909697066486e-05, + "loss": 0.5643757581710815, + "step": 3102 + }, + { + "epoch": 0.9074426085685041, + "grad_norm": 1.4963223668806274, + "learning_rate": 1.2342209749102088e-05, + "loss": 0.5406394004821777, + "step": 3103 + }, + { + "epoch": 0.9077350489837696, + "grad_norm": 1.3377232980381308, + "learning_rate": 1.2337509253660404e-05, + "loss": 0.5845915079116821, + "step": 3104 + }, + { + "epoch": 0.908027489399035, + "grad_norm": 1.614536146442758, + "learning_rate": 1.2332808211840147e-05, + "loss": 0.6912981271743774, + "step": 3105 + }, + { + "epoch": 0.9083199298143003, + "grad_norm": 1.2433178855630291, + "learning_rate": 1.2328106624740151e-05, + "loss": 0.5571672320365906, + "step": 3106 + }, + { + "epoch": 0.9086123702295658, + "grad_norm": 1.0866011599268561, + "learning_rate": 1.2323404493459386e-05, + "loss": 0.5219087600708008, + "step": 3107 + }, + { + "epoch": 0.9089048106448311, + "grad_norm": 1.3110052749572634, + "learning_rate": 1.2318701819096952e-05, + "loss": 0.5780971050262451, + "step": 3108 + }, + { + "epoch": 0.9091972510600965, + "grad_norm": 1.4311943893173962, + "learning_rate": 1.2313998602752063e-05, + "loss": 0.6206589937210083, + "step": 3109 + }, + { + "epoch": 0.9094896914753618, + "grad_norm": 1.4768884476442792, + "learning_rate": 1.2309294845524068e-05, + "loss": 0.6063584089279175, + "step": 3110 + }, + { + "epoch": 0.9097821318906273, + "grad_norm": 1.7547035202334638, + "learning_rate": 1.2304590548512445e-05, + "loss": 0.5733555555343628, + "step": 3111 + }, + { + "epoch": 0.9100745723058927, + "grad_norm": 1.0786362412869268, + "learning_rate": 1.2299885712816792e-05, + "loss": 0.5227848887443542, + "step": 3112 + }, + { + "epoch": 0.910367012721158, + "grad_norm": 1.3268713618037162, + "learning_rate": 1.2295180339536839e-05, + "loss": 0.6357969045639038, + "step": 3113 + }, + { + "epoch": 0.9106594531364235, + "grad_norm": 1.4243975329678797, + "learning_rate": 1.2290474429772438e-05, + "loss": 0.6194056272506714, + "step": 3114 + }, + { + "epoch": 0.9109518935516888, + "grad_norm": 1.3151715542581663, + "learning_rate": 1.2285767984623563e-05, + "loss": 0.5274733304977417, + "step": 3115 + }, + { + "epoch": 0.9112443339669543, + "grad_norm": 1.370068266036648, + "learning_rate": 1.228106100519032e-05, + "loss": 0.5612698197364807, + "step": 3116 + }, + { + "epoch": 0.9115367743822196, + "grad_norm": 1.578530779654035, + "learning_rate": 1.2276353492572937e-05, + "loss": 0.6261074542999268, + "step": 3117 + }, + { + "epoch": 0.911829214797485, + "grad_norm": 1.2011662273206838, + "learning_rate": 1.2271645447871764e-05, + "loss": 0.6407681703567505, + "step": 3118 + }, + { + "epoch": 0.9121216552127503, + "grad_norm": 1.869370443317622, + "learning_rate": 1.226693687218728e-05, + "loss": 0.7862328290939331, + "step": 3119 + }, + { + "epoch": 0.9124140956280158, + "grad_norm": 1.4175623746202768, + "learning_rate": 1.2262227766620083e-05, + "loss": 0.5079205632209778, + "step": 3120 + }, + { + "epoch": 0.9127065360432812, + "grad_norm": 1.5666620241066453, + "learning_rate": 1.2257518132270903e-05, + "loss": 0.6074210405349731, + "step": 3121 + }, + { + "epoch": 0.9129989764585466, + "grad_norm": 1.5222891825114737, + "learning_rate": 1.2252807970240582e-05, + "loss": 0.642460823059082, + "step": 3122 + }, + { + "epoch": 0.913291416873812, + "grad_norm": 1.5105961127505823, + "learning_rate": 1.22480972816301e-05, + "loss": 0.5996612310409546, + "step": 3123 + }, + { + "epoch": 0.9135838572890773, + "grad_norm": 1.4191755584361432, + "learning_rate": 1.2243386067540548e-05, + "loss": 0.5629523992538452, + "step": 3124 + }, + { + "epoch": 0.9138762977043428, + "grad_norm": 1.488297008451051, + "learning_rate": 1.223867432907314e-05, + "loss": 0.5794960260391235, + "step": 3125 + }, + { + "epoch": 0.9141687381196081, + "grad_norm": 1.4839380471480481, + "learning_rate": 1.2233962067329217e-05, + "loss": 0.6665213108062744, + "step": 3126 + }, + { + "epoch": 0.9144611785348735, + "grad_norm": 1.7069185609011637, + "learning_rate": 1.2229249283410245e-05, + "loss": 0.6834249496459961, + "step": 3127 + }, + { + "epoch": 0.914753618950139, + "grad_norm": 1.472483487554638, + "learning_rate": 1.2224535978417809e-05, + "loss": 0.5709845423698425, + "step": 3128 + }, + { + "epoch": 0.9150460593654043, + "grad_norm": 1.3783113695609808, + "learning_rate": 1.2219822153453613e-05, + "loss": 0.5455344915390015, + "step": 3129 + }, + { + "epoch": 0.9153384997806697, + "grad_norm": 1.5138708664001599, + "learning_rate": 1.2215107809619483e-05, + "loss": 0.6291406154632568, + "step": 3130 + }, + { + "epoch": 0.915630940195935, + "grad_norm": 1.340686035335307, + "learning_rate": 1.2210392948017371e-05, + "loss": 0.5953069925308228, + "step": 3131 + }, + { + "epoch": 0.9159233806112005, + "grad_norm": 1.3390197673162056, + "learning_rate": 1.2205677569749347e-05, + "loss": 0.6958901882171631, + "step": 3132 + }, + { + "epoch": 0.9162158210264658, + "grad_norm": 2.251590691230911, + "learning_rate": 1.2200961675917605e-05, + "loss": 0.5867033004760742, + "step": 3133 + }, + { + "epoch": 0.9165082614417313, + "grad_norm": 1.2167957981489814, + "learning_rate": 1.2196245267624449e-05, + "loss": 0.5364042520523071, + "step": 3134 + }, + { + "epoch": 0.9168007018569967, + "grad_norm": 1.0997310314063415, + "learning_rate": 1.2191528345972318e-05, + "loss": 0.5141438841819763, + "step": 3135 + }, + { + "epoch": 0.917093142272262, + "grad_norm": 1.1435709173541644, + "learning_rate": 1.218681091206376e-05, + "loss": 0.5024605393409729, + "step": 3136 + }, + { + "epoch": 0.9173855826875275, + "grad_norm": 1.4583614763595478, + "learning_rate": 1.2182092967001447e-05, + "loss": 0.567114531993866, + "step": 3137 + }, + { + "epoch": 0.9176780231027928, + "grad_norm": 1.4993671644221835, + "learning_rate": 1.217737451188817e-05, + "loss": 0.7224113941192627, + "step": 3138 + }, + { + "epoch": 0.9179704635180582, + "grad_norm": 1.368376715547139, + "learning_rate": 1.2172655547826839e-05, + "loss": 0.6033936738967896, + "step": 3139 + }, + { + "epoch": 0.9182629039333235, + "grad_norm": 1.4327847369216065, + "learning_rate": 1.2167936075920486e-05, + "loss": 0.5555745363235474, + "step": 3140 + }, + { + "epoch": 0.918555344348589, + "grad_norm": 1.1757378939927343, + "learning_rate": 1.2163216097272255e-05, + "loss": 0.5939170718193054, + "step": 3141 + }, + { + "epoch": 0.9188477847638543, + "grad_norm": 1.49535441688526, + "learning_rate": 1.2158495612985415e-05, + "loss": 0.7141895294189453, + "step": 3142 + }, + { + "epoch": 0.9191402251791198, + "grad_norm": 1.5558405168210478, + "learning_rate": 1.2153774624163345e-05, + "loss": 0.585646390914917, + "step": 3143 + }, + { + "epoch": 0.9194326655943852, + "grad_norm": 1.114182805953909, + "learning_rate": 1.2149053131909556e-05, + "loss": 0.5378825068473816, + "step": 3144 + }, + { + "epoch": 0.9197251060096505, + "grad_norm": 1.383902731385194, + "learning_rate": 1.2144331137327663e-05, + "loss": 0.569821834564209, + "step": 3145 + }, + { + "epoch": 0.920017546424916, + "grad_norm": 1.6457891792908532, + "learning_rate": 1.2139608641521406e-05, + "loss": 0.6101462244987488, + "step": 3146 + }, + { + "epoch": 0.9203099868401813, + "grad_norm": 1.2016357640033675, + "learning_rate": 1.2134885645594637e-05, + "loss": 0.5481746792793274, + "step": 3147 + }, + { + "epoch": 0.9206024272554467, + "grad_norm": 1.538402380383642, + "learning_rate": 1.2130162150651326e-05, + "loss": 0.7075197696685791, + "step": 3148 + }, + { + "epoch": 0.920894867670712, + "grad_norm": 1.7217246005422928, + "learning_rate": 1.2125438157795567e-05, + "loss": 0.6375464200973511, + "step": 3149 + }, + { + "epoch": 0.9211873080859775, + "grad_norm": 1.3850395600859229, + "learning_rate": 1.2120713668131558e-05, + "loss": 0.6954327821731567, + "step": 3150 + }, + { + "epoch": 0.9214797485012429, + "grad_norm": 1.3658544095341296, + "learning_rate": 1.2115988682763626e-05, + "loss": 0.5855636596679688, + "step": 3151 + }, + { + "epoch": 0.9217721889165083, + "grad_norm": 1.4751760026778278, + "learning_rate": 1.2111263202796206e-05, + "loss": 0.6056143641471863, + "step": 3152 + }, + { + "epoch": 0.9220646293317737, + "grad_norm": 1.551741495670365, + "learning_rate": 1.2106537229333848e-05, + "loss": 0.7918239831924438, + "step": 3153 + }, + { + "epoch": 0.922357069747039, + "grad_norm": 1.7033588700340108, + "learning_rate": 1.2101810763481218e-05, + "loss": 0.7772212028503418, + "step": 3154 + }, + { + "epoch": 0.9226495101623045, + "grad_norm": 1.511966147005096, + "learning_rate": 1.2097083806343104e-05, + "loss": 0.6332443356513977, + "step": 3155 + }, + { + "epoch": 0.9229419505775698, + "grad_norm": 1.358434184305942, + "learning_rate": 1.2092356359024399e-05, + "loss": 0.6254568099975586, + "step": 3156 + }, + { + "epoch": 0.9232343909928352, + "grad_norm": 1.5630990314712985, + "learning_rate": 1.208762842263012e-05, + "loss": 0.6178697347640991, + "step": 3157 + }, + { + "epoch": 0.9235268314081005, + "grad_norm": 1.1998616171531247, + "learning_rate": 1.2082899998265387e-05, + "loss": 0.5049355030059814, + "step": 3158 + }, + { + "epoch": 0.923819271823366, + "grad_norm": 1.4513160919924062, + "learning_rate": 1.2078171087035444e-05, + "loss": 0.7013234496116638, + "step": 3159 + }, + { + "epoch": 0.9241117122386314, + "grad_norm": 1.4119575222677514, + "learning_rate": 1.2073441690045647e-05, + "loss": 0.576643705368042, + "step": 3160 + }, + { + "epoch": 0.9244041526538967, + "grad_norm": 1.2307321356514476, + "learning_rate": 1.2068711808401459e-05, + "loss": 0.5163617134094238, + "step": 3161 + }, + { + "epoch": 0.9246965930691622, + "grad_norm": 1.39625806011197, + "learning_rate": 1.2063981443208466e-05, + "loss": 0.571370005607605, + "step": 3162 + }, + { + "epoch": 0.9249890334844275, + "grad_norm": 1.3814954844513003, + "learning_rate": 1.2059250595572358e-05, + "loss": 0.7424927949905396, + "step": 3163 + }, + { + "epoch": 0.925281473899693, + "grad_norm": 1.398481393831642, + "learning_rate": 1.2054519266598946e-05, + "loss": 0.6661131381988525, + "step": 3164 + }, + { + "epoch": 0.9255739143149583, + "grad_norm": 1.382448951979987, + "learning_rate": 1.2049787457394145e-05, + "loss": 0.6416351795196533, + "step": 3165 + }, + { + "epoch": 0.9258663547302237, + "grad_norm": 1.5012000035545232, + "learning_rate": 1.2045055169063988e-05, + "loss": 0.6708394289016724, + "step": 3166 + }, + { + "epoch": 0.9261587951454892, + "grad_norm": 1.5269915566780659, + "learning_rate": 1.2040322402714624e-05, + "loss": 0.536340057849884, + "step": 3167 + }, + { + "epoch": 0.9264512355607545, + "grad_norm": 1.4556897812811458, + "learning_rate": 1.20355891594523e-05, + "loss": 0.5621340274810791, + "step": 3168 + }, + { + "epoch": 0.9267436759760199, + "grad_norm": 1.274628172323648, + "learning_rate": 1.2030855440383387e-05, + "loss": 0.5972496271133423, + "step": 3169 + }, + { + "epoch": 0.9270361163912852, + "grad_norm": 1.4230845419048714, + "learning_rate": 1.2026121246614362e-05, + "loss": 0.567542314529419, + "step": 3170 + }, + { + "epoch": 0.9273285568065507, + "grad_norm": 1.092340586033623, + "learning_rate": 1.2021386579251814e-05, + "loss": 0.5487483739852905, + "step": 3171 + }, + { + "epoch": 0.927620997221816, + "grad_norm": 1.6219751059797927, + "learning_rate": 1.2016651439402445e-05, + "loss": 0.7988057136535645, + "step": 3172 + }, + { + "epoch": 0.9279134376370815, + "grad_norm": 1.2231171520157942, + "learning_rate": 1.2011915828173066e-05, + "loss": 0.5333850979804993, + "step": 3173 + }, + { + "epoch": 0.9282058780523469, + "grad_norm": 1.1146388373256622, + "learning_rate": 1.2007179746670592e-05, + "loss": 0.5640296936035156, + "step": 3174 + }, + { + "epoch": 0.9284983184676122, + "grad_norm": 1.7918188640848236, + "learning_rate": 1.2002443196002057e-05, + "loss": 0.7154449820518494, + "step": 3175 + }, + { + "epoch": 0.9287907588828777, + "grad_norm": 1.533684329230312, + "learning_rate": 1.1997706177274597e-05, + "loss": 0.8660446405410767, + "step": 3176 + }, + { + "epoch": 0.929083199298143, + "grad_norm": 1.498753630747748, + "learning_rate": 1.1992968691595465e-05, + "loss": 0.601166307926178, + "step": 3177 + }, + { + "epoch": 0.9293756397134084, + "grad_norm": 1.4563708289231845, + "learning_rate": 1.1988230740072022e-05, + "loss": 0.6197638511657715, + "step": 3178 + }, + { + "epoch": 0.9296680801286737, + "grad_norm": 1.2218794629813654, + "learning_rate": 1.198349232381173e-05, + "loss": 0.5716423988342285, + "step": 3179 + }, + { + "epoch": 0.9299605205439392, + "grad_norm": 1.1601969521725652, + "learning_rate": 1.197875344392217e-05, + "loss": 0.4319373071193695, + "step": 3180 + }, + { + "epoch": 0.9302529609592045, + "grad_norm": 1.3226372570662766, + "learning_rate": 1.1974014101511018e-05, + "loss": 0.5299028158187866, + "step": 3181 + }, + { + "epoch": 0.93054540137447, + "grad_norm": 1.4024951088839022, + "learning_rate": 1.1969274297686075e-05, + "loss": 0.7085509300231934, + "step": 3182 + }, + { + "epoch": 0.9308378417897354, + "grad_norm": 1.3237854936063287, + "learning_rate": 1.1964534033555237e-05, + "loss": 0.6025770902633667, + "step": 3183 + }, + { + "epoch": 0.9311302822050007, + "grad_norm": 1.2585066067859425, + "learning_rate": 1.1959793310226518e-05, + "loss": 0.5624677538871765, + "step": 3184 + }, + { + "epoch": 0.9314227226202662, + "grad_norm": 1.3607236544497474, + "learning_rate": 1.1955052128808025e-05, + "loss": 0.602645754814148, + "step": 3185 + }, + { + "epoch": 0.9317151630355315, + "grad_norm": 1.5196424442530971, + "learning_rate": 1.1950310490407984e-05, + "loss": 0.6495026350021362, + "step": 3186 + }, + { + "epoch": 0.9320076034507969, + "grad_norm": 1.2037819566859902, + "learning_rate": 1.1945568396134721e-05, + "loss": 0.50370192527771, + "step": 3187 + }, + { + "epoch": 0.9323000438660622, + "grad_norm": 1.4578860564520788, + "learning_rate": 1.1940825847096677e-05, + "loss": 0.5717373490333557, + "step": 3188 + }, + { + "epoch": 0.9325924842813277, + "grad_norm": 1.2463647398252022, + "learning_rate": 1.1936082844402395e-05, + "loss": 0.5863519310951233, + "step": 3189 + }, + { + "epoch": 0.9328849246965931, + "grad_norm": 1.3634372027202455, + "learning_rate": 1.1931339389160516e-05, + "loss": 0.6607284545898438, + "step": 3190 + }, + { + "epoch": 0.9331773651118584, + "grad_norm": 1.2667041686104175, + "learning_rate": 1.1926595482479799e-05, + "loss": 0.5578058958053589, + "step": 3191 + }, + { + "epoch": 0.9334698055271239, + "grad_norm": 1.577459199872034, + "learning_rate": 1.19218511254691e-05, + "loss": 0.6839171648025513, + "step": 3192 + }, + { + "epoch": 0.9337622459423892, + "grad_norm": 1.4197717809462, + "learning_rate": 1.1917106319237386e-05, + "loss": 0.5071141719818115, + "step": 3193 + }, + { + "epoch": 0.9340546863576547, + "grad_norm": 1.3302825340941604, + "learning_rate": 1.1912361064893726e-05, + "loss": 0.5112525820732117, + "step": 3194 + }, + { + "epoch": 0.93434712677292, + "grad_norm": 1.3701575961238917, + "learning_rate": 1.1907615363547299e-05, + "loss": 0.5661873817443848, + "step": 3195 + }, + { + "epoch": 0.9346395671881854, + "grad_norm": 1.3078991902724904, + "learning_rate": 1.190286921630737e-05, + "loss": 0.5520195364952087, + "step": 3196 + }, + { + "epoch": 0.9349320076034507, + "grad_norm": 1.1923433518822224, + "learning_rate": 1.1898122624283337e-05, + "loss": 0.560089111328125, + "step": 3197 + }, + { + "epoch": 0.9352244480187162, + "grad_norm": 1.3393482355065873, + "learning_rate": 1.1893375588584681e-05, + "loss": 0.6431207656860352, + "step": 3198 + }, + { + "epoch": 0.9355168884339816, + "grad_norm": 1.6025933525200546, + "learning_rate": 1.1888628110320995e-05, + "loss": 0.7365666031837463, + "step": 3199 + }, + { + "epoch": 0.935809328849247, + "grad_norm": 1.5181397488734587, + "learning_rate": 1.1883880190601968e-05, + "loss": 0.5455417633056641, + "step": 3200 + }, + { + "epoch": 0.9361017692645124, + "grad_norm": 1.2648151177686433, + "learning_rate": 1.1879131830537403e-05, + "loss": 0.5749938488006592, + "step": 3201 + }, + { + "epoch": 0.9363942096797777, + "grad_norm": 1.4774526931967815, + "learning_rate": 1.1874383031237196e-05, + "loss": 0.588424563407898, + "step": 3202 + }, + { + "epoch": 0.9366866500950431, + "grad_norm": 1.7045519601542285, + "learning_rate": 1.1869633793811352e-05, + "loss": 0.7039792537689209, + "step": 3203 + }, + { + "epoch": 0.9369790905103085, + "grad_norm": 1.3777530310932211, + "learning_rate": 1.1864884119369977e-05, + "loss": 0.5972777009010315, + "step": 3204 + }, + { + "epoch": 0.9372715309255739, + "grad_norm": 1.5348242749242778, + "learning_rate": 1.1860134009023281e-05, + "loss": 0.6510647535324097, + "step": 3205 + }, + { + "epoch": 0.9375639713408394, + "grad_norm": 1.3174058455781212, + "learning_rate": 1.1855383463881566e-05, + "loss": 0.606874406337738, + "step": 3206 + }, + { + "epoch": 0.9378564117561047, + "grad_norm": 1.4675285988638056, + "learning_rate": 1.1850632485055247e-05, + "loss": 0.5527048110961914, + "step": 3207 + }, + { + "epoch": 0.9381488521713701, + "grad_norm": 1.3531723389548285, + "learning_rate": 1.1845881073654838e-05, + "loss": 0.6297399997711182, + "step": 3208 + }, + { + "epoch": 0.9384412925866354, + "grad_norm": 1.4561464002236073, + "learning_rate": 1.184112923079095e-05, + "loss": 0.5852634310722351, + "step": 3209 + }, + { + "epoch": 0.9387337330019009, + "grad_norm": 1.276124242645333, + "learning_rate": 1.1836376957574301e-05, + "loss": 0.5648211240768433, + "step": 3210 + }, + { + "epoch": 0.9390261734171662, + "grad_norm": 1.4542765956455581, + "learning_rate": 1.1831624255115703e-05, + "loss": 0.5547506213188171, + "step": 3211 + }, + { + "epoch": 0.9393186138324316, + "grad_norm": 1.3882723904405088, + "learning_rate": 1.1826871124526072e-05, + "loss": 0.5927829146385193, + "step": 3212 + }, + { + "epoch": 0.9396110542476971, + "grad_norm": 1.4870159815211654, + "learning_rate": 1.182211756691642e-05, + "loss": 0.5705278515815735, + "step": 3213 + }, + { + "epoch": 0.9399034946629624, + "grad_norm": 1.3481561389317809, + "learning_rate": 1.1817363583397868e-05, + "loss": 0.547038197517395, + "step": 3214 + }, + { + "epoch": 0.9401959350782279, + "grad_norm": 1.6799026497887648, + "learning_rate": 1.1812609175081626e-05, + "loss": 0.6136760115623474, + "step": 3215 + }, + { + "epoch": 0.9404883754934932, + "grad_norm": 1.3697737055687615, + "learning_rate": 1.1807854343079015e-05, + "loss": 0.5784845352172852, + "step": 3216 + }, + { + "epoch": 0.9407808159087586, + "grad_norm": 1.306268521565337, + "learning_rate": 1.1803099088501439e-05, + "loss": 0.6629599332809448, + "step": 3217 + }, + { + "epoch": 0.9410732563240239, + "grad_norm": 1.3560413521315915, + "learning_rate": 1.1798343412460416e-05, + "loss": 0.6058052778244019, + "step": 3218 + }, + { + "epoch": 0.9413656967392894, + "grad_norm": 1.236587656133179, + "learning_rate": 1.1793587316067552e-05, + "loss": 0.5689725875854492, + "step": 3219 + }, + { + "epoch": 0.9416581371545547, + "grad_norm": 1.2722209400014248, + "learning_rate": 1.1788830800434561e-05, + "loss": 0.5718861818313599, + "step": 3220 + }, + { + "epoch": 0.9419505775698201, + "grad_norm": 1.4517063699959183, + "learning_rate": 1.1784073866673245e-05, + "loss": 0.6061254739761353, + "step": 3221 + }, + { + "epoch": 0.9422430179850856, + "grad_norm": 1.3732176542504997, + "learning_rate": 1.1779316515895511e-05, + "loss": 0.6805517077445984, + "step": 3222 + }, + { + "epoch": 0.9425354584003509, + "grad_norm": 1.3828844754339646, + "learning_rate": 1.1774558749213358e-05, + "loss": 0.5553466081619263, + "step": 3223 + }, + { + "epoch": 0.9428278988156163, + "grad_norm": 1.2173236944216692, + "learning_rate": 1.176980056773889e-05, + "loss": 0.6408798694610596, + "step": 3224 + }, + { + "epoch": 0.9431203392308817, + "grad_norm": 1.222815565053331, + "learning_rate": 1.1765041972584296e-05, + "loss": 0.5269505381584167, + "step": 3225 + }, + { + "epoch": 0.9434127796461471, + "grad_norm": 1.424391391794669, + "learning_rate": 1.1760282964861873e-05, + "loss": 0.682415246963501, + "step": 3226 + }, + { + "epoch": 0.9437052200614124, + "grad_norm": 1.4623421356805024, + "learning_rate": 1.1755523545684016e-05, + "loss": 0.507567286491394, + "step": 3227 + }, + { + "epoch": 0.9439976604766779, + "grad_norm": 1.4192334343942388, + "learning_rate": 1.1750763716163199e-05, + "loss": 0.6977763175964355, + "step": 3228 + }, + { + "epoch": 0.9442901008919433, + "grad_norm": 1.3754010773945908, + "learning_rate": 1.1746003477412007e-05, + "loss": 0.5626407861709595, + "step": 3229 + }, + { + "epoch": 0.9445825413072086, + "grad_norm": 1.537446067568307, + "learning_rate": 1.1741242830543118e-05, + "loss": 0.5280323624610901, + "step": 3230 + }, + { + "epoch": 0.9448749817224741, + "grad_norm": 1.564549447099706, + "learning_rate": 1.1736481776669307e-05, + "loss": 0.6236885190010071, + "step": 3231 + }, + { + "epoch": 0.9451674221377394, + "grad_norm": 1.2957140073878561, + "learning_rate": 1.1731720316903435e-05, + "loss": 0.5250823497772217, + "step": 3232 + }, + { + "epoch": 0.9454598625530048, + "grad_norm": 1.3562245135276858, + "learning_rate": 1.1726958452358472e-05, + "loss": 0.5885770320892334, + "step": 3233 + }, + { + "epoch": 0.9457523029682702, + "grad_norm": 1.5466392002562799, + "learning_rate": 1.1722196184147467e-05, + "loss": 0.7812498807907104, + "step": 3234 + }, + { + "epoch": 0.9460447433835356, + "grad_norm": 2.1182720670568678, + "learning_rate": 1.1717433513383575e-05, + "loss": 0.6763796210289001, + "step": 3235 + }, + { + "epoch": 0.9463371837988009, + "grad_norm": 1.4130641179603503, + "learning_rate": 1.1712670441180045e-05, + "loss": 0.5983982682228088, + "step": 3236 + }, + { + "epoch": 0.9466296242140664, + "grad_norm": 1.4075974845813908, + "learning_rate": 1.1707906968650214e-05, + "loss": 0.6665002107620239, + "step": 3237 + }, + { + "epoch": 0.9469220646293318, + "grad_norm": 1.3129047594602676, + "learning_rate": 1.1703143096907507e-05, + "loss": 0.7676652669906616, + "step": 3238 + }, + { + "epoch": 0.9472145050445971, + "grad_norm": 1.552106023331421, + "learning_rate": 1.1698378827065461e-05, + "loss": 0.710014820098877, + "step": 3239 + }, + { + "epoch": 0.9475069454598626, + "grad_norm": 1.3709978679968329, + "learning_rate": 1.169361416023769e-05, + "loss": 0.5800554752349854, + "step": 3240 + }, + { + "epoch": 0.9477993858751279, + "grad_norm": 1.2790925568283578, + "learning_rate": 1.1688849097537904e-05, + "loss": 0.602012574672699, + "step": 3241 + }, + { + "epoch": 0.9480918262903933, + "grad_norm": 1.4089569844293444, + "learning_rate": 1.1684083640079912e-05, + "loss": 0.4943910241127014, + "step": 3242 + }, + { + "epoch": 0.9483842667056587, + "grad_norm": 1.3173293444454082, + "learning_rate": 1.1679317788977609e-05, + "loss": 0.49094298481941223, + "step": 3243 + }, + { + "epoch": 0.9486767071209241, + "grad_norm": 1.1684708220820899, + "learning_rate": 1.1674551545344983e-05, + "loss": 0.46416157484054565, + "step": 3244 + }, + { + "epoch": 0.9489691475361896, + "grad_norm": 1.3422229221849986, + "learning_rate": 1.1669784910296114e-05, + "loss": 0.5170255899429321, + "step": 3245 + }, + { + "epoch": 0.9492615879514549, + "grad_norm": 1.3467691134757651, + "learning_rate": 1.1665017884945174e-05, + "loss": 0.7673200368881226, + "step": 3246 + }, + { + "epoch": 0.9495540283667203, + "grad_norm": 1.194998950326605, + "learning_rate": 1.1660250470406426e-05, + "loss": 0.49335333704948425, + "step": 3247 + }, + { + "epoch": 0.9498464687819856, + "grad_norm": 1.5055569823397887, + "learning_rate": 1.1655482667794228e-05, + "loss": 0.6620640754699707, + "step": 3248 + }, + { + "epoch": 0.9501389091972511, + "grad_norm": 1.5536985980342881, + "learning_rate": 1.1650714478223022e-05, + "loss": 0.600047767162323, + "step": 3249 + }, + { + "epoch": 0.9504313496125164, + "grad_norm": 1.449375702915225, + "learning_rate": 1.164594590280734e-05, + "loss": 0.668572187423706, + "step": 3250 + }, + { + "epoch": 0.9507237900277818, + "grad_norm": 1.28696773590094, + "learning_rate": 1.1641176942661812e-05, + "loss": 0.4460945725440979, + "step": 3251 + }, + { + "epoch": 0.9510162304430473, + "grad_norm": 1.553130185640807, + "learning_rate": 1.1636407598901154e-05, + "loss": 0.6650545597076416, + "step": 3252 + }, + { + "epoch": 0.9513086708583126, + "grad_norm": 1.4537452557116313, + "learning_rate": 1.1631637872640166e-05, + "loss": 0.5631237030029297, + "step": 3253 + }, + { + "epoch": 0.951601111273578, + "grad_norm": 1.2642307643713007, + "learning_rate": 1.162686776499375e-05, + "loss": 0.650580883026123, + "step": 3254 + }, + { + "epoch": 0.9518935516888434, + "grad_norm": 1.2808622379645098, + "learning_rate": 1.1622097277076883e-05, + "loss": 0.5606606602668762, + "step": 3255 + }, + { + "epoch": 0.9521859921041088, + "grad_norm": 1.6059525544711786, + "learning_rate": 1.1617326410004639e-05, + "loss": 0.667366623878479, + "step": 3256 + }, + { + "epoch": 0.9524784325193741, + "grad_norm": 1.2848877829061671, + "learning_rate": 1.1612555164892181e-05, + "loss": 0.5895084738731384, + "step": 3257 + }, + { + "epoch": 0.9527708729346396, + "grad_norm": 1.3031742059601414, + "learning_rate": 1.1607783542854759e-05, + "loss": 0.6468119025230408, + "step": 3258 + }, + { + "epoch": 0.9530633133499049, + "grad_norm": 1.567653748749065, + "learning_rate": 1.1603011545007708e-05, + "loss": 0.7178056240081787, + "step": 3259 + }, + { + "epoch": 0.9533557537651703, + "grad_norm": 1.0796246328531958, + "learning_rate": 1.1598239172466457e-05, + "loss": 0.42994585633277893, + "step": 3260 + }, + { + "epoch": 0.9536481941804358, + "grad_norm": 1.3208710287997751, + "learning_rate": 1.1593466426346513e-05, + "loss": 0.4939822554588318, + "step": 3261 + }, + { + "epoch": 0.9539406345957011, + "grad_norm": 1.4828958620285886, + "learning_rate": 1.1588693307763483e-05, + "loss": 0.4252137839794159, + "step": 3262 + }, + { + "epoch": 0.9542330750109665, + "grad_norm": 1.4293991408504185, + "learning_rate": 1.1583919817833051e-05, + "loss": 0.5772995948791504, + "step": 3263 + }, + { + "epoch": 0.9545255154262319, + "grad_norm": 1.4892265763022432, + "learning_rate": 1.1579145957670992e-05, + "loss": 0.6784560680389404, + "step": 3264 + }, + { + "epoch": 0.9548179558414973, + "grad_norm": 1.4340903064465058, + "learning_rate": 1.1574371728393169e-05, + "loss": 0.5373483896255493, + "step": 3265 + }, + { + "epoch": 0.9551103962567626, + "grad_norm": 1.5590731671081544, + "learning_rate": 1.1569597131115523e-05, + "loss": 0.7517837285995483, + "step": 3266 + }, + { + "epoch": 0.9554028366720281, + "grad_norm": 1.2323534514024168, + "learning_rate": 1.1564822166954092e-05, + "loss": 0.6715551614761353, + "step": 3267 + }, + { + "epoch": 0.9556952770872935, + "grad_norm": 1.5740418428519831, + "learning_rate": 1.1560046837024994e-05, + "loss": 0.6892265677452087, + "step": 3268 + }, + { + "epoch": 0.9559877175025588, + "grad_norm": 1.1845546480418727, + "learning_rate": 1.1555271142444433e-05, + "loss": 0.5564894676208496, + "step": 3269 + }, + { + "epoch": 0.9562801579178243, + "grad_norm": 1.4735106062071393, + "learning_rate": 1.15504950843287e-05, + "loss": 0.6211465001106262, + "step": 3270 + }, + { + "epoch": 0.9565725983330896, + "grad_norm": 1.360797371118281, + "learning_rate": 1.1545718663794165e-05, + "loss": 0.6189093589782715, + "step": 3271 + }, + { + "epoch": 0.956865038748355, + "grad_norm": 1.332461163898103, + "learning_rate": 1.1540941881957293e-05, + "loss": 0.6600508689880371, + "step": 3272 + }, + { + "epoch": 0.9571574791636204, + "grad_norm": 1.1722369932825303, + "learning_rate": 1.1536164739934626e-05, + "loss": 0.5891202688217163, + "step": 3273 + }, + { + "epoch": 0.9574499195788858, + "grad_norm": 1.450456789269031, + "learning_rate": 1.1531387238842788e-05, + "loss": 0.5996856093406677, + "step": 3274 + }, + { + "epoch": 0.9577423599941511, + "grad_norm": 1.3947581203143906, + "learning_rate": 1.15266093797985e-05, + "loss": 0.5645085573196411, + "step": 3275 + }, + { + "epoch": 0.9580348004094166, + "grad_norm": 1.3192013477387883, + "learning_rate": 1.1521831163918545e-05, + "loss": 0.5934250354766846, + "step": 3276 + }, + { + "epoch": 0.958327240824682, + "grad_norm": 1.3125475487560205, + "learning_rate": 1.151705259231981e-05, + "loss": 0.6659657955169678, + "step": 3277 + }, + { + "epoch": 0.9586196812399473, + "grad_norm": 1.4439329469838202, + "learning_rate": 1.1512273666119255e-05, + "loss": 0.518921434879303, + "step": 3278 + }, + { + "epoch": 0.9589121216552128, + "grad_norm": 1.5520324796179028, + "learning_rate": 1.1507494386433927e-05, + "loss": 0.6015551686286926, + "step": 3279 + }, + { + "epoch": 0.9592045620704781, + "grad_norm": 1.3864839845404684, + "learning_rate": 1.150271475438095e-05, + "loss": 0.5590265393257141, + "step": 3280 + }, + { + "epoch": 0.9594970024857435, + "grad_norm": 2.135782810317134, + "learning_rate": 1.149793477107754e-05, + "loss": 0.5820340514183044, + "step": 3281 + }, + { + "epoch": 0.9597894429010089, + "grad_norm": 1.5263684685914536, + "learning_rate": 1.1493154437640981e-05, + "loss": 0.5356709957122803, + "step": 3282 + }, + { + "epoch": 0.9600818833162743, + "grad_norm": 1.6754028625571513, + "learning_rate": 1.1488373755188651e-05, + "loss": 0.7024146318435669, + "step": 3283 + }, + { + "epoch": 0.9603743237315397, + "grad_norm": 1.1672092433368113, + "learning_rate": 1.1483592724838007e-05, + "loss": 0.4929785132408142, + "step": 3284 + }, + { + "epoch": 0.9606667641468051, + "grad_norm": 1.288237919875972, + "learning_rate": 1.147881134770658e-05, + "loss": 0.6902902126312256, + "step": 3285 + }, + { + "epoch": 0.9609592045620705, + "grad_norm": 1.3348356135288268, + "learning_rate": 1.1474029624911997e-05, + "loss": 0.5339258313179016, + "step": 3286 + }, + { + "epoch": 0.9612516449773358, + "grad_norm": 1.4657145875756896, + "learning_rate": 1.146924755757195e-05, + "loss": 0.6998730897903442, + "step": 3287 + }, + { + "epoch": 0.9615440853926013, + "grad_norm": 1.257948537764273, + "learning_rate": 1.1464465146804218e-05, + "loss": 0.6174519062042236, + "step": 3288 + }, + { + "epoch": 0.9618365258078666, + "grad_norm": 1.812192547108516, + "learning_rate": 1.145968239372666e-05, + "loss": 0.5395258665084839, + "step": 3289 + }, + { + "epoch": 0.962128966223132, + "grad_norm": 1.4759469600623887, + "learning_rate": 1.1454899299457221e-05, + "loss": 0.6355341672897339, + "step": 3290 + }, + { + "epoch": 0.9624214066383975, + "grad_norm": 1.519697305957534, + "learning_rate": 1.1450115865113916e-05, + "loss": 0.5315179228782654, + "step": 3291 + }, + { + "epoch": 0.9627138470536628, + "grad_norm": 1.468105168017502, + "learning_rate": 1.1445332091814844e-05, + "loss": 0.5595142841339111, + "step": 3292 + }, + { + "epoch": 0.9630062874689282, + "grad_norm": 1.2033736096293444, + "learning_rate": 1.1440547980678185e-05, + "loss": 0.5509291291236877, + "step": 3293 + }, + { + "epoch": 0.9632987278841936, + "grad_norm": 1.5381505996084959, + "learning_rate": 1.1435763532822191e-05, + "loss": 0.6831322908401489, + "step": 3294 + }, + { + "epoch": 0.963591168299459, + "grad_norm": 1.3733453232745707, + "learning_rate": 1.1430978749365203e-05, + "loss": 0.5494598150253296, + "step": 3295 + }, + { + "epoch": 0.9638836087147243, + "grad_norm": 1.498661160088125, + "learning_rate": 1.142619363142563e-05, + "loss": 0.5613550543785095, + "step": 3296 + }, + { + "epoch": 0.9641760491299898, + "grad_norm": 1.5212850266198317, + "learning_rate": 1.1421408180121972e-05, + "loss": 0.656089186668396, + "step": 3297 + }, + { + "epoch": 0.9644684895452551, + "grad_norm": 1.1510410875603876, + "learning_rate": 1.1416622396572791e-05, + "loss": 0.5913431644439697, + "step": 3298 + }, + { + "epoch": 0.9647609299605205, + "grad_norm": 1.3644056514467953, + "learning_rate": 1.1411836281896737e-05, + "loss": 0.6706565022468567, + "step": 3299 + }, + { + "epoch": 0.965053370375786, + "grad_norm": 1.3661421058655916, + "learning_rate": 1.1407049837212539e-05, + "loss": 0.6169217824935913, + "step": 3300 + }, + { + "epoch": 0.9653458107910513, + "grad_norm": 1.2988460072876178, + "learning_rate": 1.1402263063638994e-05, + "loss": 0.5516680479049683, + "step": 3301 + }, + { + "epoch": 0.9656382512063167, + "grad_norm": 1.2914486970247845, + "learning_rate": 1.1397475962294986e-05, + "loss": 0.7105098962783813, + "step": 3302 + }, + { + "epoch": 0.9659306916215821, + "grad_norm": 1.5297340917133426, + "learning_rate": 1.139268853429947e-05, + "loss": 0.6183327436447144, + "step": 3303 + }, + { + "epoch": 0.9662231320368475, + "grad_norm": 1.4183780196378124, + "learning_rate": 1.1387900780771472e-05, + "loss": 0.6160033941268921, + "step": 3304 + }, + { + "epoch": 0.9665155724521128, + "grad_norm": 1.4212044707464202, + "learning_rate": 1.1383112702830108e-05, + "loss": 0.5526994466781616, + "step": 3305 + }, + { + "epoch": 0.9668080128673783, + "grad_norm": 1.381901469460175, + "learning_rate": 1.137832430159456e-05, + "loss": 0.5476477742195129, + "step": 3306 + }, + { + "epoch": 0.9671004532826437, + "grad_norm": 1.3794404018811846, + "learning_rate": 1.1373535578184083e-05, + "loss": 0.558393657207489, + "step": 3307 + }, + { + "epoch": 0.967392893697909, + "grad_norm": 1.4577860579810487, + "learning_rate": 1.1368746533718017e-05, + "loss": 0.6302276849746704, + "step": 3308 + }, + { + "epoch": 0.9676853341131745, + "grad_norm": 1.2805956031485568, + "learning_rate": 1.1363957169315773e-05, + "loss": 0.619697630405426, + "step": 3309 + }, + { + "epoch": 0.9679777745284398, + "grad_norm": 1.4119075289775231, + "learning_rate": 1.135916748609683e-05, + "loss": 0.564563512802124, + "step": 3310 + }, + { + "epoch": 0.9682702149437052, + "grad_norm": 1.6014783450991135, + "learning_rate": 1.1354377485180756e-05, + "loss": 0.6238751411437988, + "step": 3311 + }, + { + "epoch": 0.9685626553589706, + "grad_norm": 1.4620948350058627, + "learning_rate": 1.1349587167687177e-05, + "loss": 0.8079221844673157, + "step": 3312 + }, + { + "epoch": 0.968855095774236, + "grad_norm": 1.4034979651528738, + "learning_rate": 1.1344796534735805e-05, + "loss": 0.5547629594802856, + "step": 3313 + }, + { + "epoch": 0.9691475361895013, + "grad_norm": 1.2187187942390127, + "learning_rate": 1.134000558744642e-05, + "loss": 0.630042552947998, + "step": 3314 + }, + { + "epoch": 0.9694399766047668, + "grad_norm": 1.284912675244452, + "learning_rate": 1.1335214326938872e-05, + "loss": 0.5283412337303162, + "step": 3315 + }, + { + "epoch": 0.9697324170200322, + "grad_norm": 1.3484514955842084, + "learning_rate": 1.1330422754333097e-05, + "loss": 0.6356452703475952, + "step": 3316 + }, + { + "epoch": 0.9700248574352975, + "grad_norm": 1.265116321608699, + "learning_rate": 1.132563087074909e-05, + "loss": 0.6531886458396912, + "step": 3317 + }, + { + "epoch": 0.970317297850563, + "grad_norm": 1.6209665553722108, + "learning_rate": 1.1320838677306927e-05, + "loss": 0.5725178718566895, + "step": 3318 + }, + { + "epoch": 0.9706097382658283, + "grad_norm": 1.460783947968998, + "learning_rate": 1.1316046175126758e-05, + "loss": 0.6341495513916016, + "step": 3319 + }, + { + "epoch": 0.9709021786810937, + "grad_norm": 1.428850290510927, + "learning_rate": 1.1311253365328794e-05, + "loss": 0.5792768597602844, + "step": 3320 + }, + { + "epoch": 0.9711946190963591, + "grad_norm": 1.2539734431492524, + "learning_rate": 1.1306460249033326e-05, + "loss": 0.5495700836181641, + "step": 3321 + }, + { + "epoch": 0.9714870595116245, + "grad_norm": 1.3779597112573112, + "learning_rate": 1.1301666827360721e-05, + "loss": 0.7092291116714478, + "step": 3322 + }, + { + "epoch": 0.97177949992689, + "grad_norm": 1.210154083257435, + "learning_rate": 1.1296873101431409e-05, + "loss": 0.5368257761001587, + "step": 3323 + }, + { + "epoch": 0.9720719403421553, + "grad_norm": 1.2901315838159502, + "learning_rate": 1.1292079072365898e-05, + "loss": 0.6116393804550171, + "step": 3324 + }, + { + "epoch": 0.9723643807574207, + "grad_norm": 1.6375876584807947, + "learning_rate": 1.1287284741284757e-05, + "loss": 0.5654028654098511, + "step": 3325 + }, + { + "epoch": 0.972656821172686, + "grad_norm": 1.4007947938241085, + "learning_rate": 1.1282490109308633e-05, + "loss": 0.6436389684677124, + "step": 3326 + }, + { + "epoch": 0.9729492615879515, + "grad_norm": 1.6286174854172328, + "learning_rate": 1.1277695177558243e-05, + "loss": 0.7687330842018127, + "step": 3327 + }, + { + "epoch": 0.9732417020032168, + "grad_norm": 1.3338540478099405, + "learning_rate": 1.1272899947154377e-05, + "loss": 0.5350443124771118, + "step": 3328 + }, + { + "epoch": 0.9735341424184822, + "grad_norm": 1.5528633763871835, + "learning_rate": 1.1268104419217884e-05, + "loss": 0.6032785773277283, + "step": 3329 + }, + { + "epoch": 0.9738265828337477, + "grad_norm": 1.410347655987774, + "learning_rate": 1.1263308594869697e-05, + "loss": 0.5756093263626099, + "step": 3330 + }, + { + "epoch": 0.974119023249013, + "grad_norm": 1.5831169693775362, + "learning_rate": 1.1258512475230807e-05, + "loss": 0.6977418065071106, + "step": 3331 + }, + { + "epoch": 0.9744114636642784, + "grad_norm": 1.3726893652594243, + "learning_rate": 1.1253716061422275e-05, + "loss": 0.5409448146820068, + "step": 3332 + }, + { + "epoch": 0.9747039040795438, + "grad_norm": 1.3626349639764654, + "learning_rate": 1.1248919354565237e-05, + "loss": 0.5863862037658691, + "step": 3333 + }, + { + "epoch": 0.9749963444948092, + "grad_norm": 1.313934697737098, + "learning_rate": 1.1244122355780895e-05, + "loss": 0.6039433479309082, + "step": 3334 + }, + { + "epoch": 0.9752887849100745, + "grad_norm": 1.4813691831553626, + "learning_rate": 1.1239325066190513e-05, + "loss": 0.6696581840515137, + "step": 3335 + }, + { + "epoch": 0.97558122532534, + "grad_norm": 1.5159715106591773, + "learning_rate": 1.1234527486915439e-05, + "loss": 0.6308715343475342, + "step": 3336 + }, + { + "epoch": 0.9758736657406053, + "grad_norm": 1.4927391317525602, + "learning_rate": 1.1229729619077065e-05, + "loss": 0.580268383026123, + "step": 3337 + }, + { + "epoch": 0.9761661061558707, + "grad_norm": 1.775582999909584, + "learning_rate": 1.1224931463796871e-05, + "loss": 0.8080834746360779, + "step": 3338 + }, + { + "epoch": 0.9764585465711362, + "grad_norm": 1.3814988427954438, + "learning_rate": 1.1220133022196395e-05, + "loss": 0.4933619499206543, + "step": 3339 + }, + { + "epoch": 0.9767509869864015, + "grad_norm": 1.26412210808527, + "learning_rate": 1.1215334295397244e-05, + "loss": 0.5639102458953857, + "step": 3340 + }, + { + "epoch": 0.9770434274016669, + "grad_norm": 1.3947001629341338, + "learning_rate": 1.1210535284521094e-05, + "loss": 0.6332741975784302, + "step": 3341 + }, + { + "epoch": 0.9773358678169323, + "grad_norm": 1.4234927806293247, + "learning_rate": 1.1205735990689677e-05, + "loss": 0.5425227880477905, + "step": 3342 + }, + { + "epoch": 0.9776283082321977, + "grad_norm": 1.2841671137073696, + "learning_rate": 1.1200936415024804e-05, + "loss": 0.48746997117996216, + "step": 3343 + }, + { + "epoch": 0.977920748647463, + "grad_norm": 1.3045240526527524, + "learning_rate": 1.1196136558648345e-05, + "loss": 0.5509577393531799, + "step": 3344 + }, + { + "epoch": 0.9782131890627285, + "grad_norm": 1.5306708658005588, + "learning_rate": 1.1191336422682237e-05, + "loss": 0.5939484238624573, + "step": 3345 + }, + { + "epoch": 0.9785056294779939, + "grad_norm": 1.4772741629174198, + "learning_rate": 1.1186536008248487e-05, + "loss": 0.6078917384147644, + "step": 3346 + }, + { + "epoch": 0.9787980698932592, + "grad_norm": 1.4449426772113496, + "learning_rate": 1.1181735316469157e-05, + "loss": 0.5578145980834961, + "step": 3347 + }, + { + "epoch": 0.9790905103085247, + "grad_norm": 1.5556898331182667, + "learning_rate": 1.1176934348466384e-05, + "loss": 0.6809493899345398, + "step": 3348 + }, + { + "epoch": 0.97938295072379, + "grad_norm": 1.3454886518258895, + "learning_rate": 1.117213310536236e-05, + "loss": 0.6057093143463135, + "step": 3349 + }, + { + "epoch": 0.9796753911390554, + "grad_norm": 1.2918762120947054, + "learning_rate": 1.1167331588279351e-05, + "loss": 0.6656113266944885, + "step": 3350 + }, + { + "epoch": 0.9799678315543208, + "grad_norm": 1.3588186351553628, + "learning_rate": 1.1162529798339682e-05, + "loss": 0.5260547399520874, + "step": 3351 + }, + { + "epoch": 0.9802602719695862, + "grad_norm": 1.4059510686804249, + "learning_rate": 1.115772773666574e-05, + "loss": 0.6918379068374634, + "step": 3352 + }, + { + "epoch": 0.9805527123848515, + "grad_norm": 1.4859264660633271, + "learning_rate": 1.115292540437998e-05, + "loss": 0.7128825187683105, + "step": 3353 + }, + { + "epoch": 0.980845152800117, + "grad_norm": 1.7806281788252345, + "learning_rate": 1.1148122802604913e-05, + "loss": 0.6858257055282593, + "step": 3354 + }, + { + "epoch": 0.9811375932153824, + "grad_norm": 1.3250069966815017, + "learning_rate": 1.1143319932463124e-05, + "loss": 0.540290117263794, + "step": 3355 + }, + { + "epoch": 0.9814300336306477, + "grad_norm": 1.3692222106755043, + "learning_rate": 1.1138516795077251e-05, + "loss": 0.7293038368225098, + "step": 3356 + }, + { + "epoch": 0.9817224740459132, + "grad_norm": 1.2337952733643827, + "learning_rate": 1.1133713391570003e-05, + "loss": 0.5981270670890808, + "step": 3357 + }, + { + "epoch": 0.9820149144611785, + "grad_norm": 1.282642205016649, + "learning_rate": 1.1128909723064138e-05, + "loss": 0.6175673604011536, + "step": 3358 + }, + { + "epoch": 0.9823073548764439, + "grad_norm": 1.233452486411816, + "learning_rate": 1.112410579068249e-05, + "loss": 0.5385074615478516, + "step": 3359 + }, + { + "epoch": 0.9825997952917093, + "grad_norm": 1.372295513124522, + "learning_rate": 1.1119301595547952e-05, + "loss": 0.5754122734069824, + "step": 3360 + }, + { + "epoch": 0.9828922357069747, + "grad_norm": 1.4139982265628481, + "learning_rate": 1.1114497138783469e-05, + "loss": 0.5817348957061768, + "step": 3361 + }, + { + "epoch": 0.9831846761222401, + "grad_norm": 1.5953096945649214, + "learning_rate": 1.1109692421512058e-05, + "loss": 0.7561115026473999, + "step": 3362 + }, + { + "epoch": 0.9834771165375055, + "grad_norm": 1.4339527302516233, + "learning_rate": 1.1104887444856786e-05, + "loss": 0.5972003936767578, + "step": 3363 + }, + { + "epoch": 0.9837695569527709, + "grad_norm": 1.7933233288020083, + "learning_rate": 1.1100082209940795e-05, + "loss": 0.7569154500961304, + "step": 3364 + }, + { + "epoch": 0.9840619973680362, + "grad_norm": 1.6291951934588174, + "learning_rate": 1.1095276717887273e-05, + "loss": 0.587831437587738, + "step": 3365 + }, + { + "epoch": 0.9843544377833017, + "grad_norm": 1.3893746663182953, + "learning_rate": 1.109047096981948e-05, + "loss": 0.5265868902206421, + "step": 3366 + }, + { + "epoch": 0.984646878198567, + "grad_norm": 1.5308570155926502, + "learning_rate": 1.1085664966860728e-05, + "loss": 0.6065980792045593, + "step": 3367 + }, + { + "epoch": 0.9849393186138324, + "grad_norm": 1.2582827679300745, + "learning_rate": 1.1080858710134392e-05, + "loss": 0.5859705209732056, + "step": 3368 + }, + { + "epoch": 0.9852317590290979, + "grad_norm": 1.2323676627113982, + "learning_rate": 1.1076052200763903e-05, + "loss": 0.508766770362854, + "step": 3369 + }, + { + "epoch": 0.9855241994443632, + "grad_norm": 1.36193145330846, + "learning_rate": 1.1071245439872752e-05, + "loss": 0.569848358631134, + "step": 3370 + }, + { + "epoch": 0.9858166398596286, + "grad_norm": 1.5268801014665052, + "learning_rate": 1.1066438428584496e-05, + "loss": 0.6665600538253784, + "step": 3371 + }, + { + "epoch": 0.986109080274894, + "grad_norm": 4.0352208239875536, + "learning_rate": 1.1061631168022742e-05, + "loss": 0.5942315459251404, + "step": 3372 + }, + { + "epoch": 0.9864015206901594, + "grad_norm": 1.3552035470831052, + "learning_rate": 1.1056823659311158e-05, + "loss": 0.5270178318023682, + "step": 3373 + }, + { + "epoch": 0.9866939611054247, + "grad_norm": 1.484191192307279, + "learning_rate": 1.1052015903573465e-05, + "loss": 0.6879183053970337, + "step": 3374 + }, + { + "epoch": 0.9869864015206902, + "grad_norm": 1.3455375539569006, + "learning_rate": 1.1047207901933453e-05, + "loss": 0.5980993509292603, + "step": 3375 + }, + { + "epoch": 0.9872788419359555, + "grad_norm": 1.3905728698834559, + "learning_rate": 1.1042399655514961e-05, + "loss": 0.5616245865821838, + "step": 3376 + }, + { + "epoch": 0.9875712823512209, + "grad_norm": 1.186489901347366, + "learning_rate": 1.1037591165441887e-05, + "loss": 0.6233900785446167, + "step": 3377 + }, + { + "epoch": 0.9878637227664864, + "grad_norm": 1.2146885941659273, + "learning_rate": 1.1032782432838188e-05, + "loss": 0.612476110458374, + "step": 3378 + }, + { + "epoch": 0.9881561631817517, + "grad_norm": 1.4001611534955285, + "learning_rate": 1.1027973458827874e-05, + "loss": 0.7109482288360596, + "step": 3379 + }, + { + "epoch": 0.9884486035970171, + "grad_norm": 1.4339596644962305, + "learning_rate": 1.1023164244535013e-05, + "loss": 0.7105005383491516, + "step": 3380 + }, + { + "epoch": 0.9887410440122825, + "grad_norm": 1.1897152470249062, + "learning_rate": 1.1018354791083731e-05, + "loss": 0.5401301383972168, + "step": 3381 + }, + { + "epoch": 0.9890334844275479, + "grad_norm": 1.2391450524860042, + "learning_rate": 1.101354509959821e-05, + "loss": 0.504487156867981, + "step": 3382 + }, + { + "epoch": 0.9893259248428132, + "grad_norm": 1.5778073649668172, + "learning_rate": 1.1008735171202685e-05, + "loss": 0.5634675025939941, + "step": 3383 + }, + { + "epoch": 0.9896183652580787, + "grad_norm": 1.2596231385186676, + "learning_rate": 1.1003925007021444e-05, + "loss": 0.4828820824623108, + "step": 3384 + }, + { + "epoch": 0.9899108056733441, + "grad_norm": 1.5274466661026922, + "learning_rate": 1.0999114608178837e-05, + "loss": 0.7154384851455688, + "step": 3385 + }, + { + "epoch": 0.9902032460886094, + "grad_norm": 1.4762279403432657, + "learning_rate": 1.0994303975799268e-05, + "loss": 0.626085638999939, + "step": 3386 + }, + { + "epoch": 0.9904956865038749, + "grad_norm": 1.2276097303271793, + "learning_rate": 1.0989493111007186e-05, + "loss": 0.5179756283760071, + "step": 3387 + }, + { + "epoch": 0.9907881269191402, + "grad_norm": 1.443725456432181, + "learning_rate": 1.0984682014927108e-05, + "loss": 0.6992131471633911, + "step": 3388 + }, + { + "epoch": 0.9910805673344056, + "grad_norm": 1.3252934977411588, + "learning_rate": 1.0979870688683598e-05, + "loss": 0.5791709423065186, + "step": 3389 + }, + { + "epoch": 0.991373007749671, + "grad_norm": 1.2293406038140111, + "learning_rate": 1.097505913340127e-05, + "loss": 0.4703817367553711, + "step": 3390 + }, + { + "epoch": 0.9916654481649364, + "grad_norm": 1.7130975290215298, + "learning_rate": 1.0970247350204797e-05, + "loss": 0.6042051911354065, + "step": 3391 + }, + { + "epoch": 0.9919578885802017, + "grad_norm": 1.5075227997294136, + "learning_rate": 1.0965435340218905e-05, + "loss": 0.6806557178497314, + "step": 3392 + }, + { + "epoch": 0.9922503289954672, + "grad_norm": 1.4336313879655775, + "learning_rate": 1.0960623104568373e-05, + "loss": 0.6372751593589783, + "step": 3393 + }, + { + "epoch": 0.9925427694107326, + "grad_norm": 1.2403325317456615, + "learning_rate": 1.0955810644378031e-05, + "loss": 0.48651185631752014, + "step": 3394 + }, + { + "epoch": 0.9928352098259979, + "grad_norm": 1.5056465468012041, + "learning_rate": 1.0950997960772764e-05, + "loss": 0.5244222283363342, + "step": 3395 + }, + { + "epoch": 0.9931276502412634, + "grad_norm": 1.4445958557594307, + "learning_rate": 1.0946185054877505e-05, + "loss": 0.6194322109222412, + "step": 3396 + }, + { + "epoch": 0.9934200906565287, + "grad_norm": 1.4199918179889868, + "learning_rate": 1.0941371927817241e-05, + "loss": 0.690010666847229, + "step": 3397 + }, + { + "epoch": 0.9937125310717941, + "grad_norm": 1.9110036566867663, + "learning_rate": 1.0936558580717013e-05, + "loss": 0.7332549095153809, + "step": 3398 + }, + { + "epoch": 0.9940049714870595, + "grad_norm": 1.428619260140058, + "learning_rate": 1.093174501470191e-05, + "loss": 0.5264838337898254, + "step": 3399 + }, + { + "epoch": 0.9942974119023249, + "grad_norm": 1.1922668548863515, + "learning_rate": 1.092693123089708e-05, + "loss": 0.624382734298706, + "step": 3400 + }, + { + "epoch": 0.9945898523175903, + "grad_norm": 1.6559518933415514, + "learning_rate": 1.0922117230427705e-05, + "loss": 0.6340548992156982, + "step": 3401 + }, + { + "epoch": 0.9948822927328557, + "grad_norm": 1.194444639014181, + "learning_rate": 1.0917303014419036e-05, + "loss": 0.4452754855155945, + "step": 3402 + }, + { + "epoch": 0.9951747331481211, + "grad_norm": 1.4241998861848877, + "learning_rate": 1.0912488583996364e-05, + "loss": 0.6180763244628906, + "step": 3403 + }, + { + "epoch": 0.9954671735633864, + "grad_norm": 1.7347993099568695, + "learning_rate": 1.0907673940285032e-05, + "loss": 0.7079293727874756, + "step": 3404 + }, + { + "epoch": 0.9957596139786519, + "grad_norm": 1.6216897448198107, + "learning_rate": 1.090285908441044e-05, + "loss": 0.6608254909515381, + "step": 3405 + }, + { + "epoch": 0.9960520543939172, + "grad_norm": 1.6873856420041173, + "learning_rate": 1.0898044017498024e-05, + "loss": 0.6450251340866089, + "step": 3406 + }, + { + "epoch": 0.9963444948091826, + "grad_norm": 1.4055094844579619, + "learning_rate": 1.089322874067328e-05, + "loss": 0.6267623901367188, + "step": 3407 + }, + { + "epoch": 0.9966369352244481, + "grad_norm": 1.6519553259967432, + "learning_rate": 1.0888413255061747e-05, + "loss": 0.6756424903869629, + "step": 3408 + }, + { + "epoch": 0.9969293756397134, + "grad_norm": 1.4122044676522614, + "learning_rate": 1.0883597561789017e-05, + "loss": 0.6578212976455688, + "step": 3409 + }, + { + "epoch": 0.9972218160549788, + "grad_norm": 1.600222297323414, + "learning_rate": 1.087878166198073e-05, + "loss": 0.8186248540878296, + "step": 3410 + }, + { + "epoch": 0.9975142564702442, + "grad_norm": 1.4575083835366422, + "learning_rate": 1.0873965556762573e-05, + "loss": 0.6689319610595703, + "step": 3411 + }, + { + "epoch": 0.9978066968855096, + "grad_norm": 1.5562694813418687, + "learning_rate": 1.0869149247260282e-05, + "loss": 0.5471278429031372, + "step": 3412 + }, + { + "epoch": 0.9980991373007749, + "grad_norm": 1.239131034827953, + "learning_rate": 1.0864332734599636e-05, + "loss": 0.4673747420310974, + "step": 3413 + }, + { + "epoch": 0.9983915777160404, + "grad_norm": 1.4054798008983762, + "learning_rate": 1.085951601990647e-05, + "loss": 0.5777568221092224, + "step": 3414 + }, + { + "epoch": 0.9986840181313057, + "grad_norm": 1.6708797545900484, + "learning_rate": 1.0854699104306661e-05, + "loss": 0.6758528351783752, + "step": 3415 + }, + { + "epoch": 0.9989764585465711, + "grad_norm": 1.169154860422915, + "learning_rate": 1.0849881988926132e-05, + "loss": 0.5759919881820679, + "step": 3416 + }, + { + "epoch": 0.9992688989618366, + "grad_norm": 1.3291108456245637, + "learning_rate": 1.0845064674890857e-05, + "loss": 0.606694221496582, + "step": 3417 + }, + { + "epoch": 0.9995613393771019, + "grad_norm": 1.475290016916602, + "learning_rate": 1.0840247163326851e-05, + "loss": 0.627873957157135, + "step": 3418 + }, + { + "epoch": 0.9998537797923673, + "grad_norm": 1.4144594545282698, + "learning_rate": 1.083542945536018e-05, + "loss": 0.5560880303382874, + "step": 3419 + }, + { + "epoch": 1.0, + "grad_norm": 2.3650000488034633, + "learning_rate": 1.0830611552116952e-05, + "loss": 0.5983354449272156, + "step": 3420 + }, + { + "epoch": 1.0002924404152653, + "grad_norm": 1.1169918975180415, + "learning_rate": 1.0825793454723325e-05, + "loss": 0.5012353658676147, + "step": 3421 + }, + { + "epoch": 1.0005848808305309, + "grad_norm": 1.6136465051179143, + "learning_rate": 1.0820975164305498e-05, + "loss": 0.4585106372833252, + "step": 3422 + }, + { + "epoch": 1.0008773212457962, + "grad_norm": 1.2831850675969656, + "learning_rate": 1.0816156681989717e-05, + "loss": 0.5790318846702576, + "step": 3423 + }, + { + "epoch": 1.0011697616610615, + "grad_norm": 1.5258008126885618, + "learning_rate": 1.0811338008902277e-05, + "loss": 0.6016381978988647, + "step": 3424 + }, + { + "epoch": 1.0014622020763269, + "grad_norm": 1.328199543518758, + "learning_rate": 1.0806519146169507e-05, + "loss": 0.5756744146347046, + "step": 3425 + }, + { + "epoch": 1.0017546424915924, + "grad_norm": 1.1865012964818713, + "learning_rate": 1.0801700094917792e-05, + "loss": 0.4776861369609833, + "step": 3426 + }, + { + "epoch": 1.0020470829068577, + "grad_norm": 1.8629358545914494, + "learning_rate": 1.0796880856273557e-05, + "loss": 0.645842969417572, + "step": 3427 + }, + { + "epoch": 1.002339523322123, + "grad_norm": 1.1125775865964678, + "learning_rate": 1.0792061431363266e-05, + "loss": 0.5645815134048462, + "step": 3428 + }, + { + "epoch": 1.0026319637373886, + "grad_norm": 1.4821141209987578, + "learning_rate": 1.0787241821313428e-05, + "loss": 0.5477975606918335, + "step": 3429 + }, + { + "epoch": 1.002924404152654, + "grad_norm": 1.0992693186116131, + "learning_rate": 1.0782422027250604e-05, + "loss": 0.4064188599586487, + "step": 3430 + }, + { + "epoch": 1.0032168445679193, + "grad_norm": 1.3634803374266724, + "learning_rate": 1.0777602050301384e-05, + "loss": 0.5360208749771118, + "step": 3431 + }, + { + "epoch": 1.0035092849831846, + "grad_norm": 1.4203435807547533, + "learning_rate": 1.0772781891592419e-05, + "loss": 0.6189982891082764, + "step": 3432 + }, + { + "epoch": 1.0038017253984501, + "grad_norm": 1.4406563602891276, + "learning_rate": 1.0767961552250382e-05, + "loss": 0.4623541533946991, + "step": 3433 + }, + { + "epoch": 1.0040941658137155, + "grad_norm": 1.4714321386033957, + "learning_rate": 1.0763141033402e-05, + "loss": 0.6094095706939697, + "step": 3434 + }, + { + "epoch": 1.0043866062289808, + "grad_norm": 1.8852494834868845, + "learning_rate": 1.0758320336174042e-05, + "loss": 0.6997445821762085, + "step": 3435 + }, + { + "epoch": 1.0046790466442463, + "grad_norm": 1.3591852438815977, + "learning_rate": 1.0753499461693316e-05, + "loss": 0.5447323322296143, + "step": 3436 + }, + { + "epoch": 1.0049714870595117, + "grad_norm": 1.526403087538078, + "learning_rate": 1.0748678411086672e-05, + "loss": 0.5851927995681763, + "step": 3437 + }, + { + "epoch": 1.005263927474777, + "grad_norm": 1.2443699762001765, + "learning_rate": 1.0743857185481006e-05, + "loss": 0.5897810459136963, + "step": 3438 + }, + { + "epoch": 1.0055563678900423, + "grad_norm": 1.277276792826896, + "learning_rate": 1.073903578600324e-05, + "loss": 0.47671592235565186, + "step": 3439 + }, + { + "epoch": 1.0058488083053079, + "grad_norm": 1.5091606917661848, + "learning_rate": 1.0734214213780355e-05, + "loss": 0.5586696863174438, + "step": 3440 + }, + { + "epoch": 1.0061412487205732, + "grad_norm": 1.7171075095449666, + "learning_rate": 1.0729392469939362e-05, + "loss": 0.6817598342895508, + "step": 3441 + }, + { + "epoch": 1.0064336891358385, + "grad_norm": 1.4899951597044825, + "learning_rate": 1.0724570555607311e-05, + "loss": 0.6503750085830688, + "step": 3442 + }, + { + "epoch": 1.0067261295511039, + "grad_norm": 1.516461978227071, + "learning_rate": 1.07197484719113e-05, + "loss": 0.7121564149856567, + "step": 3443 + }, + { + "epoch": 1.0070185699663694, + "grad_norm": 1.2899445236891802, + "learning_rate": 1.071492621997846e-05, + "loss": 0.5760178565979004, + "step": 3444 + }, + { + "epoch": 1.0073110103816347, + "grad_norm": 1.2567067936293974, + "learning_rate": 1.0710103800935965e-05, + "loss": 0.4555765390396118, + "step": 3445 + }, + { + "epoch": 1.0076034507969, + "grad_norm": 1.73824720674272, + "learning_rate": 1.0705281215911021e-05, + "loss": 0.6098523736000061, + "step": 3446 + }, + { + "epoch": 1.0078958912121656, + "grad_norm": 1.3529009112365886, + "learning_rate": 1.070045846603088e-05, + "loss": 0.49828749895095825, + "step": 3447 + }, + { + "epoch": 1.008188331627431, + "grad_norm": 1.6747165622943363, + "learning_rate": 1.0695635552422834e-05, + "loss": 0.5134999752044678, + "step": 3448 + }, + { + "epoch": 1.0084807720426963, + "grad_norm": 1.6379844761327287, + "learning_rate": 1.0690812476214209e-05, + "loss": 0.53546142578125, + "step": 3449 + }, + { + "epoch": 1.0087732124579616, + "grad_norm": 1.353591975524027, + "learning_rate": 1.0685989238532364e-05, + "loss": 0.4955276846885681, + "step": 3450 + }, + { + "epoch": 1.0090656528732271, + "grad_norm": 1.5308502126967132, + "learning_rate": 1.0681165840504708e-05, + "loss": 0.5693827271461487, + "step": 3451 + }, + { + "epoch": 1.0093580932884925, + "grad_norm": 1.2544327118971752, + "learning_rate": 1.0676342283258676e-05, + "loss": 0.5023596286773682, + "step": 3452 + }, + { + "epoch": 1.0096505337037578, + "grad_norm": 1.4830383604575028, + "learning_rate": 1.0671518567921748e-05, + "loss": 0.5601100921630859, + "step": 3453 + }, + { + "epoch": 1.0099429741190233, + "grad_norm": 1.5483896672555095, + "learning_rate": 1.0666694695621438e-05, + "loss": 0.5744563341140747, + "step": 3454 + }, + { + "epoch": 1.0102354145342887, + "grad_norm": 1.2243241739970807, + "learning_rate": 1.0661870667485298e-05, + "loss": 0.531909704208374, + "step": 3455 + }, + { + "epoch": 1.010527854949554, + "grad_norm": 1.5063779223920848, + "learning_rate": 1.0657046484640911e-05, + "loss": 0.5737274885177612, + "step": 3456 + }, + { + "epoch": 1.0108202953648193, + "grad_norm": 1.3852723907754825, + "learning_rate": 1.0652222148215905e-05, + "loss": 0.5550329089164734, + "step": 3457 + }, + { + "epoch": 1.0111127357800849, + "grad_norm": 1.6139287553682227, + "learning_rate": 1.0647397659337936e-05, + "loss": 0.47795504331588745, + "step": 3458 + }, + { + "epoch": 1.0114051761953502, + "grad_norm": 1.4543285146976004, + "learning_rate": 1.0642573019134703e-05, + "loss": 0.6817550659179688, + "step": 3459 + }, + { + "epoch": 1.0116976166106155, + "grad_norm": 1.1722820118460164, + "learning_rate": 1.063774822873393e-05, + "loss": 0.45271044969558716, + "step": 3460 + }, + { + "epoch": 1.011990057025881, + "grad_norm": 1.537598582173988, + "learning_rate": 1.0632923289263389e-05, + "loss": 0.611709475517273, + "step": 3461 + }, + { + "epoch": 1.0122824974411464, + "grad_norm": 1.4188302760105698, + "learning_rate": 1.0628098201850876e-05, + "loss": 0.5101709961891174, + "step": 3462 + }, + { + "epoch": 1.0125749378564117, + "grad_norm": 1.433548611715836, + "learning_rate": 1.0623272967624227e-05, + "loss": 0.6550514698028564, + "step": 3463 + }, + { + "epoch": 1.012867378271677, + "grad_norm": 1.2796248072280718, + "learning_rate": 1.0618447587711312e-05, + "loss": 0.479978084564209, + "step": 3464 + }, + { + "epoch": 1.0131598186869426, + "grad_norm": 1.5575466316491844, + "learning_rate": 1.0613622063240035e-05, + "loss": 0.5616719722747803, + "step": 3465 + }, + { + "epoch": 1.013452259102208, + "grad_norm": 1.5865800035698945, + "learning_rate": 1.060879639533833e-05, + "loss": 0.5160953998565674, + "step": 3466 + }, + { + "epoch": 1.0137446995174733, + "grad_norm": 1.5690447549246889, + "learning_rate": 1.0603970585134168e-05, + "loss": 0.6069898009300232, + "step": 3467 + }, + { + "epoch": 1.0140371399327388, + "grad_norm": 1.4806335128762829, + "learning_rate": 1.0599144633755555e-05, + "loss": 0.5800961256027222, + "step": 3468 + }, + { + "epoch": 1.0143295803480041, + "grad_norm": 1.2794607035027592, + "learning_rate": 1.0594318542330528e-05, + "loss": 0.5286555290222168, + "step": 3469 + }, + { + "epoch": 1.0146220207632695, + "grad_norm": 1.3098421389423984, + "learning_rate": 1.0589492311987157e-05, + "loss": 0.44960829615592957, + "step": 3470 + }, + { + "epoch": 1.0149144611785348, + "grad_norm": 1.787788159345536, + "learning_rate": 1.0584665943853538e-05, + "loss": 0.5799434781074524, + "step": 3471 + }, + { + "epoch": 1.0152069015938003, + "grad_norm": 1.3655057393381103, + "learning_rate": 1.057983943905781e-05, + "loss": 0.5142421126365662, + "step": 3472 + }, + { + "epoch": 1.0154993420090657, + "grad_norm": 1.3605211166498987, + "learning_rate": 1.0575012798728141e-05, + "loss": 0.5184981226921082, + "step": 3473 + }, + { + "epoch": 1.015791782424331, + "grad_norm": 1.6630390830837942, + "learning_rate": 1.0570186023992724e-05, + "loss": 0.5747173428535461, + "step": 3474 + }, + { + "epoch": 1.0160842228395965, + "grad_norm": 1.4307323575447104, + "learning_rate": 1.0565359115979792e-05, + "loss": 0.5994119644165039, + "step": 3475 + }, + { + "epoch": 1.0163766632548619, + "grad_norm": 1.4001969418816858, + "learning_rate": 1.0560532075817605e-05, + "loss": 0.5020599365234375, + "step": 3476 + }, + { + "epoch": 1.0166691036701272, + "grad_norm": 1.5266027572877992, + "learning_rate": 1.0555704904634451e-05, + "loss": 0.5023698806762695, + "step": 3477 + }, + { + "epoch": 1.0169615440853925, + "grad_norm": 1.3247610849347196, + "learning_rate": 1.0550877603558656e-05, + "loss": 0.3998676538467407, + "step": 3478 + }, + { + "epoch": 1.017253984500658, + "grad_norm": 1.2513443496343235, + "learning_rate": 1.0546050173718569e-05, + "loss": 0.5083760619163513, + "step": 3479 + }, + { + "epoch": 1.0175464249159234, + "grad_norm": 1.3684676716830397, + "learning_rate": 1.0541222616242575e-05, + "loss": 0.49840620160102844, + "step": 3480 + }, + { + "epoch": 1.0178388653311887, + "grad_norm": 1.3303553104888959, + "learning_rate": 1.0536394932259085e-05, + "loss": 0.5302960276603699, + "step": 3481 + }, + { + "epoch": 1.018131305746454, + "grad_norm": 1.338379797222235, + "learning_rate": 1.0531567122896543e-05, + "loss": 0.5694236755371094, + "step": 3482 + }, + { + "epoch": 1.0184237461617196, + "grad_norm": 1.4305833876226657, + "learning_rate": 1.0526739189283414e-05, + "loss": 0.5155326128005981, + "step": 3483 + }, + { + "epoch": 1.018716186576985, + "grad_norm": 1.3829306833852764, + "learning_rate": 1.0521911132548207e-05, + "loss": 0.6254806518554688, + "step": 3484 + }, + { + "epoch": 1.0190086269922503, + "grad_norm": 1.9177430357611984, + "learning_rate": 1.0517082953819442e-05, + "loss": 0.5623525977134705, + "step": 3485 + }, + { + "epoch": 1.0193010674075158, + "grad_norm": 1.67092732120196, + "learning_rate": 1.051225465422568e-05, + "loss": 0.6289865970611572, + "step": 3486 + }, + { + "epoch": 1.0195935078227811, + "grad_norm": 1.4045798370952283, + "learning_rate": 1.050742623489551e-05, + "loss": 0.5935345888137817, + "step": 3487 + }, + { + "epoch": 1.0198859482380465, + "grad_norm": 1.696103524125264, + "learning_rate": 1.0502597696957542e-05, + "loss": 0.5223839282989502, + "step": 3488 + }, + { + "epoch": 1.0201783886533118, + "grad_norm": 1.9382869881093494, + "learning_rate": 1.0497769041540418e-05, + "loss": 0.6766373515129089, + "step": 3489 + }, + { + "epoch": 1.0204708290685773, + "grad_norm": 1.7017290392950901, + "learning_rate": 1.0492940269772806e-05, + "loss": 0.4934672713279724, + "step": 3490 + }, + { + "epoch": 1.0207632694838427, + "grad_norm": 1.345123127698455, + "learning_rate": 1.0488111382783403e-05, + "loss": 0.5207735300064087, + "step": 3491 + }, + { + "epoch": 1.021055709899108, + "grad_norm": 1.6293706929191067, + "learning_rate": 1.0483282381700933e-05, + "loss": 0.6090695261955261, + "step": 3492 + }, + { + "epoch": 1.0213481503143735, + "grad_norm": 1.2927953162345942, + "learning_rate": 1.0478453267654147e-05, + "loss": 0.5777665376663208, + "step": 3493 + }, + { + "epoch": 1.0216405907296389, + "grad_norm": 1.5951555841510592, + "learning_rate": 1.0473624041771814e-05, + "loss": 0.7241395711898804, + "step": 3494 + }, + { + "epoch": 1.0219330311449042, + "grad_norm": 1.4480767991556562, + "learning_rate": 1.0468794705182742e-05, + "loss": 0.45545506477355957, + "step": 3495 + }, + { + "epoch": 1.0222254715601695, + "grad_norm": 1.422698945534055, + "learning_rate": 1.0463965259015761e-05, + "loss": 0.5519885420799255, + "step": 3496 + }, + { + "epoch": 1.022517911975435, + "grad_norm": 1.509316262763282, + "learning_rate": 1.045913570439972e-05, + "loss": 0.558646559715271, + "step": 3497 + }, + { + "epoch": 1.0228103523907004, + "grad_norm": 1.4960690347564465, + "learning_rate": 1.0454306042463499e-05, + "loss": 0.5259999632835388, + "step": 3498 + }, + { + "epoch": 1.0231027928059657, + "grad_norm": 1.2679527875669403, + "learning_rate": 1.0449476274336004e-05, + "loss": 0.4711627960205078, + "step": 3499 + }, + { + "epoch": 1.0233952332212313, + "grad_norm": 1.5395810801486782, + "learning_rate": 1.0444646401146161e-05, + "loss": 0.5893874168395996, + "step": 3500 + }, + { + "epoch": 1.0236876736364966, + "grad_norm": 1.498228532943397, + "learning_rate": 1.0439816424022926e-05, + "loss": 0.5596123933792114, + "step": 3501 + }, + { + "epoch": 1.023980114051762, + "grad_norm": 1.3706228388690522, + "learning_rate": 1.0434986344095276e-05, + "loss": 0.5228658318519592, + "step": 3502 + }, + { + "epoch": 1.0242725544670273, + "grad_norm": 1.3956010390337459, + "learning_rate": 1.0430156162492216e-05, + "loss": 0.5520567297935486, + "step": 3503 + }, + { + "epoch": 1.0245649948822928, + "grad_norm": 1.2988010194163804, + "learning_rate": 1.0425325880342762e-05, + "loss": 0.531911313533783, + "step": 3504 + }, + { + "epoch": 1.0248574352975581, + "grad_norm": 1.5296749459710133, + "learning_rate": 1.0420495498775974e-05, + "loss": 0.58717942237854, + "step": 3505 + }, + { + "epoch": 1.0251498757128235, + "grad_norm": 1.3937094974123596, + "learning_rate": 1.0415665018920919e-05, + "loss": 0.4972108006477356, + "step": 3506 + }, + { + "epoch": 1.025442316128089, + "grad_norm": 1.4653045497635373, + "learning_rate": 1.0410834441906692e-05, + "loss": 0.567977249622345, + "step": 3507 + }, + { + "epoch": 1.0257347565433543, + "grad_norm": 1.4984249963013099, + "learning_rate": 1.0406003768862416e-05, + "loss": 0.568755567073822, + "step": 3508 + }, + { + "epoch": 1.0260271969586197, + "grad_norm": 1.5140899451878516, + "learning_rate": 1.0401173000917224e-05, + "loss": 0.5668960809707642, + "step": 3509 + }, + { + "epoch": 1.026319637373885, + "grad_norm": 1.5737165138245863, + "learning_rate": 1.0396342139200282e-05, + "loss": 0.5956743955612183, + "step": 3510 + }, + { + "epoch": 1.0266120777891505, + "grad_norm": 1.3000472899601168, + "learning_rate": 1.0391511184840775e-05, + "loss": 0.5258834362030029, + "step": 3511 + }, + { + "epoch": 1.0269045182044159, + "grad_norm": 1.52676259543146, + "learning_rate": 1.038668013896791e-05, + "loss": 0.7358168363571167, + "step": 3512 + }, + { + "epoch": 1.0271969586196812, + "grad_norm": 1.6868440270891885, + "learning_rate": 1.0381849002710914e-05, + "loss": 0.5845209956169128, + "step": 3513 + }, + { + "epoch": 1.0274893990349467, + "grad_norm": 1.4837942506085555, + "learning_rate": 1.0377017777199034e-05, + "loss": 0.4475495219230652, + "step": 3514 + }, + { + "epoch": 1.027781839450212, + "grad_norm": 1.2830033919091985, + "learning_rate": 1.0372186463561542e-05, + "loss": 0.5555804371833801, + "step": 3515 + }, + { + "epoch": 1.0280742798654774, + "grad_norm": 1.65016913167245, + "learning_rate": 1.0367355062927726e-05, + "loss": 0.5927316546440125, + "step": 3516 + }, + { + "epoch": 1.0283667202807427, + "grad_norm": 1.3376999356667882, + "learning_rate": 1.0362523576426897e-05, + "loss": 0.47281715273857117, + "step": 3517 + }, + { + "epoch": 1.0286591606960083, + "grad_norm": 1.4195049172993812, + "learning_rate": 1.0357692005188387e-05, + "loss": 0.5275483727455139, + "step": 3518 + }, + { + "epoch": 1.0289516011112736, + "grad_norm": 1.6670234220228792, + "learning_rate": 1.0352860350341547e-05, + "loss": 0.5740839242935181, + "step": 3519 + }, + { + "epoch": 1.029244041526539, + "grad_norm": 1.3668449892598942, + "learning_rate": 1.0348028613015747e-05, + "loss": 0.6030054688453674, + "step": 3520 + }, + { + "epoch": 1.0295364819418042, + "grad_norm": 1.4423080423666719, + "learning_rate": 1.034319679434037e-05, + "loss": 0.5415347814559937, + "step": 3521 + }, + { + "epoch": 1.0298289223570698, + "grad_norm": 1.4756281264212951, + "learning_rate": 1.033836489544483e-05, + "loss": 0.5850083231925964, + "step": 3522 + }, + { + "epoch": 1.0301213627723351, + "grad_norm": 1.516707487989418, + "learning_rate": 1.0333532917458556e-05, + "loss": 0.47614163160324097, + "step": 3523 + }, + { + "epoch": 1.0304138031876005, + "grad_norm": 1.5357316287676814, + "learning_rate": 1.0328700861510987e-05, + "loss": 0.5645745992660522, + "step": 3524 + }, + { + "epoch": 1.030706243602866, + "grad_norm": 1.3186548714848774, + "learning_rate": 1.0323868728731591e-05, + "loss": 0.5729008913040161, + "step": 3525 + }, + { + "epoch": 1.0309986840181313, + "grad_norm": 1.373781447264802, + "learning_rate": 1.031903652024985e-05, + "loss": 0.5177778005599976, + "step": 3526 + }, + { + "epoch": 1.0312911244333967, + "grad_norm": 1.390457184292636, + "learning_rate": 1.0314204237195263e-05, + "loss": 0.49413079023361206, + "step": 3527 + }, + { + "epoch": 1.031583564848662, + "grad_norm": 1.4789369230243037, + "learning_rate": 1.0309371880697342e-05, + "loss": 0.5074756145477295, + "step": 3528 + }, + { + "epoch": 1.0318760052639275, + "grad_norm": 1.590543948205407, + "learning_rate": 1.0304539451885629e-05, + "loss": 0.5601285696029663, + "step": 3529 + }, + { + "epoch": 1.0321684456791929, + "grad_norm": 1.3273904087281212, + "learning_rate": 1.029970695188967e-05, + "loss": 0.48358121514320374, + "step": 3530 + }, + { + "epoch": 1.0324608860944582, + "grad_norm": 1.4772927313727484, + "learning_rate": 1.0294874381839033e-05, + "loss": 0.4472161829471588, + "step": 3531 + }, + { + "epoch": 1.0327533265097237, + "grad_norm": 1.4129544794929634, + "learning_rate": 1.02900417428633e-05, + "loss": 0.6011627912521362, + "step": 3532 + }, + { + "epoch": 1.033045766924989, + "grad_norm": 1.354725840134447, + "learning_rate": 1.0285209036092076e-05, + "loss": 0.5212395191192627, + "step": 3533 + }, + { + "epoch": 1.0333382073402544, + "grad_norm": 1.844431950477259, + "learning_rate": 1.0280376262654971e-05, + "loss": 0.5433810949325562, + "step": 3534 + }, + { + "epoch": 1.0336306477555197, + "grad_norm": 1.4124385690995565, + "learning_rate": 1.0275543423681622e-05, + "loss": 0.5215464234352112, + "step": 3535 + }, + { + "epoch": 1.0339230881707853, + "grad_norm": 1.3386210311441036, + "learning_rate": 1.0270710520301672e-05, + "loss": 0.511099100112915, + "step": 3536 + }, + { + "epoch": 1.0342155285860506, + "grad_norm": 1.3822305233430652, + "learning_rate": 1.0265877553644783e-05, + "loss": 0.4954407811164856, + "step": 3537 + }, + { + "epoch": 1.034507969001316, + "grad_norm": 1.5424734752588294, + "learning_rate": 1.0261044524840633e-05, + "loss": 0.5491081476211548, + "step": 3538 + }, + { + "epoch": 1.0348004094165815, + "grad_norm": 1.5108040554468096, + "learning_rate": 1.0256211435018912e-05, + "loss": 0.43202829360961914, + "step": 3539 + }, + { + "epoch": 1.0350928498318468, + "grad_norm": 1.5814180623509084, + "learning_rate": 1.0251378285309326e-05, + "loss": 0.4721212089061737, + "step": 3540 + }, + { + "epoch": 1.0353852902471121, + "grad_norm": 1.6070602892086314, + "learning_rate": 1.0246545076841596e-05, + "loss": 0.5621099472045898, + "step": 3541 + }, + { + "epoch": 1.0356777306623774, + "grad_norm": 1.5170284121136077, + "learning_rate": 1.0241711810745452e-05, + "loss": 0.5572346448898315, + "step": 3542 + }, + { + "epoch": 1.035970171077643, + "grad_norm": 1.3590672633285579, + "learning_rate": 1.023687848815064e-05, + "loss": 0.40916550159454346, + "step": 3543 + }, + { + "epoch": 1.0362626114929083, + "grad_norm": 1.5018716604616227, + "learning_rate": 1.0232045110186926e-05, + "loss": 0.5370572805404663, + "step": 3544 + }, + { + "epoch": 1.0365550519081737, + "grad_norm": 1.603253593979403, + "learning_rate": 1.0227211677984074e-05, + "loss": 0.5381634831428528, + "step": 3545 + }, + { + "epoch": 1.0368474923234392, + "grad_norm": 1.3795492267662186, + "learning_rate": 1.0222378192671878e-05, + "loss": 0.4807749092578888, + "step": 3546 + }, + { + "epoch": 1.0371399327387045, + "grad_norm": 1.4973562396665303, + "learning_rate": 1.0217544655380129e-05, + "loss": 0.5673447847366333, + "step": 3547 + }, + { + "epoch": 1.0374323731539699, + "grad_norm": 1.6360254172890698, + "learning_rate": 1.0212711067238639e-05, + "loss": 0.5259549021720886, + "step": 3548 + }, + { + "epoch": 1.0377248135692352, + "grad_norm": 1.4439961362376934, + "learning_rate": 1.0207877429377232e-05, + "loss": 0.48267534375190735, + "step": 3549 + }, + { + "epoch": 1.0380172539845007, + "grad_norm": 1.438603988067733, + "learning_rate": 1.0203043742925738e-05, + "loss": 0.44843387603759766, + "step": 3550 + }, + { + "epoch": 1.038309694399766, + "grad_norm": 1.5765887333733293, + "learning_rate": 1.0198210009014005e-05, + "loss": 0.8050575256347656, + "step": 3551 + }, + { + "epoch": 1.0386021348150314, + "grad_norm": 1.3559927051954717, + "learning_rate": 1.0193376228771887e-05, + "loss": 0.590203046798706, + "step": 3552 + }, + { + "epoch": 1.0388945752302967, + "grad_norm": 1.4420953878245995, + "learning_rate": 1.0188542403329252e-05, + "loss": 0.5974458456039429, + "step": 3553 + }, + { + "epoch": 1.0391870156455623, + "grad_norm": 1.4408311686918343, + "learning_rate": 1.0183708533815975e-05, + "loss": 0.4628743827342987, + "step": 3554 + }, + { + "epoch": 1.0394794560608276, + "grad_norm": 1.538902326182442, + "learning_rate": 1.0178874621361944e-05, + "loss": 0.6738137006759644, + "step": 3555 + }, + { + "epoch": 1.039771896476093, + "grad_norm": 1.2584091446339778, + "learning_rate": 1.0174040667097061e-05, + "loss": 0.48062413930892944, + "step": 3556 + }, + { + "epoch": 1.0400643368913585, + "grad_norm": 1.4180020858721523, + "learning_rate": 1.016920667215123e-05, + "loss": 0.564401388168335, + "step": 3557 + }, + { + "epoch": 1.0403567773066238, + "grad_norm": 1.5220611788966263, + "learning_rate": 1.0164372637654367e-05, + "loss": 0.4035246968269348, + "step": 3558 + }, + { + "epoch": 1.0406492177218891, + "grad_norm": 1.3759176374876299, + "learning_rate": 1.0159538564736399e-05, + "loss": 0.4484536051750183, + "step": 3559 + }, + { + "epoch": 1.0409416581371547, + "grad_norm": 1.5320485493087415, + "learning_rate": 1.0154704454527265e-05, + "loss": 0.6257200837135315, + "step": 3560 + }, + { + "epoch": 1.04123409855242, + "grad_norm": 1.7250809702027206, + "learning_rate": 1.0149870308156899e-05, + "loss": 0.5541477799415588, + "step": 3561 + }, + { + "epoch": 1.0415265389676853, + "grad_norm": 1.5360272319586679, + "learning_rate": 1.0145036126755264e-05, + "loss": 0.6248821020126343, + "step": 3562 + }, + { + "epoch": 1.0418189793829506, + "grad_norm": 1.3930925306710389, + "learning_rate": 1.0140201911452318e-05, + "loss": 0.574689507484436, + "step": 3563 + }, + { + "epoch": 1.0421114197982162, + "grad_norm": 1.45907196010364, + "learning_rate": 1.0135367663378025e-05, + "loss": 0.5873313546180725, + "step": 3564 + }, + { + "epoch": 1.0424038602134815, + "grad_norm": 1.7911480245961826, + "learning_rate": 1.0130533383662361e-05, + "loss": 0.6662088632583618, + "step": 3565 + }, + { + "epoch": 1.0426963006287469, + "grad_norm": 1.688392121046196, + "learning_rate": 1.0125699073435316e-05, + "loss": 0.6517773866653442, + "step": 3566 + }, + { + "epoch": 1.0429887410440122, + "grad_norm": 1.8273298961737783, + "learning_rate": 1.0120864733826877e-05, + "loss": 0.6311444640159607, + "step": 3567 + }, + { + "epoch": 1.0432811814592777, + "grad_norm": 1.4367651958960501, + "learning_rate": 1.0116030365967037e-05, + "loss": 0.49060457944869995, + "step": 3568 + }, + { + "epoch": 1.043573621874543, + "grad_norm": 1.609897253932932, + "learning_rate": 1.0111195970985813e-05, + "loss": 0.5405893921852112, + "step": 3569 + }, + { + "epoch": 1.0438660622898084, + "grad_norm": 1.4830806836977097, + "learning_rate": 1.01063615500132e-05, + "loss": 0.482162743806839, + "step": 3570 + }, + { + "epoch": 1.044158502705074, + "grad_norm": 1.4107369824500982, + "learning_rate": 1.0101527104179224e-05, + "loss": 0.4542362093925476, + "step": 3571 + }, + { + "epoch": 1.0444509431203393, + "grad_norm": 1.5628480243599212, + "learning_rate": 1.00966926346139e-05, + "loss": 0.6157265305519104, + "step": 3572 + }, + { + "epoch": 1.0447433835356046, + "grad_norm": 1.6143915430154057, + "learning_rate": 1.0091858142447266e-05, + "loss": 0.6591875553131104, + "step": 3573 + }, + { + "epoch": 1.04503582395087, + "grad_norm": 1.410506710976703, + "learning_rate": 1.0087023628809347e-05, + "loss": 0.5686256885528564, + "step": 3574 + }, + { + "epoch": 1.0453282643661355, + "grad_norm": 1.2971662039691743, + "learning_rate": 1.0082189094830183e-05, + "loss": 0.45131799578666687, + "step": 3575 + }, + { + "epoch": 1.0456207047814008, + "grad_norm": 1.6508365467694242, + "learning_rate": 1.0077354541639821e-05, + "loss": 0.5787829160690308, + "step": 3576 + }, + { + "epoch": 1.0459131451966661, + "grad_norm": 1.6915833775625508, + "learning_rate": 1.0072519970368303e-05, + "loss": 0.5755574107170105, + "step": 3577 + }, + { + "epoch": 1.0462055856119317, + "grad_norm": 1.4591194150184388, + "learning_rate": 1.0067685382145683e-05, + "loss": 0.5017693638801575, + "step": 3578 + }, + { + "epoch": 1.046498026027197, + "grad_norm": 1.508478769597254, + "learning_rate": 1.0062850778102017e-05, + "loss": 0.5096016526222229, + "step": 3579 + }, + { + "epoch": 1.0467904664424623, + "grad_norm": 1.443966956114079, + "learning_rate": 1.0058016159367365e-05, + "loss": 0.4988967180252075, + "step": 3580 + }, + { + "epoch": 1.0470829068577276, + "grad_norm": 1.5186890104543016, + "learning_rate": 1.0053181527071786e-05, + "loss": 0.5410172939300537, + "step": 3581 + }, + { + "epoch": 1.0473753472729932, + "grad_norm": 1.7546625585964495, + "learning_rate": 1.004834688234535e-05, + "loss": 0.5980710983276367, + "step": 3582 + }, + { + "epoch": 1.0476677876882585, + "grad_norm": 1.347751797857706, + "learning_rate": 1.0043512226318124e-05, + "loss": 0.4737449586391449, + "step": 3583 + }, + { + "epoch": 1.0479602281035238, + "grad_norm": 1.5493397390355739, + "learning_rate": 1.003867756012018e-05, + "loss": 0.6106469631195068, + "step": 3584 + }, + { + "epoch": 1.0482526685187894, + "grad_norm": 1.6077524420960543, + "learning_rate": 1.0033842884881593e-05, + "loss": 0.48002901673316956, + "step": 3585 + }, + { + "epoch": 1.0485451089340547, + "grad_norm": 1.4065529576638647, + "learning_rate": 1.0029008201732433e-05, + "loss": 0.5101731419563293, + "step": 3586 + }, + { + "epoch": 1.04883754934932, + "grad_norm": 1.6961382740739117, + "learning_rate": 1.0024173511802786e-05, + "loss": 0.6350706219673157, + "step": 3587 + }, + { + "epoch": 1.0491299897645854, + "grad_norm": 1.4947432010936612, + "learning_rate": 1.0019338816222725e-05, + "loss": 0.5268979072570801, + "step": 3588 + }, + { + "epoch": 1.049422430179851, + "grad_norm": 1.4955724361545546, + "learning_rate": 1.0014504116122335e-05, + "loss": 0.5670457482337952, + "step": 3589 + }, + { + "epoch": 1.0497148705951163, + "grad_norm": 1.7472274991386971, + "learning_rate": 1.0009669412631697e-05, + "loss": 0.6200711727142334, + "step": 3590 + }, + { + "epoch": 1.0500073110103816, + "grad_norm": 1.5117580085419962, + "learning_rate": 1.0004834706880891e-05, + "loss": 0.44014686346054077, + "step": 3591 + }, + { + "epoch": 1.050299751425647, + "grad_norm": 1.4806608082423456, + "learning_rate": 1e-05, + "loss": 0.4690900146961212, + "step": 3592 + }, + { + "epoch": 1.0505921918409125, + "grad_norm": 1.5061085663062508, + "learning_rate": 9.995165293119112e-06, + "loss": 0.5791969299316406, + "step": 3593 + }, + { + "epoch": 1.0508846322561778, + "grad_norm": 1.403652610849375, + "learning_rate": 9.990330587368306e-06, + "loss": 0.5566244125366211, + "step": 3594 + }, + { + "epoch": 1.0511770726714431, + "grad_norm": 1.47068511144412, + "learning_rate": 9.985495883877668e-06, + "loss": 0.5201646685600281, + "step": 3595 + }, + { + "epoch": 1.0514695130867087, + "grad_norm": 1.3147681531847344, + "learning_rate": 9.980661183777277e-06, + "loss": 0.44774526357650757, + "step": 3596 + }, + { + "epoch": 1.051761953501974, + "grad_norm": 1.641682032458417, + "learning_rate": 9.975826488197217e-06, + "loss": 0.5346901416778564, + "step": 3597 + }, + { + "epoch": 1.0520543939172393, + "grad_norm": 1.516503297952313, + "learning_rate": 9.970991798267568e-06, + "loss": 0.4639764428138733, + "step": 3598 + }, + { + "epoch": 1.0523468343325049, + "grad_norm": 1.5385061459553095, + "learning_rate": 9.966157115118412e-06, + "loss": 0.5505763292312622, + "step": 3599 + }, + { + "epoch": 1.0526392747477702, + "grad_norm": 1.5065604638146801, + "learning_rate": 9.961322439879821e-06, + "loss": 0.5187631845474243, + "step": 3600 + }, + { + "epoch": 1.0529317151630355, + "grad_norm": 1.5837365707911437, + "learning_rate": 9.95648777368188e-06, + "loss": 0.5990081429481506, + "step": 3601 + }, + { + "epoch": 1.0532241555783008, + "grad_norm": 1.5943954940503307, + "learning_rate": 9.951653117654653e-06, + "loss": 0.5926306843757629, + "step": 3602 + }, + { + "epoch": 1.0535165959935664, + "grad_norm": 1.5828616151591308, + "learning_rate": 9.946818472928215e-06, + "loss": 0.5294582843780518, + "step": 3603 + }, + { + "epoch": 1.0538090364088317, + "grad_norm": 1.4492789926079117, + "learning_rate": 9.941983840632637e-06, + "loss": 0.5442140102386475, + "step": 3604 + }, + { + "epoch": 1.054101476824097, + "grad_norm": 1.5960181258924353, + "learning_rate": 9.937149221897984e-06, + "loss": 0.5888028740882874, + "step": 3605 + }, + { + "epoch": 1.0543939172393624, + "grad_norm": 1.6823030520405429, + "learning_rate": 9.93231461785432e-06, + "loss": 0.7545796632766724, + "step": 3606 + }, + { + "epoch": 1.054686357654628, + "grad_norm": 1.4193397986001617, + "learning_rate": 9.9274800296317e-06, + "loss": 0.4850383996963501, + "step": 3607 + }, + { + "epoch": 1.0549787980698933, + "grad_norm": 1.7761903590602732, + "learning_rate": 9.922645458360182e-06, + "loss": 0.5658243894577026, + "step": 3608 + }, + { + "epoch": 1.0552712384851586, + "grad_norm": 1.913627443584159, + "learning_rate": 9.917810905169818e-06, + "loss": 0.6526712775230408, + "step": 3609 + }, + { + "epoch": 1.0555636789004241, + "grad_norm": 1.7132894383948376, + "learning_rate": 9.912976371190657e-06, + "loss": 0.6125987768173218, + "step": 3610 + }, + { + "epoch": 1.0558561193156895, + "grad_norm": 1.3139938490016692, + "learning_rate": 9.908141857552737e-06, + "loss": 0.40159785747528076, + "step": 3611 + }, + { + "epoch": 1.0561485597309548, + "grad_norm": 1.7052081125083998, + "learning_rate": 9.903307365386103e-06, + "loss": 0.6628924608230591, + "step": 3612 + }, + { + "epoch": 1.05644100014622, + "grad_norm": 1.638888923278887, + "learning_rate": 9.898472895820783e-06, + "loss": 0.6083816289901733, + "step": 3613 + }, + { + "epoch": 1.0567334405614857, + "grad_norm": 1.564812875636552, + "learning_rate": 9.893638449986806e-06, + "loss": 0.5349488854408264, + "step": 3614 + }, + { + "epoch": 1.057025880976751, + "grad_norm": 1.5340813216184335, + "learning_rate": 9.888804029014194e-06, + "loss": 0.6119222044944763, + "step": 3615 + }, + { + "epoch": 1.0573183213920163, + "grad_norm": 1.367693459120948, + "learning_rate": 9.883969634032964e-06, + "loss": 0.531359851360321, + "step": 3616 + }, + { + "epoch": 1.0576107618072819, + "grad_norm": 1.6344237981695606, + "learning_rate": 9.879135266173127e-06, + "loss": 0.6604791879653931, + "step": 3617 + }, + { + "epoch": 1.0579032022225472, + "grad_norm": 1.4352324880813543, + "learning_rate": 9.874300926564689e-06, + "loss": 0.4691445231437683, + "step": 3618 + }, + { + "epoch": 1.0581956426378125, + "grad_norm": 1.2910646539258182, + "learning_rate": 9.869466616337642e-06, + "loss": 0.5690087080001831, + "step": 3619 + }, + { + "epoch": 1.0584880830530778, + "grad_norm": 1.403700057828388, + "learning_rate": 9.86463233662198e-06, + "loss": 0.5426729917526245, + "step": 3620 + }, + { + "epoch": 1.0587805234683434, + "grad_norm": 1.578075476325045, + "learning_rate": 9.859798088547687e-06, + "loss": 0.5640411376953125, + "step": 3621 + }, + { + "epoch": 1.0590729638836087, + "grad_norm": 1.4838032713556162, + "learning_rate": 9.854963873244738e-06, + "loss": 0.6724091172218323, + "step": 3622 + }, + { + "epoch": 1.059365404298874, + "grad_norm": 1.4145337335983883, + "learning_rate": 9.850129691843105e-06, + "loss": 0.5448887348175049, + "step": 3623 + }, + { + "epoch": 1.0596578447141396, + "grad_norm": 1.5190623574509117, + "learning_rate": 9.845295545472742e-06, + "loss": 0.5555344820022583, + "step": 3624 + }, + { + "epoch": 1.059950285129405, + "grad_norm": 1.6879154347320564, + "learning_rate": 9.840461435263604e-06, + "loss": 0.5053969621658325, + "step": 3625 + }, + { + "epoch": 1.0602427255446703, + "grad_norm": 1.5675488432589333, + "learning_rate": 9.835627362345636e-06, + "loss": 0.5866390466690063, + "step": 3626 + }, + { + "epoch": 1.0605351659599356, + "grad_norm": 1.81247497722172, + "learning_rate": 9.830793327848773e-06, + "loss": 0.5936717987060547, + "step": 3627 + }, + { + "epoch": 1.0608276063752011, + "grad_norm": 1.5536122437945554, + "learning_rate": 9.82595933290294e-06, + "loss": 0.6009070873260498, + "step": 3628 + }, + { + "epoch": 1.0611200467904665, + "grad_norm": 1.588445125911092, + "learning_rate": 9.821125378638059e-06, + "loss": 0.5361435413360596, + "step": 3629 + }, + { + "epoch": 1.0614124872057318, + "grad_norm": 1.4856331412797505, + "learning_rate": 9.816291466184025e-06, + "loss": 0.5763939619064331, + "step": 3630 + }, + { + "epoch": 1.061704927620997, + "grad_norm": 1.618308780160016, + "learning_rate": 9.81145759667075e-06, + "loss": 0.57512366771698, + "step": 3631 + }, + { + "epoch": 1.0619973680362627, + "grad_norm": 1.4990484363196022, + "learning_rate": 9.806623771228115e-06, + "loss": 0.6144367456436157, + "step": 3632 + }, + { + "epoch": 1.062289808451528, + "grad_norm": 1.5222649609215075, + "learning_rate": 9.801789990985997e-06, + "loss": 0.5715698003768921, + "step": 3633 + }, + { + "epoch": 1.0625822488667933, + "grad_norm": 1.3438421364889925, + "learning_rate": 9.796956257074263e-06, + "loss": 0.632681131362915, + "step": 3634 + }, + { + "epoch": 1.0628746892820589, + "grad_norm": 1.2996961363437054, + "learning_rate": 9.79212257062277e-06, + "loss": 0.5362547636032104, + "step": 3635 + }, + { + "epoch": 1.0631671296973242, + "grad_norm": 1.2451948790215157, + "learning_rate": 9.787288932761361e-06, + "loss": 0.553846538066864, + "step": 3636 + }, + { + "epoch": 1.0634595701125895, + "grad_norm": 2.0033616068213456, + "learning_rate": 9.782455344619871e-06, + "loss": 0.7200362682342529, + "step": 3637 + }, + { + "epoch": 1.063752010527855, + "grad_norm": 1.5986858016901493, + "learning_rate": 9.777621807328126e-06, + "loss": 0.5544596910476685, + "step": 3638 + }, + { + "epoch": 1.0640444509431204, + "grad_norm": 1.9336329750915207, + "learning_rate": 9.772788322015926e-06, + "loss": 0.687321126461029, + "step": 3639 + }, + { + "epoch": 1.0643368913583857, + "grad_norm": 1.4658162923896687, + "learning_rate": 9.767954889813076e-06, + "loss": 0.4986167550086975, + "step": 3640 + }, + { + "epoch": 1.064629331773651, + "grad_norm": 1.6835767903522258, + "learning_rate": 9.763121511849358e-06, + "loss": 0.5021307468414307, + "step": 3641 + }, + { + "epoch": 1.0649217721889166, + "grad_norm": 1.6084332451713093, + "learning_rate": 9.758288189254548e-06, + "loss": 0.5542711019515991, + "step": 3642 + }, + { + "epoch": 1.065214212604182, + "grad_norm": 1.4567212868909125, + "learning_rate": 9.753454923158407e-06, + "loss": 0.5161126852035522, + "step": 3643 + }, + { + "epoch": 1.0655066530194472, + "grad_norm": 1.3588587385016027, + "learning_rate": 9.748621714690674e-06, + "loss": 0.6041361093521118, + "step": 3644 + }, + { + "epoch": 1.0657990934347126, + "grad_norm": 1.5312936542968558, + "learning_rate": 9.74378856498109e-06, + "loss": 0.5252672433853149, + "step": 3645 + }, + { + "epoch": 1.0660915338499781, + "grad_norm": 1.508976518247356, + "learning_rate": 9.738955475159369e-06, + "loss": 0.5198208093643188, + "step": 3646 + }, + { + "epoch": 1.0663839742652435, + "grad_norm": 1.617831688267231, + "learning_rate": 9.734122446355219e-06, + "loss": 0.5547968149185181, + "step": 3647 + }, + { + "epoch": 1.0666764146805088, + "grad_norm": 1.3192996989880752, + "learning_rate": 9.72928947969833e-06, + "loss": 0.5854370594024658, + "step": 3648 + }, + { + "epoch": 1.0669688550957743, + "grad_norm": 1.4612935433441103, + "learning_rate": 9.724456576318383e-06, + "loss": 0.5199173092842102, + "step": 3649 + }, + { + "epoch": 1.0672612955110397, + "grad_norm": 1.5597306303032106, + "learning_rate": 9.71962373734503e-06, + "loss": 0.49684566259384155, + "step": 3650 + }, + { + "epoch": 1.067553735926305, + "grad_norm": 1.5081407431370675, + "learning_rate": 9.714790963907927e-06, + "loss": 0.593805193901062, + "step": 3651 + }, + { + "epoch": 1.0678461763415703, + "grad_norm": 1.6501383657240702, + "learning_rate": 9.7099582571367e-06, + "loss": 0.5524622201919556, + "step": 3652 + }, + { + "epoch": 1.0681386167568359, + "grad_norm": 1.589706723326761, + "learning_rate": 9.70512561816097e-06, + "loss": 0.5796955227851868, + "step": 3653 + }, + { + "epoch": 1.0684310571721012, + "grad_norm": 1.6252059263075247, + "learning_rate": 9.700293048110335e-06, + "loss": 0.5470535159111023, + "step": 3654 + }, + { + "epoch": 1.0687234975873665, + "grad_norm": 1.180447413588476, + "learning_rate": 9.695460548114374e-06, + "loss": 0.5438790321350098, + "step": 3655 + }, + { + "epoch": 1.069015938002632, + "grad_norm": 1.5271792603913512, + "learning_rate": 9.69062811930266e-06, + "loss": 0.6324823498725891, + "step": 3656 + }, + { + "epoch": 1.0693083784178974, + "grad_norm": 1.5347219744388463, + "learning_rate": 9.68579576280474e-06, + "loss": 0.5261266231536865, + "step": 3657 + }, + { + "epoch": 1.0696008188331627, + "grad_norm": 1.408009396375569, + "learning_rate": 9.680963479750152e-06, + "loss": 0.49827292561531067, + "step": 3658 + }, + { + "epoch": 1.069893259248428, + "grad_norm": 1.8715423798930795, + "learning_rate": 9.67613127126841e-06, + "loss": 0.5273935794830322, + "step": 3659 + }, + { + "epoch": 1.0701856996636936, + "grad_norm": 1.5578682729768194, + "learning_rate": 9.671299138489017e-06, + "loss": 0.5816709995269775, + "step": 3660 + }, + { + "epoch": 1.070478140078959, + "grad_norm": 1.7016426471813102, + "learning_rate": 9.66646708254145e-06, + "loss": 0.5591616630554199, + "step": 3661 + }, + { + "epoch": 1.0707705804942242, + "grad_norm": 1.5738449439513973, + "learning_rate": 9.661635104555172e-06, + "loss": 0.581566572189331, + "step": 3662 + }, + { + "epoch": 1.0710630209094898, + "grad_norm": 1.5518333497561696, + "learning_rate": 9.656803205659632e-06, + "loss": 0.5339047312736511, + "step": 3663 + }, + { + "epoch": 1.0713554613247551, + "grad_norm": 1.6271916881343873, + "learning_rate": 9.651971386984258e-06, + "loss": 0.5200103521347046, + "step": 3664 + }, + { + "epoch": 1.0716479017400204, + "grad_norm": 1.6521270716003156, + "learning_rate": 9.647139649658454e-06, + "loss": 0.7201805114746094, + "step": 3665 + }, + { + "epoch": 1.0719403421552858, + "grad_norm": 1.534541270100013, + "learning_rate": 9.642307994811614e-06, + "loss": 0.4801551103591919, + "step": 3666 + }, + { + "epoch": 1.0722327825705513, + "grad_norm": 1.5215862158184845, + "learning_rate": 9.637476423573106e-06, + "loss": 0.5809728503227234, + "step": 3667 + }, + { + "epoch": 1.0725252229858167, + "grad_norm": 1.6423129831570165, + "learning_rate": 9.632644937072277e-06, + "loss": 0.6493573188781738, + "step": 3668 + }, + { + "epoch": 1.072817663401082, + "grad_norm": 1.5984538738730298, + "learning_rate": 9.627813536438461e-06, + "loss": 0.5858349800109863, + "step": 3669 + }, + { + "epoch": 1.0731101038163473, + "grad_norm": 1.5154205099747375, + "learning_rate": 9.622982222800968e-06, + "loss": 0.604835033416748, + "step": 3670 + }, + { + "epoch": 1.0734025442316129, + "grad_norm": 1.6814842296922758, + "learning_rate": 9.618150997289091e-06, + "loss": 0.6168441772460938, + "step": 3671 + }, + { + "epoch": 1.0736949846468782, + "grad_norm": 1.4221905571438933, + "learning_rate": 9.613319861032093e-06, + "loss": 0.5297094583511353, + "step": 3672 + }, + { + "epoch": 1.0739874250621435, + "grad_norm": 1.4440813284349416, + "learning_rate": 9.608488815159226e-06, + "loss": 0.513571560382843, + "step": 3673 + }, + { + "epoch": 1.074279865477409, + "grad_norm": 1.4202335692197015, + "learning_rate": 9.603657860799721e-06, + "loss": 0.4383837580680847, + "step": 3674 + }, + { + "epoch": 1.0745723058926744, + "grad_norm": 1.660966167075539, + "learning_rate": 9.59882699908278e-06, + "loss": 0.5428420305252075, + "step": 3675 + }, + { + "epoch": 1.0748647463079397, + "grad_norm": 1.331252403406651, + "learning_rate": 9.593996231137587e-06, + "loss": 0.5193662047386169, + "step": 3676 + }, + { + "epoch": 1.0751571867232053, + "grad_norm": 1.1890998376752542, + "learning_rate": 9.589165558093311e-06, + "loss": 0.47949904203414917, + "step": 3677 + }, + { + "epoch": 1.0754496271384706, + "grad_norm": 1.4440336102087743, + "learning_rate": 9.584334981079085e-06, + "loss": 0.5092326402664185, + "step": 3678 + }, + { + "epoch": 1.075742067553736, + "grad_norm": 1.642845621448486, + "learning_rate": 9.579504501224028e-06, + "loss": 0.6627280712127686, + "step": 3679 + }, + { + "epoch": 1.0760345079690012, + "grad_norm": 1.4633415466571795, + "learning_rate": 9.57467411965724e-06, + "loss": 0.45087775588035583, + "step": 3680 + }, + { + "epoch": 1.0763269483842668, + "grad_norm": 1.5441336288481917, + "learning_rate": 9.569843837507788e-06, + "loss": 0.5745380520820618, + "step": 3681 + }, + { + "epoch": 1.0766193887995321, + "grad_norm": 1.4663672637613454, + "learning_rate": 9.565013655904728e-06, + "loss": 0.4410436749458313, + "step": 3682 + }, + { + "epoch": 1.0769118292147974, + "grad_norm": 1.5197962338342057, + "learning_rate": 9.560183575977079e-06, + "loss": 0.4991244375705719, + "step": 3683 + }, + { + "epoch": 1.0772042696300628, + "grad_norm": 1.760205368894331, + "learning_rate": 9.555353598853842e-06, + "loss": 0.6316145658493042, + "step": 3684 + }, + { + "epoch": 1.0774967100453283, + "grad_norm": 1.7400994246729, + "learning_rate": 9.550523725664e-06, + "loss": 0.5593908429145813, + "step": 3685 + }, + { + "epoch": 1.0777891504605936, + "grad_norm": 1.360696277932948, + "learning_rate": 9.545693957536503e-06, + "loss": 0.5491319894790649, + "step": 3686 + }, + { + "epoch": 1.078081590875859, + "grad_norm": 1.6733496726210937, + "learning_rate": 9.540864295600282e-06, + "loss": 0.6299821138381958, + "step": 3687 + }, + { + "epoch": 1.0783740312911245, + "grad_norm": 1.584478567774571, + "learning_rate": 9.536034740984244e-06, + "loss": 0.5673841238021851, + "step": 3688 + }, + { + "epoch": 1.0786664717063899, + "grad_norm": 1.2029070866459273, + "learning_rate": 9.53120529481726e-06, + "loss": 0.45966464281082153, + "step": 3689 + }, + { + "epoch": 1.0789589121216552, + "grad_norm": 1.5763188044346095, + "learning_rate": 9.526375958228191e-06, + "loss": 0.5831631422042847, + "step": 3690 + }, + { + "epoch": 1.0792513525369205, + "grad_norm": 1.6299976133727174, + "learning_rate": 9.52154673234586e-06, + "loss": 0.5456256866455078, + "step": 3691 + }, + { + "epoch": 1.079543792952186, + "grad_norm": 1.4868906970264604, + "learning_rate": 9.516717618299069e-06, + "loss": 0.46428292989730835, + "step": 3692 + }, + { + "epoch": 1.0798362333674514, + "grad_norm": 1.4498481381133475, + "learning_rate": 9.511888617216602e-06, + "loss": 0.47320839762687683, + "step": 3693 + }, + { + "epoch": 1.0801286737827167, + "grad_norm": 1.4932376641022789, + "learning_rate": 9.507059730227199e-06, + "loss": 0.5205492973327637, + "step": 3694 + }, + { + "epoch": 1.0804211141979823, + "grad_norm": 1.631704411581211, + "learning_rate": 9.502230958459587e-06, + "loss": 0.42696553468704224, + "step": 3695 + }, + { + "epoch": 1.0807135546132476, + "grad_norm": 1.5001123816983175, + "learning_rate": 9.497402303042463e-06, + "loss": 0.5147116780281067, + "step": 3696 + }, + { + "epoch": 1.081005995028513, + "grad_norm": 1.38029323867701, + "learning_rate": 9.492573765104494e-06, + "loss": 0.5080294609069824, + "step": 3697 + }, + { + "epoch": 1.0812984354437782, + "grad_norm": 1.6652094239637947, + "learning_rate": 9.487745345774323e-06, + "loss": 0.6228866577148438, + "step": 3698 + }, + { + "epoch": 1.0815908758590438, + "grad_norm": 1.5822778586922481, + "learning_rate": 9.482917046180563e-06, + "loss": 0.5560915470123291, + "step": 3699 + }, + { + "epoch": 1.0818833162743091, + "grad_norm": 1.5535091238731367, + "learning_rate": 9.4780888674518e-06, + "loss": 0.5245859622955322, + "step": 3700 + }, + { + "epoch": 1.0821757566895744, + "grad_norm": 1.5051094804368905, + "learning_rate": 9.47326081071659e-06, + "loss": 0.6462790966033936, + "step": 3701 + }, + { + "epoch": 1.08246819710484, + "grad_norm": 1.5924758840128848, + "learning_rate": 9.468432877103462e-06, + "loss": 0.5196692943572998, + "step": 3702 + }, + { + "epoch": 1.0827606375201053, + "grad_norm": 1.7568328506180717, + "learning_rate": 9.463605067740917e-06, + "loss": 0.5487779974937439, + "step": 3703 + }, + { + "epoch": 1.0830530779353706, + "grad_norm": 1.6433197945872438, + "learning_rate": 9.458777383757428e-06, + "loss": 0.5471592545509338, + "step": 3704 + }, + { + "epoch": 1.083345518350636, + "grad_norm": 1.7295248979937683, + "learning_rate": 9.453949826281436e-06, + "loss": 0.6927378177642822, + "step": 3705 + }, + { + "epoch": 1.0836379587659015, + "grad_norm": 1.645450906929874, + "learning_rate": 9.449122396441344e-06, + "loss": 0.569003164768219, + "step": 3706 + }, + { + "epoch": 1.0839303991811668, + "grad_norm": 1.5204128580175535, + "learning_rate": 9.444295095365549e-06, + "loss": 0.5655964612960815, + "step": 3707 + }, + { + "epoch": 1.0842228395964322, + "grad_norm": 1.5653417821245283, + "learning_rate": 9.439467924182397e-06, + "loss": 0.6223032474517822, + "step": 3708 + }, + { + "epoch": 1.0845152800116975, + "grad_norm": 1.8058201614843348, + "learning_rate": 9.43464088402021e-06, + "loss": 0.6553555727005005, + "step": 3709 + }, + { + "epoch": 1.084807720426963, + "grad_norm": 1.7065419655088354, + "learning_rate": 9.429813976007277e-06, + "loss": 0.534509539604187, + "step": 3710 + }, + { + "epoch": 1.0851001608422284, + "grad_norm": 1.7341944929762452, + "learning_rate": 9.42498720127186e-06, + "loss": 0.5801417827606201, + "step": 3711 + }, + { + "epoch": 1.0853926012574937, + "grad_norm": 1.4311879630985456, + "learning_rate": 9.42016056094219e-06, + "loss": 0.47260361909866333, + "step": 3712 + }, + { + "epoch": 1.0856850416727593, + "grad_norm": 1.5640804855296242, + "learning_rate": 9.415334056146464e-06, + "loss": 0.5924841165542603, + "step": 3713 + }, + { + "epoch": 1.0859774820880246, + "grad_norm": 1.7346051575584198, + "learning_rate": 9.410507688012847e-06, + "loss": 0.6029725074768066, + "step": 3714 + }, + { + "epoch": 1.08626992250329, + "grad_norm": 1.6762909361099274, + "learning_rate": 9.405681457669472e-06, + "loss": 0.5838413834571838, + "step": 3715 + }, + { + "epoch": 1.0865623629185555, + "grad_norm": 1.277586165055191, + "learning_rate": 9.400855366244445e-06, + "loss": 0.4739546775817871, + "step": 3716 + }, + { + "epoch": 1.0868548033338208, + "grad_norm": 1.5391172094714582, + "learning_rate": 9.396029414865832e-06, + "loss": 0.4870055913925171, + "step": 3717 + }, + { + "epoch": 1.0871472437490861, + "grad_norm": 1.4254039758246118, + "learning_rate": 9.39120360466167e-06, + "loss": 0.5572132468223572, + "step": 3718 + }, + { + "epoch": 1.0874396841643514, + "grad_norm": 1.6824352313774058, + "learning_rate": 9.386377936759966e-06, + "loss": 0.5601439476013184, + "step": 3719 + }, + { + "epoch": 1.087732124579617, + "grad_norm": 1.4548205788512927, + "learning_rate": 9.38155241228869e-06, + "loss": 0.4551504850387573, + "step": 3720 + }, + { + "epoch": 1.0880245649948823, + "grad_norm": 1.447968175073075, + "learning_rate": 9.376727032375773e-06, + "loss": 0.5656375885009766, + "step": 3721 + }, + { + "epoch": 1.0883170054101476, + "grad_norm": 1.4767808933411752, + "learning_rate": 9.371901798149124e-06, + "loss": 0.5597153902053833, + "step": 3722 + }, + { + "epoch": 1.088609445825413, + "grad_norm": 1.5252235269095387, + "learning_rate": 9.367076710736613e-06, + "loss": 0.5946288108825684, + "step": 3723 + }, + { + "epoch": 1.0889018862406785, + "grad_norm": 1.9924638298376933, + "learning_rate": 9.36225177126607e-06, + "loss": 0.5951449871063232, + "step": 3724 + }, + { + "epoch": 1.0891943266559438, + "grad_norm": 1.7845167649533908, + "learning_rate": 9.3574269808653e-06, + "loss": 0.5755487680435181, + "step": 3725 + }, + { + "epoch": 1.0894867670712092, + "grad_norm": 1.5254834641419546, + "learning_rate": 9.352602340662065e-06, + "loss": 0.5118892788887024, + "step": 3726 + }, + { + "epoch": 1.0897792074864747, + "grad_norm": 1.596558008598135, + "learning_rate": 9.347777851784097e-06, + "loss": 0.5652351975440979, + "step": 3727 + }, + { + "epoch": 1.09007164790174, + "grad_norm": 1.5215560380827415, + "learning_rate": 9.34295351535909e-06, + "loss": 0.624887228012085, + "step": 3728 + }, + { + "epoch": 1.0903640883170054, + "grad_norm": 1.447383452488018, + "learning_rate": 9.338129332514705e-06, + "loss": 0.534363329410553, + "step": 3729 + }, + { + "epoch": 1.0906565287322707, + "grad_norm": 1.477841435635963, + "learning_rate": 9.333305304378565e-06, + "loss": 0.6203521490097046, + "step": 3730 + }, + { + "epoch": 1.0909489691475363, + "grad_norm": 1.7401174715864398, + "learning_rate": 9.328481432078254e-06, + "loss": 0.64560866355896, + "step": 3731 + }, + { + "epoch": 1.0912414095628016, + "grad_norm": 1.5841972191853104, + "learning_rate": 9.323657716741327e-06, + "loss": 0.5389514565467834, + "step": 3732 + }, + { + "epoch": 1.091533849978067, + "grad_norm": 1.4621625707128454, + "learning_rate": 9.318834159495295e-06, + "loss": 0.5245277881622314, + "step": 3733 + }, + { + "epoch": 1.0918262903933325, + "grad_norm": 1.6486990138865423, + "learning_rate": 9.314010761467637e-06, + "loss": 0.603967010974884, + "step": 3734 + }, + { + "epoch": 1.0921187308085978, + "grad_norm": 1.7983997195133608, + "learning_rate": 9.309187523785794e-06, + "loss": 0.5426995754241943, + "step": 3735 + }, + { + "epoch": 1.092411171223863, + "grad_norm": 1.6248514181798874, + "learning_rate": 9.30436444757717e-06, + "loss": 0.5400352478027344, + "step": 3736 + }, + { + "epoch": 1.0927036116391284, + "grad_norm": 1.5009984854869718, + "learning_rate": 9.299541533969121e-06, + "loss": 0.5016524195671082, + "step": 3737 + }, + { + "epoch": 1.092996052054394, + "grad_norm": 1.7929437285814107, + "learning_rate": 9.294718784088982e-06, + "loss": 0.526217520236969, + "step": 3738 + }, + { + "epoch": 1.0932884924696593, + "grad_norm": 1.7293517567202035, + "learning_rate": 9.289896199064038e-06, + "loss": 0.525063157081604, + "step": 3739 + }, + { + "epoch": 1.0935809328849246, + "grad_norm": 1.269101628653969, + "learning_rate": 9.285073780021541e-06, + "loss": 0.3792048692703247, + "step": 3740 + }, + { + "epoch": 1.0938733733001902, + "grad_norm": 1.4416380651624152, + "learning_rate": 9.280251528088702e-06, + "loss": 0.5326308012008667, + "step": 3741 + }, + { + "epoch": 1.0941658137154555, + "grad_norm": 1.3946561055322027, + "learning_rate": 9.275429444392692e-06, + "loss": 0.5675199627876282, + "step": 3742 + }, + { + "epoch": 1.0944582541307208, + "grad_norm": 1.640552639536372, + "learning_rate": 9.270607530060643e-06, + "loss": 0.6525516510009766, + "step": 3743 + }, + { + "epoch": 1.0947506945459862, + "grad_norm": 1.563647681973335, + "learning_rate": 9.265785786219647e-06, + "loss": 0.6376343369483948, + "step": 3744 + }, + { + "epoch": 1.0950431349612517, + "grad_norm": 1.7701418719133022, + "learning_rate": 9.260964213996763e-06, + "loss": 0.6440377235412598, + "step": 3745 + }, + { + "epoch": 1.095335575376517, + "grad_norm": 1.886853414823259, + "learning_rate": 9.256142814518997e-06, + "loss": 0.5971434116363525, + "step": 3746 + }, + { + "epoch": 1.0956280157917824, + "grad_norm": 1.3797760891901851, + "learning_rate": 9.251321588913331e-06, + "loss": 0.5096890330314636, + "step": 3747 + }, + { + "epoch": 1.0959204562070477, + "grad_norm": 1.7099901744739332, + "learning_rate": 9.246500538306686e-06, + "loss": 0.4303498864173889, + "step": 3748 + }, + { + "epoch": 1.0962128966223132, + "grad_norm": 1.5934571510718554, + "learning_rate": 9.241679663825961e-06, + "loss": 0.5484192371368408, + "step": 3749 + }, + { + "epoch": 1.0965053370375786, + "grad_norm": 1.6268147624989107, + "learning_rate": 9.236858966598004e-06, + "loss": 0.6057884693145752, + "step": 3750 + }, + { + "epoch": 1.096797777452844, + "grad_norm": 1.565840426411154, + "learning_rate": 9.232038447749623e-06, + "loss": 0.5261536836624146, + "step": 3751 + }, + { + "epoch": 1.0970902178681095, + "grad_norm": 1.3242416099520606, + "learning_rate": 9.227218108407586e-06, + "loss": 0.470365047454834, + "step": 3752 + }, + { + "epoch": 1.0973826582833748, + "grad_norm": 1.8694075496184692, + "learning_rate": 9.222397949698618e-06, + "loss": 0.6158323287963867, + "step": 3753 + }, + { + "epoch": 1.09767509869864, + "grad_norm": 1.4353847976975904, + "learning_rate": 9.217577972749401e-06, + "loss": 0.582190990447998, + "step": 3754 + }, + { + "epoch": 1.0979675391139057, + "grad_norm": 1.5377732823861585, + "learning_rate": 9.212758178686575e-06, + "loss": 0.4939305782318115, + "step": 3755 + }, + { + "epoch": 1.098259979529171, + "grad_norm": 1.501946006392042, + "learning_rate": 9.207938568636739e-06, + "loss": 0.576829731464386, + "step": 3756 + }, + { + "epoch": 1.0985524199444363, + "grad_norm": 1.582387804664269, + "learning_rate": 9.203119143726445e-06, + "loss": 0.581257164478302, + "step": 3757 + }, + { + "epoch": 1.0988448603597016, + "grad_norm": 1.4501950316688965, + "learning_rate": 9.19829990508221e-06, + "loss": 0.6105127334594727, + "step": 3758 + }, + { + "epoch": 1.0991373007749672, + "grad_norm": 1.7379854400774775, + "learning_rate": 9.193480853830495e-06, + "loss": 0.5311432480812073, + "step": 3759 + }, + { + "epoch": 1.0994297411902325, + "grad_norm": 1.3707297007944412, + "learning_rate": 9.188661991097726e-06, + "loss": 0.44334596395492554, + "step": 3760 + }, + { + "epoch": 1.0997221816054978, + "grad_norm": 1.6175971035022318, + "learning_rate": 9.183843318010285e-06, + "loss": 0.5795773267745972, + "step": 3761 + }, + { + "epoch": 1.1000146220207632, + "grad_norm": 1.4465404341375856, + "learning_rate": 9.179024835694504e-06, + "loss": 0.619825541973114, + "step": 3762 + }, + { + "epoch": 1.1003070624360287, + "grad_norm": 1.754450237430447, + "learning_rate": 9.174206545276678e-06, + "loss": 0.633934497833252, + "step": 3763 + }, + { + "epoch": 1.100599502851294, + "grad_norm": 1.34560762533496, + "learning_rate": 9.169388447883053e-06, + "loss": 0.48922473192214966, + "step": 3764 + }, + { + "epoch": 1.1008919432665594, + "grad_norm": 1.7340747304342141, + "learning_rate": 9.164570544639825e-06, + "loss": 0.6125025153160095, + "step": 3765 + }, + { + "epoch": 1.101184383681825, + "grad_norm": 1.4327034643571392, + "learning_rate": 9.159752836673154e-06, + "loss": 0.5428078174591064, + "step": 3766 + }, + { + "epoch": 1.1014768240970902, + "grad_norm": 1.4335551572069505, + "learning_rate": 9.154935325109148e-06, + "loss": 0.5848157405853271, + "step": 3767 + }, + { + "epoch": 1.1017692645123556, + "grad_norm": 1.5053579548838565, + "learning_rate": 9.150118011073872e-06, + "loss": 0.5150102376937866, + "step": 3768 + }, + { + "epoch": 1.102061704927621, + "grad_norm": 1.429407171536289, + "learning_rate": 9.145300895693344e-06, + "loss": 0.6106699705123901, + "step": 3769 + }, + { + "epoch": 1.1023541453428864, + "grad_norm": 1.4079938603953852, + "learning_rate": 9.140483980093534e-06, + "loss": 0.5819482803344727, + "step": 3770 + }, + { + "epoch": 1.1026465857581518, + "grad_norm": 1.7060315490040079, + "learning_rate": 9.135667265400369e-06, + "loss": 0.6499812602996826, + "step": 3771 + }, + { + "epoch": 1.102939026173417, + "grad_norm": 1.520551323323022, + "learning_rate": 9.130850752739724e-06, + "loss": 0.5375189781188965, + "step": 3772 + }, + { + "epoch": 1.1032314665886827, + "grad_norm": 1.5200340564855783, + "learning_rate": 9.12603444323743e-06, + "loss": 0.5582318902015686, + "step": 3773 + }, + { + "epoch": 1.103523907003948, + "grad_norm": 1.6010357553720616, + "learning_rate": 9.121218338019273e-06, + "loss": 0.5549799203872681, + "step": 3774 + }, + { + "epoch": 1.1038163474192133, + "grad_norm": 1.672600820514396, + "learning_rate": 9.116402438210988e-06, + "loss": 0.4942197799682617, + "step": 3775 + }, + { + "epoch": 1.1041087878344786, + "grad_norm": 1.294858704528479, + "learning_rate": 9.11158674493826e-06, + "loss": 0.5039837956428528, + "step": 3776 + }, + { + "epoch": 1.1044012282497442, + "grad_norm": 1.4904483423531274, + "learning_rate": 9.106771259326726e-06, + "loss": 0.49781280755996704, + "step": 3777 + }, + { + "epoch": 1.1046936686650095, + "grad_norm": 1.5058975394537781, + "learning_rate": 9.101955982501981e-06, + "loss": 0.41755813360214233, + "step": 3778 + }, + { + "epoch": 1.1049861090802748, + "grad_norm": 1.3247888444316807, + "learning_rate": 9.097140915589564e-06, + "loss": 0.5605067014694214, + "step": 3779 + }, + { + "epoch": 1.1052785494955404, + "grad_norm": 1.6960736504408462, + "learning_rate": 9.092326059714971e-06, + "loss": 0.6291122436523438, + "step": 3780 + }, + { + "epoch": 1.1055709899108057, + "grad_norm": 1.5309788529424204, + "learning_rate": 9.087511416003636e-06, + "loss": 0.5164260864257812, + "step": 3781 + }, + { + "epoch": 1.105863430326071, + "grad_norm": 1.481065256446166, + "learning_rate": 9.082696985580964e-06, + "loss": 0.5002986192703247, + "step": 3782 + }, + { + "epoch": 1.1061558707413364, + "grad_norm": 1.8553995759252653, + "learning_rate": 9.077882769572295e-06, + "loss": 0.5149055123329163, + "step": 3783 + }, + { + "epoch": 1.106448311156602, + "grad_norm": 1.4637547819206846, + "learning_rate": 9.073068769102925e-06, + "loss": 0.5375808477401733, + "step": 3784 + }, + { + "epoch": 1.1067407515718672, + "grad_norm": 1.4438276838658128, + "learning_rate": 9.06825498529809e-06, + "loss": 0.5574408173561096, + "step": 3785 + }, + { + "epoch": 1.1070331919871326, + "grad_norm": 1.8566533611842586, + "learning_rate": 9.063441419282989e-06, + "loss": 0.7410034537315369, + "step": 3786 + }, + { + "epoch": 1.107325632402398, + "grad_norm": 1.4780218137550694, + "learning_rate": 9.058628072182759e-06, + "loss": 0.4890757203102112, + "step": 3787 + }, + { + "epoch": 1.1076180728176634, + "grad_norm": 1.449027088222319, + "learning_rate": 9.053814945122496e-06, + "loss": 0.5012304782867432, + "step": 3788 + }, + { + "epoch": 1.1079105132329288, + "grad_norm": 1.6277147220392454, + "learning_rate": 9.049002039227239e-06, + "loss": 0.5235648155212402, + "step": 3789 + }, + { + "epoch": 1.108202953648194, + "grad_norm": 1.5099212526378973, + "learning_rate": 9.044189355621969e-06, + "loss": 0.44732457399368286, + "step": 3790 + }, + { + "epoch": 1.1084953940634596, + "grad_norm": 1.6131396298332503, + "learning_rate": 9.039376895431627e-06, + "loss": 0.5771712064743042, + "step": 3791 + }, + { + "epoch": 1.108787834478725, + "grad_norm": 2.537465666899194, + "learning_rate": 9.034564659781096e-06, + "loss": 0.5361784100532532, + "step": 3792 + }, + { + "epoch": 1.1090802748939903, + "grad_norm": 1.3520934517992165, + "learning_rate": 9.029752649795203e-06, + "loss": 0.5305893421173096, + "step": 3793 + }, + { + "epoch": 1.1093727153092559, + "grad_norm": 1.3303918593615456, + "learning_rate": 9.02494086659873e-06, + "loss": 0.5094715356826782, + "step": 3794 + }, + { + "epoch": 1.1096651557245212, + "grad_norm": 1.4162243148383913, + "learning_rate": 9.020129311316405e-06, + "loss": 0.5406676530838013, + "step": 3795 + }, + { + "epoch": 1.1099575961397865, + "grad_norm": 1.5431545303983976, + "learning_rate": 9.015317985072893e-06, + "loss": 0.5170687437057495, + "step": 3796 + }, + { + "epoch": 1.1102500365550518, + "grad_norm": 1.454438976249235, + "learning_rate": 9.010506888992814e-06, + "loss": 0.4632429778575897, + "step": 3797 + }, + { + "epoch": 1.1105424769703174, + "grad_norm": 1.8257270837662332, + "learning_rate": 9.005696024200734e-06, + "loss": 0.5614180564880371, + "step": 3798 + }, + { + "epoch": 1.1108349173855827, + "grad_norm": 1.5187438448472135, + "learning_rate": 9.000885391821164e-06, + "loss": 0.5660920143127441, + "step": 3799 + }, + { + "epoch": 1.111127357800848, + "grad_norm": 1.616333702810617, + "learning_rate": 8.996074992978558e-06, + "loss": 0.6346436142921448, + "step": 3800 + }, + { + "epoch": 1.1114197982161134, + "grad_norm": 1.2613316779938173, + "learning_rate": 8.991264828797319e-06, + "loss": 0.4295850396156311, + "step": 3801 + }, + { + "epoch": 1.111712238631379, + "grad_norm": 1.4545086499056976, + "learning_rate": 8.986454900401791e-06, + "loss": 0.4797070622444153, + "step": 3802 + }, + { + "epoch": 1.1120046790466442, + "grad_norm": 1.3353593055033692, + "learning_rate": 8.98164520891627e-06, + "loss": 0.4912114143371582, + "step": 3803 + }, + { + "epoch": 1.1122971194619096, + "grad_norm": 1.6135433736276805, + "learning_rate": 8.976835755464988e-06, + "loss": 0.4156647026538849, + "step": 3804 + }, + { + "epoch": 1.1125895598771751, + "grad_norm": 1.6120031027815822, + "learning_rate": 8.97202654117213e-06, + "loss": 0.4527992010116577, + "step": 3805 + }, + { + "epoch": 1.1128820002924404, + "grad_norm": 1.6881758541294942, + "learning_rate": 8.967217567161817e-06, + "loss": 0.5969425439834595, + "step": 3806 + }, + { + "epoch": 1.1131744407077058, + "grad_norm": 1.5313114259080804, + "learning_rate": 8.962408834558116e-06, + "loss": 0.5867633819580078, + "step": 3807 + }, + { + "epoch": 1.113466881122971, + "grad_norm": 1.4924056676350326, + "learning_rate": 8.957600344485042e-06, + "loss": 0.549109697341919, + "step": 3808 + }, + { + "epoch": 1.1137593215382366, + "grad_norm": 1.6602567019426782, + "learning_rate": 8.952792098066549e-06, + "loss": 0.6336593627929688, + "step": 3809 + }, + { + "epoch": 1.114051761953502, + "grad_norm": 1.4867429859275132, + "learning_rate": 8.947984096426537e-06, + "loss": 0.5403220653533936, + "step": 3810 + }, + { + "epoch": 1.1143442023687673, + "grad_norm": 1.3422567204959701, + "learning_rate": 8.943176340688846e-06, + "loss": 0.37941914796829224, + "step": 3811 + }, + { + "epoch": 1.1146366427840328, + "grad_norm": 1.7322077540170269, + "learning_rate": 8.938368831977262e-06, + "loss": 0.5509335994720459, + "step": 3812 + }, + { + "epoch": 1.1149290831992982, + "grad_norm": 1.7077554301344111, + "learning_rate": 8.933561571415506e-06, + "loss": 0.5798860788345337, + "step": 3813 + }, + { + "epoch": 1.1152215236145635, + "grad_norm": 1.8693354922278385, + "learning_rate": 8.92875456012725e-06, + "loss": 0.5549412965774536, + "step": 3814 + }, + { + "epoch": 1.1155139640298288, + "grad_norm": 1.5992402094758784, + "learning_rate": 8.9239477992361e-06, + "loss": 0.4707058072090149, + "step": 3815 + }, + { + "epoch": 1.1158064044450944, + "grad_norm": 1.5838333385974708, + "learning_rate": 8.919141289865611e-06, + "loss": 0.4717002511024475, + "step": 3816 + }, + { + "epoch": 1.1160988448603597, + "grad_norm": 1.288572308356885, + "learning_rate": 8.914335033139274e-06, + "loss": 0.48403650522232056, + "step": 3817 + }, + { + "epoch": 1.116391285275625, + "grad_norm": 1.6715157915340426, + "learning_rate": 8.909529030180522e-06, + "loss": 0.48592090606689453, + "step": 3818 + }, + { + "epoch": 1.1166837256908906, + "grad_norm": 1.566157541574177, + "learning_rate": 8.904723282112728e-06, + "loss": 0.5052220225334167, + "step": 3819 + }, + { + "epoch": 1.116976166106156, + "grad_norm": 1.6151321192825796, + "learning_rate": 8.899917790059208e-06, + "loss": 0.7858535051345825, + "step": 3820 + }, + { + "epoch": 1.1172686065214212, + "grad_norm": 1.8369545909174703, + "learning_rate": 8.895112555143217e-06, + "loss": 0.6768159866333008, + "step": 3821 + }, + { + "epoch": 1.1175610469366866, + "grad_norm": 1.8079763728482598, + "learning_rate": 8.890307578487947e-06, + "loss": 0.5661243200302124, + "step": 3822 + }, + { + "epoch": 1.1178534873519521, + "grad_norm": 1.7067515294047517, + "learning_rate": 8.885502861216535e-06, + "loss": 0.5129438638687134, + "step": 3823 + }, + { + "epoch": 1.1181459277672174, + "grad_norm": 1.5735393429941704, + "learning_rate": 8.880698404452051e-06, + "loss": 0.4813467264175415, + "step": 3824 + }, + { + "epoch": 1.1184383681824828, + "grad_norm": 1.5840908667031388, + "learning_rate": 8.87589420931751e-06, + "loss": 0.5165577530860901, + "step": 3825 + }, + { + "epoch": 1.118730808597748, + "grad_norm": 1.4425390765128903, + "learning_rate": 8.871090276935863e-06, + "loss": 0.47335073351860046, + "step": 3826 + }, + { + "epoch": 1.1190232490130136, + "grad_norm": 1.6934955516318184, + "learning_rate": 8.86628660843e-06, + "loss": 0.4902348518371582, + "step": 3827 + }, + { + "epoch": 1.119315689428279, + "grad_norm": 1.7245920316429901, + "learning_rate": 8.861483204922752e-06, + "loss": 0.5933388471603394, + "step": 3828 + }, + { + "epoch": 1.1196081298435443, + "grad_norm": 1.5505961542425288, + "learning_rate": 8.85668006753688e-06, + "loss": 0.4898201823234558, + "step": 3829 + }, + { + "epoch": 1.1199005702588098, + "grad_norm": 1.3287782031202422, + "learning_rate": 8.851877197395088e-06, + "loss": 0.4745003879070282, + "step": 3830 + }, + { + "epoch": 1.1201930106740752, + "grad_norm": 1.5081067046883336, + "learning_rate": 8.847074595620024e-06, + "loss": 0.5246972441673279, + "step": 3831 + }, + { + "epoch": 1.1204854510893405, + "grad_norm": 1.498399687409688, + "learning_rate": 8.842272263334263e-06, + "loss": 0.5196787714958191, + "step": 3832 + }, + { + "epoch": 1.120777891504606, + "grad_norm": 2.0301798075149446, + "learning_rate": 8.83747020166032e-06, + "loss": 0.6721034049987793, + "step": 3833 + }, + { + "epoch": 1.1210703319198714, + "grad_norm": 1.4930580521199184, + "learning_rate": 8.832668411720652e-06, + "loss": 0.5654234886169434, + "step": 3834 + }, + { + "epoch": 1.1213627723351367, + "grad_norm": 1.91001506609742, + "learning_rate": 8.827866894637642e-06, + "loss": 0.7520767450332642, + "step": 3835 + }, + { + "epoch": 1.121655212750402, + "grad_norm": 1.3753523987373926, + "learning_rate": 8.82306565153362e-06, + "loss": 0.43645960092544556, + "step": 3836 + }, + { + "epoch": 1.1219476531656676, + "grad_norm": 1.688542605024225, + "learning_rate": 8.818264683530845e-06, + "loss": 0.5802274942398071, + "step": 3837 + }, + { + "epoch": 1.122240093580933, + "grad_norm": 1.5706370149670577, + "learning_rate": 8.813463991751516e-06, + "loss": 0.5593410134315491, + "step": 3838 + }, + { + "epoch": 1.1225325339961982, + "grad_norm": 1.56949134961986, + "learning_rate": 8.808663577317765e-06, + "loss": 0.6126681566238403, + "step": 3839 + }, + { + "epoch": 1.1228249744114636, + "grad_norm": 1.6396156905409707, + "learning_rate": 8.80386344135166e-06, + "loss": 0.6245180368423462, + "step": 3840 + }, + { + "epoch": 1.123117414826729, + "grad_norm": 1.3541654958690765, + "learning_rate": 8.799063584975201e-06, + "loss": 0.6611473560333252, + "step": 3841 + }, + { + "epoch": 1.1234098552419944, + "grad_norm": 1.2310988629927149, + "learning_rate": 8.79426400931033e-06, + "loss": 0.40020978450775146, + "step": 3842 + }, + { + "epoch": 1.1237022956572598, + "grad_norm": 1.3644507366239775, + "learning_rate": 8.789464715478913e-06, + "loss": 0.4965318441390991, + "step": 3843 + }, + { + "epoch": 1.1239947360725253, + "grad_norm": 1.4326851806590044, + "learning_rate": 8.784665704602758e-06, + "loss": 0.4838374853134155, + "step": 3844 + }, + { + "epoch": 1.1242871764877906, + "grad_norm": 1.389039662475551, + "learning_rate": 8.77986697780361e-06, + "loss": 0.5756508708000183, + "step": 3845 + }, + { + "epoch": 1.124579616903056, + "grad_norm": 1.484286888056792, + "learning_rate": 8.775068536203132e-06, + "loss": 0.5341511964797974, + "step": 3846 + }, + { + "epoch": 1.1248720573183213, + "grad_norm": 1.5007549282773276, + "learning_rate": 8.77027038092294e-06, + "loss": 0.6239134073257446, + "step": 3847 + }, + { + "epoch": 1.1251644977335868, + "grad_norm": 1.818555508500906, + "learning_rate": 8.765472513084566e-06, + "loss": 0.5642406940460205, + "step": 3848 + }, + { + "epoch": 1.1254569381488522, + "grad_norm": 1.5841401225303304, + "learning_rate": 8.760674933809488e-06, + "loss": 0.5242771506309509, + "step": 3849 + }, + { + "epoch": 1.1257493785641175, + "grad_norm": 1.5608207104848433, + "learning_rate": 8.755877644219108e-06, + "loss": 0.5205737352371216, + "step": 3850 + }, + { + "epoch": 1.126041818979383, + "grad_norm": 1.6760248633979633, + "learning_rate": 8.751080645434768e-06, + "loss": 0.5005168318748474, + "step": 3851 + }, + { + "epoch": 1.1263342593946484, + "grad_norm": 1.539730717074913, + "learning_rate": 8.74628393857773e-06, + "loss": 0.44978275895118713, + "step": 3852 + }, + { + "epoch": 1.1266266998099137, + "grad_norm": 1.3558571119447433, + "learning_rate": 8.741487524769198e-06, + "loss": 0.43631571531295776, + "step": 3853 + }, + { + "epoch": 1.126919140225179, + "grad_norm": 1.220093214706796, + "learning_rate": 8.736691405130306e-06, + "loss": 0.4196016788482666, + "step": 3854 + }, + { + "epoch": 1.1272115806404446, + "grad_norm": 1.6296314839875645, + "learning_rate": 8.731895580782118e-06, + "loss": 0.6389856338500977, + "step": 3855 + }, + { + "epoch": 1.12750402105571, + "grad_norm": 1.726359030533187, + "learning_rate": 8.72710005284563e-06, + "loss": 0.5465584993362427, + "step": 3856 + }, + { + "epoch": 1.1277964614709752, + "grad_norm": 1.469192647678069, + "learning_rate": 8.722304822441757e-06, + "loss": 0.5513765811920166, + "step": 3857 + }, + { + "epoch": 1.1280889018862408, + "grad_norm": 1.5516390698184288, + "learning_rate": 8.717509890691369e-06, + "loss": 0.6984349489212036, + "step": 3858 + }, + { + "epoch": 1.128381342301506, + "grad_norm": 1.6096511723205336, + "learning_rate": 8.712715258715248e-06, + "loss": 0.5311027765274048, + "step": 3859 + }, + { + "epoch": 1.1286737827167714, + "grad_norm": 1.5113126886002746, + "learning_rate": 8.707920927634105e-06, + "loss": 0.4598672091960907, + "step": 3860 + }, + { + "epoch": 1.1289662231320368, + "grad_norm": 1.8202302284240548, + "learning_rate": 8.703126898568591e-06, + "loss": 0.6177612543106079, + "step": 3861 + }, + { + "epoch": 1.1292586635473023, + "grad_norm": 1.8043964275332298, + "learning_rate": 8.69833317263928e-06, + "loss": 0.6442389488220215, + "step": 3862 + }, + { + "epoch": 1.1295511039625676, + "grad_norm": 1.4793698971631246, + "learning_rate": 8.693539750966672e-06, + "loss": 0.5925737023353577, + "step": 3863 + }, + { + "epoch": 1.129843544377833, + "grad_norm": 1.3730688779887357, + "learning_rate": 8.688746634671207e-06, + "loss": 0.46009114384651184, + "step": 3864 + }, + { + "epoch": 1.1301359847930983, + "grad_norm": 1.6065358861472605, + "learning_rate": 8.683953824873246e-06, + "loss": 0.5438460111618042, + "step": 3865 + }, + { + "epoch": 1.1304284252083638, + "grad_norm": 1.6436751318662282, + "learning_rate": 8.679161322693073e-06, + "loss": 0.5355101823806763, + "step": 3866 + }, + { + "epoch": 1.1307208656236292, + "grad_norm": 1.5636124606467166, + "learning_rate": 8.67436912925091e-06, + "loss": 0.4494459629058838, + "step": 3867 + }, + { + "epoch": 1.1310133060388945, + "grad_norm": 1.5118698872161136, + "learning_rate": 8.669577245666905e-06, + "loss": 0.5828550457954407, + "step": 3868 + }, + { + "epoch": 1.13130574645416, + "grad_norm": 1.43455699505813, + "learning_rate": 8.664785673061127e-06, + "loss": 0.4956590235233307, + "step": 3869 + }, + { + "epoch": 1.1315981868694254, + "grad_norm": 1.5146504272638424, + "learning_rate": 8.659994412553582e-06, + "loss": 0.5447779893875122, + "step": 3870 + }, + { + "epoch": 1.1318906272846907, + "grad_norm": 1.6512585184867246, + "learning_rate": 8.655203465264196e-06, + "loss": 0.6275361776351929, + "step": 3871 + }, + { + "epoch": 1.1321830676999562, + "grad_norm": 1.564521343459816, + "learning_rate": 8.650412832312823e-06, + "loss": 0.47899991273880005, + "step": 3872 + }, + { + "epoch": 1.1324755081152216, + "grad_norm": 1.1875547206815094, + "learning_rate": 8.645622514819243e-06, + "loss": 0.3356127142906189, + "step": 3873 + }, + { + "epoch": 1.132767948530487, + "grad_norm": 1.4442401622701144, + "learning_rate": 8.640832513903168e-06, + "loss": 0.48855727910995483, + "step": 3874 + }, + { + "epoch": 1.1330603889457522, + "grad_norm": 1.4528018972795056, + "learning_rate": 8.636042830684227e-06, + "loss": 0.46642380952835083, + "step": 3875 + }, + { + "epoch": 1.1333528293610178, + "grad_norm": 1.8421536572224761, + "learning_rate": 8.631253466281984e-06, + "loss": 0.6179598569869995, + "step": 3876 + }, + { + "epoch": 1.133645269776283, + "grad_norm": 1.6762180368596016, + "learning_rate": 8.626464421815919e-06, + "loss": 0.6361704468727112, + "step": 3877 + }, + { + "epoch": 1.1339377101915484, + "grad_norm": 1.574443230288469, + "learning_rate": 8.621675698405446e-06, + "loss": 0.6243701577186584, + "step": 3878 + }, + { + "epoch": 1.1342301506068138, + "grad_norm": 1.6113304231540622, + "learning_rate": 8.616887297169895e-06, + "loss": 0.5402215123176575, + "step": 3879 + }, + { + "epoch": 1.1345225910220793, + "grad_norm": 1.6390191276422172, + "learning_rate": 8.61209921922853e-06, + "loss": 0.6050009727478027, + "step": 3880 + }, + { + "epoch": 1.1348150314373446, + "grad_norm": 1.6106875040973343, + "learning_rate": 8.607311465700534e-06, + "loss": 0.5705801248550415, + "step": 3881 + }, + { + "epoch": 1.13510747185261, + "grad_norm": 1.5380461037587805, + "learning_rate": 8.602524037705018e-06, + "loss": 0.5467248558998108, + "step": 3882 + }, + { + "epoch": 1.1353999122678755, + "grad_norm": 1.7121108266736746, + "learning_rate": 8.597736936361007e-06, + "loss": 0.5903012752532959, + "step": 3883 + }, + { + "epoch": 1.1356923526831408, + "grad_norm": 1.6218348221942134, + "learning_rate": 8.592950162787463e-06, + "loss": 0.6034090518951416, + "step": 3884 + }, + { + "epoch": 1.1359847930984062, + "grad_norm": 1.3056254339924755, + "learning_rate": 8.588163718103264e-06, + "loss": 0.4282987117767334, + "step": 3885 + }, + { + "epoch": 1.1362772335136717, + "grad_norm": 1.5127630417626896, + "learning_rate": 8.583377603427212e-06, + "loss": 0.47374194860458374, + "step": 3886 + }, + { + "epoch": 1.136569673928937, + "grad_norm": 1.5841934947134406, + "learning_rate": 8.578591819878033e-06, + "loss": 0.43954724073410034, + "step": 3887 + }, + { + "epoch": 1.1368621143442024, + "grad_norm": 1.4278799477191386, + "learning_rate": 8.573806368574372e-06, + "loss": 0.4731065034866333, + "step": 3888 + }, + { + "epoch": 1.1371545547594677, + "grad_norm": 1.4184800646863156, + "learning_rate": 8.5690212506348e-06, + "loss": 0.5241256356239319, + "step": 3889 + }, + { + "epoch": 1.1374469951747332, + "grad_norm": 1.631021419370316, + "learning_rate": 8.56423646717781e-06, + "loss": 0.5823307037353516, + "step": 3890 + }, + { + "epoch": 1.1377394355899986, + "grad_norm": 1.654201038343883, + "learning_rate": 8.55945201932182e-06, + "loss": 0.5360631346702576, + "step": 3891 + }, + { + "epoch": 1.138031876005264, + "grad_norm": 1.5773624073994579, + "learning_rate": 8.554667908185158e-06, + "loss": 0.5227797627449036, + "step": 3892 + }, + { + "epoch": 1.1383243164205292, + "grad_norm": 1.7414634806893152, + "learning_rate": 8.549884134886089e-06, + "loss": 0.6232806444168091, + "step": 3893 + }, + { + "epoch": 1.1386167568357948, + "grad_norm": 1.567438316916472, + "learning_rate": 8.545100700542782e-06, + "loss": 0.6697877049446106, + "step": 3894 + }, + { + "epoch": 1.13890919725106, + "grad_norm": 1.5115348655280192, + "learning_rate": 8.540317606273343e-06, + "loss": 0.6348206400871277, + "step": 3895 + }, + { + "epoch": 1.1392016376663254, + "grad_norm": 1.5453537409734852, + "learning_rate": 8.535534853195786e-06, + "loss": 0.5578476190567017, + "step": 3896 + }, + { + "epoch": 1.139494078081591, + "grad_norm": 1.6674507953444782, + "learning_rate": 8.530752442428055e-06, + "loss": 0.6439946889877319, + "step": 3897 + }, + { + "epoch": 1.1397865184968563, + "grad_norm": 1.5233786551580588, + "learning_rate": 8.525970375088006e-06, + "loss": 0.5292261242866516, + "step": 3898 + }, + { + "epoch": 1.1400789589121216, + "grad_norm": 1.6870433422022266, + "learning_rate": 8.521188652293421e-06, + "loss": 0.5836480855941772, + "step": 3899 + }, + { + "epoch": 1.140371399327387, + "grad_norm": 1.731988866581243, + "learning_rate": 8.516407275161998e-06, + "loss": 0.5166354775428772, + "step": 3900 + }, + { + "epoch": 1.1406638397426525, + "grad_norm": 1.61853635946673, + "learning_rate": 8.511626244811352e-06, + "loss": 0.5236127972602844, + "step": 3901 + }, + { + "epoch": 1.1409562801579178, + "grad_norm": 1.3903461786321225, + "learning_rate": 8.506845562359022e-06, + "loss": 0.4900703430175781, + "step": 3902 + }, + { + "epoch": 1.1412487205731832, + "grad_norm": 1.484704749479714, + "learning_rate": 8.502065228922464e-06, + "loss": 0.5200212001800537, + "step": 3903 + }, + { + "epoch": 1.1415411609884485, + "grad_norm": 1.588712114908106, + "learning_rate": 8.497285245619053e-06, + "loss": 0.5553300976753235, + "step": 3904 + }, + { + "epoch": 1.141833601403714, + "grad_norm": 1.5947362241383982, + "learning_rate": 8.492505613566075e-06, + "loss": 0.5650131702423096, + "step": 3905 + }, + { + "epoch": 1.1421260418189794, + "grad_norm": 1.7370414648582224, + "learning_rate": 8.487726333880746e-06, + "loss": 0.4732077121734619, + "step": 3906 + }, + { + "epoch": 1.1424184822342447, + "grad_norm": 1.4560698890341355, + "learning_rate": 8.482947407680193e-06, + "loss": 0.46741920709609985, + "step": 3907 + }, + { + "epoch": 1.1427109226495102, + "grad_norm": 1.717154367813477, + "learning_rate": 8.478168836081457e-06, + "loss": 0.606191873550415, + "step": 3908 + }, + { + "epoch": 1.1430033630647756, + "grad_norm": 1.4018605845855592, + "learning_rate": 8.473390620201505e-06, + "loss": 0.4373897314071655, + "step": 3909 + }, + { + "epoch": 1.143295803480041, + "grad_norm": 1.4705540951964773, + "learning_rate": 8.468612761157215e-06, + "loss": 0.5460623502731323, + "step": 3910 + }, + { + "epoch": 1.1435882438953064, + "grad_norm": 1.311062743935516, + "learning_rate": 8.463835260065379e-06, + "loss": 0.4939531087875366, + "step": 3911 + }, + { + "epoch": 1.1438806843105718, + "grad_norm": 1.4297382144675803, + "learning_rate": 8.459058118042708e-06, + "loss": 0.544964611530304, + "step": 3912 + }, + { + "epoch": 1.144173124725837, + "grad_norm": 1.600083847682917, + "learning_rate": 8.454281336205836e-06, + "loss": 0.6118921041488647, + "step": 3913 + }, + { + "epoch": 1.1444655651411024, + "grad_norm": 1.4018893231050742, + "learning_rate": 8.449504915671304e-06, + "loss": 0.561060905456543, + "step": 3914 + }, + { + "epoch": 1.144758005556368, + "grad_norm": 1.3967184209578762, + "learning_rate": 8.444728857555572e-06, + "loss": 0.430827796459198, + "step": 3915 + }, + { + "epoch": 1.1450504459716333, + "grad_norm": 1.7776445971156332, + "learning_rate": 8.439953162975011e-06, + "loss": 0.5482884645462036, + "step": 3916 + }, + { + "epoch": 1.1453428863868986, + "grad_norm": 1.8487697311002218, + "learning_rate": 8.435177833045911e-06, + "loss": 0.6614879965782166, + "step": 3917 + }, + { + "epoch": 1.145635326802164, + "grad_norm": 1.686073678884194, + "learning_rate": 8.430402868884482e-06, + "loss": 0.6290509104728699, + "step": 3918 + }, + { + "epoch": 1.1459277672174295, + "grad_norm": 1.479686951025449, + "learning_rate": 8.425628271606836e-06, + "loss": 0.404970645904541, + "step": 3919 + }, + { + "epoch": 1.1462202076326948, + "grad_norm": 1.3978968237521616, + "learning_rate": 8.420854042329011e-06, + "loss": 0.4902762174606323, + "step": 3920 + }, + { + "epoch": 1.1465126480479602, + "grad_norm": 1.6869046118960203, + "learning_rate": 8.416080182166955e-06, + "loss": 0.5757346153259277, + "step": 3921 + }, + { + "epoch": 1.1468050884632257, + "grad_norm": 1.5541954452670608, + "learning_rate": 8.41130669223652e-06, + "loss": 0.5453485250473022, + "step": 3922 + }, + { + "epoch": 1.147097528878491, + "grad_norm": 1.7189844130617113, + "learning_rate": 8.40653357365349e-06, + "loss": 0.5660290122032166, + "step": 3923 + }, + { + "epoch": 1.1473899692937564, + "grad_norm": 1.576466831282747, + "learning_rate": 8.40176082753355e-06, + "loss": 0.46013498306274414, + "step": 3924 + }, + { + "epoch": 1.147682409709022, + "grad_norm": 1.4364824711460213, + "learning_rate": 8.396988454992296e-06, + "loss": 0.5183000564575195, + "step": 3925 + }, + { + "epoch": 1.1479748501242872, + "grad_norm": 1.7258149850246205, + "learning_rate": 8.392216457145246e-06, + "loss": 0.5407284498214722, + "step": 3926 + }, + { + "epoch": 1.1482672905395526, + "grad_norm": 1.735168999167248, + "learning_rate": 8.387444835107824e-06, + "loss": 0.5960655808448792, + "step": 3927 + }, + { + "epoch": 1.148559730954818, + "grad_norm": 1.2891916158500891, + "learning_rate": 8.382673589995365e-06, + "loss": 0.4363316297531128, + "step": 3928 + }, + { + "epoch": 1.1488521713700834, + "grad_norm": 1.4413045514377891, + "learning_rate": 8.377902722923122e-06, + "loss": 0.5143908262252808, + "step": 3929 + }, + { + "epoch": 1.1491446117853488, + "grad_norm": 1.696244956095385, + "learning_rate": 8.373132235006254e-06, + "loss": 0.6016460657119751, + "step": 3930 + }, + { + "epoch": 1.149437052200614, + "grad_norm": 1.4319561508465357, + "learning_rate": 8.368362127359835e-06, + "loss": 0.5120511651039124, + "step": 3931 + }, + { + "epoch": 1.1497294926158794, + "grad_norm": 1.5254857110351325, + "learning_rate": 8.363592401098853e-06, + "loss": 0.49658435583114624, + "step": 3932 + }, + { + "epoch": 1.150021933031145, + "grad_norm": 1.7705521617533395, + "learning_rate": 8.358823057338188e-06, + "loss": 0.584032416343689, + "step": 3933 + }, + { + "epoch": 1.1503143734464103, + "grad_norm": 1.5012587623360505, + "learning_rate": 8.35405409719266e-06, + "loss": 0.4673706293106079, + "step": 3934 + }, + { + "epoch": 1.1506068138616756, + "grad_norm": 1.618555555366979, + "learning_rate": 8.349285521776982e-06, + "loss": 0.633565366268158, + "step": 3935 + }, + { + "epoch": 1.1508992542769412, + "grad_norm": 1.6576478038135816, + "learning_rate": 8.344517332205774e-06, + "loss": 0.6029015779495239, + "step": 3936 + }, + { + "epoch": 1.1511916946922065, + "grad_norm": 1.519081286345544, + "learning_rate": 8.339749529593574e-06, + "loss": 0.45594489574432373, + "step": 3937 + }, + { + "epoch": 1.1514841351074718, + "grad_norm": 1.5262842564669963, + "learning_rate": 8.334982115054828e-06, + "loss": 0.4413541257381439, + "step": 3938 + }, + { + "epoch": 1.1517765755227372, + "grad_norm": 1.6373893488771099, + "learning_rate": 8.330215089703887e-06, + "loss": 0.5674389004707336, + "step": 3939 + }, + { + "epoch": 1.1520690159380027, + "grad_norm": 1.386401958621656, + "learning_rate": 8.325448454655019e-06, + "loss": 0.43449294567108154, + "step": 3940 + }, + { + "epoch": 1.152361456353268, + "grad_norm": 1.6283403091444353, + "learning_rate": 8.320682211022393e-06, + "loss": 0.5190714597702026, + "step": 3941 + }, + { + "epoch": 1.1526538967685334, + "grad_norm": 1.5774508757028434, + "learning_rate": 8.31591635992009e-06, + "loss": 0.56162428855896, + "step": 3942 + }, + { + "epoch": 1.1529463371837987, + "grad_norm": 1.4891934876919055, + "learning_rate": 8.311150902462096e-06, + "loss": 0.5588958263397217, + "step": 3943 + }, + { + "epoch": 1.1532387775990642, + "grad_norm": 1.5108312938903155, + "learning_rate": 8.306385839762312e-06, + "loss": 0.5438264608383179, + "step": 3944 + }, + { + "epoch": 1.1535312180143296, + "grad_norm": 1.575513080138648, + "learning_rate": 8.30162117293454e-06, + "loss": 0.5860258340835571, + "step": 3945 + }, + { + "epoch": 1.153823658429595, + "grad_norm": 1.552005958726473, + "learning_rate": 8.296856903092494e-06, + "loss": 0.4742947220802307, + "step": 3946 + }, + { + "epoch": 1.1541160988448604, + "grad_norm": 1.44195573685015, + "learning_rate": 8.292093031349791e-06, + "loss": 0.47963109612464905, + "step": 3947 + }, + { + "epoch": 1.1544085392601258, + "grad_norm": 1.5340226225614597, + "learning_rate": 8.287329558819957e-06, + "loss": 0.5404704213142395, + "step": 3948 + }, + { + "epoch": 1.154700979675391, + "grad_norm": 1.8054477659796657, + "learning_rate": 8.282566486616425e-06, + "loss": 0.6559766530990601, + "step": 3949 + }, + { + "epoch": 1.1549934200906566, + "grad_norm": 1.507763379787764, + "learning_rate": 8.277803815852535e-06, + "loss": 0.4462929368019104, + "step": 3950 + }, + { + "epoch": 1.155285860505922, + "grad_norm": 1.6398920335039024, + "learning_rate": 8.273041547641531e-06, + "loss": 0.5672504901885986, + "step": 3951 + }, + { + "epoch": 1.1555783009211873, + "grad_norm": 1.5384582587859306, + "learning_rate": 8.268279683096567e-06, + "loss": 0.4040188193321228, + "step": 3952 + }, + { + "epoch": 1.1558707413364526, + "grad_norm": 1.4954603260099153, + "learning_rate": 8.263518223330698e-06, + "loss": 0.4639814794063568, + "step": 3953 + }, + { + "epoch": 1.1561631817517182, + "grad_norm": 1.3560290444841174, + "learning_rate": 8.258757169456885e-06, + "loss": 0.384866327047348, + "step": 3954 + }, + { + "epoch": 1.1564556221669835, + "grad_norm": 1.5360587849114566, + "learning_rate": 8.253996522587997e-06, + "loss": 0.452106773853302, + "step": 3955 + }, + { + "epoch": 1.1567480625822488, + "grad_norm": 1.5044138285106523, + "learning_rate": 8.249236283836806e-06, + "loss": 0.487504780292511, + "step": 3956 + }, + { + "epoch": 1.1570405029975142, + "grad_norm": 1.6199121483000312, + "learning_rate": 8.244476454315989e-06, + "loss": 0.6225916147232056, + "step": 3957 + }, + { + "epoch": 1.1573329434127797, + "grad_norm": 1.7421167385988239, + "learning_rate": 8.239717035138128e-06, + "loss": 0.5254271030426025, + "step": 3958 + }, + { + "epoch": 1.157625383828045, + "grad_norm": 1.6240162719096014, + "learning_rate": 8.234958027415707e-06, + "loss": 0.5759135484695435, + "step": 3959 + }, + { + "epoch": 1.1579178242433104, + "grad_norm": 1.6959935899735565, + "learning_rate": 8.230199432261115e-06, + "loss": 0.5720966458320618, + "step": 3960 + }, + { + "epoch": 1.158210264658576, + "grad_norm": 1.5797174163929866, + "learning_rate": 8.225441250786643e-06, + "loss": 0.4807323217391968, + "step": 3961 + }, + { + "epoch": 1.1585027050738412, + "grad_norm": 1.6197693861653146, + "learning_rate": 8.22068348410449e-06, + "loss": 0.5049746036529541, + "step": 3962 + }, + { + "epoch": 1.1587951454891066, + "grad_norm": 1.673364031578337, + "learning_rate": 8.215926133326758e-06, + "loss": 0.5321973562240601, + "step": 3963 + }, + { + "epoch": 1.159087585904372, + "grad_norm": 1.3992709586079797, + "learning_rate": 8.211169199565444e-06, + "loss": 0.5176634788513184, + "step": 3964 + }, + { + "epoch": 1.1593800263196374, + "grad_norm": 1.5661593234971032, + "learning_rate": 8.20641268393245e-06, + "loss": 0.5345112681388855, + "step": 3965 + }, + { + "epoch": 1.1596724667349028, + "grad_norm": 1.8309312482061675, + "learning_rate": 8.201656587539589e-06, + "loss": 0.47578325867652893, + "step": 3966 + }, + { + "epoch": 1.159964907150168, + "grad_norm": 1.5996140092470157, + "learning_rate": 8.196900911498563e-06, + "loss": 0.5018264651298523, + "step": 3967 + }, + { + "epoch": 1.1602573475654336, + "grad_norm": 1.530612277867195, + "learning_rate": 8.192145656920989e-06, + "loss": 0.4643394351005554, + "step": 3968 + }, + { + "epoch": 1.160549787980699, + "grad_norm": 1.6066179328722245, + "learning_rate": 8.187390824918375e-06, + "loss": 0.5391045808792114, + "step": 3969 + }, + { + "epoch": 1.1608422283959643, + "grad_norm": 1.4691594768883462, + "learning_rate": 8.182636416602136e-06, + "loss": 0.5168124437332153, + "step": 3970 + }, + { + "epoch": 1.1611346688112296, + "grad_norm": 1.4702658109064293, + "learning_rate": 8.177882433083583e-06, + "loss": 0.5821055173873901, + "step": 3971 + }, + { + "epoch": 1.1614271092264952, + "grad_norm": 1.597748811964364, + "learning_rate": 8.173128875473933e-06, + "loss": 0.6031824946403503, + "step": 3972 + }, + { + "epoch": 1.1617195496417605, + "grad_norm": 1.598311083454874, + "learning_rate": 8.1683757448843e-06, + "loss": 0.5085259675979614, + "step": 3973 + }, + { + "epoch": 1.1620119900570258, + "grad_norm": 1.6218562380492636, + "learning_rate": 8.163623042425702e-06, + "loss": 0.5654903650283813, + "step": 3974 + }, + { + "epoch": 1.1623044304722914, + "grad_norm": 1.6279393236171642, + "learning_rate": 8.158870769209051e-06, + "loss": 0.3920902609825134, + "step": 3975 + }, + { + "epoch": 1.1625968708875567, + "grad_norm": 1.6100798425685794, + "learning_rate": 8.154118926345165e-06, + "loss": 0.5334979891777039, + "step": 3976 + }, + { + "epoch": 1.162889311302822, + "grad_norm": 1.7332980039574648, + "learning_rate": 8.149367514944754e-06, + "loss": 0.6212184429168701, + "step": 3977 + }, + { + "epoch": 1.1631817517180874, + "grad_norm": 1.847204612085083, + "learning_rate": 8.144616536118437e-06, + "loss": 0.71863853931427, + "step": 3978 + }, + { + "epoch": 1.163474192133353, + "grad_norm": 1.7297963031597574, + "learning_rate": 8.139865990976722e-06, + "loss": 0.5263794660568237, + "step": 3979 + }, + { + "epoch": 1.1637666325486182, + "grad_norm": 1.5706968019905152, + "learning_rate": 8.135115880630025e-06, + "loss": 0.5035576224327087, + "step": 3980 + }, + { + "epoch": 1.1640590729638836, + "grad_norm": 1.4183002447341373, + "learning_rate": 8.130366206188651e-06, + "loss": 0.5695084929466248, + "step": 3981 + }, + { + "epoch": 1.1643515133791489, + "grad_norm": 1.51980370598088, + "learning_rate": 8.125616968762806e-06, + "loss": 0.5826396942138672, + "step": 3982 + }, + { + "epoch": 1.1646439537944144, + "grad_norm": 1.5991682342910063, + "learning_rate": 8.1208681694626e-06, + "loss": 0.5132841467857361, + "step": 3983 + }, + { + "epoch": 1.1649363942096798, + "grad_norm": 1.7073185800473716, + "learning_rate": 8.116119809398034e-06, + "loss": 0.6572669744491577, + "step": 3984 + }, + { + "epoch": 1.165228834624945, + "grad_norm": 1.8729301131644296, + "learning_rate": 8.111371889679007e-06, + "loss": 0.5365801453590393, + "step": 3985 + }, + { + "epoch": 1.1655212750402106, + "grad_norm": 1.4561472169130645, + "learning_rate": 8.10662441141532e-06, + "loss": 0.44511687755584717, + "step": 3986 + }, + { + "epoch": 1.165813715455476, + "grad_norm": 1.596383666869324, + "learning_rate": 8.101877375716666e-06, + "loss": 0.47212404012680054, + "step": 3987 + }, + { + "epoch": 1.1661061558707413, + "grad_norm": 1.5859450593798408, + "learning_rate": 8.097130783692631e-06, + "loss": 0.5942205786705017, + "step": 3988 + }, + { + "epoch": 1.1663985962860068, + "grad_norm": 1.6678058947227146, + "learning_rate": 8.092384636452708e-06, + "loss": 0.49162304401397705, + "step": 3989 + }, + { + "epoch": 1.1666910367012722, + "grad_norm": 2.635849062548634, + "learning_rate": 8.087638935106277e-06, + "loss": 0.6544803380966187, + "step": 3990 + }, + { + "epoch": 1.1669834771165375, + "grad_norm": 1.677008396527972, + "learning_rate": 8.082893680762619e-06, + "loss": 0.5572186708450317, + "step": 3991 + }, + { + "epoch": 1.1672759175318028, + "grad_norm": 1.6443546400872178, + "learning_rate": 8.078148874530906e-06, + "loss": 0.5836775898933411, + "step": 3992 + }, + { + "epoch": 1.1675683579470684, + "grad_norm": 1.3079140035223278, + "learning_rate": 8.073404517520208e-06, + "loss": 0.5507068634033203, + "step": 3993 + }, + { + "epoch": 1.1678607983623337, + "grad_norm": 1.9861505555993526, + "learning_rate": 8.068660610839489e-06, + "loss": 0.5312684178352356, + "step": 3994 + }, + { + "epoch": 1.168153238777599, + "grad_norm": 1.6552821635427635, + "learning_rate": 8.06391715559761e-06, + "loss": 0.5555688142776489, + "step": 3995 + }, + { + "epoch": 1.1684456791928644, + "grad_norm": 1.6871575092969338, + "learning_rate": 8.059174152903324e-06, + "loss": 0.5724596977233887, + "step": 3996 + }, + { + "epoch": 1.16873811960813, + "grad_norm": 1.6937240718062052, + "learning_rate": 8.054431603865282e-06, + "loss": 0.6212218999862671, + "step": 3997 + }, + { + "epoch": 1.1690305600233952, + "grad_norm": 1.4230492945656301, + "learning_rate": 8.049689509592023e-06, + "loss": 0.5061509609222412, + "step": 3998 + }, + { + "epoch": 1.1693230004386606, + "grad_norm": 1.695677070671476, + "learning_rate": 8.044947871191982e-06, + "loss": 0.6143001914024353, + "step": 3999 + }, + { + "epoch": 1.169615440853926, + "grad_norm": 1.32203821023621, + "learning_rate": 8.040206689773487e-06, + "loss": 0.5079911351203918, + "step": 4000 + }, + { + "epoch": 1.1699078812691914, + "grad_norm": 1.6638666755727167, + "learning_rate": 8.035465966444764e-06, + "loss": 0.6104908585548401, + "step": 4001 + }, + { + "epoch": 1.1702003216844568, + "grad_norm": 1.680147004679776, + "learning_rate": 8.03072570231393e-06, + "loss": 0.5953013896942139, + "step": 4002 + }, + { + "epoch": 1.1704927620997223, + "grad_norm": 1.3075175590117196, + "learning_rate": 8.025985898488986e-06, + "loss": 0.4541323781013489, + "step": 4003 + }, + { + "epoch": 1.1707852025149876, + "grad_norm": 1.5142685350846732, + "learning_rate": 8.021246556077838e-06, + "loss": 0.5708850026130676, + "step": 4004 + }, + { + "epoch": 1.171077642930253, + "grad_norm": 1.6574340180310174, + "learning_rate": 8.016507676188275e-06, + "loss": 0.5430601835250854, + "step": 4005 + }, + { + "epoch": 1.1713700833455183, + "grad_norm": 1.504278683910439, + "learning_rate": 8.011769259927981e-06, + "loss": 0.5621174573898315, + "step": 4006 + }, + { + "epoch": 1.1716625237607838, + "grad_norm": 1.5473800123062453, + "learning_rate": 8.007031308404536e-06, + "loss": 0.48092782497406006, + "step": 4007 + }, + { + "epoch": 1.1719549641760492, + "grad_norm": 1.9401766125340165, + "learning_rate": 8.002293822725404e-06, + "loss": 0.5770663022994995, + "step": 4008 + }, + { + "epoch": 1.1722474045913145, + "grad_norm": 1.7123399188942874, + "learning_rate": 7.997556803997945e-06, + "loss": 0.5692728757858276, + "step": 4009 + }, + { + "epoch": 1.1725398450065798, + "grad_norm": 1.6496884851556144, + "learning_rate": 7.99282025332941e-06, + "loss": 0.6256895065307617, + "step": 4010 + }, + { + "epoch": 1.1728322854218454, + "grad_norm": 1.4529405935008253, + "learning_rate": 7.988084171826937e-06, + "loss": 0.4272884130477905, + "step": 4011 + }, + { + "epoch": 1.1731247258371107, + "grad_norm": 1.548325850009333, + "learning_rate": 7.983348560597557e-06, + "loss": 0.5113184452056885, + "step": 4012 + }, + { + "epoch": 1.173417166252376, + "grad_norm": 1.4489276426544837, + "learning_rate": 7.978613420748186e-06, + "loss": 0.45635539293289185, + "step": 4013 + }, + { + "epoch": 1.1737096066676416, + "grad_norm": 1.6347983233600756, + "learning_rate": 7.973878753385638e-06, + "loss": 0.5539636611938477, + "step": 4014 + }, + { + "epoch": 1.174002047082907, + "grad_norm": 1.4140693021111321, + "learning_rate": 7.969144559616615e-06, + "loss": 0.6083431243896484, + "step": 4015 + }, + { + "epoch": 1.1742944874981722, + "grad_norm": 1.372102806580561, + "learning_rate": 7.9644108405477e-06, + "loss": 0.5268326997756958, + "step": 4016 + }, + { + "epoch": 1.1745869279134376, + "grad_norm": 1.5989300144328094, + "learning_rate": 7.95967759728538e-06, + "loss": 0.5680301189422607, + "step": 4017 + }, + { + "epoch": 1.174879368328703, + "grad_norm": 1.62105042514946, + "learning_rate": 7.954944830936012e-06, + "loss": 0.5457121133804321, + "step": 4018 + }, + { + "epoch": 1.1751718087439684, + "grad_norm": 1.59579517284719, + "learning_rate": 7.950212542605857e-06, + "loss": 0.5358338356018066, + "step": 4019 + }, + { + "epoch": 1.1754642491592338, + "grad_norm": 1.5630110417390142, + "learning_rate": 7.945480733401056e-06, + "loss": 0.6094579696655273, + "step": 4020 + }, + { + "epoch": 1.175756689574499, + "grad_norm": 1.6732097827507912, + "learning_rate": 7.940749404427642e-06, + "loss": 0.5108463764190674, + "step": 4021 + }, + { + "epoch": 1.1760491299897646, + "grad_norm": 1.6495017651653137, + "learning_rate": 7.936018556791537e-06, + "loss": 0.4946494698524475, + "step": 4022 + }, + { + "epoch": 1.17634157040503, + "grad_norm": 1.7757854212337651, + "learning_rate": 7.931288191598543e-06, + "loss": 0.5056017637252808, + "step": 4023 + }, + { + "epoch": 1.1766340108202953, + "grad_norm": 1.5276820294687934, + "learning_rate": 7.926558309954354e-06, + "loss": 0.5242294073104858, + "step": 4024 + }, + { + "epoch": 1.1769264512355608, + "grad_norm": 1.682705103807737, + "learning_rate": 7.921828912964556e-06, + "loss": 0.5667276382446289, + "step": 4025 + }, + { + "epoch": 1.1772188916508262, + "grad_norm": 1.6370912674167624, + "learning_rate": 7.917100001734614e-06, + "loss": 0.5282422304153442, + "step": 4026 + }, + { + "epoch": 1.1775113320660915, + "grad_norm": 1.6026370834828365, + "learning_rate": 7.912371577369881e-06, + "loss": 0.4887520670890808, + "step": 4027 + }, + { + "epoch": 1.177803772481357, + "grad_norm": 1.6287890532743194, + "learning_rate": 7.907643640975603e-06, + "loss": 0.5082155466079712, + "step": 4028 + }, + { + "epoch": 1.1780962128966224, + "grad_norm": 1.649507152949628, + "learning_rate": 7.902916193656898e-06, + "loss": 0.5432984828948975, + "step": 4029 + }, + { + "epoch": 1.1783886533118877, + "grad_norm": 1.7249606112651144, + "learning_rate": 7.898189236518783e-06, + "loss": 0.4313681721687317, + "step": 4030 + }, + { + "epoch": 1.178681093727153, + "grad_norm": 1.494399406404666, + "learning_rate": 7.893462770666155e-06, + "loss": 0.6051831245422363, + "step": 4031 + }, + { + "epoch": 1.1789735341424186, + "grad_norm": 1.5057449817059945, + "learning_rate": 7.888736797203796e-06, + "loss": 0.45805442333221436, + "step": 4032 + }, + { + "epoch": 1.179265974557684, + "grad_norm": 1.7917233044229635, + "learning_rate": 7.884011317236376e-06, + "loss": 0.4998340606689453, + "step": 4033 + }, + { + "epoch": 1.1795584149729492, + "grad_norm": 1.651259706746187, + "learning_rate": 7.879286331868443e-06, + "loss": 0.5298212170600891, + "step": 4034 + }, + { + "epoch": 1.1798508553882145, + "grad_norm": 1.7028183419777814, + "learning_rate": 7.874561842204437e-06, + "loss": 0.5104682445526123, + "step": 4035 + }, + { + "epoch": 1.18014329580348, + "grad_norm": 1.6393724776910414, + "learning_rate": 7.869837849348676e-06, + "loss": 0.5793051719665527, + "step": 4036 + }, + { + "epoch": 1.1804357362187454, + "grad_norm": 1.4839435154715734, + "learning_rate": 7.865114354405367e-06, + "loss": 0.42913323640823364, + "step": 4037 + }, + { + "epoch": 1.1807281766340108, + "grad_norm": 2.104724599006863, + "learning_rate": 7.860391358478596e-06, + "loss": 0.5183675289154053, + "step": 4038 + }, + { + "epoch": 1.1810206170492763, + "grad_norm": 1.5685744104736703, + "learning_rate": 7.855668862672339e-06, + "loss": 0.444034218788147, + "step": 4039 + }, + { + "epoch": 1.1813130574645416, + "grad_norm": 1.486556561749613, + "learning_rate": 7.850946868090446e-06, + "loss": 0.4357207417488098, + "step": 4040 + }, + { + "epoch": 1.181605497879807, + "grad_norm": 1.6923285770365775, + "learning_rate": 7.846225375836657e-06, + "loss": 0.4517707824707031, + "step": 4041 + }, + { + "epoch": 1.1818979382950725, + "grad_norm": 1.5865011864132745, + "learning_rate": 7.841504387014589e-06, + "loss": 0.4437381625175476, + "step": 4042 + }, + { + "epoch": 1.1821903787103378, + "grad_norm": 1.4744521314451464, + "learning_rate": 7.836783902727746e-06, + "loss": 0.5364828109741211, + "step": 4043 + }, + { + "epoch": 1.1824828191256032, + "grad_norm": 1.650227369991675, + "learning_rate": 7.832063924079516e-06, + "loss": 0.4814251661300659, + "step": 4044 + }, + { + "epoch": 1.1827752595408685, + "grad_norm": 1.9016693432010778, + "learning_rate": 7.827344452173163e-06, + "loss": 0.5376232862472534, + "step": 4045 + }, + { + "epoch": 1.183067699956134, + "grad_norm": 1.7007887018924743, + "learning_rate": 7.822625488111833e-06, + "loss": 0.6005147695541382, + "step": 4046 + }, + { + "epoch": 1.1833601403713994, + "grad_norm": 1.5696670197669271, + "learning_rate": 7.817907032998556e-06, + "loss": 0.5276827216148376, + "step": 4047 + }, + { + "epoch": 1.1836525807866647, + "grad_norm": 2.1978111734105994, + "learning_rate": 7.813189087936243e-06, + "loss": 0.6425626277923584, + "step": 4048 + }, + { + "epoch": 1.18394502120193, + "grad_norm": 1.272646490936496, + "learning_rate": 7.808471654027685e-06, + "loss": 0.44388407468795776, + "step": 4049 + }, + { + "epoch": 1.1842374616171956, + "grad_norm": 1.743245771156321, + "learning_rate": 7.803754732375554e-06, + "loss": 0.5044336318969727, + "step": 4050 + }, + { + "epoch": 1.184529902032461, + "grad_norm": 1.9415496480441554, + "learning_rate": 7.7990383240824e-06, + "loss": 0.6964906454086304, + "step": 4051 + }, + { + "epoch": 1.1848223424477262, + "grad_norm": 1.9029191440552455, + "learning_rate": 7.794322430250654e-06, + "loss": 0.6093637943267822, + "step": 4052 + }, + { + "epoch": 1.1851147828629918, + "grad_norm": 1.8079016024144563, + "learning_rate": 7.78960705198263e-06, + "loss": 0.5264803171157837, + "step": 4053 + }, + { + "epoch": 1.185407223278257, + "grad_norm": 1.444425047773482, + "learning_rate": 7.78489219038052e-06, + "loss": 0.5336456298828125, + "step": 4054 + }, + { + "epoch": 1.1856996636935224, + "grad_norm": 1.7563642817078289, + "learning_rate": 7.78017784654639e-06, + "loss": 0.5266311168670654, + "step": 4055 + }, + { + "epoch": 1.1859921041087877, + "grad_norm": 1.6538609406479838, + "learning_rate": 7.775464021582195e-06, + "loss": 0.6281685829162598, + "step": 4056 + }, + { + "epoch": 1.1862845445240533, + "grad_norm": 1.6081255371588656, + "learning_rate": 7.770750716589758e-06, + "loss": 0.560591995716095, + "step": 4057 + }, + { + "epoch": 1.1865769849393186, + "grad_norm": 1.7912692279763305, + "learning_rate": 7.766037932670786e-06, + "loss": 0.5751859545707703, + "step": 4058 + }, + { + "epoch": 1.186869425354584, + "grad_norm": 1.629657999448518, + "learning_rate": 7.761325670926864e-06, + "loss": 0.5404624938964844, + "step": 4059 + }, + { + "epoch": 1.1871618657698493, + "grad_norm": 1.6997280704374504, + "learning_rate": 7.756613932459456e-06, + "loss": 0.4714626669883728, + "step": 4060 + }, + { + "epoch": 1.1874543061851148, + "grad_norm": 1.4471766418666208, + "learning_rate": 7.751902718369903e-06, + "loss": 0.5449519157409668, + "step": 4061 + }, + { + "epoch": 1.1877467466003802, + "grad_norm": 1.6279611933236646, + "learning_rate": 7.747192029759419e-06, + "loss": 0.6518754959106445, + "step": 4062 + }, + { + "epoch": 1.1880391870156455, + "grad_norm": 1.4655931664348079, + "learning_rate": 7.7424818677291e-06, + "loss": 0.47224369645118713, + "step": 4063 + }, + { + "epoch": 1.188331627430911, + "grad_norm": 1.6924798895194766, + "learning_rate": 7.737772233379919e-06, + "loss": 0.5482417345046997, + "step": 4064 + }, + { + "epoch": 1.1886240678461764, + "grad_norm": 1.3910277085667344, + "learning_rate": 7.733063127812724e-06, + "loss": 0.5401996374130249, + "step": 4065 + }, + { + "epoch": 1.1889165082614417, + "grad_norm": 2.4517381628425547, + "learning_rate": 7.72835455212824e-06, + "loss": 0.4678424596786499, + "step": 4066 + }, + { + "epoch": 1.1892089486767072, + "grad_norm": 1.6156459518768798, + "learning_rate": 7.72364650742707e-06, + "loss": 0.5191294550895691, + "step": 4067 + }, + { + "epoch": 1.1895013890919726, + "grad_norm": 1.4433917939096517, + "learning_rate": 7.718938994809685e-06, + "loss": 0.44018834829330444, + "step": 4068 + }, + { + "epoch": 1.1897938295072379, + "grad_norm": 1.3955169745603861, + "learning_rate": 7.714232015376442e-06, + "loss": 0.47852614521980286, + "step": 4069 + }, + { + "epoch": 1.1900862699225032, + "grad_norm": 1.523334975304476, + "learning_rate": 7.709525570227567e-06, + "loss": 0.5748994946479797, + "step": 4070 + }, + { + "epoch": 1.1903787103377688, + "grad_norm": 1.7714529908638612, + "learning_rate": 7.704819660463164e-06, + "loss": 0.5015645027160645, + "step": 4071 + }, + { + "epoch": 1.190671150753034, + "grad_norm": 1.8100962592275294, + "learning_rate": 7.70011428718321e-06, + "loss": 0.6200511455535889, + "step": 4072 + }, + { + "epoch": 1.1909635911682994, + "grad_norm": 1.531990990921369, + "learning_rate": 7.69540945148756e-06, + "loss": 0.6311289668083191, + "step": 4073 + }, + { + "epoch": 1.1912560315835647, + "grad_norm": 1.5403717728586237, + "learning_rate": 7.690705154475937e-06, + "loss": 0.5707247257232666, + "step": 4074 + }, + { + "epoch": 1.1915484719988303, + "grad_norm": 2.0693191702072107, + "learning_rate": 7.686001397247944e-06, + "loss": 0.5616360902786255, + "step": 4075 + }, + { + "epoch": 1.1918409124140956, + "grad_norm": 1.7144278887449431, + "learning_rate": 7.681298180903054e-06, + "loss": 0.5955555438995361, + "step": 4076 + }, + { + "epoch": 1.192133352829361, + "grad_norm": 1.6252826516162207, + "learning_rate": 7.676595506540615e-06, + "loss": 0.5057257413864136, + "step": 4077 + }, + { + "epoch": 1.1924257932446265, + "grad_norm": 1.8445544744897249, + "learning_rate": 7.671893375259854e-06, + "loss": 0.5795278549194336, + "step": 4078 + }, + { + "epoch": 1.1927182336598918, + "grad_norm": 1.5541021220011975, + "learning_rate": 7.66719178815986e-06, + "loss": 0.5213087797164917, + "step": 4079 + }, + { + "epoch": 1.1930106740751572, + "grad_norm": 1.5183242886274189, + "learning_rate": 7.662490746339601e-06, + "loss": 0.5333693027496338, + "step": 4080 + }, + { + "epoch": 1.1933031144904227, + "grad_norm": 1.5253876680230323, + "learning_rate": 7.657790250897916e-06, + "loss": 0.4705297648906708, + "step": 4081 + }, + { + "epoch": 1.193595554905688, + "grad_norm": 1.5875026444946445, + "learning_rate": 7.65309030293352e-06, + "loss": 0.5376054644584656, + "step": 4082 + }, + { + "epoch": 1.1938879953209534, + "grad_norm": 1.4103452849520708, + "learning_rate": 7.648390903544997e-06, + "loss": 0.47457355260849, + "step": 4083 + }, + { + "epoch": 1.1941804357362187, + "grad_norm": 1.5068528532277095, + "learning_rate": 7.6436920538308e-06, + "loss": 0.48752763867378235, + "step": 4084 + }, + { + "epoch": 1.1944728761514842, + "grad_norm": 1.5226531730849548, + "learning_rate": 7.63899375488926e-06, + "loss": 0.48227858543395996, + "step": 4085 + }, + { + "epoch": 1.1947653165667496, + "grad_norm": 1.4101996785965327, + "learning_rate": 7.634296007818576e-06, + "loss": 0.4294116497039795, + "step": 4086 + }, + { + "epoch": 1.1950577569820149, + "grad_norm": 1.24669252589954, + "learning_rate": 7.629598813716817e-06, + "loss": 0.5562552809715271, + "step": 4087 + }, + { + "epoch": 1.1953501973972802, + "grad_norm": 1.6628602240304204, + "learning_rate": 7.624902173681923e-06, + "loss": 0.6466431617736816, + "step": 4088 + }, + { + "epoch": 1.1956426378125458, + "grad_norm": 1.7596122427030323, + "learning_rate": 7.620206088811704e-06, + "loss": 0.7183903455734253, + "step": 4089 + }, + { + "epoch": 1.195935078227811, + "grad_norm": 1.666055880141139, + "learning_rate": 7.615510560203841e-06, + "loss": 0.5667496919631958, + "step": 4090 + }, + { + "epoch": 1.1962275186430764, + "grad_norm": 1.5232682591562918, + "learning_rate": 7.610815588955888e-06, + "loss": 0.5603050589561462, + "step": 4091 + }, + { + "epoch": 1.196519959058342, + "grad_norm": 1.6556784363331365, + "learning_rate": 7.606121176165267e-06, + "loss": 0.5305474996566772, + "step": 4092 + }, + { + "epoch": 1.1968123994736073, + "grad_norm": 2.0140258709167163, + "learning_rate": 7.6014273229292625e-06, + "loss": 0.7321374416351318, + "step": 4093 + }, + { + "epoch": 1.1971048398888726, + "grad_norm": 1.5891169030075603, + "learning_rate": 7.5967340303450385e-06, + "loss": 0.44885972142219543, + "step": 4094 + }, + { + "epoch": 1.197397280304138, + "grad_norm": 1.7955283190373275, + "learning_rate": 7.592041299509624e-06, + "loss": 0.593859076499939, + "step": 4095 + }, + { + "epoch": 1.1976897207194035, + "grad_norm": 1.4553428657338656, + "learning_rate": 7.587349131519913e-06, + "loss": 0.6701182723045349, + "step": 4096 + }, + { + "epoch": 1.1979821611346688, + "grad_norm": 1.9268795339399152, + "learning_rate": 7.582657527472674e-06, + "loss": 0.6456711292266846, + "step": 4097 + }, + { + "epoch": 1.1982746015499341, + "grad_norm": 1.6602700214896833, + "learning_rate": 7.577966488464543e-06, + "loss": 0.5933864116668701, + "step": 4098 + }, + { + "epoch": 1.1985670419651995, + "grad_norm": 1.398305705152583, + "learning_rate": 7.5732760155920175e-06, + "loss": 0.4609876275062561, + "step": 4099 + }, + { + "epoch": 1.198859482380465, + "grad_norm": 1.4827488477589208, + "learning_rate": 7.568586109951468e-06, + "loss": 0.540961503982544, + "step": 4100 + }, + { + "epoch": 1.1991519227957304, + "grad_norm": 1.629963355664156, + "learning_rate": 7.563896772639132e-06, + "loss": 0.5522942543029785, + "step": 4101 + }, + { + "epoch": 1.1994443632109957, + "grad_norm": 1.739784480619601, + "learning_rate": 7.559208004751114e-06, + "loss": 0.483737587928772, + "step": 4102 + }, + { + "epoch": 1.1997368036262612, + "grad_norm": 1.2743684109876499, + "learning_rate": 7.554519807383384e-06, + "loss": 0.3760339915752411, + "step": 4103 + }, + { + "epoch": 1.2000292440415266, + "grad_norm": 1.494486709964621, + "learning_rate": 7.549832181631782e-06, + "loss": 0.5034801959991455, + "step": 4104 + }, + { + "epoch": 1.2003216844567919, + "grad_norm": 1.6403057961263519, + "learning_rate": 7.545145128592009e-06, + "loss": 0.5605261325836182, + "step": 4105 + }, + { + "epoch": 1.2006141248720574, + "grad_norm": 1.4179033673825343, + "learning_rate": 7.540458649359637e-06, + "loss": 0.4724245071411133, + "step": 4106 + }, + { + "epoch": 1.2009065652873228, + "grad_norm": 1.609040907971216, + "learning_rate": 7.535772745030101e-06, + "loss": 0.564873218536377, + "step": 4107 + }, + { + "epoch": 1.201199005702588, + "grad_norm": 1.9431151220409157, + "learning_rate": 7.531087416698702e-06, + "loss": 0.699596643447876, + "step": 4108 + }, + { + "epoch": 1.2014914461178534, + "grad_norm": 1.5180492689699372, + "learning_rate": 7.526402665460612e-06, + "loss": 0.47448351979255676, + "step": 4109 + }, + { + "epoch": 1.201783886533119, + "grad_norm": 1.4606225624905942, + "learning_rate": 7.521718492410855e-06, + "loss": 0.4681323766708374, + "step": 4110 + }, + { + "epoch": 1.2020763269483843, + "grad_norm": 1.8550718864551587, + "learning_rate": 7.517034898644333e-06, + "loss": 0.6361842155456543, + "step": 4111 + }, + { + "epoch": 1.2023687673636496, + "grad_norm": 1.5211596606564617, + "learning_rate": 7.5123518852558075e-06, + "loss": 0.4732646942138672, + "step": 4112 + }, + { + "epoch": 1.202661207778915, + "grad_norm": 1.6512929892036816, + "learning_rate": 7.507669453339903e-06, + "loss": 0.57124263048172, + "step": 4113 + }, + { + "epoch": 1.2029536481941805, + "grad_norm": 1.758611342292707, + "learning_rate": 7.502987603991111e-06, + "loss": 0.5228173732757568, + "step": 4114 + }, + { + "epoch": 1.2032460886094458, + "grad_norm": 1.7352024129193708, + "learning_rate": 7.4983063383037864e-06, + "loss": 0.5501765012741089, + "step": 4115 + }, + { + "epoch": 1.2035385290247111, + "grad_norm": 1.6782467710972089, + "learning_rate": 7.493625657372141e-06, + "loss": 0.5062840580940247, + "step": 4116 + }, + { + "epoch": 1.2038309694399767, + "grad_norm": 1.6960273401585455, + "learning_rate": 7.4889455622902616e-06, + "loss": 0.7060763835906982, + "step": 4117 + }, + { + "epoch": 1.204123409855242, + "grad_norm": 1.478061987478783, + "learning_rate": 7.484266054152088e-06, + "loss": 0.42127668857574463, + "step": 4118 + }, + { + "epoch": 1.2044158502705073, + "grad_norm": 1.3574946815299211, + "learning_rate": 7.479587134051429e-06, + "loss": 0.490860253572464, + "step": 4119 + }, + { + "epoch": 1.204708290685773, + "grad_norm": 1.443033575116078, + "learning_rate": 7.474908803081955e-06, + "loss": 0.45786625146865845, + "step": 4120 + }, + { + "epoch": 1.2050007311010382, + "grad_norm": 1.810733388901398, + "learning_rate": 7.470231062337192e-06, + "loss": 0.5267277359962463, + "step": 4121 + }, + { + "epoch": 1.2052931715163036, + "grad_norm": 1.670838162040588, + "learning_rate": 7.465553912910539e-06, + "loss": 0.47834646701812744, + "step": 4122 + }, + { + "epoch": 1.2055856119315689, + "grad_norm": 1.4366745635956868, + "learning_rate": 7.460877355895249e-06, + "loss": 0.5348576903343201, + "step": 4123 + }, + { + "epoch": 1.2058780523468344, + "grad_norm": 1.7186674622129299, + "learning_rate": 7.456201392384437e-06, + "loss": 0.47992441058158875, + "step": 4124 + }, + { + "epoch": 1.2061704927620998, + "grad_norm": 1.6108537844876905, + "learning_rate": 7.451526023471085e-06, + "loss": 0.5693913698196411, + "step": 4125 + }, + { + "epoch": 1.206462933177365, + "grad_norm": 1.8995573488864546, + "learning_rate": 7.4468512502480305e-06, + "loss": 0.5165153741836548, + "step": 4126 + }, + { + "epoch": 1.2067553735926304, + "grad_norm": 1.4833110616884417, + "learning_rate": 7.442177073807973e-06, + "loss": 0.522534966468811, + "step": 4127 + }, + { + "epoch": 1.207047814007896, + "grad_norm": 2.0276890753098464, + "learning_rate": 7.43750349524347e-06, + "loss": 0.6298432946205139, + "step": 4128 + }, + { + "epoch": 1.2073402544231613, + "grad_norm": 1.5840348747117112, + "learning_rate": 7.432830515646947e-06, + "loss": 0.5077394247055054, + "step": 4129 + }, + { + "epoch": 1.2076326948384266, + "grad_norm": 2.0187303897146682, + "learning_rate": 7.428158136110681e-06, + "loss": 0.6492841839790344, + "step": 4130 + }, + { + "epoch": 1.2079251352536922, + "grad_norm": 1.8651226738731277, + "learning_rate": 7.423486357726813e-06, + "loss": 0.5204535126686096, + "step": 4131 + }, + { + "epoch": 1.2082175756689575, + "grad_norm": 1.7208004693147547, + "learning_rate": 7.418815181587347e-06, + "loss": 0.56598961353302, + "step": 4132 + }, + { + "epoch": 1.2085100160842228, + "grad_norm": 1.7632065676998485, + "learning_rate": 7.4141446087841364e-06, + "loss": 0.486950159072876, + "step": 4133 + }, + { + "epoch": 1.2088024564994881, + "grad_norm": 1.8229002651567825, + "learning_rate": 7.4094746404089e-06, + "loss": 0.7218466997146606, + "step": 4134 + }, + { + "epoch": 1.2090948969147537, + "grad_norm": 1.715700034058204, + "learning_rate": 7.404805277553218e-06, + "loss": 0.6486172676086426, + "step": 4135 + }, + { + "epoch": 1.209387337330019, + "grad_norm": 1.8385918373460561, + "learning_rate": 7.400136521308521e-06, + "loss": 0.6160574555397034, + "step": 4136 + }, + { + "epoch": 1.2096797777452843, + "grad_norm": 1.6766631954981184, + "learning_rate": 7.395468372766107e-06, + "loss": 0.6184699535369873, + "step": 4137 + }, + { + "epoch": 1.2099722181605497, + "grad_norm": 1.6881704887676476, + "learning_rate": 7.390800833017124e-06, + "loss": 0.5795263051986694, + "step": 4138 + }, + { + "epoch": 1.2102646585758152, + "grad_norm": 1.6841718896097397, + "learning_rate": 7.386133903152581e-06, + "loss": 0.5409367084503174, + "step": 4139 + }, + { + "epoch": 1.2105570989910805, + "grad_norm": 1.678948206873695, + "learning_rate": 7.3814675842633465e-06, + "loss": 0.47924935817718506, + "step": 4140 + }, + { + "epoch": 1.2108495394063459, + "grad_norm": 1.6532202064740131, + "learning_rate": 7.376801877440143e-06, + "loss": 0.5737412571907043, + "step": 4141 + }, + { + "epoch": 1.2111419798216114, + "grad_norm": 1.5307761286613382, + "learning_rate": 7.372136783773551e-06, + "loss": 0.538013219833374, + "step": 4142 + }, + { + "epoch": 1.2114344202368768, + "grad_norm": 1.4940902719253717, + "learning_rate": 7.367472304354011e-06, + "loss": 0.4523904323577881, + "step": 4143 + }, + { + "epoch": 1.211726860652142, + "grad_norm": 1.4793905716399964, + "learning_rate": 7.362808440271811e-06, + "loss": 0.5057293176651001, + "step": 4144 + }, + { + "epoch": 1.2120193010674076, + "grad_norm": 1.4373562566302274, + "learning_rate": 7.358145192617103e-06, + "loss": 0.4653171896934509, + "step": 4145 + }, + { + "epoch": 1.212311741482673, + "grad_norm": 1.6048946971271119, + "learning_rate": 7.353482562479896e-06, + "loss": 0.607070803642273, + "step": 4146 + }, + { + "epoch": 1.2126041818979383, + "grad_norm": 1.51939699208445, + "learning_rate": 7.348820550950047e-06, + "loss": 0.4721861481666565, + "step": 4147 + }, + { + "epoch": 1.2128966223132036, + "grad_norm": 1.8802239228266517, + "learning_rate": 7.3441591591172765e-06, + "loss": 0.6656746864318848, + "step": 4148 + }, + { + "epoch": 1.2131890627284692, + "grad_norm": 1.504596663567376, + "learning_rate": 7.339498388071154e-06, + "loss": 0.5231848359107971, + "step": 4149 + }, + { + "epoch": 1.2134815031437345, + "grad_norm": 1.4250712810936565, + "learning_rate": 7.334838238901106e-06, + "loss": 0.42241257429122925, + "step": 4150 + }, + { + "epoch": 1.2137739435589998, + "grad_norm": 1.4721862632309721, + "learning_rate": 7.3301787126964165e-06, + "loss": 0.427111953496933, + "step": 4151 + }, + { + "epoch": 1.2140663839742651, + "grad_norm": 2.038283523639075, + "learning_rate": 7.325519810546219e-06, + "loss": 0.6208339929580688, + "step": 4152 + }, + { + "epoch": 1.2143588243895307, + "grad_norm": 1.3970516014119925, + "learning_rate": 7.320861533539505e-06, + "loss": 0.5031273365020752, + "step": 4153 + }, + { + "epoch": 1.214651264804796, + "grad_norm": 1.5153681425347725, + "learning_rate": 7.3162038827651205e-06, + "loss": 0.5617444515228271, + "step": 4154 + }, + { + "epoch": 1.2149437052200613, + "grad_norm": 1.4855483785732004, + "learning_rate": 7.311546859311758e-06, + "loss": 0.4616255462169647, + "step": 4155 + }, + { + "epoch": 1.215236145635327, + "grad_norm": 1.5704453976932513, + "learning_rate": 7.306890464267972e-06, + "loss": 0.5799977779388428, + "step": 4156 + }, + { + "epoch": 1.2155285860505922, + "grad_norm": 1.521477491941422, + "learning_rate": 7.302234698722165e-06, + "loss": 0.5669786930084229, + "step": 4157 + }, + { + "epoch": 1.2158210264658575, + "grad_norm": 1.5325381791627977, + "learning_rate": 7.297579563762595e-06, + "loss": 0.5622642040252686, + "step": 4158 + }, + { + "epoch": 1.216113466881123, + "grad_norm": 1.8789411887268221, + "learning_rate": 7.292925060477367e-06, + "loss": 0.6896791458129883, + "step": 4159 + }, + { + "epoch": 1.2164059072963884, + "grad_norm": 1.5263918361022677, + "learning_rate": 7.288271189954451e-06, + "loss": 0.6704437136650085, + "step": 4160 + }, + { + "epoch": 1.2166983477116537, + "grad_norm": 1.6192057061391554, + "learning_rate": 7.2836179532816565e-06, + "loss": 0.6340646743774414, + "step": 4161 + }, + { + "epoch": 1.216990788126919, + "grad_norm": 1.4283430296516553, + "learning_rate": 7.278965351546648e-06, + "loss": 0.528992772102356, + "step": 4162 + }, + { + "epoch": 1.2172832285421846, + "grad_norm": 1.4842100691170903, + "learning_rate": 7.274313385836949e-06, + "loss": 0.45160621404647827, + "step": 4163 + }, + { + "epoch": 1.21757566895745, + "grad_norm": 1.3859373993268853, + "learning_rate": 7.269662057239919e-06, + "loss": 0.5398670434951782, + "step": 4164 + }, + { + "epoch": 1.2178681093727153, + "grad_norm": 1.7598892874276293, + "learning_rate": 7.265011366842785e-06, + "loss": 0.5174476504325867, + "step": 4165 + }, + { + "epoch": 1.2181605497879806, + "grad_norm": 1.663231631427072, + "learning_rate": 7.260361315732613e-06, + "loss": 0.4830206632614136, + "step": 4166 + }, + { + "epoch": 1.2184529902032462, + "grad_norm": 1.4149457900973579, + "learning_rate": 7.2557119049963266e-06, + "loss": 0.42422181367874146, + "step": 4167 + }, + { + "epoch": 1.2187454306185115, + "grad_norm": 1.363467777836694, + "learning_rate": 7.251063135720699e-06, + "loss": 0.43544018268585205, + "step": 4168 + }, + { + "epoch": 1.2190378710337768, + "grad_norm": 1.4776092804767433, + "learning_rate": 7.2464150089923465e-06, + "loss": 0.5352005362510681, + "step": 4169 + }, + { + "epoch": 1.2193303114490424, + "grad_norm": 1.5459436268475357, + "learning_rate": 7.241767525897746e-06, + "loss": 0.4718678891658783, + "step": 4170 + }, + { + "epoch": 1.2196227518643077, + "grad_norm": 1.4994134423194976, + "learning_rate": 7.237120687523214e-06, + "loss": 0.618084192276001, + "step": 4171 + }, + { + "epoch": 1.219915192279573, + "grad_norm": 1.8137589794234399, + "learning_rate": 7.232474494954924e-06, + "loss": 0.625995397567749, + "step": 4172 + }, + { + "epoch": 1.2202076326948383, + "grad_norm": 1.4989590312422592, + "learning_rate": 7.227828949278894e-06, + "loss": 0.5382465124130249, + "step": 4173 + }, + { + "epoch": 1.220500073110104, + "grad_norm": 1.702878462884744, + "learning_rate": 7.223184051580992e-06, + "loss": 0.5299465656280518, + "step": 4174 + }, + { + "epoch": 1.2207925135253692, + "grad_norm": 1.7776293184889576, + "learning_rate": 7.218539802946934e-06, + "loss": 0.5899940729141235, + "step": 4175 + }, + { + "epoch": 1.2210849539406345, + "grad_norm": 1.9763552708522982, + "learning_rate": 7.213896204462286e-06, + "loss": 0.6126594543457031, + "step": 4176 + }, + { + "epoch": 1.2213773943558999, + "grad_norm": 1.6580044033592523, + "learning_rate": 7.20925325721246e-06, + "loss": 0.5576338768005371, + "step": 4177 + }, + { + "epoch": 1.2216698347711654, + "grad_norm": 1.5044012673537284, + "learning_rate": 7.204610962282717e-06, + "loss": 0.540515661239624, + "step": 4178 + }, + { + "epoch": 1.2219622751864307, + "grad_norm": 1.5281012838641301, + "learning_rate": 7.1999693207581675e-06, + "loss": 0.5306440591812134, + "step": 4179 + }, + { + "epoch": 1.222254715601696, + "grad_norm": 2.02113466617051, + "learning_rate": 7.195328333723763e-06, + "loss": 0.6274853944778442, + "step": 4180 + }, + { + "epoch": 1.2225471560169616, + "grad_norm": 1.6954554706562375, + "learning_rate": 7.190688002264308e-06, + "loss": 0.5626333951950073, + "step": 4181 + }, + { + "epoch": 1.222839596432227, + "grad_norm": 1.6364457786315536, + "learning_rate": 7.18604832746445e-06, + "loss": 0.5938719511032104, + "step": 4182 + }, + { + "epoch": 1.2231320368474923, + "grad_norm": 1.4010331016668016, + "learning_rate": 7.181409310408688e-06, + "loss": 0.4599727988243103, + "step": 4183 + }, + { + "epoch": 1.2234244772627578, + "grad_norm": 1.516823379099723, + "learning_rate": 7.176770952181363e-06, + "loss": 0.5912302732467651, + "step": 4184 + }, + { + "epoch": 1.2237169176780232, + "grad_norm": 1.24563200951521, + "learning_rate": 7.172133253866662e-06, + "loss": 0.534631073474884, + "step": 4185 + }, + { + "epoch": 1.2240093580932885, + "grad_norm": 1.3825393422514298, + "learning_rate": 7.167496216548618e-06, + "loss": 0.5084418058395386, + "step": 4186 + }, + { + "epoch": 1.2243017985085538, + "grad_norm": 1.6343841724383257, + "learning_rate": 7.162859841311112e-06, + "loss": 0.6906956434249878, + "step": 4187 + }, + { + "epoch": 1.2245942389238194, + "grad_norm": 1.6583835426138527, + "learning_rate": 7.158224129237867e-06, + "loss": 0.5578658580780029, + "step": 4188 + }, + { + "epoch": 1.2248866793390847, + "grad_norm": 1.4116232043960963, + "learning_rate": 7.153589081412455e-06, + "loss": 0.4438907206058502, + "step": 4189 + }, + { + "epoch": 1.22517911975435, + "grad_norm": 1.9189119615156511, + "learning_rate": 7.148954698918289e-06, + "loss": 0.6366580724716187, + "step": 4190 + }, + { + "epoch": 1.2254715601696153, + "grad_norm": 1.674796821883658, + "learning_rate": 7.144320982838628e-06, + "loss": 0.5532524585723877, + "step": 4191 + }, + { + "epoch": 1.2257640005848809, + "grad_norm": 1.3678471530217577, + "learning_rate": 7.139687934256574e-06, + "loss": 0.4847594201564789, + "step": 4192 + }, + { + "epoch": 1.2260564410001462, + "grad_norm": 1.651582950772816, + "learning_rate": 7.135055554255073e-06, + "loss": 0.6273454427719116, + "step": 4193 + }, + { + "epoch": 1.2263488814154115, + "grad_norm": 1.826963047999446, + "learning_rate": 7.130423843916917e-06, + "loss": 0.6320512294769287, + "step": 4194 + }, + { + "epoch": 1.226641321830677, + "grad_norm": 1.9938752870068028, + "learning_rate": 7.125792804324741e-06, + "loss": 0.5499723553657532, + "step": 4195 + }, + { + "epoch": 1.2269337622459424, + "grad_norm": 1.531512294163018, + "learning_rate": 7.121162436561023e-06, + "loss": 0.5855484008789062, + "step": 4196 + }, + { + "epoch": 1.2272262026612077, + "grad_norm": 1.5092619418718032, + "learning_rate": 7.11653274170808e-06, + "loss": 0.5998305678367615, + "step": 4197 + }, + { + "epoch": 1.2275186430764733, + "grad_norm": 1.6613439290789596, + "learning_rate": 7.111903720848077e-06, + "loss": 0.6963703632354736, + "step": 4198 + }, + { + "epoch": 1.2278110834917386, + "grad_norm": 1.7273945695579416, + "learning_rate": 7.10727537506302e-06, + "loss": 0.5664974451065063, + "step": 4199 + }, + { + "epoch": 1.228103523907004, + "grad_norm": 1.8611907189119672, + "learning_rate": 7.102647705434755e-06, + "loss": 0.6502630710601807, + "step": 4200 + }, + { + "epoch": 1.2283959643222693, + "grad_norm": 1.5674599206950446, + "learning_rate": 7.098020713044973e-06, + "loss": 0.5727233290672302, + "step": 4201 + }, + { + "epoch": 1.2286884047375348, + "grad_norm": 1.4105219463780128, + "learning_rate": 7.093394398975206e-06, + "loss": 0.47885602712631226, + "step": 4202 + }, + { + "epoch": 1.2289808451528001, + "grad_norm": 1.62325320016664, + "learning_rate": 7.088768764306826e-06, + "loss": 0.46089547872543335, + "step": 4203 + }, + { + "epoch": 1.2292732855680655, + "grad_norm": 1.3853973501267451, + "learning_rate": 7.084143810121044e-06, + "loss": 0.48920977115631104, + "step": 4204 + }, + { + "epoch": 1.2295657259833308, + "grad_norm": 1.4371671531095065, + "learning_rate": 7.07951953749892e-06, + "loss": 0.5320104956626892, + "step": 4205 + }, + { + "epoch": 1.2298581663985964, + "grad_norm": 1.693565977205871, + "learning_rate": 7.074895947521347e-06, + "loss": 0.6403206586837769, + "step": 4206 + }, + { + "epoch": 1.2301506068138617, + "grad_norm": 1.3774390509755927, + "learning_rate": 7.070273041269062e-06, + "loss": 0.5522217750549316, + "step": 4207 + }, + { + "epoch": 1.230443047229127, + "grad_norm": 1.644407790392686, + "learning_rate": 7.0656508198226405e-06, + "loss": 0.5235073566436768, + "step": 4208 + }, + { + "epoch": 1.2307354876443926, + "grad_norm": 1.9076552987416457, + "learning_rate": 7.061029284262497e-06, + "loss": 0.5972521305084229, + "step": 4209 + }, + { + "epoch": 1.2310279280596579, + "grad_norm": 1.7443828706372393, + "learning_rate": 7.0564084356688885e-06, + "loss": 0.5989280343055725, + "step": 4210 + }, + { + "epoch": 1.2313203684749232, + "grad_norm": 1.4723000244161777, + "learning_rate": 7.051788275121913e-06, + "loss": 0.5714213848114014, + "step": 4211 + }, + { + "epoch": 1.2316128088901885, + "grad_norm": 1.715005842824084, + "learning_rate": 7.047168803701502e-06, + "loss": 0.5588504076004028, + "step": 4212 + }, + { + "epoch": 1.231905249305454, + "grad_norm": 1.3648320017744335, + "learning_rate": 7.042550022487431e-06, + "loss": 0.47527533769607544, + "step": 4213 + }, + { + "epoch": 1.2321976897207194, + "grad_norm": 1.4838404108317171, + "learning_rate": 7.03793193255931e-06, + "loss": 0.5281137228012085, + "step": 4214 + }, + { + "epoch": 1.2324901301359847, + "grad_norm": 1.7839268972332825, + "learning_rate": 7.033314534996589e-06, + "loss": 0.5509631037712097, + "step": 4215 + }, + { + "epoch": 1.23278257055125, + "grad_norm": 1.5991883103171023, + "learning_rate": 7.028697830878557e-06, + "loss": 0.5291438698768616, + "step": 4216 + }, + { + "epoch": 1.2330750109665156, + "grad_norm": 1.9253124571991533, + "learning_rate": 7.024081821284343e-06, + "loss": 0.5931780934333801, + "step": 4217 + }, + { + "epoch": 1.233367451381781, + "grad_norm": 1.5978832259158926, + "learning_rate": 7.019466507292908e-06, + "loss": 0.4883537292480469, + "step": 4218 + }, + { + "epoch": 1.2336598917970463, + "grad_norm": 1.5441369085427046, + "learning_rate": 7.014851889983058e-06, + "loss": 0.45155030488967896, + "step": 4219 + }, + { + "epoch": 1.2339523322123118, + "grad_norm": 1.7603110515675113, + "learning_rate": 7.010237970433426e-06, + "loss": 0.6107507944107056, + "step": 4220 + }, + { + "epoch": 1.2342447726275771, + "grad_norm": 1.4005214588133317, + "learning_rate": 7.0056247497224905e-06, + "loss": 0.41764840483665466, + "step": 4221 + }, + { + "epoch": 1.2345372130428425, + "grad_norm": 1.4727432689856292, + "learning_rate": 7.0010122289285635e-06, + "loss": 0.6786199808120728, + "step": 4222 + }, + { + "epoch": 1.234829653458108, + "grad_norm": 1.6328773458986388, + "learning_rate": 6.996400409129793e-06, + "loss": 0.5378292798995972, + "step": 4223 + }, + { + "epoch": 1.2351220938733734, + "grad_norm": 1.5470680329093456, + "learning_rate": 6.9917892914041685e-06, + "loss": 0.47646570205688477, + "step": 4224 + }, + { + "epoch": 1.2354145342886387, + "grad_norm": 1.6302332764801317, + "learning_rate": 6.987178876829503e-06, + "loss": 0.554225504398346, + "step": 4225 + }, + { + "epoch": 1.235706974703904, + "grad_norm": 1.8888599643549215, + "learning_rate": 6.982569166483459e-06, + "loss": 0.42614030838012695, + "step": 4226 + }, + { + "epoch": 1.2359994151191696, + "grad_norm": 1.523118498051214, + "learning_rate": 6.977960161443524e-06, + "loss": 0.5043676495552063, + "step": 4227 + }, + { + "epoch": 1.2362918555344349, + "grad_norm": 1.454372819437309, + "learning_rate": 6.973351862787029e-06, + "loss": 0.4905642569065094, + "step": 4228 + }, + { + "epoch": 1.2365842959497002, + "grad_norm": 1.6152329822736995, + "learning_rate": 6.9687442715911325e-06, + "loss": 0.5860332250595093, + "step": 4229 + }, + { + "epoch": 1.2368767363649655, + "grad_norm": 1.3841079659340747, + "learning_rate": 6.9641373889328345e-06, + "loss": 0.4900137782096863, + "step": 4230 + }, + { + "epoch": 1.237169176780231, + "grad_norm": 1.7249957815195471, + "learning_rate": 6.959531215888961e-06, + "loss": 0.5736855268478394, + "step": 4231 + }, + { + "epoch": 1.2374616171954964, + "grad_norm": 1.6635333389812996, + "learning_rate": 6.95492575353618e-06, + "loss": 0.6390400528907776, + "step": 4232 + }, + { + "epoch": 1.2377540576107617, + "grad_norm": 1.6623693676348965, + "learning_rate": 6.95032100295099e-06, + "loss": 0.6553822159767151, + "step": 4233 + }, + { + "epoch": 1.2380464980260273, + "grad_norm": 1.871056647578711, + "learning_rate": 6.945716965209723e-06, + "loss": 0.6685863733291626, + "step": 4234 + }, + { + "epoch": 1.2383389384412926, + "grad_norm": 1.7090289188063175, + "learning_rate": 6.941113641388542e-06, + "loss": 0.5172277688980103, + "step": 4235 + }, + { + "epoch": 1.238631378856558, + "grad_norm": 1.9648968097135298, + "learning_rate": 6.936511032563451e-06, + "loss": 0.6578007936477661, + "step": 4236 + }, + { + "epoch": 1.2389238192718235, + "grad_norm": 1.5304274814539944, + "learning_rate": 6.931909139810283e-06, + "loss": 0.5679500699043274, + "step": 4237 + }, + { + "epoch": 1.2392162596870888, + "grad_norm": 1.6592749019605815, + "learning_rate": 6.927307964204695e-06, + "loss": 0.49142318964004517, + "step": 4238 + }, + { + "epoch": 1.2395087001023541, + "grad_norm": 1.497996058585022, + "learning_rate": 6.9227075068221926e-06, + "loss": 0.5339487195014954, + "step": 4239 + }, + { + "epoch": 1.2398011405176195, + "grad_norm": 1.9993237065248757, + "learning_rate": 6.918107768738097e-06, + "loss": 0.5845860242843628, + "step": 4240 + }, + { + "epoch": 1.240093580932885, + "grad_norm": 2.5543699126297823, + "learning_rate": 6.9135087510275735e-06, + "loss": 0.6767281889915466, + "step": 4241 + }, + { + "epoch": 1.2403860213481503, + "grad_norm": 1.850547226886836, + "learning_rate": 6.908910454765612e-06, + "loss": 0.6119472980499268, + "step": 4242 + }, + { + "epoch": 1.2406784617634157, + "grad_norm": 1.6013723709723773, + "learning_rate": 6.904312881027038e-06, + "loss": 0.6375409364700317, + "step": 4243 + }, + { + "epoch": 1.240970902178681, + "grad_norm": 1.9482571730059268, + "learning_rate": 6.899716030886508e-06, + "loss": 0.7059881687164307, + "step": 4244 + }, + { + "epoch": 1.2412633425939466, + "grad_norm": 1.9206862231453385, + "learning_rate": 6.895119905418504e-06, + "loss": 0.6463328003883362, + "step": 4245 + }, + { + "epoch": 1.2415557830092119, + "grad_norm": 1.5219372029025222, + "learning_rate": 6.890524505697345e-06, + "loss": 0.5374869108200073, + "step": 4246 + }, + { + "epoch": 1.2418482234244772, + "grad_norm": 1.625313205404651, + "learning_rate": 6.885929832797176e-06, + "loss": 0.5219276547431946, + "step": 4247 + }, + { + "epoch": 1.2421406638397428, + "grad_norm": 1.4315105659194174, + "learning_rate": 6.881335887791973e-06, + "loss": 0.4815624952316284, + "step": 4248 + }, + { + "epoch": 1.242433104255008, + "grad_norm": 1.318059168550072, + "learning_rate": 6.8767426717555475e-06, + "loss": 0.5111992955207825, + "step": 4249 + }, + { + "epoch": 1.2427255446702734, + "grad_norm": 1.6870166439076426, + "learning_rate": 6.872150185761533e-06, + "loss": 0.5331606268882751, + "step": 4250 + }, + { + "epoch": 1.2430179850855387, + "grad_norm": 1.5572023614320247, + "learning_rate": 6.867558430883393e-06, + "loss": 0.5375202894210815, + "step": 4251 + }, + { + "epoch": 1.2433104255008043, + "grad_norm": 1.495445158871636, + "learning_rate": 6.862967408194425e-06, + "loss": 0.5667152404785156, + "step": 4252 + }, + { + "epoch": 1.2436028659160696, + "grad_norm": 2.036302557289267, + "learning_rate": 6.858377118767752e-06, + "loss": 0.5679255723953247, + "step": 4253 + }, + { + "epoch": 1.243895306331335, + "grad_norm": 1.7798647531094058, + "learning_rate": 6.853787563676324e-06, + "loss": 0.6097947359085083, + "step": 4254 + }, + { + "epoch": 1.2441877467466003, + "grad_norm": 1.458407608257313, + "learning_rate": 6.849198743992927e-06, + "loss": 0.41869044303894043, + "step": 4255 + }, + { + "epoch": 1.2444801871618658, + "grad_norm": 1.595586166137391, + "learning_rate": 6.8446106607901655e-06, + "loss": 0.6414821147918701, + "step": 4256 + }, + { + "epoch": 1.2447726275771311, + "grad_norm": 1.9180058965370612, + "learning_rate": 6.840023315140476e-06, + "loss": 0.5985021591186523, + "step": 4257 + }, + { + "epoch": 1.2450650679923965, + "grad_norm": 1.429348085027092, + "learning_rate": 6.8354367081161235e-06, + "loss": 0.4718092381954193, + "step": 4258 + }, + { + "epoch": 1.245357508407662, + "grad_norm": 1.374927912317877, + "learning_rate": 6.8308508407892e-06, + "loss": 0.46431800723075867, + "step": 4259 + }, + { + "epoch": 1.2456499488229273, + "grad_norm": 1.4906925043469428, + "learning_rate": 6.826265714231624e-06, + "loss": 0.5499997735023499, + "step": 4260 + }, + { + "epoch": 1.2459423892381927, + "grad_norm": 1.605653884930273, + "learning_rate": 6.8216813295151415e-06, + "loss": 0.6078206300735474, + "step": 4261 + }, + { + "epoch": 1.2462348296534582, + "grad_norm": 1.6116067904051048, + "learning_rate": 6.817097687711322e-06, + "loss": 0.5706520080566406, + "step": 4262 + }, + { + "epoch": 1.2465272700687235, + "grad_norm": 1.4579793726336556, + "learning_rate": 6.812514789891566e-06, + "loss": 0.5210137367248535, + "step": 4263 + }, + { + "epoch": 1.2468197104839889, + "grad_norm": 1.5969341972097826, + "learning_rate": 6.807932637127097e-06, + "loss": 0.42632028460502625, + "step": 4264 + }, + { + "epoch": 1.2471121508992542, + "grad_norm": 1.3281470644259092, + "learning_rate": 6.803351230488967e-06, + "loss": 0.49990004301071167, + "step": 4265 + }, + { + "epoch": 1.2474045913145198, + "grad_norm": 1.6439327542913937, + "learning_rate": 6.798770571048052e-06, + "loss": 0.557829737663269, + "step": 4266 + }, + { + "epoch": 1.247697031729785, + "grad_norm": 1.6838717466364301, + "learning_rate": 6.794190659875052e-06, + "loss": 0.4784187078475952, + "step": 4267 + }, + { + "epoch": 1.2479894721450504, + "grad_norm": 1.6243877795123443, + "learning_rate": 6.789611498040492e-06, + "loss": 0.4795057773590088, + "step": 4268 + }, + { + "epoch": 1.2482819125603157, + "grad_norm": 1.4149752899303223, + "learning_rate": 6.785033086614725e-06, + "loss": 0.415715754032135, + "step": 4269 + }, + { + "epoch": 1.2485743529755813, + "grad_norm": 1.4478921102692126, + "learning_rate": 6.7804554266679266e-06, + "loss": 0.49056607484817505, + "step": 4270 + }, + { + "epoch": 1.2488667933908466, + "grad_norm": 1.8227279880342706, + "learning_rate": 6.775878519270098e-06, + "loss": 0.5268200039863586, + "step": 4271 + }, + { + "epoch": 1.249159233806112, + "grad_norm": 1.5664194732567784, + "learning_rate": 6.771302365491064e-06, + "loss": 0.6250356435775757, + "step": 4272 + }, + { + "epoch": 1.2494516742213775, + "grad_norm": 1.5152208337758115, + "learning_rate": 6.76672696640047e-06, + "loss": 0.5403029918670654, + "step": 4273 + }, + { + "epoch": 1.2497441146366428, + "grad_norm": 1.6699524807174595, + "learning_rate": 6.762152323067787e-06, + "loss": 0.47006577253341675, + "step": 4274 + }, + { + "epoch": 1.2500365550519081, + "grad_norm": 1.7406248179582138, + "learning_rate": 6.7575784365623134e-06, + "loss": 0.5088232755661011, + "step": 4275 + }, + { + "epoch": 1.2503289954671737, + "grad_norm": 1.7598214720338152, + "learning_rate": 6.7530053079531664e-06, + "loss": 0.5438642501831055, + "step": 4276 + }, + { + "epoch": 1.250621435882439, + "grad_norm": 1.4316922317447767, + "learning_rate": 6.748432938309286e-06, + "loss": 0.45436567068099976, + "step": 4277 + }, + { + "epoch": 1.2509138762977043, + "grad_norm": 1.5793052704561465, + "learning_rate": 6.743861328699438e-06, + "loss": 0.5298944115638733, + "step": 4278 + }, + { + "epoch": 1.2512063167129697, + "grad_norm": 1.3504092629468785, + "learning_rate": 6.7392904801922055e-06, + "loss": 0.49393707513809204, + "step": 4279 + }, + { + "epoch": 1.251498757128235, + "grad_norm": 1.4852717426676887, + "learning_rate": 6.734720393855998e-06, + "loss": 0.5540947318077087, + "step": 4280 + }, + { + "epoch": 1.2517911975435005, + "grad_norm": 1.4330918355062934, + "learning_rate": 6.730151070759043e-06, + "loss": 0.47406166791915894, + "step": 4281 + }, + { + "epoch": 1.2520836379587659, + "grad_norm": 1.5653956712736337, + "learning_rate": 6.725582511969397e-06, + "loss": 0.46885907649993896, + "step": 4282 + }, + { + "epoch": 1.2523760783740312, + "grad_norm": 1.7710771095422673, + "learning_rate": 6.721014718554931e-06, + "loss": 0.537517786026001, + "step": 4283 + }, + { + "epoch": 1.2526685187892967, + "grad_norm": 1.5323701554592244, + "learning_rate": 6.716447691583336e-06, + "loss": 0.514340341091156, + "step": 4284 + }, + { + "epoch": 1.252960959204562, + "grad_norm": 1.6716715067641383, + "learning_rate": 6.711881432122129e-06, + "loss": 0.5696117281913757, + "step": 4285 + }, + { + "epoch": 1.2532533996198274, + "grad_norm": 1.632492076185155, + "learning_rate": 6.707315941238645e-06, + "loss": 0.5620799660682678, + "step": 4286 + }, + { + "epoch": 1.253545840035093, + "grad_norm": 1.7721487037647632, + "learning_rate": 6.702751220000039e-06, + "loss": 0.4832923412322998, + "step": 4287 + }, + { + "epoch": 1.2538382804503583, + "grad_norm": 1.7195688873272827, + "learning_rate": 6.698187269473289e-06, + "loss": 0.6608176231384277, + "step": 4288 + }, + { + "epoch": 1.2541307208656236, + "grad_norm": 1.67536250359078, + "learning_rate": 6.69362409072519e-06, + "loss": 0.6002779006958008, + "step": 4289 + }, + { + "epoch": 1.2544231612808892, + "grad_norm": 1.5859756058231869, + "learning_rate": 6.689061684822357e-06, + "loss": 0.49898988008499146, + "step": 4290 + }, + { + "epoch": 1.2547156016961545, + "grad_norm": 1.908707186131175, + "learning_rate": 6.684500052831222e-06, + "loss": 0.5887055397033691, + "step": 4291 + }, + { + "epoch": 1.2550080421114198, + "grad_norm": 1.7680049519728702, + "learning_rate": 6.679939195818043e-06, + "loss": 0.6494714617729187, + "step": 4292 + }, + { + "epoch": 1.2553004825266851, + "grad_norm": 2.320887096811341, + "learning_rate": 6.67537911484889e-06, + "loss": 0.5708397626876831, + "step": 4293 + }, + { + "epoch": 1.2555929229419505, + "grad_norm": 1.4472817266256797, + "learning_rate": 6.670819810989656e-06, + "loss": 0.40412014722824097, + "step": 4294 + }, + { + "epoch": 1.255885363357216, + "grad_norm": 1.675200347061479, + "learning_rate": 6.666261285306048e-06, + "loss": 0.5141078233718872, + "step": 4295 + }, + { + "epoch": 1.2561778037724813, + "grad_norm": 1.8039877813287382, + "learning_rate": 6.661703538863595e-06, + "loss": 0.6463406085968018, + "step": 4296 + }, + { + "epoch": 1.2564702441877467, + "grad_norm": 1.5123528456732447, + "learning_rate": 6.657146572727643e-06, + "loss": 0.5809177160263062, + "step": 4297 + }, + { + "epoch": 1.2567626846030122, + "grad_norm": 1.6628802038143384, + "learning_rate": 6.652590387963354e-06, + "loss": 0.5124412775039673, + "step": 4298 + }, + { + "epoch": 1.2570551250182775, + "grad_norm": 1.8011842610745197, + "learning_rate": 6.64803498563571e-06, + "loss": 0.5399736762046814, + "step": 4299 + }, + { + "epoch": 1.2573475654335429, + "grad_norm": 1.4403786785249715, + "learning_rate": 6.6434803668095095e-06, + "loss": 0.548133373260498, + "step": 4300 + }, + { + "epoch": 1.2576400058488084, + "grad_norm": 1.7736401224051406, + "learning_rate": 6.638926532549364e-06, + "loss": 0.45056310296058655, + "step": 4301 + }, + { + "epoch": 1.2579324462640737, + "grad_norm": 2.434184879977136, + "learning_rate": 6.634373483919705e-06, + "loss": 0.5191814303398132, + "step": 4302 + }, + { + "epoch": 1.258224886679339, + "grad_norm": 1.4188278481806091, + "learning_rate": 6.62982122198478e-06, + "loss": 0.41939109563827515, + "step": 4303 + }, + { + "epoch": 1.2585173270946044, + "grad_norm": 1.6631261031278954, + "learning_rate": 6.625269747808655e-06, + "loss": 0.6535190939903259, + "step": 4304 + }, + { + "epoch": 1.2588097675098697, + "grad_norm": 1.7210614964326925, + "learning_rate": 6.620719062455207e-06, + "loss": 0.6282539367675781, + "step": 4305 + }, + { + "epoch": 1.2591022079251353, + "grad_norm": 1.5686327106153548, + "learning_rate": 6.616169166988133e-06, + "loss": 0.5378686189651489, + "step": 4306 + }, + { + "epoch": 1.2593946483404006, + "grad_norm": 1.80292094791683, + "learning_rate": 6.611620062470942e-06, + "loss": 0.5278643369674683, + "step": 4307 + }, + { + "epoch": 1.259687088755666, + "grad_norm": 1.5211478183195457, + "learning_rate": 6.607071749966958e-06, + "loss": 0.5578285455703735, + "step": 4308 + }, + { + "epoch": 1.2599795291709315, + "grad_norm": 1.7646090466366875, + "learning_rate": 6.602524230539324e-06, + "loss": 0.6452580094337463, + "step": 4309 + }, + { + "epoch": 1.2602719695861968, + "grad_norm": 1.7812547970338353, + "learning_rate": 6.597977505250992e-06, + "loss": 0.6133028268814087, + "step": 4310 + }, + { + "epoch": 1.2605644100014621, + "grad_norm": 1.552230597230507, + "learning_rate": 6.5934315751647345e-06, + "loss": 0.4930221140384674, + "step": 4311 + }, + { + "epoch": 1.2608568504167277, + "grad_norm": 2.197359143106273, + "learning_rate": 6.588886441343136e-06, + "loss": 0.48653531074523926, + "step": 4312 + }, + { + "epoch": 1.261149290831993, + "grad_norm": 1.851387133095935, + "learning_rate": 6.5843421048485915e-06, + "loss": 0.6594399213790894, + "step": 4313 + }, + { + "epoch": 1.2614417312472583, + "grad_norm": 1.66909694599425, + "learning_rate": 6.579798566743314e-06, + "loss": 0.5164401531219482, + "step": 4314 + }, + { + "epoch": 1.2617341716625239, + "grad_norm": 1.7484363064869977, + "learning_rate": 6.5752558280893245e-06, + "loss": 0.6338971853256226, + "step": 4315 + }, + { + "epoch": 1.2620266120777892, + "grad_norm": 1.7526913055276123, + "learning_rate": 6.570713889948461e-06, + "loss": 0.5301859974861145, + "step": 4316 + }, + { + "epoch": 1.2623190524930545, + "grad_norm": 1.5016995868339762, + "learning_rate": 6.566172753382376e-06, + "loss": 0.4572887420654297, + "step": 4317 + }, + { + "epoch": 1.2626114929083199, + "grad_norm": 1.5874066468532555, + "learning_rate": 6.561632419452532e-06, + "loss": 0.5235984325408936, + "step": 4318 + }, + { + "epoch": 1.2629039333235852, + "grad_norm": 1.5456604836068861, + "learning_rate": 6.557092889220206e-06, + "loss": 0.586036205291748, + "step": 4319 + }, + { + "epoch": 1.2631963737388507, + "grad_norm": 1.6865403223453492, + "learning_rate": 6.5525541637464855e-06, + "loss": 0.4728356599807739, + "step": 4320 + }, + { + "epoch": 1.263488814154116, + "grad_norm": 1.5435862254535146, + "learning_rate": 6.548016244092265e-06, + "loss": 0.4932190179824829, + "step": 4321 + }, + { + "epoch": 1.2637812545693814, + "grad_norm": 1.6817765339416926, + "learning_rate": 6.543479131318259e-06, + "loss": 0.525676429271698, + "step": 4322 + }, + { + "epoch": 1.264073694984647, + "grad_norm": 1.4602981048339732, + "learning_rate": 6.538942826484991e-06, + "loss": 0.5462610721588135, + "step": 4323 + }, + { + "epoch": 1.2643661353999123, + "grad_norm": 1.6170865165049584, + "learning_rate": 6.534407330652792e-06, + "loss": 0.5391229391098022, + "step": 4324 + }, + { + "epoch": 1.2646585758151776, + "grad_norm": 1.7047610503615187, + "learning_rate": 6.529872644881811e-06, + "loss": 0.5361309051513672, + "step": 4325 + }, + { + "epoch": 1.2649510162304431, + "grad_norm": 1.7296167923882715, + "learning_rate": 6.525338770232001e-06, + "loss": 0.5692390203475952, + "step": 4326 + }, + { + "epoch": 1.2652434566457085, + "grad_norm": 1.7314833561159049, + "learning_rate": 6.520805707763125e-06, + "loss": 0.5337555408477783, + "step": 4327 + }, + { + "epoch": 1.2655358970609738, + "grad_norm": 1.5538338127930955, + "learning_rate": 6.5162734585347605e-06, + "loss": 0.604168176651001, + "step": 4328 + }, + { + "epoch": 1.2658283374762394, + "grad_norm": 1.621069176676038, + "learning_rate": 6.5117420236062955e-06, + "loss": 0.5404821038246155, + "step": 4329 + }, + { + "epoch": 1.2661207778915047, + "grad_norm": 1.8779165644410452, + "learning_rate": 6.507211404036922e-06, + "loss": 0.6097038388252258, + "step": 4330 + }, + { + "epoch": 1.26641321830677, + "grad_norm": 1.41106750899854, + "learning_rate": 6.50268160088565e-06, + "loss": 0.44309180974960327, + "step": 4331 + }, + { + "epoch": 1.2667056587220353, + "grad_norm": 1.633689199912191, + "learning_rate": 6.498152615211286e-06, + "loss": 0.5703015923500061, + "step": 4332 + }, + { + "epoch": 1.2669980991373007, + "grad_norm": 1.9239494523704173, + "learning_rate": 6.4936244480724575e-06, + "loss": 0.5745347738265991, + "step": 4333 + }, + { + "epoch": 1.2672905395525662, + "grad_norm": 1.7558467932702122, + "learning_rate": 6.489097100527595e-06, + "loss": 0.6611922979354858, + "step": 4334 + }, + { + "epoch": 1.2675829799678315, + "grad_norm": 1.373367301388142, + "learning_rate": 6.484570573634939e-06, + "loss": 0.4560534358024597, + "step": 4335 + }, + { + "epoch": 1.2678754203830969, + "grad_norm": 1.3735982195225196, + "learning_rate": 6.480044868452535e-06, + "loss": 0.3765673041343689, + "step": 4336 + }, + { + "epoch": 1.2681678607983624, + "grad_norm": 1.631255659187599, + "learning_rate": 6.475519986038246e-06, + "loss": 0.6471004486083984, + "step": 4337 + }, + { + "epoch": 1.2684603012136277, + "grad_norm": 1.6199016829966775, + "learning_rate": 6.4709959274497284e-06, + "loss": 0.5639084577560425, + "step": 4338 + }, + { + "epoch": 1.268752741628893, + "grad_norm": 1.6880087227037737, + "learning_rate": 6.4664726937444545e-06, + "loss": 0.6367507576942444, + "step": 4339 + }, + { + "epoch": 1.2690451820441586, + "grad_norm": 2.0302420653268958, + "learning_rate": 6.4619502859797055e-06, + "loss": 0.6803586483001709, + "step": 4340 + }, + { + "epoch": 1.269337622459424, + "grad_norm": 1.7398101139995543, + "learning_rate": 6.457428705212565e-06, + "loss": 0.49068397283554077, + "step": 4341 + }, + { + "epoch": 1.2696300628746893, + "grad_norm": 1.8759736386903334, + "learning_rate": 6.4529079524999296e-06, + "loss": 0.616880476474762, + "step": 4342 + }, + { + "epoch": 1.2699225032899546, + "grad_norm": 1.3483643409763457, + "learning_rate": 6.448388028898489e-06, + "loss": 0.45614945888519287, + "step": 4343 + }, + { + "epoch": 1.27021494370522, + "grad_norm": 1.4554785032074153, + "learning_rate": 6.443868935464754e-06, + "loss": 0.49267178773880005, + "step": 4344 + }, + { + "epoch": 1.2705073841204855, + "grad_norm": 1.6269409722468795, + "learning_rate": 6.439350673255033e-06, + "loss": 0.5169225335121155, + "step": 4345 + }, + { + "epoch": 1.2707998245357508, + "grad_norm": 1.4955295461512919, + "learning_rate": 6.434833243325442e-06, + "loss": 0.4999169111251831, + "step": 4346 + }, + { + "epoch": 1.2710922649510161, + "grad_norm": 1.6243334237328435, + "learning_rate": 6.430316646731906e-06, + "loss": 0.6282567977905273, + "step": 4347 + }, + { + "epoch": 1.2713847053662817, + "grad_norm": 1.6085299245102849, + "learning_rate": 6.425800884530151e-06, + "loss": 0.5007494688034058, + "step": 4348 + }, + { + "epoch": 1.271677145781547, + "grad_norm": 1.656568917278449, + "learning_rate": 6.421285957775705e-06, + "loss": 0.5178118944168091, + "step": 4349 + }, + { + "epoch": 1.2719695861968123, + "grad_norm": 1.560370266514351, + "learning_rate": 6.4167718675239075e-06, + "loss": 0.5473636388778687, + "step": 4350 + }, + { + "epoch": 1.2722620266120779, + "grad_norm": 1.6953423126666767, + "learning_rate": 6.4122586148299004e-06, + "loss": 0.5863620042800903, + "step": 4351 + }, + { + "epoch": 1.2725544670273432, + "grad_norm": 1.8607908969719156, + "learning_rate": 6.407746200748628e-06, + "loss": 0.5301654934883118, + "step": 4352 + }, + { + "epoch": 1.2728469074426085, + "grad_norm": 1.6932378497792755, + "learning_rate": 6.403234626334842e-06, + "loss": 0.5856075286865234, + "step": 4353 + }, + { + "epoch": 1.273139347857874, + "grad_norm": 1.678003179838639, + "learning_rate": 6.39872389264309e-06, + "loss": 0.49686455726623535, + "step": 4354 + }, + { + "epoch": 1.2734317882731394, + "grad_norm": 1.4854139308295418, + "learning_rate": 6.394214000727734e-06, + "loss": 0.5032684803009033, + "step": 4355 + }, + { + "epoch": 1.2737242286884047, + "grad_norm": 1.8801294667488437, + "learning_rate": 6.389704951642931e-06, + "loss": 0.6855330467224121, + "step": 4356 + }, + { + "epoch": 1.27401666910367, + "grad_norm": 1.479367610859775, + "learning_rate": 6.385196746442644e-06, + "loss": 0.5333864688873291, + "step": 4357 + }, + { + "epoch": 1.2743091095189354, + "grad_norm": 1.5944305875728124, + "learning_rate": 6.380689386180641e-06, + "loss": 0.5597629547119141, + "step": 4358 + }, + { + "epoch": 1.274601549934201, + "grad_norm": 1.467403558865203, + "learning_rate": 6.376182871910488e-06, + "loss": 0.4576488137245178, + "step": 4359 + }, + { + "epoch": 1.2748939903494663, + "grad_norm": 1.7247772731373485, + "learning_rate": 6.371677204685555e-06, + "loss": 0.45165061950683594, + "step": 4360 + }, + { + "epoch": 1.2751864307647316, + "grad_norm": 1.5415632861050979, + "learning_rate": 6.367172385559014e-06, + "loss": 0.5451514720916748, + "step": 4361 + }, + { + "epoch": 1.2754788711799971, + "grad_norm": 1.874618224476165, + "learning_rate": 6.362668415583841e-06, + "loss": 0.6141163110733032, + "step": 4362 + }, + { + "epoch": 1.2757713115952625, + "grad_norm": 1.6869879622469415, + "learning_rate": 6.358165295812809e-06, + "loss": 0.5156669020652771, + "step": 4363 + }, + { + "epoch": 1.2760637520105278, + "grad_norm": 1.8328178355603366, + "learning_rate": 6.3536630272984974e-06, + "loss": 0.41485118865966797, + "step": 4364 + }, + { + "epoch": 1.2763561924257933, + "grad_norm": 1.546563271256682, + "learning_rate": 6.3491616110932845e-06, + "loss": 0.386514276266098, + "step": 4365 + }, + { + "epoch": 1.2766486328410587, + "grad_norm": 1.472426766767245, + "learning_rate": 6.344661048249345e-06, + "loss": 0.5620483160018921, + "step": 4366 + }, + { + "epoch": 1.276941073256324, + "grad_norm": 1.6328857080628636, + "learning_rate": 6.340161339818662e-06, + "loss": 0.4910007119178772, + "step": 4367 + }, + { + "epoch": 1.2772335136715895, + "grad_norm": 1.3312787841228058, + "learning_rate": 6.335662486853014e-06, + "loss": 0.4628123939037323, + "step": 4368 + }, + { + "epoch": 1.2775259540868549, + "grad_norm": 1.7576669653081538, + "learning_rate": 6.331164490403978e-06, + "loss": 0.5129125118255615, + "step": 4369 + }, + { + "epoch": 1.2778183945021202, + "grad_norm": 1.3282548492081792, + "learning_rate": 6.326667351522939e-06, + "loss": 0.45091521739959717, + "step": 4370 + }, + { + "epoch": 1.2781108349173855, + "grad_norm": 1.4312089210542207, + "learning_rate": 6.322171071261071e-06, + "loss": 0.4914324879646301, + "step": 4371 + }, + { + "epoch": 1.2784032753326509, + "grad_norm": 1.7409991660962885, + "learning_rate": 6.317675650669353e-06, + "loss": 0.6361461877822876, + "step": 4372 + }, + { + "epoch": 1.2786957157479164, + "grad_norm": 1.6196651007639755, + "learning_rate": 6.313181090798561e-06, + "loss": 0.4251636564731598, + "step": 4373 + }, + { + "epoch": 1.2789881561631817, + "grad_norm": 1.7204832108380748, + "learning_rate": 6.308687392699275e-06, + "loss": 0.5605714321136475, + "step": 4374 + }, + { + "epoch": 1.279280596578447, + "grad_norm": 1.5898129202606366, + "learning_rate": 6.304194557421867e-06, + "loss": 0.5366392731666565, + "step": 4375 + }, + { + "epoch": 1.2795730369937126, + "grad_norm": 1.9084263306328586, + "learning_rate": 6.299702586016512e-06, + "loss": 0.5501587986946106, + "step": 4376 + }, + { + "epoch": 1.279865477408978, + "grad_norm": 1.856477952130892, + "learning_rate": 6.295211479533177e-06, + "loss": 0.6145694851875305, + "step": 4377 + }, + { + "epoch": 1.2801579178242433, + "grad_norm": 1.9271512769721166, + "learning_rate": 6.2907212390216335e-06, + "loss": 0.5921984910964966, + "step": 4378 + }, + { + "epoch": 1.2804503582395088, + "grad_norm": 1.5061577707687395, + "learning_rate": 6.286231865531447e-06, + "loss": 0.4376833140850067, + "step": 4379 + }, + { + "epoch": 1.2807427986547741, + "grad_norm": 1.5348932565255202, + "learning_rate": 6.281743360111983e-06, + "loss": 0.5141662955284119, + "step": 4380 + }, + { + "epoch": 1.2810352390700395, + "grad_norm": 1.700541758244486, + "learning_rate": 6.2772557238124025e-06, + "loss": 0.7065848112106323, + "step": 4381 + }, + { + "epoch": 1.2813276794853048, + "grad_norm": 1.500203661604044, + "learning_rate": 6.272768957681659e-06, + "loss": 0.5662813186645508, + "step": 4382 + }, + { + "epoch": 1.2816201199005701, + "grad_norm": 1.5006210101215816, + "learning_rate": 6.268283062768512e-06, + "loss": 0.46340662240982056, + "step": 4383 + }, + { + "epoch": 1.2819125603158357, + "grad_norm": 1.5406586553103667, + "learning_rate": 6.263798040121508e-06, + "loss": 0.5258422493934631, + "step": 4384 + }, + { + "epoch": 1.282205000731101, + "grad_norm": 1.8313859097442655, + "learning_rate": 6.2593138907889965e-06, + "loss": 0.5586943030357361, + "step": 4385 + }, + { + "epoch": 1.2824974411463663, + "grad_norm": 1.707661958872181, + "learning_rate": 6.254830615819116e-06, + "loss": 0.5224723815917969, + "step": 4386 + }, + { + "epoch": 1.2827898815616319, + "grad_norm": 1.8755820352841006, + "learning_rate": 6.250348216259812e-06, + "loss": 0.6092125177383423, + "step": 4387 + }, + { + "epoch": 1.2830823219768972, + "grad_norm": 1.6601692047393128, + "learning_rate": 6.245866693158813e-06, + "loss": 0.5582839250564575, + "step": 4388 + }, + { + "epoch": 1.2833747623921625, + "grad_norm": 1.529218817283274, + "learning_rate": 6.241386047563649e-06, + "loss": 0.6074620485305786, + "step": 4389 + }, + { + "epoch": 1.283667202807428, + "grad_norm": 1.3747332990929297, + "learning_rate": 6.236906280521646e-06, + "loss": 0.6247550845146179, + "step": 4390 + }, + { + "epoch": 1.2839596432226934, + "grad_norm": 1.6645308511195784, + "learning_rate": 6.232427393079919e-06, + "loss": 0.5325940847396851, + "step": 4391 + }, + { + "epoch": 1.2842520836379587, + "grad_norm": 1.5279900789464966, + "learning_rate": 6.227949386285379e-06, + "loss": 0.5082288980484009, + "step": 4392 + }, + { + "epoch": 1.2845445240532243, + "grad_norm": 1.587332587045442, + "learning_rate": 6.223472261184738e-06, + "loss": 0.5704036355018616, + "step": 4393 + }, + { + "epoch": 1.2848369644684896, + "grad_norm": 1.7646477307813349, + "learning_rate": 6.218996018824492e-06, + "loss": 0.5301543474197388, + "step": 4394 + }, + { + "epoch": 1.285129404883755, + "grad_norm": 1.6829663682000435, + "learning_rate": 6.21452066025094e-06, + "loss": 0.48660725355148315, + "step": 4395 + }, + { + "epoch": 1.2854218452990203, + "grad_norm": 1.7324467857194032, + "learning_rate": 6.210046186510168e-06, + "loss": 0.5744560956954956, + "step": 4396 + }, + { + "epoch": 1.2857142857142856, + "grad_norm": 1.6645302463411007, + "learning_rate": 6.205572598648055e-06, + "loss": 0.5714898109436035, + "step": 4397 + }, + { + "epoch": 1.2860067261295511, + "grad_norm": 1.8166911532739076, + "learning_rate": 6.201099897710277e-06, + "loss": 0.6616571545600891, + "step": 4398 + }, + { + "epoch": 1.2862991665448165, + "grad_norm": 1.75450880953695, + "learning_rate": 6.1966280847423e-06, + "loss": 0.5552959442138672, + "step": 4399 + }, + { + "epoch": 1.2865916069600818, + "grad_norm": 1.6738534376194054, + "learning_rate": 6.192157160789382e-06, + "loss": 0.5544919967651367, + "step": 4400 + }, + { + "epoch": 1.2868840473753473, + "grad_norm": 1.6448049553355306, + "learning_rate": 6.18768712689658e-06, + "loss": 0.5914726853370667, + "step": 4401 + }, + { + "epoch": 1.2871764877906127, + "grad_norm": 1.76025336575331, + "learning_rate": 6.183217984108729e-06, + "loss": 0.47191259264945984, + "step": 4402 + }, + { + "epoch": 1.287468928205878, + "grad_norm": 1.690038062727397, + "learning_rate": 6.178749733470468e-06, + "loss": 0.6479181051254272, + "step": 4403 + }, + { + "epoch": 1.2877613686211435, + "grad_norm": 1.5093061541159978, + "learning_rate": 6.174282376026225e-06, + "loss": 0.42491137981414795, + "step": 4404 + }, + { + "epoch": 1.2880538090364089, + "grad_norm": 1.5952968160469727, + "learning_rate": 6.169815912820214e-06, + "loss": 0.6037728786468506, + "step": 4405 + }, + { + "epoch": 1.2883462494516742, + "grad_norm": 1.6035701682484467, + "learning_rate": 6.165350344896446e-06, + "loss": 0.4979787766933441, + "step": 4406 + }, + { + "epoch": 1.2886386898669397, + "grad_norm": 1.800062229580063, + "learning_rate": 6.160885673298722e-06, + "loss": 0.5863564014434814, + "step": 4407 + }, + { + "epoch": 1.288931130282205, + "grad_norm": 1.735193401842224, + "learning_rate": 6.156421899070628e-06, + "loss": 0.6516878008842468, + "step": 4408 + }, + { + "epoch": 1.2892235706974704, + "grad_norm": 1.3644068122534347, + "learning_rate": 6.151959023255545e-06, + "loss": 0.45655903220176697, + "step": 4409 + }, + { + "epoch": 1.2895160111127357, + "grad_norm": 1.5401566996811273, + "learning_rate": 6.147497046896644e-06, + "loss": 0.4751289486885071, + "step": 4410 + }, + { + "epoch": 1.289808451528001, + "grad_norm": 1.6902527178920421, + "learning_rate": 6.1430359710368845e-06, + "loss": 0.48472684621810913, + "step": 4411 + }, + { + "epoch": 1.2901008919432666, + "grad_norm": 1.5473669029252384, + "learning_rate": 6.138575796719017e-06, + "loss": 0.5014214515686035, + "step": 4412 + }, + { + "epoch": 1.290393332358532, + "grad_norm": 1.7827106404845192, + "learning_rate": 6.134116524985581e-06, + "loss": 0.5979991555213928, + "step": 4413 + }, + { + "epoch": 1.2906857727737973, + "grad_norm": 1.325839826079579, + "learning_rate": 6.129658156878899e-06, + "loss": 0.4651130437850952, + "step": 4414 + }, + { + "epoch": 1.2909782131890628, + "grad_norm": 1.7806648175874917, + "learning_rate": 6.125200693441092e-06, + "loss": 0.5938215255737305, + "step": 4415 + }, + { + "epoch": 1.2912706536043281, + "grad_norm": 1.5490961027602033, + "learning_rate": 6.1207441357140626e-06, + "loss": 0.4893927574157715, + "step": 4416 + }, + { + "epoch": 1.2915630940195935, + "grad_norm": 1.7524993955466766, + "learning_rate": 6.116288484739507e-06, + "loss": 0.5546435713768005, + "step": 4417 + }, + { + "epoch": 1.291855534434859, + "grad_norm": 1.8413981048239587, + "learning_rate": 6.111833741558905e-06, + "loss": 0.545367419719696, + "step": 4418 + }, + { + "epoch": 1.2921479748501243, + "grad_norm": 1.4120684443774227, + "learning_rate": 6.1073799072135245e-06, + "loss": 0.47479283809661865, + "step": 4419 + }, + { + "epoch": 1.2924404152653897, + "grad_norm": 1.6721044710471762, + "learning_rate": 6.102926982744423e-06, + "loss": 0.5109270215034485, + "step": 4420 + }, + { + "epoch": 1.292732855680655, + "grad_norm": 1.774842272860347, + "learning_rate": 6.098474969192445e-06, + "loss": 0.5862404108047485, + "step": 4421 + }, + { + "epoch": 1.2930252960959203, + "grad_norm": 1.5821200459355214, + "learning_rate": 6.09402386759822e-06, + "loss": 0.5031660795211792, + "step": 4422 + }, + { + "epoch": 1.2933177365111859, + "grad_norm": 1.7397846198854208, + "learning_rate": 6.089573679002168e-06, + "loss": 0.47179776430130005, + "step": 4423 + }, + { + "epoch": 1.2936101769264512, + "grad_norm": 1.5340233803824985, + "learning_rate": 6.085124404444495e-06, + "loss": 0.45889902114868164, + "step": 4424 + }, + { + "epoch": 1.2939026173417165, + "grad_norm": 1.5550814946749143, + "learning_rate": 6.080676044965188e-06, + "loss": 0.49759042263031006, + "step": 4425 + }, + { + "epoch": 1.294195057756982, + "grad_norm": 1.9841525065569887, + "learning_rate": 6.076228601604024e-06, + "loss": 0.5980732440948486, + "step": 4426 + }, + { + "epoch": 1.2944874981722474, + "grad_norm": 1.6256180215634828, + "learning_rate": 6.07178207540057e-06, + "loss": 0.6167548894882202, + "step": 4427 + }, + { + "epoch": 1.2947799385875127, + "grad_norm": 1.7343822678821683, + "learning_rate": 6.067336467394169e-06, + "loss": 0.5632568597793579, + "step": 4428 + }, + { + "epoch": 1.2950723790027783, + "grad_norm": 1.713926568632917, + "learning_rate": 6.062891778623961e-06, + "loss": 0.5521456003189087, + "step": 4429 + }, + { + "epoch": 1.2953648194180436, + "grad_norm": 1.4514202434870498, + "learning_rate": 6.058448010128861e-06, + "loss": 0.5916576385498047, + "step": 4430 + }, + { + "epoch": 1.295657259833309, + "grad_norm": 1.4200773171635346, + "learning_rate": 6.054005162947571e-06, + "loss": 0.546825647354126, + "step": 4431 + }, + { + "epoch": 1.2959497002485745, + "grad_norm": 1.903586469303659, + "learning_rate": 6.049563238118584e-06, + "loss": 0.5704302787780762, + "step": 4432 + }, + { + "epoch": 1.2962421406638398, + "grad_norm": 1.6923235048512564, + "learning_rate": 6.0451222366801706e-06, + "loss": 0.5791710615158081, + "step": 4433 + }, + { + "epoch": 1.2965345810791051, + "grad_norm": 1.5242567102891653, + "learning_rate": 6.040682159670389e-06, + "loss": 0.41179752349853516, + "step": 4434 + }, + { + "epoch": 1.2968270214943705, + "grad_norm": 1.7120079687188825, + "learning_rate": 6.03624300812708e-06, + "loss": 0.5213680267333984, + "step": 4435 + }, + { + "epoch": 1.2971194619096358, + "grad_norm": 1.6198208396506975, + "learning_rate": 6.0318047830878675e-06, + "loss": 0.4917318522930145, + "step": 4436 + }, + { + "epoch": 1.2974119023249013, + "grad_norm": 1.9301576881874427, + "learning_rate": 6.027367485590159e-06, + "loss": 0.6347956657409668, + "step": 4437 + }, + { + "epoch": 1.2977043427401667, + "grad_norm": 1.454096730257314, + "learning_rate": 6.022931116671147e-06, + "loss": 0.5263427495956421, + "step": 4438 + }, + { + "epoch": 1.297996783155432, + "grad_norm": 1.3982615348649814, + "learning_rate": 6.018495677367806e-06, + "loss": 0.5686784982681274, + "step": 4439 + }, + { + "epoch": 1.2982892235706975, + "grad_norm": 1.6986790860575087, + "learning_rate": 6.0140611687168934e-06, + "loss": 0.576974630355835, + "step": 4440 + }, + { + "epoch": 1.2985816639859629, + "grad_norm": 1.7183954732732796, + "learning_rate": 6.009627591754946e-06, + "loss": 0.5375877618789673, + "step": 4441 + }, + { + "epoch": 1.2988741044012282, + "grad_norm": 1.7026702794952187, + "learning_rate": 6.005194947518287e-06, + "loss": 0.6106576919555664, + "step": 4442 + }, + { + "epoch": 1.2991665448164937, + "grad_norm": 1.6076086367802058, + "learning_rate": 6.000763237043021e-06, + "loss": 0.475483238697052, + "step": 4443 + }, + { + "epoch": 1.299458985231759, + "grad_norm": 1.7568326021636087, + "learning_rate": 5.9963324613650335e-06, + "loss": 0.5819226503372192, + "step": 4444 + }, + { + "epoch": 1.2997514256470244, + "grad_norm": 1.6384408260054233, + "learning_rate": 5.991902621519988e-06, + "loss": 0.6394410133361816, + "step": 4445 + }, + { + "epoch": 1.30004386606229, + "grad_norm": 1.653615111391099, + "learning_rate": 5.987473718543338e-06, + "loss": 0.48502016067504883, + "step": 4446 + }, + { + "epoch": 1.3003363064775553, + "grad_norm": 1.5217151928427126, + "learning_rate": 5.983045753470308e-06, + "loss": 0.5782333612442017, + "step": 4447 + }, + { + "epoch": 1.3006287468928206, + "grad_norm": 1.8358895387455052, + "learning_rate": 5.97861872733591e-06, + "loss": 0.5498893857002258, + "step": 4448 + }, + { + "epoch": 1.300921187308086, + "grad_norm": 1.5773905938706185, + "learning_rate": 5.974192641174934e-06, + "loss": 0.47757571935653687, + "step": 4449 + }, + { + "epoch": 1.3012136277233513, + "grad_norm": 1.751650457738534, + "learning_rate": 5.96976749602195e-06, + "loss": 0.5401994585990906, + "step": 4450 + }, + { + "epoch": 1.3015060681386168, + "grad_norm": 1.7445816604225337, + "learning_rate": 5.965343292911309e-06, + "loss": 0.5818814635276794, + "step": 4451 + }, + { + "epoch": 1.3017985085538821, + "grad_norm": 1.890298335476633, + "learning_rate": 5.9609200328771465e-06, + "loss": 0.524645984172821, + "step": 4452 + }, + { + "epoch": 1.3020909489691475, + "grad_norm": 1.6124004265504417, + "learning_rate": 5.956497716953365e-06, + "loss": 0.46523183584213257, + "step": 4453 + }, + { + "epoch": 1.302383389384413, + "grad_norm": 1.6328139064911342, + "learning_rate": 5.952076346173657e-06, + "loss": 0.6066159009933472, + "step": 4454 + }, + { + "epoch": 1.3026758297996783, + "grad_norm": 1.5743831575113747, + "learning_rate": 5.947655921571491e-06, + "loss": 0.48635774850845337, + "step": 4455 + }, + { + "epoch": 1.3029682702149437, + "grad_norm": 1.7296441740948125, + "learning_rate": 5.943236444180116e-06, + "loss": 0.5159435868263245, + "step": 4456 + }, + { + "epoch": 1.3032607106302092, + "grad_norm": 1.43545214825073, + "learning_rate": 5.938817915032558e-06, + "loss": 0.5566878914833069, + "step": 4457 + }, + { + "epoch": 1.3035531510454745, + "grad_norm": 1.5736652583628634, + "learning_rate": 5.934400335161618e-06, + "loss": 0.46998029947280884, + "step": 4458 + }, + { + "epoch": 1.3038455914607399, + "grad_norm": 1.7808256717613173, + "learning_rate": 5.92998370559988e-06, + "loss": 0.5554553270339966, + "step": 4459 + }, + { + "epoch": 1.3041380318760052, + "grad_norm": 1.7335497855414168, + "learning_rate": 5.925568027379704e-06, + "loss": 0.5659651756286621, + "step": 4460 + }, + { + "epoch": 1.3044304722912705, + "grad_norm": 1.4784849199972236, + "learning_rate": 5.921153301533229e-06, + "loss": 0.5105445981025696, + "step": 4461 + }, + { + "epoch": 1.304722912706536, + "grad_norm": 1.6833489269681376, + "learning_rate": 5.91673952909237e-06, + "loss": 0.5255740284919739, + "step": 4462 + }, + { + "epoch": 1.3050153531218014, + "grad_norm": 1.6388447853221406, + "learning_rate": 5.912326711088821e-06, + "loss": 0.5691270232200623, + "step": 4463 + }, + { + "epoch": 1.3053077935370667, + "grad_norm": 1.64945916767282, + "learning_rate": 5.907914848554048e-06, + "loss": 0.5783474445343018, + "step": 4464 + }, + { + "epoch": 1.3056002339523323, + "grad_norm": 1.631334603802349, + "learning_rate": 5.903503942519299e-06, + "loss": 0.6305002570152283, + "step": 4465 + }, + { + "epoch": 1.3058926743675976, + "grad_norm": 1.9357776829199835, + "learning_rate": 5.8990939940156e-06, + "loss": 0.6465631723403931, + "step": 4466 + }, + { + "epoch": 1.306185114782863, + "grad_norm": 1.8264406193491898, + "learning_rate": 5.8946850040737434e-06, + "loss": 0.4883456230163574, + "step": 4467 + }, + { + "epoch": 1.3064775551981285, + "grad_norm": 1.3902013367704193, + "learning_rate": 5.890276973724305e-06, + "loss": 0.4896056056022644, + "step": 4468 + }, + { + "epoch": 1.3067699956133938, + "grad_norm": 1.6292986861573446, + "learning_rate": 5.885869903997638e-06, + "loss": 0.603757917881012, + "step": 4469 + }, + { + "epoch": 1.3070624360286591, + "grad_norm": 1.6368879465310389, + "learning_rate": 5.881463795923866e-06, + "loss": 0.5412129163742065, + "step": 4470 + }, + { + "epoch": 1.3073548764439247, + "grad_norm": 1.576979548849775, + "learning_rate": 5.877058650532891e-06, + "loss": 0.5255335569381714, + "step": 4471 + }, + { + "epoch": 1.30764731685919, + "grad_norm": 1.861250264495057, + "learning_rate": 5.87265446885439e-06, + "loss": 0.5855039358139038, + "step": 4472 + }, + { + "epoch": 1.3079397572744553, + "grad_norm": 1.7387082626664492, + "learning_rate": 5.868251251917811e-06, + "loss": 0.5763603448867798, + "step": 4473 + }, + { + "epoch": 1.3082321976897207, + "grad_norm": 1.7494976398773932, + "learning_rate": 5.86384900075238e-06, + "loss": 0.5148910880088806, + "step": 4474 + }, + { + "epoch": 1.308524638104986, + "grad_norm": 1.726220320494232, + "learning_rate": 5.859447716387097e-06, + "loss": 0.6387143135070801, + "step": 4475 + }, + { + "epoch": 1.3088170785202515, + "grad_norm": 1.6421362434800872, + "learning_rate": 5.855047399850735e-06, + "loss": 0.5492211580276489, + "step": 4476 + }, + { + "epoch": 1.3091095189355169, + "grad_norm": 1.748321310864673, + "learning_rate": 5.850648052171843e-06, + "loss": 0.5715115070343018, + "step": 4477 + }, + { + "epoch": 1.3094019593507822, + "grad_norm": 1.8948603499593957, + "learning_rate": 5.8462496743787385e-06, + "loss": 0.6295989155769348, + "step": 4478 + }, + { + "epoch": 1.3096943997660477, + "grad_norm": 1.6169983680834699, + "learning_rate": 5.841852267499518e-06, + "loss": 0.5843105316162109, + "step": 4479 + }, + { + "epoch": 1.309986840181313, + "grad_norm": 1.443044009123256, + "learning_rate": 5.837455832562049e-06, + "loss": 0.43283605575561523, + "step": 4480 + }, + { + "epoch": 1.3102792805965784, + "grad_norm": 1.6217104179487012, + "learning_rate": 5.8330603705939684e-06, + "loss": 0.6115404367446899, + "step": 4481 + }, + { + "epoch": 1.310571721011844, + "grad_norm": 1.2325386929467517, + "learning_rate": 5.828665882622692e-06, + "loss": 0.4274179935455322, + "step": 4482 + }, + { + "epoch": 1.3108641614271093, + "grad_norm": 1.3722363792161896, + "learning_rate": 5.824272369675403e-06, + "loss": 0.4385778307914734, + "step": 4483 + }, + { + "epoch": 1.3111566018423746, + "grad_norm": 1.939305382555819, + "learning_rate": 5.819879832779058e-06, + "loss": 0.6310205459594727, + "step": 4484 + }, + { + "epoch": 1.3114490422576401, + "grad_norm": 1.5511013635003787, + "learning_rate": 5.815488272960388e-06, + "loss": 0.6309192180633545, + "step": 4485 + }, + { + "epoch": 1.3117414826729055, + "grad_norm": 1.8051032087296774, + "learning_rate": 5.811097691245895e-06, + "loss": 0.4751497507095337, + "step": 4486 + }, + { + "epoch": 1.3120339230881708, + "grad_norm": 1.5897893613027336, + "learning_rate": 5.806708088661846e-06, + "loss": 0.5540175437927246, + "step": 4487 + }, + { + "epoch": 1.3123263635034361, + "grad_norm": 1.924801228279098, + "learning_rate": 5.802319466234283e-06, + "loss": 0.5533273816108704, + "step": 4488 + }, + { + "epoch": 1.3126188039187014, + "grad_norm": 1.5486991099512135, + "learning_rate": 5.797931824989023e-06, + "loss": 0.463643878698349, + "step": 4489 + }, + { + "epoch": 1.312911244333967, + "grad_norm": 1.9073169839874196, + "learning_rate": 5.79354516595165e-06, + "loss": 0.5990232229232788, + "step": 4490 + }, + { + "epoch": 1.3132036847492323, + "grad_norm": 1.7681103257151853, + "learning_rate": 5.789159490147518e-06, + "loss": 0.5569760799407959, + "step": 4491 + }, + { + "epoch": 1.3134961251644977, + "grad_norm": 1.598897244778613, + "learning_rate": 5.784774798601755e-06, + "loss": 0.5016749501228333, + "step": 4492 + }, + { + "epoch": 1.3137885655797632, + "grad_norm": 1.8830720070455038, + "learning_rate": 5.780391092339253e-06, + "loss": 0.5624934434890747, + "step": 4493 + }, + { + "epoch": 1.3140810059950285, + "grad_norm": 2.146444811832683, + "learning_rate": 5.776008372384676e-06, + "loss": 0.7445797920227051, + "step": 4494 + }, + { + "epoch": 1.3143734464102939, + "grad_norm": 1.9276650555591395, + "learning_rate": 5.771626639762461e-06, + "loss": 0.5849495530128479, + "step": 4495 + }, + { + "epoch": 1.3146658868255594, + "grad_norm": 1.6679644602081254, + "learning_rate": 5.767245895496809e-06, + "loss": 0.5672163367271423, + "step": 4496 + }, + { + "epoch": 1.3149583272408247, + "grad_norm": 1.4482015307125622, + "learning_rate": 5.762866140611698e-06, + "loss": 0.5278276801109314, + "step": 4497 + }, + { + "epoch": 1.31525076765609, + "grad_norm": 1.8273800354421317, + "learning_rate": 5.7584873761308615e-06, + "loss": 0.54908686876297, + "step": 4498 + }, + { + "epoch": 1.3155432080713554, + "grad_norm": 1.7592605115208164, + "learning_rate": 5.754109603077811e-06, + "loss": 0.5257589817047119, + "step": 4499 + }, + { + "epoch": 1.3158356484866207, + "grad_norm": 1.4910358958486878, + "learning_rate": 5.749732822475825e-06, + "loss": 0.5744988918304443, + "step": 4500 + }, + { + "epoch": 1.3161280889018863, + "grad_norm": 1.4827754689170145, + "learning_rate": 5.74535703534795e-06, + "loss": 0.5186365246772766, + "step": 4501 + }, + { + "epoch": 1.3164205293171516, + "grad_norm": 1.6539527720112557, + "learning_rate": 5.740982242716999e-06, + "loss": 0.53574538230896, + "step": 4502 + }, + { + "epoch": 1.316712969732417, + "grad_norm": 1.5347054109635063, + "learning_rate": 5.736608445605555e-06, + "loss": 0.6087717413902283, + "step": 4503 + }, + { + "epoch": 1.3170054101476825, + "grad_norm": 1.5413257189374059, + "learning_rate": 5.732235645035964e-06, + "loss": 0.5132769346237183, + "step": 4504 + }, + { + "epoch": 1.3172978505629478, + "grad_norm": 1.6361856291197476, + "learning_rate": 5.727863842030342e-06, + "loss": 0.588458776473999, + "step": 4505 + }, + { + "epoch": 1.3175902909782131, + "grad_norm": 1.6129388653597692, + "learning_rate": 5.723493037610572e-06, + "loss": 0.5154894590377808, + "step": 4506 + }, + { + "epoch": 1.3178827313934787, + "grad_norm": 1.5507002889867831, + "learning_rate": 5.719123232798304e-06, + "loss": 0.586688220500946, + "step": 4507 + }, + { + "epoch": 1.318175171808744, + "grad_norm": 1.8125403251714918, + "learning_rate": 5.714754428614956e-06, + "loss": 0.4948856830596924, + "step": 4508 + }, + { + "epoch": 1.3184676122240093, + "grad_norm": 1.5128350944665496, + "learning_rate": 5.7103866260817005e-06, + "loss": 0.6179821491241455, + "step": 4509 + }, + { + "epoch": 1.3187600526392749, + "grad_norm": 1.876290206668384, + "learning_rate": 5.7060198262194914e-06, + "loss": 0.5865011811256409, + "step": 4510 + }, + { + "epoch": 1.3190524930545402, + "grad_norm": 1.660419141577327, + "learning_rate": 5.701654030049038e-06, + "loss": 0.519783079624176, + "step": 4511 + }, + { + "epoch": 1.3193449334698055, + "grad_norm": 1.5035780556155738, + "learning_rate": 5.697289238590822e-06, + "loss": 0.4238147437572479, + "step": 4512 + }, + { + "epoch": 1.3196373738850709, + "grad_norm": 1.6350345014151721, + "learning_rate": 5.6929254528650855e-06, + "loss": 0.5931107997894287, + "step": 4513 + }, + { + "epoch": 1.3199298143003362, + "grad_norm": 1.7485415603348589, + "learning_rate": 5.688562673891837e-06, + "loss": 0.7454524040222168, + "step": 4514 + }, + { + "epoch": 1.3202222547156017, + "grad_norm": 1.6756127294636487, + "learning_rate": 5.684200902690848e-06, + "loss": 0.5909554362297058, + "step": 4515 + }, + { + "epoch": 1.320514695130867, + "grad_norm": 1.449068353866628, + "learning_rate": 5.67984014028166e-06, + "loss": 0.5059943199157715, + "step": 4516 + }, + { + "epoch": 1.3208071355461324, + "grad_norm": 1.3855018310443914, + "learning_rate": 5.675480387683572e-06, + "loss": 0.4387373924255371, + "step": 4517 + }, + { + "epoch": 1.321099575961398, + "grad_norm": 1.6368288915875209, + "learning_rate": 5.671121645915648e-06, + "loss": 0.6452310681343079, + "step": 4518 + }, + { + "epoch": 1.3213920163766633, + "grad_norm": 1.4569471180570228, + "learning_rate": 5.666763915996725e-06, + "loss": 0.5629088282585144, + "step": 4519 + }, + { + "epoch": 1.3216844567919286, + "grad_norm": 1.6108062624448902, + "learning_rate": 5.662407198945386e-06, + "loss": 0.6442849636077881, + "step": 4520 + }, + { + "epoch": 1.3219768972071941, + "grad_norm": 1.4707356833436183, + "learning_rate": 5.6580514957799894e-06, + "loss": 0.5330031514167786, + "step": 4521 + }, + { + "epoch": 1.3222693376224595, + "grad_norm": 1.4396348923376052, + "learning_rate": 5.6536968075186575e-06, + "loss": 0.471035361289978, + "step": 4522 + }, + { + "epoch": 1.3225617780377248, + "grad_norm": 1.5589169874424196, + "learning_rate": 5.649343135179271e-06, + "loss": 0.5675650835037231, + "step": 4523 + }, + { + "epoch": 1.3228542184529903, + "grad_norm": 1.6961906881686575, + "learning_rate": 5.644990479779473e-06, + "loss": 0.5458093881607056, + "step": 4524 + }, + { + "epoch": 1.3231466588682557, + "grad_norm": 1.5690712646364733, + "learning_rate": 5.640638842336672e-06, + "loss": 0.5625189542770386, + "step": 4525 + }, + { + "epoch": 1.323439099283521, + "grad_norm": 1.778677748743509, + "learning_rate": 5.636288223868038e-06, + "loss": 0.5868214964866638, + "step": 4526 + }, + { + "epoch": 1.3237315396987863, + "grad_norm": 1.6502123203157841, + "learning_rate": 5.631938625390498e-06, + "loss": 0.5340765714645386, + "step": 4527 + }, + { + "epoch": 1.3240239801140516, + "grad_norm": 1.4463169385647288, + "learning_rate": 5.627590047920747e-06, + "loss": 0.4487069845199585, + "step": 4528 + }, + { + "epoch": 1.3243164205293172, + "grad_norm": 1.5750183859940412, + "learning_rate": 5.623242492475237e-06, + "loss": 0.4246913194656372, + "step": 4529 + }, + { + "epoch": 1.3246088609445825, + "grad_norm": 1.6537085849345186, + "learning_rate": 5.618895960070188e-06, + "loss": 0.49904564023017883, + "step": 4530 + }, + { + "epoch": 1.3249013013598478, + "grad_norm": 1.6201874773916152, + "learning_rate": 5.614550451721566e-06, + "loss": 0.5506085157394409, + "step": 4531 + }, + { + "epoch": 1.3251937417751134, + "grad_norm": 1.6929750939693964, + "learning_rate": 5.610205968445111e-06, + "loss": 0.4861884117126465, + "step": 4532 + }, + { + "epoch": 1.3254861821903787, + "grad_norm": 1.5616728357477914, + "learning_rate": 5.605862511256322e-06, + "loss": 0.5639146566390991, + "step": 4533 + }, + { + "epoch": 1.325778622605644, + "grad_norm": 1.3747626231277423, + "learning_rate": 5.601520081170455e-06, + "loss": 0.43305879831314087, + "step": 4534 + }, + { + "epoch": 1.3260710630209096, + "grad_norm": 1.4728588464752952, + "learning_rate": 5.597178679202524e-06, + "loss": 0.4820408821105957, + "step": 4535 + }, + { + "epoch": 1.326363503436175, + "grad_norm": 2.092875019342334, + "learning_rate": 5.592838306367307e-06, + "loss": 0.5601707100868225, + "step": 4536 + }, + { + "epoch": 1.3266559438514403, + "grad_norm": 1.6269012393440097, + "learning_rate": 5.588498963679339e-06, + "loss": 0.5655055046081543, + "step": 4537 + }, + { + "epoch": 1.3269483842667056, + "grad_norm": 1.871556737283143, + "learning_rate": 5.584160652152917e-06, + "loss": 0.5425975322723389, + "step": 4538 + }, + { + "epoch": 1.327240824681971, + "grad_norm": 1.5388263554547548, + "learning_rate": 5.579823372802098e-06, + "loss": 0.607103168964386, + "step": 4539 + }, + { + "epoch": 1.3275332650972365, + "grad_norm": 1.6396827179367406, + "learning_rate": 5.575487126640686e-06, + "loss": 0.6011538505554199, + "step": 4540 + }, + { + "epoch": 1.3278257055125018, + "grad_norm": 1.6364470669862505, + "learning_rate": 5.571151914682258e-06, + "loss": 0.5333601236343384, + "step": 4541 + }, + { + "epoch": 1.3281181459277671, + "grad_norm": 1.7756177203838306, + "learning_rate": 5.566817737940142e-06, + "loss": 0.576410174369812, + "step": 4542 + }, + { + "epoch": 1.3284105863430327, + "grad_norm": 1.8060302167235907, + "learning_rate": 5.562484597427425e-06, + "loss": 0.506458044052124, + "step": 4543 + }, + { + "epoch": 1.328703026758298, + "grad_norm": 2.0174061298696975, + "learning_rate": 5.558152494156955e-06, + "loss": 0.5893718004226685, + "step": 4544 + }, + { + "epoch": 1.3289954671735633, + "grad_norm": 1.6979483029237916, + "learning_rate": 5.55382142914133e-06, + "loss": 0.508120059967041, + "step": 4545 + }, + { + "epoch": 1.3292879075888289, + "grad_norm": 1.5737735987577735, + "learning_rate": 5.5494914033929126e-06, + "loss": 0.6103616952896118, + "step": 4546 + }, + { + "epoch": 1.3295803480040942, + "grad_norm": 1.7304904972315491, + "learning_rate": 5.545162417923822e-06, + "loss": 0.5290235280990601, + "step": 4547 + }, + { + "epoch": 1.3298727884193595, + "grad_norm": 1.5350904839753017, + "learning_rate": 5.540834473745929e-06, + "loss": 0.5729631185531616, + "step": 4548 + }, + { + "epoch": 1.330165228834625, + "grad_norm": 1.5574358916011883, + "learning_rate": 5.536507571870866e-06, + "loss": 0.48720547556877136, + "step": 4549 + }, + { + "epoch": 1.3304576692498904, + "grad_norm": 1.5393587740053045, + "learning_rate": 5.532181713310023e-06, + "loss": 0.4987955689430237, + "step": 4550 + }, + { + "epoch": 1.3307501096651557, + "grad_norm": 1.3126988702980638, + "learning_rate": 5.527856899074536e-06, + "loss": 0.4002467393875122, + "step": 4551 + }, + { + "epoch": 1.331042550080421, + "grad_norm": 2.0947575410388866, + "learning_rate": 5.523533130175308e-06, + "loss": 0.7435724139213562, + "step": 4552 + }, + { + "epoch": 1.3313349904956864, + "grad_norm": 1.541726198150986, + "learning_rate": 5.519210407622993e-06, + "loss": 0.34711340069770813, + "step": 4553 + }, + { + "epoch": 1.331627430910952, + "grad_norm": 1.6396721749099359, + "learning_rate": 5.514888732428003e-06, + "loss": 0.4749720096588135, + "step": 4554 + }, + { + "epoch": 1.3319198713262173, + "grad_norm": 1.7586628740577253, + "learning_rate": 5.5105681056005e-06, + "loss": 0.5818741321563721, + "step": 4555 + }, + { + "epoch": 1.3322123117414826, + "grad_norm": 1.7095504305078453, + "learning_rate": 5.506248528150407e-06, + "loss": 0.5715004801750183, + "step": 4556 + }, + { + "epoch": 1.3325047521567481, + "grad_norm": 1.7722621684818736, + "learning_rate": 5.501930001087399e-06, + "loss": 0.5465661287307739, + "step": 4557 + }, + { + "epoch": 1.3327971925720135, + "grad_norm": 1.7620411170921917, + "learning_rate": 5.4976125254209035e-06, + "loss": 0.6324847936630249, + "step": 4558 + }, + { + "epoch": 1.3330896329872788, + "grad_norm": 1.4165701736936904, + "learning_rate": 5.493296102160105e-06, + "loss": 0.4616294503211975, + "step": 4559 + }, + { + "epoch": 1.3333820734025443, + "grad_norm": 1.6922106714814378, + "learning_rate": 5.488980732313942e-06, + "loss": 0.5187079310417175, + "step": 4560 + }, + { + "epoch": 1.3336745138178097, + "grad_norm": 1.8396067182286635, + "learning_rate": 5.484666416891109e-06, + "loss": 0.6120654344558716, + "step": 4561 + }, + { + "epoch": 1.333966954233075, + "grad_norm": 1.6878860661661148, + "learning_rate": 5.480353156900044e-06, + "loss": 0.6171379685401917, + "step": 4562 + }, + { + "epoch": 1.3342593946483405, + "grad_norm": 1.510636167770684, + "learning_rate": 5.4760409533489475e-06, + "loss": 0.4690072536468506, + "step": 4563 + }, + { + "epoch": 1.3345518350636059, + "grad_norm": 1.5961764389633983, + "learning_rate": 5.471729807245773e-06, + "loss": 0.511309802532196, + "step": 4564 + }, + { + "epoch": 1.3348442754788712, + "grad_norm": 1.6355911684199975, + "learning_rate": 5.467419719598223e-06, + "loss": 0.5657862424850464, + "step": 4565 + }, + { + "epoch": 1.3351367158941365, + "grad_norm": 1.7641189489668823, + "learning_rate": 5.4631106914137555e-06, + "loss": 0.4263400733470917, + "step": 4566 + }, + { + "epoch": 1.3354291563094018, + "grad_norm": 1.8179548841156754, + "learning_rate": 5.458802723699579e-06, + "loss": 0.6275177001953125, + "step": 4567 + }, + { + "epoch": 1.3357215967246674, + "grad_norm": 1.6668120373290058, + "learning_rate": 5.454495817462655e-06, + "loss": 0.3857421278953552, + "step": 4568 + }, + { + "epoch": 1.3360140371399327, + "grad_norm": 1.7165178528012586, + "learning_rate": 5.450189973709697e-06, + "loss": 0.5834560394287109, + "step": 4569 + }, + { + "epoch": 1.336306477555198, + "grad_norm": 1.6632572235317495, + "learning_rate": 5.445885193447169e-06, + "loss": 0.6165010929107666, + "step": 4570 + }, + { + "epoch": 1.3365989179704636, + "grad_norm": 1.7470412065212853, + "learning_rate": 5.441581477681288e-06, + "loss": 0.6034595966339111, + "step": 4571 + }, + { + "epoch": 1.336891358385729, + "grad_norm": 1.740024112758077, + "learning_rate": 5.43727882741802e-06, + "loss": 0.570164144039154, + "step": 4572 + }, + { + "epoch": 1.3371837988009942, + "grad_norm": 1.4917354928366209, + "learning_rate": 5.432977243663089e-06, + "loss": 0.5369169116020203, + "step": 4573 + }, + { + "epoch": 1.3374762392162598, + "grad_norm": 1.7875464183853407, + "learning_rate": 5.428676727421954e-06, + "loss": 0.5624364614486694, + "step": 4574 + }, + { + "epoch": 1.3377686796315251, + "grad_norm": 1.517348885410251, + "learning_rate": 5.424377279699842e-06, + "loss": 0.5002127885818481, + "step": 4575 + }, + { + "epoch": 1.3380611200467905, + "grad_norm": 1.7071888960959534, + "learning_rate": 5.42007890150172e-06, + "loss": 0.5998499393463135, + "step": 4576 + }, + { + "epoch": 1.3383535604620558, + "grad_norm": 1.7074905497433162, + "learning_rate": 5.415781593832307e-06, + "loss": 0.5988572835922241, + "step": 4577 + }, + { + "epoch": 1.338646000877321, + "grad_norm": 1.6551550553396004, + "learning_rate": 5.411485357696075e-06, + "loss": 0.5202064514160156, + "step": 4578 + }, + { + "epoch": 1.3389384412925867, + "grad_norm": 2.519364812628366, + "learning_rate": 5.407190194097241e-06, + "loss": 0.5246714949607849, + "step": 4579 + }, + { + "epoch": 1.339230881707852, + "grad_norm": 1.5907571805696734, + "learning_rate": 5.4028961040397765e-06, + "loss": 0.5998588800430298, + "step": 4580 + }, + { + "epoch": 1.3395233221231173, + "grad_norm": 1.7851321190756844, + "learning_rate": 5.3986030885273945e-06, + "loss": 0.5971418023109436, + "step": 4581 + }, + { + "epoch": 1.3398157625383829, + "grad_norm": 1.5857061971181772, + "learning_rate": 5.3943111485635644e-06, + "loss": 0.4638952910900116, + "step": 4582 + }, + { + "epoch": 1.3401082029536482, + "grad_norm": 1.5981773831835344, + "learning_rate": 5.390020285151502e-06, + "loss": 0.5007182955741882, + "step": 4583 + }, + { + "epoch": 1.3404006433689135, + "grad_norm": 1.610643010141743, + "learning_rate": 5.385730499294171e-06, + "loss": 0.5013964772224426, + "step": 4584 + }, + { + "epoch": 1.340693083784179, + "grad_norm": 1.6360724667305655, + "learning_rate": 5.381441791994276e-06, + "loss": 0.5699980854988098, + "step": 4585 + }, + { + "epoch": 1.3409855241994444, + "grad_norm": 1.6423818252193456, + "learning_rate": 5.377154164254283e-06, + "loss": 0.5326210260391235, + "step": 4586 + }, + { + "epoch": 1.3412779646147097, + "grad_norm": 1.5111806674915849, + "learning_rate": 5.372867617076395e-06, + "loss": 0.6065158843994141, + "step": 4587 + }, + { + "epoch": 1.3415704050299753, + "grad_norm": 1.356022290658006, + "learning_rate": 5.368582151462569e-06, + "loss": 0.48427143692970276, + "step": 4588 + }, + { + "epoch": 1.3418628454452406, + "grad_norm": 1.4868111001385538, + "learning_rate": 5.364297768414505e-06, + "loss": 0.5755994915962219, + "step": 4589 + }, + { + "epoch": 1.342155285860506, + "grad_norm": 1.4690268021295017, + "learning_rate": 5.360014468933652e-06, + "loss": 0.4959644377231598, + "step": 4590 + }, + { + "epoch": 1.3424477262757712, + "grad_norm": 1.5383458553689457, + "learning_rate": 5.355732254021205e-06, + "loss": 0.5374274253845215, + "step": 4591 + }, + { + "epoch": 1.3427401666910366, + "grad_norm": 1.6286753609495908, + "learning_rate": 5.351451124678106e-06, + "loss": 0.5875111818313599, + "step": 4592 + }, + { + "epoch": 1.3430326071063021, + "grad_norm": 1.7964496178319949, + "learning_rate": 5.347171081905045e-06, + "loss": 0.5230692028999329, + "step": 4593 + }, + { + "epoch": 1.3433250475215675, + "grad_norm": 1.424672908012482, + "learning_rate": 5.342892126702453e-06, + "loss": 0.4624518156051636, + "step": 4594 + }, + { + "epoch": 1.3436174879368328, + "grad_norm": 1.9140370650793175, + "learning_rate": 5.3386142600705134e-06, + "loss": 0.5141074061393738, + "step": 4595 + }, + { + "epoch": 1.3439099283520983, + "grad_norm": 1.6249918744835086, + "learning_rate": 5.334337483009147e-06, + "loss": 0.4655565023422241, + "step": 4596 + }, + { + "epoch": 1.3442023687673637, + "grad_norm": 1.6516547156710706, + "learning_rate": 5.330061796518025e-06, + "loss": 0.6135094165802002, + "step": 4597 + }, + { + "epoch": 1.344494809182629, + "grad_norm": 1.595543646054287, + "learning_rate": 5.325787201596563e-06, + "loss": 0.5865254402160645, + "step": 4598 + }, + { + "epoch": 1.3447872495978945, + "grad_norm": 1.8032344885262006, + "learning_rate": 5.321513699243924e-06, + "loss": 0.5290840268135071, + "step": 4599 + }, + { + "epoch": 1.3450796900131599, + "grad_norm": 1.5294052976370318, + "learning_rate": 5.317241290459012e-06, + "loss": 0.554675817489624, + "step": 4600 + }, + { + "epoch": 1.3453721304284252, + "grad_norm": 1.499219614332531, + "learning_rate": 5.312969976240479e-06, + "loss": 0.5033853650093079, + "step": 4601 + }, + { + "epoch": 1.3456645708436907, + "grad_norm": 1.8108264508032192, + "learning_rate": 5.308699757586713e-06, + "loss": 0.44666093587875366, + "step": 4602 + }, + { + "epoch": 1.345957011258956, + "grad_norm": 1.5332559280539126, + "learning_rate": 5.304430635495856e-06, + "loss": 0.5447900891304016, + "step": 4603 + }, + { + "epoch": 1.3462494516742214, + "grad_norm": 1.507503116151542, + "learning_rate": 5.30016261096579e-06, + "loss": 0.4425917863845825, + "step": 4604 + }, + { + "epoch": 1.3465418920894867, + "grad_norm": 1.508411296889156, + "learning_rate": 5.295895684994137e-06, + "loss": 0.4411497712135315, + "step": 4605 + }, + { + "epoch": 1.346834332504752, + "grad_norm": 1.537668383754579, + "learning_rate": 5.291629858578271e-06, + "loss": 0.5577414631843567, + "step": 4606 + }, + { + "epoch": 1.3471267729200176, + "grad_norm": 1.7128549715372505, + "learning_rate": 5.287365132715293e-06, + "loss": 0.4754186272621155, + "step": 4607 + }, + { + "epoch": 1.347419213335283, + "grad_norm": 1.6521724702121328, + "learning_rate": 5.283101508402063e-06, + "loss": 0.5582431554794312, + "step": 4608 + }, + { + "epoch": 1.3477116537505482, + "grad_norm": 1.7476811492664892, + "learning_rate": 5.2788389866351755e-06, + "loss": 0.5552654266357422, + "step": 4609 + }, + { + "epoch": 1.3480040941658138, + "grad_norm": 1.8662632335270106, + "learning_rate": 5.2745775684109705e-06, + "loss": 0.5776556730270386, + "step": 4610 + }, + { + "epoch": 1.3482965345810791, + "grad_norm": 1.7735552141557176, + "learning_rate": 5.270317254725528e-06, + "loss": 0.5859286785125732, + "step": 4611 + }, + { + "epoch": 1.3485889749963444, + "grad_norm": 1.5182169678473143, + "learning_rate": 5.2660580465746694e-06, + "loss": 0.5914887189865112, + "step": 4612 + }, + { + "epoch": 1.34888141541161, + "grad_norm": 1.6371325039607922, + "learning_rate": 5.261799944953956e-06, + "loss": 0.43669426441192627, + "step": 4613 + }, + { + "epoch": 1.3491738558268753, + "grad_norm": 1.718792113074269, + "learning_rate": 5.2575429508587e-06, + "loss": 0.473773717880249, + "step": 4614 + }, + { + "epoch": 1.3494662962421407, + "grad_norm": 1.7451807781202082, + "learning_rate": 5.253287065283949e-06, + "loss": 0.5011228919029236, + "step": 4615 + }, + { + "epoch": 1.349758736657406, + "grad_norm": 1.6598931266775088, + "learning_rate": 5.249032289224483e-06, + "loss": 0.5839254856109619, + "step": 4616 + }, + { + "epoch": 1.3500511770726713, + "grad_norm": 1.7262514320572941, + "learning_rate": 5.244778623674831e-06, + "loss": 0.5375077128410339, + "step": 4617 + }, + { + "epoch": 1.3503436174879369, + "grad_norm": 1.4572654878782452, + "learning_rate": 5.240526069629265e-06, + "loss": 0.49445679783821106, + "step": 4618 + }, + { + "epoch": 1.3506360579032022, + "grad_norm": 1.5263979209526246, + "learning_rate": 5.236274628081792e-06, + "loss": 0.5369694828987122, + "step": 4619 + }, + { + "epoch": 1.3509284983184675, + "grad_norm": 1.8018674546255473, + "learning_rate": 5.23202430002616e-06, + "loss": 0.6017554402351379, + "step": 4620 + }, + { + "epoch": 1.351220938733733, + "grad_norm": 1.9428924144840352, + "learning_rate": 5.227775086455859e-06, + "loss": 0.5380403995513916, + "step": 4621 + }, + { + "epoch": 1.3515133791489984, + "grad_norm": 1.6665289001084298, + "learning_rate": 5.223526988364116e-06, + "loss": 0.5650593042373657, + "step": 4622 + }, + { + "epoch": 1.3518058195642637, + "grad_norm": 1.5672489406384107, + "learning_rate": 5.219280006743897e-06, + "loss": 0.5572884678840637, + "step": 4623 + }, + { + "epoch": 1.3520982599795293, + "grad_norm": 1.839257774768153, + "learning_rate": 5.21503414258791e-06, + "loss": 0.5304458141326904, + "step": 4624 + }, + { + "epoch": 1.3523907003947946, + "grad_norm": 1.8264084905380675, + "learning_rate": 5.2107893968886005e-06, + "loss": 0.6702588796615601, + "step": 4625 + }, + { + "epoch": 1.35268314081006, + "grad_norm": 1.5301776431109881, + "learning_rate": 5.206545770638152e-06, + "loss": 0.4607279300689697, + "step": 4626 + }, + { + "epoch": 1.3529755812253255, + "grad_norm": 1.4702386368708713, + "learning_rate": 5.202303264828482e-06, + "loss": 0.5759040713310242, + "step": 4627 + }, + { + "epoch": 1.3532680216405908, + "grad_norm": 1.6340224609334149, + "learning_rate": 5.198061880451253e-06, + "loss": 0.446469783782959, + "step": 4628 + }, + { + "epoch": 1.3535604620558561, + "grad_norm": 1.6416831158378962, + "learning_rate": 5.193821618497864e-06, + "loss": 0.4869040846824646, + "step": 4629 + }, + { + "epoch": 1.3538529024711214, + "grad_norm": 1.59588454548975, + "learning_rate": 5.189582479959449e-06, + "loss": 0.5153477191925049, + "step": 4630 + }, + { + "epoch": 1.3541453428863868, + "grad_norm": 1.6964185114911852, + "learning_rate": 5.185344465826883e-06, + "loss": 0.4958652853965759, + "step": 4631 + }, + { + "epoch": 1.3544377833016523, + "grad_norm": 1.544404184800908, + "learning_rate": 5.1811075770907715e-06, + "loss": 0.5314347743988037, + "step": 4632 + }, + { + "epoch": 1.3547302237169176, + "grad_norm": 1.6488125019330604, + "learning_rate": 5.176871814741466e-06, + "loss": 0.5366088151931763, + "step": 4633 + }, + { + "epoch": 1.355022664132183, + "grad_norm": 1.7011582339400138, + "learning_rate": 5.172637179769049e-06, + "loss": 0.6239185929298401, + "step": 4634 + }, + { + "epoch": 1.3553151045474485, + "grad_norm": 1.8789833552926098, + "learning_rate": 5.168403673163341e-06, + "loss": 0.5516507625579834, + "step": 4635 + }, + { + "epoch": 1.3556075449627139, + "grad_norm": 1.6420696506744512, + "learning_rate": 5.164171295913898e-06, + "loss": 0.5859683156013489, + "step": 4636 + }, + { + "epoch": 1.3558999853779792, + "grad_norm": 1.6138084463921514, + "learning_rate": 5.159940049010015e-06, + "loss": 0.5913225412368774, + "step": 4637 + }, + { + "epoch": 1.3561924257932447, + "grad_norm": 1.690951404825549, + "learning_rate": 5.155709933440714e-06, + "loss": 0.650983989238739, + "step": 4638 + }, + { + "epoch": 1.35648486620851, + "grad_norm": 1.7360324268029201, + "learning_rate": 5.151480950194762e-06, + "loss": 0.5631625652313232, + "step": 4639 + }, + { + "epoch": 1.3567773066237754, + "grad_norm": 1.9305214623229574, + "learning_rate": 5.147253100260659e-06, + "loss": 0.48153650760650635, + "step": 4640 + }, + { + "epoch": 1.357069747039041, + "grad_norm": 1.382159174171422, + "learning_rate": 5.143026384626637e-06, + "loss": 0.43598422408103943, + "step": 4641 + }, + { + "epoch": 1.3573621874543063, + "grad_norm": 1.5586949144187017, + "learning_rate": 5.138800804280668e-06, + "loss": 0.5323987007141113, + "step": 4642 + }, + { + "epoch": 1.3576546278695716, + "grad_norm": 1.739858834969472, + "learning_rate": 5.134576360210454e-06, + "loss": 0.5386587977409363, + "step": 4643 + }, + { + "epoch": 1.357947068284837, + "grad_norm": 1.7229356194902612, + "learning_rate": 5.130353053403434e-06, + "loss": 0.4913867115974426, + "step": 4644 + }, + { + "epoch": 1.3582395087001022, + "grad_norm": 2.681042611993396, + "learning_rate": 5.12613088484678e-06, + "loss": 0.6516048908233643, + "step": 4645 + }, + { + "epoch": 1.3585319491153678, + "grad_norm": 1.7863407962771196, + "learning_rate": 5.121909855527398e-06, + "loss": 0.5290599465370178, + "step": 4646 + }, + { + "epoch": 1.3588243895306331, + "grad_norm": 1.992281323100596, + "learning_rate": 5.117689966431927e-06, + "loss": 0.7909928560256958, + "step": 4647 + }, + { + "epoch": 1.3591168299458984, + "grad_norm": 1.7798386890797042, + "learning_rate": 5.113471218546746e-06, + "loss": 0.4751276969909668, + "step": 4648 + }, + { + "epoch": 1.359409270361164, + "grad_norm": 1.3934486662021524, + "learning_rate": 5.109253612857954e-06, + "loss": 0.4542301893234253, + "step": 4649 + }, + { + "epoch": 1.3597017107764293, + "grad_norm": 1.6724566490890436, + "learning_rate": 5.105037150351393e-06, + "loss": 0.5355349779129028, + "step": 4650 + }, + { + "epoch": 1.3599941511916946, + "grad_norm": 1.7131391763754547, + "learning_rate": 5.100821832012637e-06, + "loss": 0.4994719326496124, + "step": 4651 + }, + { + "epoch": 1.3602865916069602, + "grad_norm": 1.7061763475820229, + "learning_rate": 5.096607658826989e-06, + "loss": 0.6171674728393555, + "step": 4652 + }, + { + "epoch": 1.3605790320222255, + "grad_norm": 1.6851325839422124, + "learning_rate": 5.092394631779487e-06, + "loss": 0.5386878252029419, + "step": 4653 + }, + { + "epoch": 1.3608714724374908, + "grad_norm": 1.4863597978488459, + "learning_rate": 5.088182751854903e-06, + "loss": 0.4495810270309448, + "step": 4654 + }, + { + "epoch": 1.3611639128527562, + "grad_norm": 1.560829764762291, + "learning_rate": 5.083972020037735e-06, + "loss": 0.5540642142295837, + "step": 4655 + }, + { + "epoch": 1.3614563532680215, + "grad_norm": 1.7743988570673719, + "learning_rate": 5.079762437312219e-06, + "loss": 0.6020554900169373, + "step": 4656 + }, + { + "epoch": 1.361748793683287, + "grad_norm": 1.5410143370370128, + "learning_rate": 5.075554004662316e-06, + "loss": 0.47981250286102295, + "step": 4657 + }, + { + "epoch": 1.3620412340985524, + "grad_norm": 1.6809006565320033, + "learning_rate": 5.071346723071724e-06, + "loss": 0.6206443905830383, + "step": 4658 + }, + { + "epoch": 1.3623336745138177, + "grad_norm": 1.2946163710464256, + "learning_rate": 5.067140593523869e-06, + "loss": 0.46899446845054626, + "step": 4659 + }, + { + "epoch": 1.3626261149290833, + "grad_norm": 1.3692435027739418, + "learning_rate": 5.062935617001912e-06, + "loss": 0.5695985555648804, + "step": 4660 + }, + { + "epoch": 1.3629185553443486, + "grad_norm": 1.5567765237338644, + "learning_rate": 5.058731794488732e-06, + "loss": 0.5524671077728271, + "step": 4661 + }, + { + "epoch": 1.363210995759614, + "grad_norm": 1.5953543121744755, + "learning_rate": 5.054529126966953e-06, + "loss": 0.4655245244503021, + "step": 4662 + }, + { + "epoch": 1.3635034361748795, + "grad_norm": 1.6197588686677031, + "learning_rate": 5.050327615418921e-06, + "loss": 0.5617693662643433, + "step": 4663 + }, + { + "epoch": 1.3637958765901448, + "grad_norm": 1.515126796303483, + "learning_rate": 5.046127260826714e-06, + "loss": 0.52044677734375, + "step": 4664 + }, + { + "epoch": 1.3640883170054101, + "grad_norm": 1.6797173356320934, + "learning_rate": 5.041928064172139e-06, + "loss": 0.4567520022392273, + "step": 4665 + }, + { + "epoch": 1.3643807574206757, + "grad_norm": 1.5794296901996336, + "learning_rate": 5.037730026436736e-06, + "loss": 0.5942729711532593, + "step": 4666 + }, + { + "epoch": 1.364673197835941, + "grad_norm": 1.6501244665537385, + "learning_rate": 5.033533148601766e-06, + "loss": 0.3824811279773712, + "step": 4667 + }, + { + "epoch": 1.3649656382512063, + "grad_norm": 1.4770402468740385, + "learning_rate": 5.029337431648227e-06, + "loss": 0.4710771441459656, + "step": 4668 + }, + { + "epoch": 1.3652580786664716, + "grad_norm": 1.5059979846835174, + "learning_rate": 5.02514287655684e-06, + "loss": 0.6617978811264038, + "step": 4669 + }, + { + "epoch": 1.365550519081737, + "grad_norm": 1.5829629132621983, + "learning_rate": 5.020949484308058e-06, + "loss": 0.5237355828285217, + "step": 4670 + }, + { + "epoch": 1.3658429594970025, + "grad_norm": 1.4158253094169178, + "learning_rate": 5.016757255882065e-06, + "loss": 0.4544803500175476, + "step": 4671 + }, + { + "epoch": 1.3661353999122678, + "grad_norm": 1.8761810485620272, + "learning_rate": 5.012566192258763e-06, + "loss": 0.5854490399360657, + "step": 4672 + }, + { + "epoch": 1.3664278403275332, + "grad_norm": 1.902502544434852, + "learning_rate": 5.008376294417787e-06, + "loss": 0.6275635361671448, + "step": 4673 + }, + { + "epoch": 1.3667202807427987, + "grad_norm": 1.6133596882151136, + "learning_rate": 5.004187563338504e-06, + "loss": 0.5160082578659058, + "step": 4674 + }, + { + "epoch": 1.367012721158064, + "grad_norm": 1.439845673979846, + "learning_rate": 5.000000000000003e-06, + "loss": 0.5203640460968018, + "step": 4675 + }, + { + "epoch": 1.3673051615733294, + "grad_norm": 2.025079516078861, + "learning_rate": 4.9958136053811e-06, + "loss": 0.6836066246032715, + "step": 4676 + }, + { + "epoch": 1.367597601988595, + "grad_norm": 1.5727820508513324, + "learning_rate": 4.991628380460343e-06, + "loss": 0.5566641092300415, + "step": 4677 + }, + { + "epoch": 1.3678900424038603, + "grad_norm": 1.643119627925769, + "learning_rate": 4.9874443262159984e-06, + "loss": 0.5618000030517578, + "step": 4678 + }, + { + "epoch": 1.3681824828191256, + "grad_norm": 1.4054605482949574, + "learning_rate": 4.983261443626068e-06, + "loss": 0.4605063796043396, + "step": 4679 + }, + { + "epoch": 1.3684749232343911, + "grad_norm": 1.7557732951775291, + "learning_rate": 4.97907973366827e-06, + "loss": 0.48282021284103394, + "step": 4680 + }, + { + "epoch": 1.3687673636496565, + "grad_norm": 1.467194830130128, + "learning_rate": 4.974899197320059e-06, + "loss": 0.42356133460998535, + "step": 4681 + }, + { + "epoch": 1.3690598040649218, + "grad_norm": 1.3266470239270218, + "learning_rate": 4.97071983555861e-06, + "loss": 0.459377646446228, + "step": 4682 + }, + { + "epoch": 1.369352244480187, + "grad_norm": 1.9278413810039654, + "learning_rate": 4.966541649360819e-06, + "loss": 0.5539775490760803, + "step": 4683 + }, + { + "epoch": 1.3696446848954524, + "grad_norm": 1.7014699336581571, + "learning_rate": 4.962364639703311e-06, + "loss": 0.5593239068984985, + "step": 4684 + }, + { + "epoch": 1.369937125310718, + "grad_norm": 1.8333805174527635, + "learning_rate": 4.958188807562441e-06, + "loss": 0.5425251722335815, + "step": 4685 + }, + { + "epoch": 1.3702295657259833, + "grad_norm": 1.564182289934299, + "learning_rate": 4.954014153914282e-06, + "loss": 0.5183289051055908, + "step": 4686 + }, + { + "epoch": 1.3705220061412486, + "grad_norm": 1.6834251116472225, + "learning_rate": 4.9498406797346345e-06, + "loss": 0.5278980731964111, + "step": 4687 + }, + { + "epoch": 1.3708144465565142, + "grad_norm": 1.6861784833580373, + "learning_rate": 4.9456683859990185e-06, + "loss": 0.4857858419418335, + "step": 4688 + }, + { + "epoch": 1.3711068869717795, + "grad_norm": 1.4955733852507764, + "learning_rate": 4.94149727368269e-06, + "loss": 0.4889591336250305, + "step": 4689 + }, + { + "epoch": 1.3713993273870448, + "grad_norm": 2.1119376280699105, + "learning_rate": 4.937327343760617e-06, + "loss": 0.5475220680236816, + "step": 4690 + }, + { + "epoch": 1.3716917678023104, + "grad_norm": 1.8065068083746048, + "learning_rate": 4.933158597207501e-06, + "loss": 0.5794380903244019, + "step": 4691 + }, + { + "epoch": 1.3719842082175757, + "grad_norm": 1.5916906211687458, + "learning_rate": 4.928991034997752e-06, + "loss": 0.42212024331092834, + "step": 4692 + }, + { + "epoch": 1.372276648632841, + "grad_norm": 1.8447627986814241, + "learning_rate": 4.924824658105516e-06, + "loss": 0.6091631054878235, + "step": 4693 + }, + { + "epoch": 1.3725690890481064, + "grad_norm": 1.8839419484958528, + "learning_rate": 4.9206594675046595e-06, + "loss": 0.544279158115387, + "step": 4694 + }, + { + "epoch": 1.3728615294633717, + "grad_norm": 1.4361678658463186, + "learning_rate": 4.916495464168768e-06, + "loss": 0.46237099170684814, + "step": 4695 + }, + { + "epoch": 1.3731539698786372, + "grad_norm": 1.5990237040506552, + "learning_rate": 4.912332649071154e-06, + "loss": 0.5615352392196655, + "step": 4696 + }, + { + "epoch": 1.3734464102939026, + "grad_norm": 1.7554295249178744, + "learning_rate": 4.90817102318485e-06, + "loss": 0.5552200078964233, + "step": 4697 + }, + { + "epoch": 1.373738850709168, + "grad_norm": 1.798510214490848, + "learning_rate": 4.904010587482612e-06, + "loss": 0.5466557741165161, + "step": 4698 + }, + { + "epoch": 1.3740312911244335, + "grad_norm": 1.8536275815794498, + "learning_rate": 4.8998513429369135e-06, + "loss": 0.6131544709205627, + "step": 4699 + }, + { + "epoch": 1.3743237315396988, + "grad_norm": 1.7671899353023186, + "learning_rate": 4.895693290519954e-06, + "loss": 0.5264796018600464, + "step": 4700 + }, + { + "epoch": 1.374616171954964, + "grad_norm": 1.6582809024037055, + "learning_rate": 4.891536431203653e-06, + "loss": 0.5179097652435303, + "step": 4701 + }, + { + "epoch": 1.3749086123702297, + "grad_norm": 1.7203915102871608, + "learning_rate": 4.887380765959655e-06, + "loss": 0.46007782220840454, + "step": 4702 + }, + { + "epoch": 1.375201052785495, + "grad_norm": 1.3949646851760964, + "learning_rate": 4.8832262957593145e-06, + "loss": 0.48182815313339233, + "step": 4703 + }, + { + "epoch": 1.3754934932007603, + "grad_norm": 1.6488295590740498, + "learning_rate": 4.879073021573717e-06, + "loss": 0.5334529280662537, + "step": 4704 + }, + { + "epoch": 1.3757859336160259, + "grad_norm": 1.824410831192183, + "learning_rate": 4.874920944373665e-06, + "loss": 0.5984899997711182, + "step": 4705 + }, + { + "epoch": 1.3760783740312912, + "grad_norm": 1.633539262172952, + "learning_rate": 4.870770065129681e-06, + "loss": 0.46676474809646606, + "step": 4706 + }, + { + "epoch": 1.3763708144465565, + "grad_norm": 1.6766360321424407, + "learning_rate": 4.866620384812008e-06, + "loss": 0.4608241617679596, + "step": 4707 + }, + { + "epoch": 1.3766632548618218, + "grad_norm": 1.6783484732888503, + "learning_rate": 4.862471904390609e-06, + "loss": 0.5877207517623901, + "step": 4708 + }, + { + "epoch": 1.3769556952770872, + "grad_norm": 1.9194747868225221, + "learning_rate": 4.858324624835164e-06, + "loss": 0.5243252515792847, + "step": 4709 + }, + { + "epoch": 1.3772481356923527, + "grad_norm": 1.7326979192308607, + "learning_rate": 4.854178547115078e-06, + "loss": 0.528606653213501, + "step": 4710 + }, + { + "epoch": 1.377540576107618, + "grad_norm": 1.761919042167513, + "learning_rate": 4.850033672199469e-06, + "loss": 0.46468549966812134, + "step": 4711 + }, + { + "epoch": 1.3778330165228834, + "grad_norm": 1.5919653348557072, + "learning_rate": 4.8458900010571765e-06, + "loss": 0.5368300676345825, + "step": 4712 + }, + { + "epoch": 1.378125456938149, + "grad_norm": 1.6462148743894651, + "learning_rate": 4.8417475346567635e-06, + "loss": 0.5156906843185425, + "step": 4713 + }, + { + "epoch": 1.3784178973534142, + "grad_norm": 1.718628393460986, + "learning_rate": 4.837606273966496e-06, + "loss": 0.5899196863174438, + "step": 4714 + }, + { + "epoch": 1.3787103377686796, + "grad_norm": 1.6725614455419595, + "learning_rate": 4.833466219954376e-06, + "loss": 0.5820844769477844, + "step": 4715 + }, + { + "epoch": 1.3790027781839451, + "grad_norm": 1.5883271974734077, + "learning_rate": 4.829327373588113e-06, + "loss": 0.4926246404647827, + "step": 4716 + }, + { + "epoch": 1.3792952185992104, + "grad_norm": 1.5404696535835014, + "learning_rate": 4.825189735835138e-06, + "loss": 0.5417006611824036, + "step": 4717 + }, + { + "epoch": 1.3795876590144758, + "grad_norm": 1.5296186550545692, + "learning_rate": 4.821053307662599e-06, + "loss": 0.4130229949951172, + "step": 4718 + }, + { + "epoch": 1.3798800994297413, + "grad_norm": 1.279729123751172, + "learning_rate": 4.8169180900373615e-06, + "loss": 0.4553627371788025, + "step": 4719 + }, + { + "epoch": 1.3801725398450067, + "grad_norm": 1.3535233614920503, + "learning_rate": 4.812784083926005e-06, + "loss": 0.523567259311676, + "step": 4720 + }, + { + "epoch": 1.380464980260272, + "grad_norm": 1.585136917164004, + "learning_rate": 4.808651290294832e-06, + "loss": 0.4643239378929138, + "step": 4721 + }, + { + "epoch": 1.3807574206755373, + "grad_norm": 1.4443352165881056, + "learning_rate": 4.804519710109856e-06, + "loss": 0.4631537199020386, + "step": 4722 + }, + { + "epoch": 1.3810498610908026, + "grad_norm": 1.9168786498716517, + "learning_rate": 4.8003893443368075e-06, + "loss": 0.5304736495018005, + "step": 4723 + }, + { + "epoch": 1.3813423015060682, + "grad_norm": 1.7679231174871453, + "learning_rate": 4.79626019394114e-06, + "loss": 0.4357796907424927, + "step": 4724 + }, + { + "epoch": 1.3816347419213335, + "grad_norm": 1.9313439900637919, + "learning_rate": 4.7921322598880095e-06, + "loss": 0.6693407297134399, + "step": 4725 + }, + { + "epoch": 1.3819271823365988, + "grad_norm": 1.614277655310262, + "learning_rate": 4.788005543142299e-06, + "loss": 0.5333320498466492, + "step": 4726 + }, + { + "epoch": 1.3822196227518644, + "grad_norm": 1.900002017358812, + "learning_rate": 4.783880044668603e-06, + "loss": 0.5782167911529541, + "step": 4727 + }, + { + "epoch": 1.3825120631671297, + "grad_norm": 1.8216810622231216, + "learning_rate": 4.779755765431231e-06, + "loss": 0.581318199634552, + "step": 4728 + }, + { + "epoch": 1.382804503582395, + "grad_norm": 1.6899321824779212, + "learning_rate": 4.775632706394211e-06, + "loss": 0.5812945365905762, + "step": 4729 + }, + { + "epoch": 1.3830969439976606, + "grad_norm": 1.7981132988330288, + "learning_rate": 4.771510868521279e-06, + "loss": 0.460615873336792, + "step": 4730 + }, + { + "epoch": 1.383389384412926, + "grad_norm": 1.8316112888726737, + "learning_rate": 4.767390252775894e-06, + "loss": 0.5934186577796936, + "step": 4731 + }, + { + "epoch": 1.3836818248281912, + "grad_norm": 1.6355522234245776, + "learning_rate": 4.763270860121222e-06, + "loss": 0.4928584098815918, + "step": 4732 + }, + { + "epoch": 1.3839742652434566, + "grad_norm": 1.6231538800234695, + "learning_rate": 4.759152691520146e-06, + "loss": 0.505489706993103, + "step": 4733 + }, + { + "epoch": 1.3842667056587221, + "grad_norm": 1.5771553081820557, + "learning_rate": 4.755035747935264e-06, + "loss": 0.5679354667663574, + "step": 4734 + }, + { + "epoch": 1.3845591460739874, + "grad_norm": 1.7096467723863036, + "learning_rate": 4.750920030328889e-06, + "loss": 0.5744746923446655, + "step": 4735 + }, + { + "epoch": 1.3848515864892528, + "grad_norm": 1.6483531613381477, + "learning_rate": 4.7468055396630395e-06, + "loss": 0.4953685402870178, + "step": 4736 + }, + { + "epoch": 1.385144026904518, + "grad_norm": 1.8803927120396235, + "learning_rate": 4.742692276899454e-06, + "loss": 0.6083461046218872, + "step": 4737 + }, + { + "epoch": 1.3854364673197836, + "grad_norm": 1.5633925902592396, + "learning_rate": 4.738580242999584e-06, + "loss": 0.4980735778808594, + "step": 4738 + }, + { + "epoch": 1.385728907735049, + "grad_norm": 1.4499409145464446, + "learning_rate": 4.734469438924594e-06, + "loss": 0.46363019943237305, + "step": 4739 + }, + { + "epoch": 1.3860213481503143, + "grad_norm": 1.818813219831182, + "learning_rate": 4.730359865635355e-06, + "loss": 0.5946298837661743, + "step": 4740 + }, + { + "epoch": 1.3863137885655799, + "grad_norm": 1.6327330611392554, + "learning_rate": 4.726251524092459e-06, + "loss": 0.5630123615264893, + "step": 4741 + }, + { + "epoch": 1.3866062289808452, + "grad_norm": 1.5382056004014089, + "learning_rate": 4.7221444152562045e-06, + "loss": 0.5353481769561768, + "step": 4742 + }, + { + "epoch": 1.3868986693961105, + "grad_norm": 1.7585652476725264, + "learning_rate": 4.718038540086602e-06, + "loss": 0.5170711874961853, + "step": 4743 + }, + { + "epoch": 1.387191109811376, + "grad_norm": 1.8043747351160766, + "learning_rate": 4.713933899543377e-06, + "loss": 0.600492000579834, + "step": 4744 + }, + { + "epoch": 1.3874835502266414, + "grad_norm": 1.5446435468278237, + "learning_rate": 4.709830494585962e-06, + "loss": 0.5291938781738281, + "step": 4745 + }, + { + "epoch": 1.3877759906419067, + "grad_norm": 1.658022225410227, + "learning_rate": 4.7057283261735055e-06, + "loss": 0.5664317011833191, + "step": 4746 + }, + { + "epoch": 1.388068431057172, + "grad_norm": 1.8477945736694077, + "learning_rate": 4.701627395264866e-06, + "loss": 0.606655478477478, + "step": 4747 + }, + { + "epoch": 1.3883608714724374, + "grad_norm": 1.5930247770190467, + "learning_rate": 4.697527702818604e-06, + "loss": 0.6160893440246582, + "step": 4748 + }, + { + "epoch": 1.388653311887703, + "grad_norm": 1.510283707012234, + "learning_rate": 4.693429249793002e-06, + "loss": 0.45944249629974365, + "step": 4749 + }, + { + "epoch": 1.3889457523029682, + "grad_norm": 1.7369442621234958, + "learning_rate": 4.689332037146049e-06, + "loss": 0.5737302303314209, + "step": 4750 + }, + { + "epoch": 1.3892381927182336, + "grad_norm": 1.7885159565933124, + "learning_rate": 4.685236065835443e-06, + "loss": 0.4075150787830353, + "step": 4751 + }, + { + "epoch": 1.3895306331334991, + "grad_norm": 1.7699683741602097, + "learning_rate": 4.681141336818592e-06, + "loss": 0.5832744836807251, + "step": 4752 + }, + { + "epoch": 1.3898230735487644, + "grad_norm": 1.6617741591328279, + "learning_rate": 4.6770478510526155e-06, + "loss": 0.5444560647010803, + "step": 4753 + }, + { + "epoch": 1.3901155139640298, + "grad_norm": 1.5343212819990357, + "learning_rate": 4.672955609494339e-06, + "loss": 0.6087433695793152, + "step": 4754 + }, + { + "epoch": 1.3904079543792953, + "grad_norm": 1.3783003966189016, + "learning_rate": 4.6688646131002995e-06, + "loss": 0.3781468868255615, + "step": 4755 + }, + { + "epoch": 1.3907003947945606, + "grad_norm": 2.0008130334792953, + "learning_rate": 4.664774862826742e-06, + "loss": 0.43719804286956787, + "step": 4756 + }, + { + "epoch": 1.390992835209826, + "grad_norm": 1.7926138812382992, + "learning_rate": 4.660686359629623e-06, + "loss": 0.550011932849884, + "step": 4757 + }, + { + "epoch": 1.3912852756250915, + "grad_norm": 1.670816081047031, + "learning_rate": 4.656599104464607e-06, + "loss": 0.6060909032821655, + "step": 4758 + }, + { + "epoch": 1.3915777160403568, + "grad_norm": 1.727898538684726, + "learning_rate": 4.652513098287058e-06, + "loss": 0.5169791579246521, + "step": 4759 + }, + { + "epoch": 1.3918701564556222, + "grad_norm": 1.667801698839589, + "learning_rate": 4.6484283420520594e-06, + "loss": 0.43063026666641235, + "step": 4760 + }, + { + "epoch": 1.3921625968708875, + "grad_norm": 1.6770983664766483, + "learning_rate": 4.644344836714397e-06, + "loss": 0.5426993370056152, + "step": 4761 + }, + { + "epoch": 1.3924550372861528, + "grad_norm": 1.7220159777866155, + "learning_rate": 4.6402625832285665e-06, + "loss": 0.5260995030403137, + "step": 4762 + }, + { + "epoch": 1.3927474777014184, + "grad_norm": 1.791130103339175, + "learning_rate": 4.63618158254877e-06, + "loss": 0.5206680297851562, + "step": 4763 + }, + { + "epoch": 1.3930399181166837, + "grad_norm": 1.8800757395074672, + "learning_rate": 4.632101835628912e-06, + "loss": 0.5250430703163147, + "step": 4764 + }, + { + "epoch": 1.393332358531949, + "grad_norm": 1.5663601185417966, + "learning_rate": 4.628023343422616e-06, + "loss": 0.5409445762634277, + "step": 4765 + }, + { + "epoch": 1.3936247989472146, + "grad_norm": 1.6199099812994435, + "learning_rate": 4.6239461068832056e-06, + "loss": 0.4676284193992615, + "step": 4766 + }, + { + "epoch": 1.39391723936248, + "grad_norm": 1.6644750420264167, + "learning_rate": 4.6198701269637014e-06, + "loss": 0.6019079089164734, + "step": 4767 + }, + { + "epoch": 1.3942096797777452, + "grad_norm": 1.6721679687151758, + "learning_rate": 4.615795404616844e-06, + "loss": 0.5434615612030029, + "step": 4768 + }, + { + "epoch": 1.3945021201930108, + "grad_norm": 1.8615818009836036, + "learning_rate": 4.611721940795074e-06, + "loss": 0.5817157030105591, + "step": 4769 + }, + { + "epoch": 1.3947945606082761, + "grad_norm": 1.7318982025014367, + "learning_rate": 4.607649736450539e-06, + "loss": 0.5601100921630859, + "step": 4770 + }, + { + "epoch": 1.3950870010235414, + "grad_norm": 1.8105361405271991, + "learning_rate": 4.6035787925350915e-06, + "loss": 0.5955039262771606, + "step": 4771 + }, + { + "epoch": 1.3953794414388068, + "grad_norm": 1.735716832820506, + "learning_rate": 4.5995091100002905e-06, + "loss": 0.47491732239723206, + "step": 4772 + }, + { + "epoch": 1.3956718818540723, + "grad_norm": 1.7916635810918338, + "learning_rate": 4.595440689797402e-06, + "loss": 0.5451281070709229, + "step": 4773 + }, + { + "epoch": 1.3959643222693376, + "grad_norm": 1.5652511418689858, + "learning_rate": 4.591373532877389e-06, + "loss": 0.3973035514354706, + "step": 4774 + }, + { + "epoch": 1.396256762684603, + "grad_norm": 1.6712606601404056, + "learning_rate": 4.587307640190929e-06, + "loss": 0.604694128036499, + "step": 4775 + }, + { + "epoch": 1.3965492030998683, + "grad_norm": 1.3684363761943823, + "learning_rate": 4.583243012688397e-06, + "loss": 0.4120032489299774, + "step": 4776 + }, + { + "epoch": 1.3968416435151338, + "grad_norm": 1.5200379644064634, + "learning_rate": 4.579179651319878e-06, + "loss": 0.4864089787006378, + "step": 4777 + }, + { + "epoch": 1.3971340839303992, + "grad_norm": 1.7660999886821023, + "learning_rate": 4.57511755703516e-06, + "loss": 0.5774982571601868, + "step": 4778 + }, + { + "epoch": 1.3974265243456645, + "grad_norm": 1.7243096372475708, + "learning_rate": 4.571056730783725e-06, + "loss": 0.48220688104629517, + "step": 4779 + }, + { + "epoch": 1.39771896476093, + "grad_norm": 1.4235878512993427, + "learning_rate": 4.566997173514771e-06, + "loss": 0.4636304974555969, + "step": 4780 + }, + { + "epoch": 1.3980114051761954, + "grad_norm": 1.3469561341500977, + "learning_rate": 4.562938886177194e-06, + "loss": 0.500522792339325, + "step": 4781 + }, + { + "epoch": 1.3983038455914607, + "grad_norm": 1.8391525606302594, + "learning_rate": 4.558881869719595e-06, + "loss": 0.5322657823562622, + "step": 4782 + }, + { + "epoch": 1.3985962860067263, + "grad_norm": 1.8673725266705359, + "learning_rate": 4.554826125090276e-06, + "loss": 0.5013759136199951, + "step": 4783 + }, + { + "epoch": 1.3988887264219916, + "grad_norm": 1.5888002392216285, + "learning_rate": 4.550771653237242e-06, + "loss": 0.4261836111545563, + "step": 4784 + }, + { + "epoch": 1.399181166837257, + "grad_norm": 1.6811392186782483, + "learning_rate": 4.546718455108205e-06, + "loss": 0.6181522607803345, + "step": 4785 + }, + { + "epoch": 1.3994736072525222, + "grad_norm": 1.7420663714537028, + "learning_rate": 4.54266653165057e-06, + "loss": 0.6267478466033936, + "step": 4786 + }, + { + "epoch": 1.3997660476677876, + "grad_norm": 1.841391700351839, + "learning_rate": 4.5386158838114535e-06, + "loss": 0.5382452607154846, + "step": 4787 + }, + { + "epoch": 1.400058488083053, + "grad_norm": 1.5361116059310378, + "learning_rate": 4.534566512537668e-06, + "loss": 0.5973625183105469, + "step": 4788 + }, + { + "epoch": 1.4003509284983184, + "grad_norm": 1.7115299901221885, + "learning_rate": 4.530518418775734e-06, + "loss": 0.57401442527771, + "step": 4789 + }, + { + "epoch": 1.4006433689135838, + "grad_norm": 1.7539136213830773, + "learning_rate": 4.52647160347186e-06, + "loss": 0.5712965726852417, + "step": 4790 + }, + { + "epoch": 1.4009358093288493, + "grad_norm": 1.7324506482257287, + "learning_rate": 4.52242606757197e-06, + "loss": 0.5678268671035767, + "step": 4791 + }, + { + "epoch": 1.4012282497441146, + "grad_norm": 1.8696367540913243, + "learning_rate": 4.518381812021682e-06, + "loss": 0.4798399806022644, + "step": 4792 + }, + { + "epoch": 1.40152069015938, + "grad_norm": 1.570253187142898, + "learning_rate": 4.514338837766317e-06, + "loss": 0.48918360471725464, + "step": 4793 + }, + { + "epoch": 1.4018131305746455, + "grad_norm": 1.4711408699123494, + "learning_rate": 4.510297145750894e-06, + "loss": 0.47836846113204956, + "step": 4794 + }, + { + "epoch": 1.4021055709899108, + "grad_norm": 1.6409652265079098, + "learning_rate": 4.506256736920136e-06, + "loss": 0.4956067204475403, + "step": 4795 + }, + { + "epoch": 1.4023980114051762, + "grad_norm": 1.6571409914414528, + "learning_rate": 4.502217612218463e-06, + "loss": 0.39146924018859863, + "step": 4796 + }, + { + "epoch": 1.4026904518204417, + "grad_norm": 1.6190957574837974, + "learning_rate": 4.498179772589998e-06, + "loss": 0.46657800674438477, + "step": 4797 + }, + { + "epoch": 1.402982892235707, + "grad_norm": 1.5760103505209448, + "learning_rate": 4.4941432189785574e-06, + "loss": 0.4949738383293152, + "step": 4798 + }, + { + "epoch": 1.4032753326509724, + "grad_norm": 1.882895838026707, + "learning_rate": 4.490107952327663e-06, + "loss": 0.5256912708282471, + "step": 4799 + }, + { + "epoch": 1.4035677730662377, + "grad_norm": 1.7128737744359326, + "learning_rate": 4.486073973580539e-06, + "loss": 0.38139551877975464, + "step": 4800 + }, + { + "epoch": 1.403860213481503, + "grad_norm": 1.8140605273544137, + "learning_rate": 4.482041283680095e-06, + "loss": 0.5014597177505493, + "step": 4801 + }, + { + "epoch": 1.4041526538967686, + "grad_norm": 1.8595922924331247, + "learning_rate": 4.478009883568951e-06, + "loss": 0.5497276186943054, + "step": 4802 + }, + { + "epoch": 1.404445094312034, + "grad_norm": 2.0532585085438524, + "learning_rate": 4.473979774189422e-06, + "loss": 0.6098340749740601, + "step": 4803 + }, + { + "epoch": 1.4047375347272992, + "grad_norm": 1.7416135071315817, + "learning_rate": 4.469950956483522e-06, + "loss": 0.40206801891326904, + "step": 4804 + }, + { + "epoch": 1.4050299751425648, + "grad_norm": 1.5567497019384768, + "learning_rate": 4.465923431392962e-06, + "loss": 0.5362050533294678, + "step": 4805 + }, + { + "epoch": 1.40532241555783, + "grad_norm": 1.6896555289921489, + "learning_rate": 4.461897199859153e-06, + "loss": 0.5688962936401367, + "step": 4806 + }, + { + "epoch": 1.4056148559730954, + "grad_norm": 2.0519988466480723, + "learning_rate": 4.457872262823202e-06, + "loss": 0.5270779132843018, + "step": 4807 + }, + { + "epoch": 1.405907296388361, + "grad_norm": 1.9613398978608871, + "learning_rate": 4.453848621225913e-06, + "loss": 0.5656974911689758, + "step": 4808 + }, + { + "epoch": 1.4061997368036263, + "grad_norm": 1.517853308784437, + "learning_rate": 4.449826276007786e-06, + "loss": 0.44072896242141724, + "step": 4809 + }, + { + "epoch": 1.4064921772188916, + "grad_norm": 1.642033723460973, + "learning_rate": 4.445805228109022e-06, + "loss": 0.5851765871047974, + "step": 4810 + }, + { + "epoch": 1.406784617634157, + "grad_norm": 1.71031586004946, + "learning_rate": 4.441785478469519e-06, + "loss": 0.6174030303955078, + "step": 4811 + }, + { + "epoch": 1.4070770580494225, + "grad_norm": 1.5609662983326855, + "learning_rate": 4.437767028028863e-06, + "loss": 0.542346715927124, + "step": 4812 + }, + { + "epoch": 1.4073694984646878, + "grad_norm": 1.855237193625426, + "learning_rate": 4.433749877726345e-06, + "loss": 0.4964073598384857, + "step": 4813 + }, + { + "epoch": 1.4076619388799532, + "grad_norm": 1.798693836443108, + "learning_rate": 4.429734028500951e-06, + "loss": 0.5309566259384155, + "step": 4814 + }, + { + "epoch": 1.4079543792952185, + "grad_norm": 1.7569401782763947, + "learning_rate": 4.425719481291359e-06, + "loss": 0.5799233913421631, + "step": 4815 + }, + { + "epoch": 1.408246819710484, + "grad_norm": 1.6640340310451727, + "learning_rate": 4.4217062370359456e-06, + "loss": 0.37344229221343994, + "step": 4816 + }, + { + "epoch": 1.4085392601257494, + "grad_norm": 1.9633336456325348, + "learning_rate": 4.417694296672783e-06, + "loss": 0.5752555727958679, + "step": 4817 + }, + { + "epoch": 1.4088317005410147, + "grad_norm": 1.8625982582112681, + "learning_rate": 4.413683661139638e-06, + "loss": 0.61701500415802, + "step": 4818 + }, + { + "epoch": 1.4091241409562802, + "grad_norm": 1.6641617857653193, + "learning_rate": 4.409674331373972e-06, + "loss": 0.4163259267807007, + "step": 4819 + }, + { + "epoch": 1.4094165813715456, + "grad_norm": 1.4025408210631873, + "learning_rate": 4.40566630831294e-06, + "loss": 0.46583253145217896, + "step": 4820 + }, + { + "epoch": 1.409709021786811, + "grad_norm": 1.739036857290848, + "learning_rate": 4.401659592893396e-06, + "loss": 0.5230617523193359, + "step": 4821 + }, + { + "epoch": 1.4100014622020764, + "grad_norm": 1.7435910389535008, + "learning_rate": 4.397654186051887e-06, + "loss": 0.6351375579833984, + "step": 4822 + }, + { + "epoch": 1.4102939026173418, + "grad_norm": 1.6526547277716674, + "learning_rate": 4.3936500887246445e-06, + "loss": 0.5895766615867615, + "step": 4823 + }, + { + "epoch": 1.410586343032607, + "grad_norm": 1.7357556256264726, + "learning_rate": 4.389647301847607e-06, + "loss": 0.49772539734840393, + "step": 4824 + }, + { + "epoch": 1.4108787834478724, + "grad_norm": 1.6867136550948763, + "learning_rate": 4.385645826356402e-06, + "loss": 0.593197226524353, + "step": 4825 + }, + { + "epoch": 1.4111712238631378, + "grad_norm": 1.497358571958903, + "learning_rate": 4.381645663186348e-06, + "loss": 0.4971385598182678, + "step": 4826 + }, + { + "epoch": 1.4114636642784033, + "grad_norm": 1.772016135609381, + "learning_rate": 4.3776468132724605e-06, + "loss": 0.5452263951301575, + "step": 4827 + }, + { + "epoch": 1.4117561046936686, + "grad_norm": 1.9896815505139207, + "learning_rate": 4.373649277549446e-06, + "loss": 0.6085976362228394, + "step": 4828 + }, + { + "epoch": 1.412048545108934, + "grad_norm": 1.4346670326917912, + "learning_rate": 4.369653056951705e-06, + "loss": 0.5594700574874878, + "step": 4829 + }, + { + "epoch": 1.4123409855241995, + "grad_norm": 1.6570477364640872, + "learning_rate": 4.365658152413328e-06, + "loss": 0.5099719166755676, + "step": 4830 + }, + { + "epoch": 1.4126334259394648, + "grad_norm": 1.557110878077197, + "learning_rate": 4.3616645648681e-06, + "loss": 0.5683532953262329, + "step": 4831 + }, + { + "epoch": 1.4129258663547302, + "grad_norm": 1.9307182018155977, + "learning_rate": 4.3576722952495e-06, + "loss": 0.5311406850814819, + "step": 4832 + }, + { + "epoch": 1.4132183067699957, + "grad_norm": 1.6214149336480879, + "learning_rate": 4.353681344490693e-06, + "loss": 0.5299100875854492, + "step": 4833 + }, + { + "epoch": 1.413510747185261, + "grad_norm": 1.6883675181677418, + "learning_rate": 4.349691713524546e-06, + "loss": 0.5531362891197205, + "step": 4834 + }, + { + "epoch": 1.4138031876005264, + "grad_norm": 1.7469666557337236, + "learning_rate": 4.345703403283603e-06, + "loss": 0.5315259099006653, + "step": 4835 + }, + { + "epoch": 1.414095628015792, + "grad_norm": 2.0019997249517645, + "learning_rate": 4.341716414700112e-06, + "loss": 0.583083987236023, + "step": 4836 + }, + { + "epoch": 1.4143880684310572, + "grad_norm": 1.680867008867613, + "learning_rate": 4.337730748706005e-06, + "loss": 0.5273857116699219, + "step": 4837 + }, + { + "epoch": 1.4146805088463226, + "grad_norm": 1.6688598484210682, + "learning_rate": 4.333746406232908e-06, + "loss": 0.4903373718261719, + "step": 4838 + }, + { + "epoch": 1.414972949261588, + "grad_norm": 1.4926269811940354, + "learning_rate": 4.329763388212134e-06, + "loss": 0.5807479619979858, + "step": 4839 + }, + { + "epoch": 1.4152653896768532, + "grad_norm": 1.6552276273685866, + "learning_rate": 4.325781695574695e-06, + "loss": 0.5613743662834167, + "step": 4840 + }, + { + "epoch": 1.4155578300921188, + "grad_norm": 1.6028157865716284, + "learning_rate": 4.321801329251286e-06, + "loss": 0.5801016092300415, + "step": 4841 + }, + { + "epoch": 1.415850270507384, + "grad_norm": 1.6267997915866552, + "learning_rate": 4.3178222901722956e-06, + "loss": 0.6412584781646729, + "step": 4842 + }, + { + "epoch": 1.4161427109226494, + "grad_norm": 1.7251596479619187, + "learning_rate": 4.313844579267793e-06, + "loss": 0.5687737464904785, + "step": 4843 + }, + { + "epoch": 1.416435151337915, + "grad_norm": 1.6343964176323358, + "learning_rate": 4.309868197467548e-06, + "loss": 0.5668497085571289, + "step": 4844 + }, + { + "epoch": 1.4167275917531803, + "grad_norm": 1.811368112437045, + "learning_rate": 4.305893145701015e-06, + "loss": 0.5814717411994934, + "step": 4845 + }, + { + "epoch": 1.4170200321684456, + "grad_norm": 1.9246707148702022, + "learning_rate": 4.301919424897339e-06, + "loss": 0.5974467992782593, + "step": 4846 + }, + { + "epoch": 1.4173124725837112, + "grad_norm": 1.5643373795961777, + "learning_rate": 4.297947035985351e-06, + "loss": 0.48333030939102173, + "step": 4847 + }, + { + "epoch": 1.4176049129989765, + "grad_norm": 1.7102352976297683, + "learning_rate": 4.293975979893576e-06, + "loss": 0.5851039886474609, + "step": 4848 + }, + { + "epoch": 1.4178973534142418, + "grad_norm": 1.4778659468844006, + "learning_rate": 4.290006257550221e-06, + "loss": 0.5510480403900146, + "step": 4849 + }, + { + "epoch": 1.4181897938295072, + "grad_norm": 1.6670833236483533, + "learning_rate": 4.286037869883187e-06, + "loss": 0.6053529977798462, + "step": 4850 + }, + { + "epoch": 1.4184822342447727, + "grad_norm": 1.5745047113214952, + "learning_rate": 4.282070817820059e-06, + "loss": 0.471671462059021, + "step": 4851 + }, + { + "epoch": 1.418774674660038, + "grad_norm": 1.6834167266574704, + "learning_rate": 4.278105102288113e-06, + "loss": 0.4864043593406677, + "step": 4852 + }, + { + "epoch": 1.4190671150753034, + "grad_norm": 1.7275065448049989, + "learning_rate": 4.274140724214311e-06, + "loss": 0.6283255815505981, + "step": 4853 + }, + { + "epoch": 1.4193595554905687, + "grad_norm": 1.7634272907173199, + "learning_rate": 4.270177684525299e-06, + "loss": 0.4990651607513428, + "step": 4854 + }, + { + "epoch": 1.4196519959058342, + "grad_norm": 1.6718595783894241, + "learning_rate": 4.2662159841474145e-06, + "loss": 0.6053239703178406, + "step": 4855 + }, + { + "epoch": 1.4199444363210996, + "grad_norm": 1.541217587678611, + "learning_rate": 4.262255624006683e-06, + "loss": 0.45790988206863403, + "step": 4856 + }, + { + "epoch": 1.420236876736365, + "grad_norm": 1.5408074963828202, + "learning_rate": 4.2582966050288125e-06, + "loss": 0.49944519996643066, + "step": 4857 + }, + { + "epoch": 1.4205293171516304, + "grad_norm": 1.7145691587216874, + "learning_rate": 4.2543389281392e-06, + "loss": 0.5365482568740845, + "step": 4858 + }, + { + "epoch": 1.4208217575668958, + "grad_norm": 1.709871732141181, + "learning_rate": 4.2503825942629285e-06, + "loss": 0.7763599157333374, + "step": 4859 + }, + { + "epoch": 1.421114197982161, + "grad_norm": 1.6376653647841246, + "learning_rate": 4.246427604324768e-06, + "loss": 0.6125203371047974, + "step": 4860 + }, + { + "epoch": 1.4214066383974266, + "grad_norm": 1.8190946758346407, + "learning_rate": 4.242473959249172e-06, + "loss": 0.6634939312934875, + "step": 4861 + }, + { + "epoch": 1.421699078812692, + "grad_norm": 1.607723662080485, + "learning_rate": 4.238521659960283e-06, + "loss": 0.5117735862731934, + "step": 4862 + }, + { + "epoch": 1.4219915192279573, + "grad_norm": 1.6860730867984624, + "learning_rate": 4.234570707381925e-06, + "loss": 0.5700962543487549, + "step": 4863 + }, + { + "epoch": 1.4222839596432226, + "grad_norm": 1.5634193566609638, + "learning_rate": 4.23062110243761e-06, + "loss": 0.5443791151046753, + "step": 4864 + }, + { + "epoch": 1.422576400058488, + "grad_norm": 1.4504951290152908, + "learning_rate": 4.226672846050538e-06, + "loss": 0.5474614500999451, + "step": 4865 + }, + { + "epoch": 1.4228688404737535, + "grad_norm": 1.9578528314343135, + "learning_rate": 4.222725939143582e-06, + "loss": 0.5938940048217773, + "step": 4866 + }, + { + "epoch": 1.4231612808890188, + "grad_norm": 1.720980371359197, + "learning_rate": 4.21878038263931e-06, + "loss": 0.5010229349136353, + "step": 4867 + }, + { + "epoch": 1.4234537213042842, + "grad_norm": 1.8142108741121714, + "learning_rate": 4.214836177459975e-06, + "loss": 0.5186876058578491, + "step": 4868 + }, + { + "epoch": 1.4237461617195497, + "grad_norm": 1.6608706852165134, + "learning_rate": 4.210893324527507e-06, + "loss": 0.5998060703277588, + "step": 4869 + }, + { + "epoch": 1.424038602134815, + "grad_norm": 1.9807145100005583, + "learning_rate": 4.206951824763528e-06, + "loss": 0.5127147436141968, + "step": 4870 + }, + { + "epoch": 1.4243310425500804, + "grad_norm": 1.4194980170815183, + "learning_rate": 4.203011679089336e-06, + "loss": 0.5134439468383789, + "step": 4871 + }, + { + "epoch": 1.424623482965346, + "grad_norm": 1.728900083762804, + "learning_rate": 4.199072888425919e-06, + "loss": 0.6244111657142639, + "step": 4872 + }, + { + "epoch": 1.4249159233806112, + "grad_norm": 1.6442803911967188, + "learning_rate": 4.195135453693944e-06, + "loss": 0.4431127905845642, + "step": 4873 + }, + { + "epoch": 1.4252083637958766, + "grad_norm": 1.7030697753848931, + "learning_rate": 4.191199375813761e-06, + "loss": 0.6479794979095459, + "step": 4874 + }, + { + "epoch": 1.4255008042111421, + "grad_norm": 2.04011086867295, + "learning_rate": 4.187264655705407e-06, + "loss": 0.6386070847511292, + "step": 4875 + }, + { + "epoch": 1.4257932446264074, + "grad_norm": 1.6039579455905961, + "learning_rate": 4.183331294288603e-06, + "loss": 0.5201597213745117, + "step": 4876 + }, + { + "epoch": 1.4260856850416728, + "grad_norm": 1.7232164566002766, + "learning_rate": 4.179399292482737e-06, + "loss": 0.46355581283569336, + "step": 4877 + }, + { + "epoch": 1.426378125456938, + "grad_norm": 2.2615584884797975, + "learning_rate": 4.175468651206898e-06, + "loss": 0.5360985398292542, + "step": 4878 + }, + { + "epoch": 1.4266705658722034, + "grad_norm": 1.552480099700309, + "learning_rate": 4.171539371379847e-06, + "loss": 0.5545670390129089, + "step": 4879 + }, + { + "epoch": 1.426963006287469, + "grad_norm": 1.4276797255790008, + "learning_rate": 4.167611453920031e-06, + "loss": 0.445978581905365, + "step": 4880 + }, + { + "epoch": 1.4272554467027343, + "grad_norm": 1.7199888948749738, + "learning_rate": 4.163684899745576e-06, + "loss": 0.5242947340011597, + "step": 4881 + }, + { + "epoch": 1.4275478871179996, + "grad_norm": 1.7383193525416518, + "learning_rate": 4.15975970977429e-06, + "loss": 0.5544728636741638, + "step": 4882 + }, + { + "epoch": 1.4278403275332652, + "grad_norm": 2.073499174067984, + "learning_rate": 4.1558358849236626e-06, + "loss": 0.5400837063789368, + "step": 4883 + }, + { + "epoch": 1.4281327679485305, + "grad_norm": 1.6385411261569034, + "learning_rate": 4.151913426110864e-06, + "loss": 0.5201395153999329, + "step": 4884 + }, + { + "epoch": 1.4284252083637958, + "grad_norm": 1.7888379069815619, + "learning_rate": 4.147992334252745e-06, + "loss": 0.4414210319519043, + "step": 4885 + }, + { + "epoch": 1.4287176487790614, + "grad_norm": 1.7818076981346203, + "learning_rate": 4.144072610265838e-06, + "loss": 0.6590272188186646, + "step": 4886 + }, + { + "epoch": 1.4290100891943267, + "grad_norm": 1.4800084296243576, + "learning_rate": 4.140154255066356e-06, + "loss": 0.4734429717063904, + "step": 4887 + }, + { + "epoch": 1.429302529609592, + "grad_norm": 1.5398179955798732, + "learning_rate": 4.136237269570186e-06, + "loss": 0.45204073190689087, + "step": 4888 + }, + { + "epoch": 1.4295949700248574, + "grad_norm": 1.6199970278575915, + "learning_rate": 4.132321654692901e-06, + "loss": 0.6570174694061279, + "step": 4889 + }, + { + "epoch": 1.429887410440123, + "grad_norm": 1.7926483421459931, + "learning_rate": 4.128407411349754e-06, + "loss": 0.5159077644348145, + "step": 4890 + }, + { + "epoch": 1.4301798508553882, + "grad_norm": 1.603963849008659, + "learning_rate": 4.124494540455674e-06, + "loss": 0.5778994560241699, + "step": 4891 + }, + { + "epoch": 1.4304722912706536, + "grad_norm": 1.4954754441376699, + "learning_rate": 4.120583042925273e-06, + "loss": 0.4740722179412842, + "step": 4892 + }, + { + "epoch": 1.430764731685919, + "grad_norm": 1.4416066465695618, + "learning_rate": 4.116672919672837e-06, + "loss": 0.5561014413833618, + "step": 4893 + }, + { + "epoch": 1.4310571721011844, + "grad_norm": 1.5040800316270475, + "learning_rate": 4.112764171612335e-06, + "loss": 0.4834856688976288, + "step": 4894 + }, + { + "epoch": 1.4313496125164498, + "grad_norm": 1.691313354112802, + "learning_rate": 4.108856799657412e-06, + "loss": 0.5565547943115234, + "step": 4895 + }, + { + "epoch": 1.431642052931715, + "grad_norm": 1.8883359305911547, + "learning_rate": 4.104950804721395e-06, + "loss": 0.5401065349578857, + "step": 4896 + }, + { + "epoch": 1.4319344933469806, + "grad_norm": 1.3793655379788223, + "learning_rate": 4.101046187717284e-06, + "loss": 0.4792686700820923, + "step": 4897 + }, + { + "epoch": 1.432226933762246, + "grad_norm": 1.5922549032476903, + "learning_rate": 4.097142949557764e-06, + "loss": 0.5255981683731079, + "step": 4898 + }, + { + "epoch": 1.4325193741775113, + "grad_norm": 1.614736024187036, + "learning_rate": 4.093241091155187e-06, + "loss": 0.5535293817520142, + "step": 4899 + }, + { + "epoch": 1.4328118145927768, + "grad_norm": 1.8976199736566215, + "learning_rate": 4.089340613421589e-06, + "loss": 0.5235373973846436, + "step": 4900 + }, + { + "epoch": 1.4331042550080422, + "grad_norm": 1.8120415147677507, + "learning_rate": 4.085441517268687e-06, + "loss": 0.5538134574890137, + "step": 4901 + }, + { + "epoch": 1.4333966954233075, + "grad_norm": 1.5442149105119904, + "learning_rate": 4.081543803607869e-06, + "loss": 0.5394395589828491, + "step": 4902 + }, + { + "epoch": 1.4336891358385728, + "grad_norm": 1.6068663887611208, + "learning_rate": 4.077647473350201e-06, + "loss": 0.522742509841919, + "step": 4903 + }, + { + "epoch": 1.4339815762538382, + "grad_norm": 1.6377229499845016, + "learning_rate": 4.073752527406429e-06, + "loss": 0.559830367565155, + "step": 4904 + }, + { + "epoch": 1.4342740166691037, + "grad_norm": 1.7578675965544384, + "learning_rate": 4.069858966686971e-06, + "loss": 0.42535799741744995, + "step": 4905 + }, + { + "epoch": 1.434566457084369, + "grad_norm": 1.7745987719575682, + "learning_rate": 4.065966792101924e-06, + "loss": 0.6075177192687988, + "step": 4906 + }, + { + "epoch": 1.4348588974996344, + "grad_norm": 1.7444570198074862, + "learning_rate": 4.06207600456106e-06, + "loss": 0.5010570883750916, + "step": 4907 + }, + { + "epoch": 1.4351513379149, + "grad_norm": 1.621587467371749, + "learning_rate": 4.058186604973826e-06, + "loss": 0.571307897567749, + "step": 4908 + }, + { + "epoch": 1.4354437783301652, + "grad_norm": 1.643170818508206, + "learning_rate": 4.0542985942493505e-06, + "loss": 0.4918866455554962, + "step": 4909 + }, + { + "epoch": 1.4357362187454306, + "grad_norm": 1.8933520643034856, + "learning_rate": 4.050411973296425e-06, + "loss": 0.6588176488876343, + "step": 4910 + }, + { + "epoch": 1.436028659160696, + "grad_norm": 1.9180926902562168, + "learning_rate": 4.046526743023526e-06, + "loss": 0.7341527938842773, + "step": 4911 + }, + { + "epoch": 1.4363210995759614, + "grad_norm": 1.7782521784505012, + "learning_rate": 4.042642904338801e-06, + "loss": 0.5233849287033081, + "step": 4912 + }, + { + "epoch": 1.4366135399912268, + "grad_norm": 1.6182742405882007, + "learning_rate": 4.038760458150079e-06, + "loss": 0.5144373178482056, + "step": 4913 + }, + { + "epoch": 1.4369059804064923, + "grad_norm": 1.55901993468911, + "learning_rate": 4.034879405364853e-06, + "loss": 0.4520954489707947, + "step": 4914 + }, + { + "epoch": 1.4371984208217576, + "grad_norm": 1.6208081934978835, + "learning_rate": 4.030999746890295e-06, + "loss": 0.5632743835449219, + "step": 4915 + }, + { + "epoch": 1.437490861237023, + "grad_norm": 1.5950473237167822, + "learning_rate": 4.027121483633257e-06, + "loss": 0.49681180715560913, + "step": 4916 + }, + { + "epoch": 1.4377833016522883, + "grad_norm": 1.684721295445507, + "learning_rate": 4.023244616500257e-06, + "loss": 0.5182398557662964, + "step": 4917 + }, + { + "epoch": 1.4380757420675536, + "grad_norm": 1.6044294787301046, + "learning_rate": 4.019369146397493e-06, + "loss": 0.5686701536178589, + "step": 4918 + }, + { + "epoch": 1.4383681824828192, + "grad_norm": 1.682926006912085, + "learning_rate": 4.015495074230823e-06, + "loss": 0.5668520927429199, + "step": 4919 + }, + { + "epoch": 1.4386606228980845, + "grad_norm": 1.556828511748538, + "learning_rate": 4.011622400905794e-06, + "loss": 0.4511116147041321, + "step": 4920 + }, + { + "epoch": 1.4389530633133498, + "grad_norm": 1.677757503686359, + "learning_rate": 4.007751127327618e-06, + "loss": 0.4736326336860657, + "step": 4921 + }, + { + "epoch": 1.4392455037286154, + "grad_norm": 1.68287466179835, + "learning_rate": 4.003881254401183e-06, + "loss": 0.5705248117446899, + "step": 4922 + }, + { + "epoch": 1.4395379441438807, + "grad_norm": 1.4732853876066263, + "learning_rate": 4.000012783031047e-06, + "loss": 0.45527490973472595, + "step": 4923 + }, + { + "epoch": 1.439830384559146, + "grad_norm": 1.5504418192282816, + "learning_rate": 3.996145714121444e-06, + "loss": 0.4926735758781433, + "step": 4924 + }, + { + "epoch": 1.4401228249744116, + "grad_norm": 1.523617382800049, + "learning_rate": 3.992280048576276e-06, + "loss": 0.42700374126434326, + "step": 4925 + }, + { + "epoch": 1.440415265389677, + "grad_norm": 1.6783270187790582, + "learning_rate": 3.988415787299118e-06, + "loss": 0.5833145976066589, + "step": 4926 + }, + { + "epoch": 1.4407077058049422, + "grad_norm": 1.70461399954195, + "learning_rate": 3.98455293119322e-06, + "loss": 0.5290282964706421, + "step": 4927 + }, + { + "epoch": 1.4410001462202076, + "grad_norm": 1.9146871710495363, + "learning_rate": 3.9806914811614984e-06, + "loss": 0.4489266872406006, + "step": 4928 + }, + { + "epoch": 1.441292586635473, + "grad_norm": 1.9109717939773812, + "learning_rate": 3.97683143810655e-06, + "loss": 0.5630865097045898, + "step": 4929 + }, + { + "epoch": 1.4415850270507384, + "grad_norm": 1.6030492821452516, + "learning_rate": 3.972972802930627e-06, + "loss": 0.5962105989456177, + "step": 4930 + }, + { + "epoch": 1.4418774674660038, + "grad_norm": 1.789368844700869, + "learning_rate": 3.9691155765356674e-06, + "loss": 0.6059410572052002, + "step": 4931 + }, + { + "epoch": 1.442169907881269, + "grad_norm": 1.6894490985884645, + "learning_rate": 3.965259759823272e-06, + "loss": 0.5476605296134949, + "step": 4932 + }, + { + "epoch": 1.4424623482965346, + "grad_norm": 1.7561171676767597, + "learning_rate": 3.961405353694716e-06, + "loss": 0.70278000831604, + "step": 4933 + }, + { + "epoch": 1.4427547887118, + "grad_norm": 1.6884311650773163, + "learning_rate": 3.9575523590509445e-06, + "loss": 0.5838963389396667, + "step": 4934 + }, + { + "epoch": 1.4430472291270653, + "grad_norm": 1.536536052995308, + "learning_rate": 3.95370077679257e-06, + "loss": 0.508273720741272, + "step": 4935 + }, + { + "epoch": 1.4433396695423308, + "grad_norm": 1.4692622152510404, + "learning_rate": 3.949850607819876e-06, + "loss": 0.5053583383560181, + "step": 4936 + }, + { + "epoch": 1.4436321099575962, + "grad_norm": 1.5754477318406401, + "learning_rate": 3.946001853032818e-06, + "loss": 0.5729954242706299, + "step": 4937 + }, + { + "epoch": 1.4439245503728615, + "grad_norm": 1.833619886253515, + "learning_rate": 3.942154513331018e-06, + "loss": 0.5261870622634888, + "step": 4938 + }, + { + "epoch": 1.444216990788127, + "grad_norm": 1.3956467871190747, + "learning_rate": 3.9383085896137675e-06, + "loss": 0.34802311658859253, + "step": 4939 + }, + { + "epoch": 1.4445094312033924, + "grad_norm": 1.8896307306874633, + "learning_rate": 3.934464082780032e-06, + "loss": 0.48302024602890015, + "step": 4940 + }, + { + "epoch": 1.4448018716186577, + "grad_norm": 1.8507631130251807, + "learning_rate": 3.930620993728434e-06, + "loss": 0.6649061441421509, + "step": 4941 + }, + { + "epoch": 1.445094312033923, + "grad_norm": 1.705526500334542, + "learning_rate": 3.926779323357278e-06, + "loss": 0.5945848822593689, + "step": 4942 + }, + { + "epoch": 1.4453867524491884, + "grad_norm": 1.5476382055190478, + "learning_rate": 3.922939072564528e-06, + "loss": 0.4783032536506653, + "step": 4943 + }, + { + "epoch": 1.445679192864454, + "grad_norm": 1.6453487782833462, + "learning_rate": 3.919100242247821e-06, + "loss": 0.4619516134262085, + "step": 4944 + }, + { + "epoch": 1.4459716332797192, + "grad_norm": 1.5327149597771257, + "learning_rate": 3.915262833304461e-06, + "loss": 0.5652358531951904, + "step": 4945 + }, + { + "epoch": 1.4462640736949846, + "grad_norm": 1.4734419470243802, + "learning_rate": 3.911426846631416e-06, + "loss": 0.4523610472679138, + "step": 4946 + }, + { + "epoch": 1.44655651411025, + "grad_norm": 1.5670101583017915, + "learning_rate": 3.9075922831253276e-06, + "loss": 0.4914482831954956, + "step": 4947 + }, + { + "epoch": 1.4468489545255154, + "grad_norm": 1.7113071980283088, + "learning_rate": 3.9037591436825005e-06, + "loss": 0.4060005247592926, + "step": 4948 + }, + { + "epoch": 1.4471413949407808, + "grad_norm": 1.9320743237560347, + "learning_rate": 3.899927429198908e-06, + "loss": 0.49987125396728516, + "step": 4949 + }, + { + "epoch": 1.4474338353560463, + "grad_norm": 2.0596677045202036, + "learning_rate": 3.896097140570189e-06, + "loss": 0.6205358505249023, + "step": 4950 + }, + { + "epoch": 1.4477262757713116, + "grad_norm": 1.7670476784744638, + "learning_rate": 3.892268278691651e-06, + "loss": 0.5302955508232117, + "step": 4951 + }, + { + "epoch": 1.448018716186577, + "grad_norm": 1.7962585212488547, + "learning_rate": 3.888440844458272e-06, + "loss": 0.5225962400436401, + "step": 4952 + }, + { + "epoch": 1.4483111566018425, + "grad_norm": 1.8247561425410785, + "learning_rate": 3.884614838764682e-06, + "loss": 0.5030089616775513, + "step": 4953 + }, + { + "epoch": 1.4486035970171078, + "grad_norm": 1.8999355010605985, + "learning_rate": 3.880790262505192e-06, + "loss": 0.6060030460357666, + "step": 4954 + }, + { + "epoch": 1.4488960374323732, + "grad_norm": 1.8229751812699673, + "learning_rate": 3.8769671165737725e-06, + "loss": 0.5244846343994141, + "step": 4955 + }, + { + "epoch": 1.4491884778476385, + "grad_norm": 1.4616444667042836, + "learning_rate": 3.873145401864061e-06, + "loss": 0.46979671716690063, + "step": 4956 + }, + { + "epoch": 1.4494809182629038, + "grad_norm": 1.8452052569073554, + "learning_rate": 3.8693251192693596e-06, + "loss": 0.5201131105422974, + "step": 4957 + }, + { + "epoch": 1.4497733586781694, + "grad_norm": 1.679443447217904, + "learning_rate": 3.865506269682638e-06, + "loss": 0.5124838352203369, + "step": 4958 + }, + { + "epoch": 1.4500657990934347, + "grad_norm": 1.830132365627518, + "learning_rate": 3.861688853996525e-06, + "loss": 0.5613473653793335, + "step": 4959 + }, + { + "epoch": 1.4503582395087, + "grad_norm": 1.5976816836472583, + "learning_rate": 3.857872873103322e-06, + "loss": 0.46196621656417847, + "step": 4960 + }, + { + "epoch": 1.4506506799239656, + "grad_norm": 1.9393165963504067, + "learning_rate": 3.8540583278949905e-06, + "loss": 0.6427509784698486, + "step": 4961 + }, + { + "epoch": 1.450943120339231, + "grad_norm": 1.7485862700938968, + "learning_rate": 3.850245219263157e-06, + "loss": 0.6306381821632385, + "step": 4962 + }, + { + "epoch": 1.4512355607544962, + "grad_norm": 1.5645194602237047, + "learning_rate": 3.846433548099114e-06, + "loss": 0.46638673543930054, + "step": 4963 + }, + { + "epoch": 1.4515280011697618, + "grad_norm": 1.5360842567610604, + "learning_rate": 3.842623315293814e-06, + "loss": 0.4950143098831177, + "step": 4964 + }, + { + "epoch": 1.451820441585027, + "grad_norm": 1.5810107141405056, + "learning_rate": 3.838814521737875e-06, + "loss": 0.45698249340057373, + "step": 4965 + }, + { + "epoch": 1.4521128820002924, + "grad_norm": 1.6457012436395508, + "learning_rate": 3.8350071683215814e-06, + "loss": 0.6068260669708252, + "step": 4966 + }, + { + "epoch": 1.4524053224155578, + "grad_norm": 1.8188775401166803, + "learning_rate": 3.831201255934879e-06, + "loss": 0.5264104008674622, + "step": 4967 + }, + { + "epoch": 1.4526977628308233, + "grad_norm": 1.6372667669239498, + "learning_rate": 3.827396785467375e-06, + "loss": 0.5198315978050232, + "step": 4968 + }, + { + "epoch": 1.4529902032460886, + "grad_norm": 1.6294906688066837, + "learning_rate": 3.823593757808342e-06, + "loss": 0.504194438457489, + "step": 4969 + }, + { + "epoch": 1.453282643661354, + "grad_norm": 1.6016674444230832, + "learning_rate": 3.819792173846717e-06, + "loss": 0.5018986463546753, + "step": 4970 + }, + { + "epoch": 1.4535750840766193, + "grad_norm": 1.6893120935929504, + "learning_rate": 3.8159920344710936e-06, + "loss": 0.4847358465194702, + "step": 4971 + }, + { + "epoch": 1.4538675244918848, + "grad_norm": 1.6703483014148515, + "learning_rate": 3.812193340569733e-06, + "loss": 0.547623872756958, + "step": 4972 + }, + { + "epoch": 1.4541599649071502, + "grad_norm": 2.1389235560975615, + "learning_rate": 3.8083960930305562e-06, + "loss": 0.534354031085968, + "step": 4973 + }, + { + "epoch": 1.4544524053224155, + "grad_norm": 1.788418032061747, + "learning_rate": 3.8046002927411506e-06, + "loss": 0.6123033165931702, + "step": 4974 + }, + { + "epoch": 1.454744845737681, + "grad_norm": 1.6087574153138633, + "learning_rate": 3.8008059405887553e-06, + "loss": 0.5222622752189636, + "step": 4975 + }, + { + "epoch": 1.4550372861529464, + "grad_norm": 1.684901707974216, + "learning_rate": 3.7970130374602785e-06, + "loss": 0.5568759441375732, + "step": 4976 + }, + { + "epoch": 1.4553297265682117, + "grad_norm": 1.7459991230210548, + "learning_rate": 3.7932215842422903e-06, + "loss": 0.5458661317825317, + "step": 4977 + }, + { + "epoch": 1.4556221669834772, + "grad_norm": 1.6216302867008319, + "learning_rate": 3.789431581821019e-06, + "loss": 0.48293566703796387, + "step": 4978 + }, + { + "epoch": 1.4559146073987426, + "grad_norm": 1.893470262052562, + "learning_rate": 3.7856430310823546e-06, + "loss": 0.647431492805481, + "step": 4979 + }, + { + "epoch": 1.456207047814008, + "grad_norm": 1.6735249045743477, + "learning_rate": 3.7818559329118475e-06, + "loss": 0.48039543628692627, + "step": 4980 + }, + { + "epoch": 1.4564994882292732, + "grad_norm": 1.6704036620696165, + "learning_rate": 3.7780702881947084e-06, + "loss": 0.6705803871154785, + "step": 4981 + }, + { + "epoch": 1.4567919286445385, + "grad_norm": 1.7404901320645014, + "learning_rate": 3.7742860978158103e-06, + "loss": 0.564405083656311, + "step": 4982 + }, + { + "epoch": 1.457084369059804, + "grad_norm": 1.7081222209997355, + "learning_rate": 3.7705033626596844e-06, + "loss": 0.5208612084388733, + "step": 4983 + }, + { + "epoch": 1.4573768094750694, + "grad_norm": 1.909829427679328, + "learning_rate": 3.766722083610521e-06, + "loss": 0.6230732202529907, + "step": 4984 + }, + { + "epoch": 1.4576692498903348, + "grad_norm": 1.6601663066885601, + "learning_rate": 3.7629422615521747e-06, + "loss": 0.5741504430770874, + "step": 4985 + }, + { + "epoch": 1.4579616903056003, + "grad_norm": 1.584208244849031, + "learning_rate": 3.75916389736815e-06, + "loss": 0.5321571826934814, + "step": 4986 + }, + { + "epoch": 1.4582541307208656, + "grad_norm": 1.95685306597155, + "learning_rate": 3.7553869919416186e-06, + "loss": 0.6367009878158569, + "step": 4987 + }, + { + "epoch": 1.458546571136131, + "grad_norm": 1.5904913997392975, + "learning_rate": 3.75161154615541e-06, + "loss": 0.5736235976219177, + "step": 4988 + }, + { + "epoch": 1.4588390115513965, + "grad_norm": 2.0157501917439866, + "learning_rate": 3.7478375608920127e-06, + "loss": 0.5799358487129211, + "step": 4989 + }, + { + "epoch": 1.4591314519666618, + "grad_norm": 1.7515991790236536, + "learning_rate": 3.7440650370335675e-06, + "loss": 0.6065561771392822, + "step": 4990 + }, + { + "epoch": 1.4594238923819272, + "grad_norm": 1.4583944256149548, + "learning_rate": 3.740293975461886e-06, + "loss": 0.5182442665100098, + "step": 4991 + }, + { + "epoch": 1.4597163327971927, + "grad_norm": 1.6877116508095484, + "learning_rate": 3.736524377058429e-06, + "loss": 0.5065605640411377, + "step": 4992 + }, + { + "epoch": 1.460008773212458, + "grad_norm": 1.5024812411134352, + "learning_rate": 3.7327562427043163e-06, + "loss": 0.44326460361480713, + "step": 4993 + }, + { + "epoch": 1.4603012136277234, + "grad_norm": 1.9166701258714811, + "learning_rate": 3.7289895732803306e-06, + "loss": 0.6192547082901001, + "step": 4994 + }, + { + "epoch": 1.4605936540429887, + "grad_norm": 1.794387571688338, + "learning_rate": 3.725224369666899e-06, + "loss": 0.5487738847732544, + "step": 4995 + }, + { + "epoch": 1.460886094458254, + "grad_norm": 1.922772286834415, + "learning_rate": 3.7214606327441203e-06, + "loss": 0.558982253074646, + "step": 4996 + }, + { + "epoch": 1.4611785348735196, + "grad_norm": 1.770836311904495, + "learning_rate": 3.717698363391744e-06, + "loss": 0.5277853012084961, + "step": 4997 + }, + { + "epoch": 1.461470975288785, + "grad_norm": 1.7748123557502546, + "learning_rate": 3.7139375624891795e-06, + "loss": 0.6561184525489807, + "step": 4998 + }, + { + "epoch": 1.4617634157040502, + "grad_norm": 1.5647900159041126, + "learning_rate": 3.710178230915489e-06, + "loss": 0.46555888652801514, + "step": 4999 + }, + { + "epoch": 1.4620558561193158, + "grad_norm": 1.7414970962586886, + "learning_rate": 3.706420369549394e-06, + "loss": 0.5808060765266418, + "step": 5000 + }, + { + "epoch": 1.462348296534581, + "grad_norm": 1.442227314234909, + "learning_rate": 3.7026639792692722e-06, + "loss": 0.5407893061637878, + "step": 5001 + }, + { + "epoch": 1.4626407369498464, + "grad_norm": 2.580423891920115, + "learning_rate": 3.6989090609531574e-06, + "loss": 0.538393497467041, + "step": 5002 + }, + { + "epoch": 1.462933177365112, + "grad_norm": 1.8751864874321293, + "learning_rate": 3.6951556154787373e-06, + "loss": 0.530704140663147, + "step": 5003 + }, + { + "epoch": 1.4632256177803773, + "grad_norm": 1.4470439364888814, + "learning_rate": 3.691403643723359e-06, + "loss": 0.43352627754211426, + "step": 5004 + }, + { + "epoch": 1.4635180581956426, + "grad_norm": 1.6573279039642985, + "learning_rate": 3.687653146564025e-06, + "loss": 0.6047205924987793, + "step": 5005 + }, + { + "epoch": 1.463810498610908, + "grad_norm": 1.6556697002732312, + "learning_rate": 3.6839041248773857e-06, + "loss": 0.44708865880966187, + "step": 5006 + }, + { + "epoch": 1.4641029390261735, + "grad_norm": 1.6445747944839355, + "learning_rate": 3.680156579539753e-06, + "loss": 0.5653451681137085, + "step": 5007 + }, + { + "epoch": 1.4643953794414388, + "grad_norm": 1.750839565103172, + "learning_rate": 3.6764105114270966e-06, + "loss": 0.49293750524520874, + "step": 5008 + }, + { + "epoch": 1.4646878198567042, + "grad_norm": 1.7691390827672615, + "learning_rate": 3.672665921415034e-06, + "loss": 0.5761851072311401, + "step": 5009 + }, + { + "epoch": 1.4649802602719695, + "grad_norm": 1.7025752756263197, + "learning_rate": 3.668922810378841e-06, + "loss": 0.5188437700271606, + "step": 5010 + }, + { + "epoch": 1.465272700687235, + "grad_norm": 1.7765263620108804, + "learning_rate": 3.6651811791934476e-06, + "loss": 0.5159400701522827, + "step": 5011 + }, + { + "epoch": 1.4655651411025004, + "grad_norm": 1.4463295265937102, + "learning_rate": 3.6614410287334377e-06, + "loss": 0.478866845369339, + "step": 5012 + }, + { + "epoch": 1.4658575815177657, + "grad_norm": 1.6006806590634375, + "learning_rate": 3.6577023598730486e-06, + "loss": 0.5509926080703735, + "step": 5013 + }, + { + "epoch": 1.4661500219330312, + "grad_norm": 1.5613591503777215, + "learning_rate": 3.6539651734861705e-06, + "loss": 0.4872981309890747, + "step": 5014 + }, + { + "epoch": 1.4664424623482966, + "grad_norm": 1.4569843282992687, + "learning_rate": 3.6502294704463493e-06, + "loss": 0.47478264570236206, + "step": 5015 + }, + { + "epoch": 1.4667349027635619, + "grad_norm": 1.765955621655722, + "learning_rate": 3.646495251626785e-06, + "loss": 0.5140335559844971, + "step": 5016 + }, + { + "epoch": 1.4670273431788274, + "grad_norm": 1.5785594027919339, + "learning_rate": 3.6427625179003223e-06, + "loss": 0.41033172607421875, + "step": 5017 + }, + { + "epoch": 1.4673197835940928, + "grad_norm": 1.7731644033346952, + "learning_rate": 3.639031270139468e-06, + "loss": 0.4290558099746704, + "step": 5018 + }, + { + "epoch": 1.467612224009358, + "grad_norm": 1.8964888989060893, + "learning_rate": 3.635301509216379e-06, + "loss": 0.5903435349464417, + "step": 5019 + }, + { + "epoch": 1.4679046644246234, + "grad_norm": 1.7302589846174075, + "learning_rate": 3.6315732360028655e-06, + "loss": 0.6410748958587646, + "step": 5020 + }, + { + "epoch": 1.4681971048398887, + "grad_norm": 1.584781169707585, + "learning_rate": 3.6278464513703858e-06, + "loss": 0.5499910712242126, + "step": 5021 + }, + { + "epoch": 1.4684895452551543, + "grad_norm": 1.4876234400926511, + "learning_rate": 3.624121156190056e-06, + "loss": 0.4980154037475586, + "step": 5022 + }, + { + "epoch": 1.4687819856704196, + "grad_norm": 1.7622618315552074, + "learning_rate": 3.6203973513326395e-06, + "loss": 0.5910995006561279, + "step": 5023 + }, + { + "epoch": 1.469074426085685, + "grad_norm": 1.837302229581672, + "learning_rate": 3.6166750376685534e-06, + "loss": 0.6003058552742004, + "step": 5024 + }, + { + "epoch": 1.4693668665009505, + "grad_norm": 2.0086634437416215, + "learning_rate": 3.6129542160678655e-06, + "loss": 0.5655561685562134, + "step": 5025 + }, + { + "epoch": 1.4696593069162158, + "grad_norm": 1.6720399704395428, + "learning_rate": 3.609234887400297e-06, + "loss": 0.713152289390564, + "step": 5026 + }, + { + "epoch": 1.4699517473314812, + "grad_norm": 1.3619130802184511, + "learning_rate": 3.605517052535219e-06, + "loss": 0.41018784046173096, + "step": 5027 + }, + { + "epoch": 1.4702441877467467, + "grad_norm": 1.7429761856148576, + "learning_rate": 3.6018007123416486e-06, + "loss": 0.5852759480476379, + "step": 5028 + }, + { + "epoch": 1.470536628162012, + "grad_norm": 1.6763203292398523, + "learning_rate": 3.598085867688259e-06, + "loss": 0.5942279696464539, + "step": 5029 + }, + { + "epoch": 1.4708290685772774, + "grad_norm": 1.5957062749275768, + "learning_rate": 3.594372519443374e-06, + "loss": 0.6265639662742615, + "step": 5030 + }, + { + "epoch": 1.471121508992543, + "grad_norm": 1.6944518172910965, + "learning_rate": 3.5906606684749668e-06, + "loss": 0.4539163112640381, + "step": 5031 + }, + { + "epoch": 1.4714139494078082, + "grad_norm": 1.8810670575321342, + "learning_rate": 3.586950315650658e-06, + "loss": 0.5682815909385681, + "step": 5032 + }, + { + "epoch": 1.4717063898230736, + "grad_norm": 1.5382985580447415, + "learning_rate": 3.583241461837721e-06, + "loss": 0.5188582539558411, + "step": 5033 + }, + { + "epoch": 1.4719988302383389, + "grad_norm": 1.923705094705072, + "learning_rate": 3.5795341079030777e-06, + "loss": 0.501958966255188, + "step": 5034 + }, + { + "epoch": 1.4722912706536042, + "grad_norm": 1.769758245215022, + "learning_rate": 3.5758282547132995e-06, + "loss": 0.5748735666275024, + "step": 5035 + }, + { + "epoch": 1.4725837110688698, + "grad_norm": 1.720811530645175, + "learning_rate": 3.5721239031346067e-06, + "loss": 0.5796875357627869, + "step": 5036 + }, + { + "epoch": 1.472876151484135, + "grad_norm": 1.7760443740240528, + "learning_rate": 3.56842105403287e-06, + "loss": 0.457103431224823, + "step": 5037 + }, + { + "epoch": 1.4731685918994004, + "grad_norm": 1.607843165834991, + "learning_rate": 3.564719708273607e-06, + "loss": 0.5300487875938416, + "step": 5038 + }, + { + "epoch": 1.473461032314666, + "grad_norm": 1.7877129065541937, + "learning_rate": 3.5610198667219886e-06, + "loss": 0.48143619298934937, + "step": 5039 + }, + { + "epoch": 1.4737534727299313, + "grad_norm": 1.9171325817627416, + "learning_rate": 3.557321530242824e-06, + "loss": 0.5523685216903687, + "step": 5040 + }, + { + "epoch": 1.4740459131451966, + "grad_norm": 1.7367077785146405, + "learning_rate": 3.5536246997005785e-06, + "loss": 0.5820931196212769, + "step": 5041 + }, + { + "epoch": 1.4743383535604622, + "grad_norm": 1.6717570524697325, + "learning_rate": 3.5499293759593656e-06, + "loss": 0.6287394762039185, + "step": 5042 + }, + { + "epoch": 1.4746307939757275, + "grad_norm": 1.737914835396703, + "learning_rate": 3.5462355598829433e-06, + "loss": 0.4621254801750183, + "step": 5043 + }, + { + "epoch": 1.4749232343909928, + "grad_norm": 1.687652415457897, + "learning_rate": 3.5425432523347205e-06, + "loss": 0.5571160316467285, + "step": 5044 + }, + { + "epoch": 1.4752156748062581, + "grad_norm": 1.716802557057107, + "learning_rate": 3.5388524541777492e-06, + "loss": 0.4135715365409851, + "step": 5045 + }, + { + "epoch": 1.4755081152215237, + "grad_norm": 1.868527213017395, + "learning_rate": 3.535163166274733e-06, + "loss": 0.524153470993042, + "step": 5046 + }, + { + "epoch": 1.475800555636789, + "grad_norm": 1.9441558365554423, + "learning_rate": 3.5314753894880205e-06, + "loss": 0.6330267786979675, + "step": 5047 + }, + { + "epoch": 1.4760929960520544, + "grad_norm": 1.7270524835767156, + "learning_rate": 3.527789124679605e-06, + "loss": 0.46210330724716187, + "step": 5048 + }, + { + "epoch": 1.4763854364673197, + "grad_norm": 1.8799684878196978, + "learning_rate": 3.524104372711131e-06, + "loss": 0.49293309450149536, + "step": 5049 + }, + { + "epoch": 1.4766778768825852, + "grad_norm": 1.7601042593478657, + "learning_rate": 3.520421134443889e-06, + "loss": 0.6196815967559814, + "step": 5050 + }, + { + "epoch": 1.4769703172978506, + "grad_norm": 1.568738566408146, + "learning_rate": 3.5167394107388064e-06, + "loss": 0.42622530460357666, + "step": 5051 + }, + { + "epoch": 1.4772627577131159, + "grad_norm": 1.6087834768838942, + "learning_rate": 3.513059202456468e-06, + "loss": 0.4475107491016388, + "step": 5052 + }, + { + "epoch": 1.4775551981283814, + "grad_norm": 1.549049360877832, + "learning_rate": 3.5093805104571e-06, + "loss": 0.4295683205127716, + "step": 5053 + }, + { + "epoch": 1.4778476385436468, + "grad_norm": 1.512499491264911, + "learning_rate": 3.505703335600573e-06, + "loss": 0.5331642627716064, + "step": 5054 + }, + { + "epoch": 1.478140078958912, + "grad_norm": 1.7125050045051866, + "learning_rate": 3.5020276787464058e-06, + "loss": 0.5615599155426025, + "step": 5055 + }, + { + "epoch": 1.4784325193741776, + "grad_norm": 1.470462641632426, + "learning_rate": 3.4983535407537618e-06, + "loss": 0.5611366033554077, + "step": 5056 + }, + { + "epoch": 1.478724959789443, + "grad_norm": 2.0861134690908325, + "learning_rate": 3.494680922481445e-06, + "loss": 0.5891577005386353, + "step": 5057 + }, + { + "epoch": 1.4790174002047083, + "grad_norm": 1.981139638659905, + "learning_rate": 3.491009824787911e-06, + "loss": 0.5583761930465698, + "step": 5058 + }, + { + "epoch": 1.4793098406199736, + "grad_norm": 1.5020288470897978, + "learning_rate": 3.4873402485312548e-06, + "loss": 0.5001339912414551, + "step": 5059 + }, + { + "epoch": 1.479602281035239, + "grad_norm": 1.445341864944132, + "learning_rate": 3.4836721945692175e-06, + "loss": 0.5050641894340515, + "step": 5060 + }, + { + "epoch": 1.4798947214505045, + "grad_norm": 1.5825314066620513, + "learning_rate": 3.4800056637591885e-06, + "loss": 0.5377815365791321, + "step": 5061 + }, + { + "epoch": 1.4801871618657698, + "grad_norm": 1.6490614330323619, + "learning_rate": 3.4763406569581892e-06, + "loss": 0.5517662763595581, + "step": 5062 + }, + { + "epoch": 1.4804796022810351, + "grad_norm": 1.7535356829599726, + "learning_rate": 3.4726771750228984e-06, + "loss": 0.5908320546150208, + "step": 5063 + }, + { + "epoch": 1.4807720426963007, + "grad_norm": 1.640782634903257, + "learning_rate": 3.4690152188096293e-06, + "loss": 0.5169299840927124, + "step": 5064 + }, + { + "epoch": 1.481064483111566, + "grad_norm": 1.5566091974805318, + "learning_rate": 3.4653547891743457e-06, + "loss": 0.6198064088821411, + "step": 5065 + }, + { + "epoch": 1.4813569235268313, + "grad_norm": 1.7822104060368598, + "learning_rate": 3.4616958869726436e-06, + "loss": 0.4971558153629303, + "step": 5066 + }, + { + "epoch": 1.481649363942097, + "grad_norm": 1.8117473020924466, + "learning_rate": 3.4580385130597794e-06, + "loss": 0.556640088558197, + "step": 5067 + }, + { + "epoch": 1.4819418043573622, + "grad_norm": 1.7297037385384992, + "learning_rate": 3.4543826682906358e-06, + "loss": 0.5336956977844238, + "step": 5068 + }, + { + "epoch": 1.4822342447726276, + "grad_norm": 1.8723627634024749, + "learning_rate": 3.4507283535197454e-06, + "loss": 0.5185145735740662, + "step": 5069 + }, + { + "epoch": 1.482526685187893, + "grad_norm": 1.5962927751585108, + "learning_rate": 3.447075569601287e-06, + "loss": 0.5460748672485352, + "step": 5070 + }, + { + "epoch": 1.4828191256031584, + "grad_norm": 1.7486536420516579, + "learning_rate": 3.4434243173890667e-06, + "loss": 0.5860699415206909, + "step": 5071 + }, + { + "epoch": 1.4831115660184238, + "grad_norm": 1.5377337582646984, + "learning_rate": 3.4397745977365482e-06, + "loss": 0.5818450450897217, + "step": 5072 + }, + { + "epoch": 1.483404006433689, + "grad_norm": 1.6591511763241749, + "learning_rate": 3.4361264114968316e-06, + "loss": 0.4205876588821411, + "step": 5073 + }, + { + "epoch": 1.4836964468489544, + "grad_norm": 1.6097740909701606, + "learning_rate": 3.4324797595226567e-06, + "loss": 0.5503501892089844, + "step": 5074 + }, + { + "epoch": 1.48398888726422, + "grad_norm": 1.7613851561474803, + "learning_rate": 3.4288346426664063e-06, + "loss": 0.5388503074645996, + "step": 5075 + }, + { + "epoch": 1.4842813276794853, + "grad_norm": 1.5726280695427581, + "learning_rate": 3.4251910617801054e-06, + "loss": 0.5866841673851013, + "step": 5076 + }, + { + "epoch": 1.4845737680947506, + "grad_norm": 1.7063663913828162, + "learning_rate": 3.4215490177154176e-06, + "loss": 0.5377970337867737, + "step": 5077 + }, + { + "epoch": 1.4848662085100162, + "grad_norm": 2.013961516297246, + "learning_rate": 3.41790851132365e-06, + "loss": 0.6311028003692627, + "step": 5078 + }, + { + "epoch": 1.4851586489252815, + "grad_norm": 1.7100175604987324, + "learning_rate": 3.414269543455747e-06, + "loss": 0.5226441621780396, + "step": 5079 + }, + { + "epoch": 1.4854510893405468, + "grad_norm": 1.73285658375087, + "learning_rate": 3.410632114962298e-06, + "loss": 0.6306775212287903, + "step": 5080 + }, + { + "epoch": 1.4857435297558124, + "grad_norm": 1.8061194998201888, + "learning_rate": 3.406996226693531e-06, + "loss": 0.5432136058807373, + "step": 5081 + }, + { + "epoch": 1.4860359701710777, + "grad_norm": 1.564250952291821, + "learning_rate": 3.403361879499305e-06, + "loss": 0.4218754470348358, + "step": 5082 + }, + { + "epoch": 1.486328410586343, + "grad_norm": 1.7436245532279955, + "learning_rate": 3.3997290742291335e-06, + "loss": 0.5121650099754333, + "step": 5083 + }, + { + "epoch": 1.4866208510016083, + "grad_norm": 1.713174617853516, + "learning_rate": 3.39609781173216e-06, + "loss": 0.5489382743835449, + "step": 5084 + }, + { + "epoch": 1.486913291416874, + "grad_norm": 1.7492646537049668, + "learning_rate": 3.3924680928571694e-06, + "loss": 0.4190993309020996, + "step": 5085 + }, + { + "epoch": 1.4872057318321392, + "grad_norm": 2.012504952292692, + "learning_rate": 3.388839918452589e-06, + "loss": 0.5927796363830566, + "step": 5086 + }, + { + "epoch": 1.4874981722474045, + "grad_norm": 1.5385674447124333, + "learning_rate": 3.3852132893664803e-06, + "loss": 0.43746429681777954, + "step": 5087 + }, + { + "epoch": 1.4877906126626699, + "grad_norm": 1.592965785800762, + "learning_rate": 3.381588206446548e-06, + "loss": 0.41599413752555847, + "step": 5088 + }, + { + "epoch": 1.4880830530779354, + "grad_norm": 1.640030018717508, + "learning_rate": 3.3779646705401305e-06, + "loss": 0.5803484320640564, + "step": 5089 + }, + { + "epoch": 1.4883754934932008, + "grad_norm": 1.6162932555816476, + "learning_rate": 3.3743426824942082e-06, + "loss": 0.5277384519577026, + "step": 5090 + }, + { + "epoch": 1.488667933908466, + "grad_norm": 1.5149011711130314, + "learning_rate": 3.370722243155401e-06, + "loss": 0.5842317342758179, + "step": 5091 + }, + { + "epoch": 1.4889603743237316, + "grad_norm": 1.8602157485440332, + "learning_rate": 3.367103353369965e-06, + "loss": 0.5394416451454163, + "step": 5092 + }, + { + "epoch": 1.489252814738997, + "grad_norm": 1.6652727466684587, + "learning_rate": 3.3634860139837877e-06, + "loss": 0.5457144975662231, + "step": 5093 + }, + { + "epoch": 1.4895452551542623, + "grad_norm": 1.6270719194791377, + "learning_rate": 3.3598702258424044e-06, + "loss": 0.49552473425865173, + "step": 5094 + }, + { + "epoch": 1.4898376955695278, + "grad_norm": 1.8756044563450258, + "learning_rate": 3.3562559897909842e-06, + "loss": 0.5922214984893799, + "step": 5095 + }, + { + "epoch": 1.4901301359847932, + "grad_norm": 1.6902952443841357, + "learning_rate": 3.35264330667433e-06, + "loss": 0.5844507217407227, + "step": 5096 + }, + { + "epoch": 1.4904225764000585, + "grad_norm": 1.6441848915551236, + "learning_rate": 3.3490321773368872e-06, + "loss": 0.5096029043197632, + "step": 5097 + }, + { + "epoch": 1.4907150168153238, + "grad_norm": 1.8296617417124132, + "learning_rate": 3.345422602622734e-06, + "loss": 0.6343984603881836, + "step": 5098 + }, + { + "epoch": 1.4910074572305891, + "grad_norm": 1.7032992920741425, + "learning_rate": 3.3418145833755875e-06, + "loss": 0.5319832563400269, + "step": 5099 + }, + { + "epoch": 1.4912998976458547, + "grad_norm": 1.8127365107062148, + "learning_rate": 3.3382081204388006e-06, + "loss": 0.6453676819801331, + "step": 5100 + }, + { + "epoch": 1.49159233806112, + "grad_norm": 1.7068058578414038, + "learning_rate": 3.33460321465536e-06, + "loss": 0.5129305720329285, + "step": 5101 + }, + { + "epoch": 1.4918847784763853, + "grad_norm": 1.7103748262888143, + "learning_rate": 3.3309998668678912e-06, + "loss": 0.5680958032608032, + "step": 5102 + }, + { + "epoch": 1.492177218891651, + "grad_norm": 1.654140366409291, + "learning_rate": 3.32739807791866e-06, + "loss": 0.5959445834159851, + "step": 5103 + }, + { + "epoch": 1.4924696593069162, + "grad_norm": 1.5546485584978795, + "learning_rate": 3.3237978486495536e-06, + "loss": 0.5549102425575256, + "step": 5104 + }, + { + "epoch": 1.4927620997221815, + "grad_norm": 1.5522771682213525, + "learning_rate": 3.3201991799021084e-06, + "loss": 0.4219816327095032, + "step": 5105 + }, + { + "epoch": 1.493054540137447, + "grad_norm": 1.8150814493123832, + "learning_rate": 3.3166020725174906e-06, + "loss": 0.46013522148132324, + "step": 5106 + }, + { + "epoch": 1.4933469805527124, + "grad_norm": 2.2057259724068885, + "learning_rate": 3.3130065273365033e-06, + "loss": 0.6013174057006836, + "step": 5107 + }, + { + "epoch": 1.4936394209679777, + "grad_norm": 1.9081850485789635, + "learning_rate": 3.3094125451995827e-06, + "loss": 0.7097996473312378, + "step": 5108 + }, + { + "epoch": 1.4939318613832433, + "grad_norm": 1.6725604100107134, + "learning_rate": 3.305820126946799e-06, + "loss": 0.6704884767532349, + "step": 5109 + }, + { + "epoch": 1.4942243017985086, + "grad_norm": 1.735486744932862, + "learning_rate": 3.3022292734178605e-06, + "loss": 0.5211119651794434, + "step": 5110 + }, + { + "epoch": 1.494516742213774, + "grad_norm": 1.7718418689676594, + "learning_rate": 3.2986399854521065e-06, + "loss": 0.5830427408218384, + "step": 5111 + }, + { + "epoch": 1.4948091826290393, + "grad_norm": 1.574048881929475, + "learning_rate": 3.2950522638885106e-06, + "loss": 0.5647883415222168, + "step": 5112 + }, + { + "epoch": 1.4951016230443046, + "grad_norm": 1.3783682279274316, + "learning_rate": 3.2914661095656807e-06, + "loss": 0.46678125858306885, + "step": 5113 + }, + { + "epoch": 1.4953940634595702, + "grad_norm": 1.768460226758459, + "learning_rate": 3.287881523321863e-06, + "loss": 0.5391934514045715, + "step": 5114 + }, + { + "epoch": 1.4956865038748355, + "grad_norm": 1.532723290545503, + "learning_rate": 3.284298505994926e-06, + "loss": 0.4039243459701538, + "step": 5115 + }, + { + "epoch": 1.4959789442901008, + "grad_norm": 1.8718379114919181, + "learning_rate": 3.2807170584223802e-06, + "loss": 0.6187412738800049, + "step": 5116 + }, + { + "epoch": 1.4962713847053664, + "grad_norm": 1.730072311160077, + "learning_rate": 3.277137181441369e-06, + "loss": 0.5165137648582458, + "step": 5117 + }, + { + "epoch": 1.4965638251206317, + "grad_norm": 1.7402216150888872, + "learning_rate": 3.273558875888665e-06, + "loss": 0.6315420866012573, + "step": 5118 + }, + { + "epoch": 1.496856265535897, + "grad_norm": 1.6811341442796868, + "learning_rate": 3.269982142600677e-06, + "loss": 0.5522993206977844, + "step": 5119 + }, + { + "epoch": 1.4971487059511626, + "grad_norm": 1.8103742244487522, + "learning_rate": 3.266406982413444e-06, + "loss": 0.5751636028289795, + "step": 5120 + }, + { + "epoch": 1.4974411463664279, + "grad_norm": 1.8346826868047423, + "learning_rate": 3.262833396162637e-06, + "loss": 0.5552358031272888, + "step": 5121 + }, + { + "epoch": 1.4977335867816932, + "grad_norm": 1.4553347230926987, + "learning_rate": 3.259261384683562e-06, + "loss": 0.4971257150173187, + "step": 5122 + }, + { + "epoch": 1.4980260271969585, + "grad_norm": 1.7328825599332134, + "learning_rate": 3.2556909488111533e-06, + "loss": 0.3803454637527466, + "step": 5123 + }, + { + "epoch": 1.498318467612224, + "grad_norm": 1.7448185442015292, + "learning_rate": 3.25212208937998e-06, + "loss": 0.45348531007766724, + "step": 5124 + }, + { + "epoch": 1.4986109080274894, + "grad_norm": 1.6593501166731528, + "learning_rate": 3.2485548072242403e-06, + "loss": 0.4839708209037781, + "step": 5125 + }, + { + "epoch": 1.4989033484427547, + "grad_norm": 1.7004886969570365, + "learning_rate": 3.244989103177768e-06, + "loss": 0.4743500351905823, + "step": 5126 + }, + { + "epoch": 1.49919578885802, + "grad_norm": 1.7042585723205583, + "learning_rate": 3.241424978074018e-06, + "loss": 0.558182954788208, + "step": 5127 + }, + { + "epoch": 1.4994882292732856, + "grad_norm": 1.5886443982701122, + "learning_rate": 3.2378624327460874e-06, + "loss": 0.41309911012649536, + "step": 5128 + }, + { + "epoch": 1.499780669688551, + "grad_norm": 1.7452725700601364, + "learning_rate": 3.2343014680266984e-06, + "loss": 0.5627751350402832, + "step": 5129 + }, + { + "epoch": 1.5000731101038163, + "grad_norm": 1.8911076385977756, + "learning_rate": 3.230742084748204e-06, + "loss": 0.5374714732170105, + "step": 5130 + }, + { + "epoch": 1.5003655505190818, + "grad_norm": 1.7659792305895352, + "learning_rate": 3.2271842837425917e-06, + "loss": 0.4264039993286133, + "step": 5131 + }, + { + "epoch": 1.5006579909343472, + "grad_norm": 1.8312136055327797, + "learning_rate": 3.223628065841472e-06, + "loss": 0.44204217195510864, + "step": 5132 + }, + { + "epoch": 1.5009504313496125, + "grad_norm": 1.6892686547824762, + "learning_rate": 3.220073431876092e-06, + "loss": 0.5322041511535645, + "step": 5133 + }, + { + "epoch": 1.501242871764878, + "grad_norm": 1.6801975106342348, + "learning_rate": 3.216520382677324e-06, + "loss": 0.4741417169570923, + "step": 5134 + }, + { + "epoch": 1.5015353121801431, + "grad_norm": 1.9712166683153383, + "learning_rate": 3.212968919075672e-06, + "loss": 0.7069851756095886, + "step": 5135 + }, + { + "epoch": 1.5018277525954087, + "grad_norm": 1.6644566597906936, + "learning_rate": 3.2094190419012694e-06, + "loss": 0.6049044132232666, + "step": 5136 + }, + { + "epoch": 1.5021201930106742, + "grad_norm": 1.6420500389509403, + "learning_rate": 3.2058707519838817e-06, + "loss": 0.556586503982544, + "step": 5137 + }, + { + "epoch": 1.5024126334259393, + "grad_norm": 1.4612168804015682, + "learning_rate": 3.202324050152894e-06, + "loss": 0.46489936113357544, + "step": 5138 + }, + { + "epoch": 1.5027050738412049, + "grad_norm": 1.6808104719845611, + "learning_rate": 3.1987789372373292e-06, + "loss": 0.5332333445549011, + "step": 5139 + }, + { + "epoch": 1.5029975142564702, + "grad_norm": 1.5897163584111842, + "learning_rate": 3.1952354140658346e-06, + "loss": 0.5547586679458618, + "step": 5140 + }, + { + "epoch": 1.5032899546717355, + "grad_norm": 1.7343008366786887, + "learning_rate": 3.1916934814666858e-06, + "loss": 0.5500372648239136, + "step": 5141 + }, + { + "epoch": 1.503582395087001, + "grad_norm": 1.6657659858957796, + "learning_rate": 3.1881531402677934e-06, + "loss": 0.5065571069717407, + "step": 5142 + }, + { + "epoch": 1.5038748355022664, + "grad_norm": 2.106659003681642, + "learning_rate": 3.1846143912966887e-06, + "loss": 0.5942833423614502, + "step": 5143 + }, + { + "epoch": 1.5041672759175317, + "grad_norm": 1.5318136638727409, + "learning_rate": 3.181077235380531e-06, + "loss": 0.4089720547199249, + "step": 5144 + }, + { + "epoch": 1.5044597163327973, + "grad_norm": 1.959628279475518, + "learning_rate": 3.1775416733461107e-06, + "loss": 0.5360317230224609, + "step": 5145 + }, + { + "epoch": 1.5047521567480626, + "grad_norm": 1.8497642502339247, + "learning_rate": 3.174007706019845e-06, + "loss": 0.5403856635093689, + "step": 5146 + }, + { + "epoch": 1.505044597163328, + "grad_norm": 1.583723666722825, + "learning_rate": 3.1704753342277727e-06, + "loss": 0.5377147197723389, + "step": 5147 + }, + { + "epoch": 1.5053370375785935, + "grad_norm": 1.908833197627838, + "learning_rate": 3.166944558795567e-06, + "loss": 0.49888312816619873, + "step": 5148 + }, + { + "epoch": 1.5056294779938586, + "grad_norm": 1.604723023798687, + "learning_rate": 3.1634153805485245e-06, + "loss": 0.5105957984924316, + "step": 5149 + }, + { + "epoch": 1.5059219184091241, + "grad_norm": 1.530550544138999, + "learning_rate": 3.1598878003115694e-06, + "loss": 0.5653882026672363, + "step": 5150 + }, + { + "epoch": 1.5062143588243895, + "grad_norm": 1.7528922447010231, + "learning_rate": 3.1563618189092536e-06, + "loss": 0.5293145179748535, + "step": 5151 + }, + { + "epoch": 1.5065067992396548, + "grad_norm": 1.7049104339852403, + "learning_rate": 3.1528374371657524e-06, + "loss": 0.5852463841438293, + "step": 5152 + }, + { + "epoch": 1.5067992396549204, + "grad_norm": 1.7074372465536334, + "learning_rate": 3.1493146559048683e-06, + "loss": 0.5986759662628174, + "step": 5153 + }, + { + "epoch": 1.5070916800701857, + "grad_norm": 1.8410699226798701, + "learning_rate": 3.1457934759500298e-06, + "loss": 0.6363133788108826, + "step": 5154 + }, + { + "epoch": 1.507384120485451, + "grad_norm": 1.7703119171725752, + "learning_rate": 3.1422738981242927e-06, + "loss": 0.4757901430130005, + "step": 5155 + }, + { + "epoch": 1.5076765609007166, + "grad_norm": 1.8042941675603332, + "learning_rate": 3.1387559232503374e-06, + "loss": 0.7614980936050415, + "step": 5156 + }, + { + "epoch": 1.5079690013159819, + "grad_norm": 1.8353916940267578, + "learning_rate": 3.13523955215047e-06, + "loss": 0.5739883184432983, + "step": 5157 + }, + { + "epoch": 1.5082614417312472, + "grad_norm": 1.6405466984899346, + "learning_rate": 3.131724785646616e-06, + "loss": 0.5893388390541077, + "step": 5158 + }, + { + "epoch": 1.5085538821465128, + "grad_norm": 1.4613031069188664, + "learning_rate": 3.1282116245603333e-06, + "loss": 0.5809957981109619, + "step": 5159 + }, + { + "epoch": 1.508846322561778, + "grad_norm": 1.558509757762028, + "learning_rate": 3.124700069712803e-06, + "loss": 0.5651090741157532, + "step": 5160 + }, + { + "epoch": 1.5091387629770434, + "grad_norm": 1.5870160926102073, + "learning_rate": 3.1211901219248273e-06, + "loss": 0.3736303448677063, + "step": 5161 + }, + { + "epoch": 1.509431203392309, + "grad_norm": 1.744264206007829, + "learning_rate": 3.117681782016838e-06, + "loss": 0.5501068234443665, + "step": 5162 + }, + { + "epoch": 1.509723643807574, + "grad_norm": 1.7377852819958348, + "learning_rate": 3.1141750508088865e-06, + "loss": 0.6210630536079407, + "step": 5163 + }, + { + "epoch": 1.5100160842228396, + "grad_norm": 1.5741938339988393, + "learning_rate": 3.110669929120651e-06, + "loss": 0.5722042322158813, + "step": 5164 + }, + { + "epoch": 1.510308524638105, + "grad_norm": 1.617906406413033, + "learning_rate": 3.107166417771431e-06, + "loss": 0.5813776254653931, + "step": 5165 + }, + { + "epoch": 1.5106009650533703, + "grad_norm": 1.5816945478856634, + "learning_rate": 3.1036645175801515e-06, + "loss": 0.4911368787288666, + "step": 5166 + }, + { + "epoch": 1.5108934054686358, + "grad_norm": 1.5812988749732655, + "learning_rate": 3.100164229365361e-06, + "loss": 0.5136172771453857, + "step": 5167 + }, + { + "epoch": 1.5111858458839011, + "grad_norm": 1.7202185949801794, + "learning_rate": 3.096665553945234e-06, + "loss": 0.5746543407440186, + "step": 5168 + }, + { + "epoch": 1.5114782862991665, + "grad_norm": 1.8577610332100818, + "learning_rate": 3.0931684921375572e-06, + "loss": 0.4949193000793457, + "step": 5169 + }, + { + "epoch": 1.511770726714432, + "grad_norm": 1.6744220879324234, + "learning_rate": 3.089673044759751e-06, + "loss": 0.5732932090759277, + "step": 5170 + }, + { + "epoch": 1.5120631671296973, + "grad_norm": 1.5865659073822531, + "learning_rate": 3.086179212628855e-06, + "loss": 0.5329696536064148, + "step": 5171 + }, + { + "epoch": 1.5123556075449627, + "grad_norm": 1.7970382860153173, + "learning_rate": 3.082686996561531e-06, + "loss": 0.631770670413971, + "step": 5172 + }, + { + "epoch": 1.5126480479602282, + "grad_norm": 1.5998021767601671, + "learning_rate": 3.0791963973740646e-06, + "loss": 0.5183405876159668, + "step": 5173 + }, + { + "epoch": 1.5129404883754933, + "grad_norm": 1.7133603210505308, + "learning_rate": 3.075707415882361e-06, + "loss": 0.5616034269332886, + "step": 5174 + }, + { + "epoch": 1.5132329287907589, + "grad_norm": 1.5912245556380846, + "learning_rate": 3.0722200529019477e-06, + "loss": 0.48513877391815186, + "step": 5175 + }, + { + "epoch": 1.5135253692060244, + "grad_norm": 1.574805808870548, + "learning_rate": 3.068734309247976e-06, + "loss": 0.5226399898529053, + "step": 5176 + }, + { + "epoch": 1.5138178096212895, + "grad_norm": 1.592402045128277, + "learning_rate": 3.0652501857352167e-06, + "loss": 0.48817533254623413, + "step": 5177 + }, + { + "epoch": 1.514110250036555, + "grad_norm": 1.5523305292465257, + "learning_rate": 3.061767683178063e-06, + "loss": 0.4163327217102051, + "step": 5178 + }, + { + "epoch": 1.5144026904518204, + "grad_norm": 1.6254224030737643, + "learning_rate": 3.058286802390531e-06, + "loss": 0.5984256267547607, + "step": 5179 + }, + { + "epoch": 1.5146951308670857, + "grad_norm": 1.8006518354372911, + "learning_rate": 3.054807544186249e-06, + "loss": 0.47233515977859497, + "step": 5180 + }, + { + "epoch": 1.5149875712823513, + "grad_norm": 1.6896342506826862, + "learning_rate": 3.0513299093784766e-06, + "loss": 0.5545482635498047, + "step": 5181 + }, + { + "epoch": 1.5152800116976166, + "grad_norm": 1.5925171354605219, + "learning_rate": 3.047853898780089e-06, + "loss": 0.46200019121170044, + "step": 5182 + }, + { + "epoch": 1.515572452112882, + "grad_norm": 1.7986358499610187, + "learning_rate": 3.0443795132035824e-06, + "loss": 0.6146235466003418, + "step": 5183 + }, + { + "epoch": 1.5158648925281475, + "grad_norm": 1.6180210942837954, + "learning_rate": 3.040906753461075e-06, + "loss": 0.5653461217880249, + "step": 5184 + }, + { + "epoch": 1.5161573329434128, + "grad_norm": 1.7782122645526974, + "learning_rate": 3.0374356203643008e-06, + "loss": 0.6514929533004761, + "step": 5185 + }, + { + "epoch": 1.5164497733586781, + "grad_norm": 1.6488410817366923, + "learning_rate": 3.033966114724618e-06, + "loss": 0.48213401436805725, + "step": 5186 + }, + { + "epoch": 1.5167422137739437, + "grad_norm": 1.8810893536328739, + "learning_rate": 3.0304982373530013e-06, + "loss": 0.4935530424118042, + "step": 5187 + }, + { + "epoch": 1.5170346541892088, + "grad_norm": 1.9406636249591702, + "learning_rate": 3.0270319890600465e-06, + "loss": 0.6435343027114868, + "step": 5188 + }, + { + "epoch": 1.5173270946044743, + "grad_norm": 1.4722259236044228, + "learning_rate": 3.0235673706559675e-06, + "loss": 0.49350717663764954, + "step": 5189 + }, + { + "epoch": 1.5176195350197397, + "grad_norm": 1.636152242750681, + "learning_rate": 3.0201043829506015e-06, + "loss": 0.4745938777923584, + "step": 5190 + }, + { + "epoch": 1.517911975435005, + "grad_norm": 1.747247707841839, + "learning_rate": 3.0166430267533944e-06, + "loss": 0.5867031812667847, + "step": 5191 + }, + { + "epoch": 1.5182044158502705, + "grad_norm": 2.0836038611604275, + "learning_rate": 3.01318330287342e-06, + "loss": 0.5477231740951538, + "step": 5192 + }, + { + "epoch": 1.5184968562655359, + "grad_norm": 1.5825293698408722, + "learning_rate": 3.0097252121193687e-06, + "loss": 0.5788818597793579, + "step": 5193 + }, + { + "epoch": 1.5187892966808012, + "grad_norm": 1.5819522244244852, + "learning_rate": 3.0062687552995475e-06, + "loss": 0.4967714548110962, + "step": 5194 + }, + { + "epoch": 1.5190817370960668, + "grad_norm": 1.810354148695448, + "learning_rate": 3.002813933221882e-06, + "loss": 0.6427319645881653, + "step": 5195 + }, + { + "epoch": 1.519374177511332, + "grad_norm": 1.7324283900525337, + "learning_rate": 2.999360746693916e-06, + "loss": 0.5615307688713074, + "step": 5196 + }, + { + "epoch": 1.5196666179265974, + "grad_norm": 1.8017068269121923, + "learning_rate": 2.9959091965228102e-06, + "loss": 0.6646313667297363, + "step": 5197 + }, + { + "epoch": 1.519959058341863, + "grad_norm": 1.4648905848591907, + "learning_rate": 2.9924592835153454e-06, + "loss": 0.47536247968673706, + "step": 5198 + }, + { + "epoch": 1.5202514987571283, + "grad_norm": 1.701001149097395, + "learning_rate": 2.9890110084779157e-06, + "loss": 0.5850256681442261, + "step": 5199 + }, + { + "epoch": 1.5205439391723936, + "grad_norm": 1.6650942638342863, + "learning_rate": 2.985564372216536e-06, + "loss": 0.5724887251853943, + "step": 5200 + }, + { + "epoch": 1.5208363795876592, + "grad_norm": 1.6379341688791944, + "learning_rate": 2.9821193755368383e-06, + "loss": 0.5052510499954224, + "step": 5201 + }, + { + "epoch": 1.5211288200029243, + "grad_norm": 1.5270508750040293, + "learning_rate": 2.9786760192440644e-06, + "loss": 0.439144492149353, + "step": 5202 + }, + { + "epoch": 1.5214212604181898, + "grad_norm": 1.624134940512823, + "learning_rate": 2.97523430414308e-06, + "loss": 0.4560511112213135, + "step": 5203 + }, + { + "epoch": 1.5217137008334551, + "grad_norm": 1.9447169329839864, + "learning_rate": 2.9717942310383664e-06, + "loss": 0.6848068237304688, + "step": 5204 + }, + { + "epoch": 1.5220061412487205, + "grad_norm": 1.5338251170475576, + "learning_rate": 2.9683558007340184e-06, + "loss": 0.5541313886642456, + "step": 5205 + }, + { + "epoch": 1.522298581663986, + "grad_norm": 1.4921475223936211, + "learning_rate": 2.964919014033749e-06, + "loss": 0.5117338299751282, + "step": 5206 + }, + { + "epoch": 1.5225910220792513, + "grad_norm": 1.8454970950489444, + "learning_rate": 2.9614838717408866e-06, + "loss": 0.5164151191711426, + "step": 5207 + }, + { + "epoch": 1.5228834624945167, + "grad_norm": 1.6612213438595136, + "learning_rate": 2.9580503746583744e-06, + "loss": 0.5461020469665527, + "step": 5208 + }, + { + "epoch": 1.5231759029097822, + "grad_norm": 1.580589085309813, + "learning_rate": 2.9546185235887705e-06, + "loss": 0.4265401065349579, + "step": 5209 + }, + { + "epoch": 1.5234683433250475, + "grad_norm": 1.822483254200033, + "learning_rate": 2.9511883193342505e-06, + "loss": 0.47372496128082275, + "step": 5210 + }, + { + "epoch": 1.5237607837403129, + "grad_norm": 1.5409548150660597, + "learning_rate": 2.9477597626966036e-06, + "loss": 0.43951019644737244, + "step": 5211 + }, + { + "epoch": 1.5240532241555784, + "grad_norm": 2.1038432849237862, + "learning_rate": 2.9443328544772343e-06, + "loss": 0.6514073610305786, + "step": 5212 + }, + { + "epoch": 1.5243456645708435, + "grad_norm": 1.6794879789857167, + "learning_rate": 2.940907595477164e-06, + "loss": 0.523013710975647, + "step": 5213 + }, + { + "epoch": 1.524638104986109, + "grad_norm": 1.6399154124434079, + "learning_rate": 2.9374839864970194e-06, + "loss": 0.4945281744003296, + "step": 5214 + }, + { + "epoch": 1.5249305454013746, + "grad_norm": 1.83414324289986, + "learning_rate": 2.9340620283370525e-06, + "loss": 0.5768609046936035, + "step": 5215 + }, + { + "epoch": 1.5252229858166397, + "grad_norm": 1.7611799606025424, + "learning_rate": 2.930641721797125e-06, + "loss": 0.45644205808639526, + "step": 5216 + }, + { + "epoch": 1.5255154262319053, + "grad_norm": 1.5932175762441756, + "learning_rate": 2.92722306767671e-06, + "loss": 0.590227484703064, + "step": 5217 + }, + { + "epoch": 1.5258078666471706, + "grad_norm": 1.8078838529845034, + "learning_rate": 2.9238060667749014e-06, + "loss": 0.5618122816085815, + "step": 5218 + }, + { + "epoch": 1.526100307062436, + "grad_norm": 1.9135498575527394, + "learning_rate": 2.9203907198904027e-06, + "loss": 0.6431877613067627, + "step": 5219 + }, + { + "epoch": 1.5263927474777015, + "grad_norm": 1.5548470750003383, + "learning_rate": 2.916977027821527e-06, + "loss": 0.5019941329956055, + "step": 5220 + }, + { + "epoch": 1.5266851878929668, + "grad_norm": 1.9013308084843434, + "learning_rate": 2.913564991366209e-06, + "loss": 0.5413016080856323, + "step": 5221 + }, + { + "epoch": 1.5269776283082321, + "grad_norm": 1.6880920277336984, + "learning_rate": 2.9101546113219846e-06, + "loss": 0.6546905636787415, + "step": 5222 + }, + { + "epoch": 1.5272700687234977, + "grad_norm": 1.7013707157233615, + "learning_rate": 2.906745888486013e-06, + "loss": 0.5689815878868103, + "step": 5223 + }, + { + "epoch": 1.527562509138763, + "grad_norm": 1.8369848799419313, + "learning_rate": 2.9033388236550632e-06, + "loss": 0.5134810209274292, + "step": 5224 + }, + { + "epoch": 1.5278549495540283, + "grad_norm": 1.4280052174004847, + "learning_rate": 2.8999334176255143e-06, + "loss": 0.4880787134170532, + "step": 5225 + }, + { + "epoch": 1.528147389969294, + "grad_norm": 1.8292283637694566, + "learning_rate": 2.89652967119336e-06, + "loss": 0.4345950782299042, + "step": 5226 + }, + { + "epoch": 1.528439830384559, + "grad_norm": 1.724451812949585, + "learning_rate": 2.893127585154205e-06, + "loss": 0.43327242136001587, + "step": 5227 + }, + { + "epoch": 1.5287322707998245, + "grad_norm": 1.780345207484487, + "learning_rate": 2.889727160303266e-06, + "loss": 0.6423674821853638, + "step": 5228 + }, + { + "epoch": 1.5290247112150899, + "grad_norm": 1.5540524492201802, + "learning_rate": 2.886328397435374e-06, + "loss": 0.5263554453849792, + "step": 5229 + }, + { + "epoch": 1.5293171516303552, + "grad_norm": 1.6433428703006638, + "learning_rate": 2.882931297344965e-06, + "loss": 0.4111948013305664, + "step": 5230 + }, + { + "epoch": 1.5296095920456207, + "grad_norm": 1.804627326985323, + "learning_rate": 2.8795358608260936e-06, + "loss": 0.43803131580352783, + "step": 5231 + }, + { + "epoch": 1.529902032460886, + "grad_norm": 1.5504311785369362, + "learning_rate": 2.8761420886724223e-06, + "loss": 0.4708956778049469, + "step": 5232 + }, + { + "epoch": 1.5301944728761514, + "grad_norm": 1.7185936460565197, + "learning_rate": 2.8727499816772265e-06, + "loss": 0.5268635749816895, + "step": 5233 + }, + { + "epoch": 1.530486913291417, + "grad_norm": 1.6977720322438927, + "learning_rate": 2.869359540633385e-06, + "loss": 0.5092788934707642, + "step": 5234 + }, + { + "epoch": 1.5307793537066823, + "grad_norm": 1.630735809850627, + "learning_rate": 2.8659707663333958e-06, + "loss": 0.4603293836116791, + "step": 5235 + }, + { + "epoch": 1.5310717941219476, + "grad_norm": 1.7857705195277582, + "learning_rate": 2.8625836595693646e-06, + "loss": 0.545462965965271, + "step": 5236 + }, + { + "epoch": 1.5313642345372132, + "grad_norm": 1.6146415057105645, + "learning_rate": 2.8591982211330073e-06, + "loss": 0.511603832244873, + "step": 5237 + }, + { + "epoch": 1.5316566749524785, + "grad_norm": 1.7935851159627383, + "learning_rate": 2.8558144518156485e-06, + "loss": 0.5076707601547241, + "step": 5238 + }, + { + "epoch": 1.5319491153677438, + "grad_norm": 1.7012818042378361, + "learning_rate": 2.852432352408224e-06, + "loss": 0.5923745632171631, + "step": 5239 + }, + { + "epoch": 1.5322415557830094, + "grad_norm": 1.8251553548092714, + "learning_rate": 2.849051923701279e-06, + "loss": 0.5588465332984924, + "step": 5240 + }, + { + "epoch": 1.5325339961982745, + "grad_norm": 1.6493521356208132, + "learning_rate": 2.845673166484969e-06, + "loss": 0.6681923270225525, + "step": 5241 + }, + { + "epoch": 1.53282643661354, + "grad_norm": 1.8683876960783266, + "learning_rate": 2.8422960815490564e-06, + "loss": 0.5702543258666992, + "step": 5242 + }, + { + "epoch": 1.5331188770288053, + "grad_norm": 1.8090012581479555, + "learning_rate": 2.8389206696829165e-06, + "loss": 0.5401744842529297, + "step": 5243 + }, + { + "epoch": 1.5334113174440707, + "grad_norm": 1.6641276436242072, + "learning_rate": 2.8355469316755324e-06, + "loss": 0.43371906876564026, + "step": 5244 + }, + { + "epoch": 1.5337037578593362, + "grad_norm": 1.6323739542625777, + "learning_rate": 2.8321748683154893e-06, + "loss": 0.5598163604736328, + "step": 5245 + }, + { + "epoch": 1.5339961982746015, + "grad_norm": 1.8330291281030966, + "learning_rate": 2.8288044803909896e-06, + "loss": 0.5836831331253052, + "step": 5246 + }, + { + "epoch": 1.5342886386898669, + "grad_norm": 1.6637462764959579, + "learning_rate": 2.8254357686898404e-06, + "loss": 0.5308898687362671, + "step": 5247 + }, + { + "epoch": 1.5345810791051324, + "grad_norm": 1.7589253104867197, + "learning_rate": 2.822068733999459e-06, + "loss": 0.6104828119277954, + "step": 5248 + }, + { + "epoch": 1.5348735195203977, + "grad_norm": 1.9266285032289332, + "learning_rate": 2.8187033771068685e-06, + "loss": 0.48373985290527344, + "step": 5249 + }, + { + "epoch": 1.535165959935663, + "grad_norm": 1.745809860715047, + "learning_rate": 2.8153396987987e-06, + "loss": 0.5213532447814941, + "step": 5250 + }, + { + "epoch": 1.5354584003509286, + "grad_norm": 1.7052291407432676, + "learning_rate": 2.811977699861195e-06, + "loss": 0.5241051912307739, + "step": 5251 + }, + { + "epoch": 1.5357508407661937, + "grad_norm": 1.54399807563896, + "learning_rate": 2.8086173810801974e-06, + "loss": 0.48321712017059326, + "step": 5252 + }, + { + "epoch": 1.5360432811814593, + "grad_norm": 1.831716416150244, + "learning_rate": 2.8052587432411626e-06, + "loss": 0.5352765917778015, + "step": 5253 + }, + { + "epoch": 1.5363357215967248, + "grad_norm": 1.7051244593885417, + "learning_rate": 2.8019017871291522e-06, + "loss": 0.5402188301086426, + "step": 5254 + }, + { + "epoch": 1.53662816201199, + "grad_norm": 1.5780940900489064, + "learning_rate": 2.798546513528837e-06, + "loss": 0.4398813545703888, + "step": 5255 + }, + { + "epoch": 1.5369206024272555, + "grad_norm": 1.6682503262337565, + "learning_rate": 2.7951929232244855e-06, + "loss": 0.5661803483963013, + "step": 5256 + }, + { + "epoch": 1.5372130428425208, + "grad_norm": 1.9389870116334766, + "learning_rate": 2.791841016999982e-06, + "loss": 0.5051732063293457, + "step": 5257 + }, + { + "epoch": 1.5375054832577861, + "grad_norm": 1.7323475801875265, + "learning_rate": 2.788490795638815e-06, + "loss": 0.5712389945983887, + "step": 5258 + }, + { + "epoch": 1.5377979236730517, + "grad_norm": 1.7189716580722423, + "learning_rate": 2.7851422599240773e-06, + "loss": 0.6257319450378418, + "step": 5259 + }, + { + "epoch": 1.538090364088317, + "grad_norm": 1.7862483931054027, + "learning_rate": 2.7817954106384704e-06, + "loss": 0.5788396596908569, + "step": 5260 + }, + { + "epoch": 1.5383828045035823, + "grad_norm": 1.508089974245087, + "learning_rate": 2.7784502485642985e-06, + "loss": 0.37253260612487793, + "step": 5261 + }, + { + "epoch": 1.5386752449188479, + "grad_norm": 2.206166372523085, + "learning_rate": 2.7751067744834726e-06, + "loss": 0.6547001004219055, + "step": 5262 + }, + { + "epoch": 1.5389676853341132, + "grad_norm": 1.551783656656575, + "learning_rate": 2.77176498917751e-06, + "loss": 0.510914146900177, + "step": 5263 + }, + { + "epoch": 1.5392601257493785, + "grad_norm": 1.731638922465708, + "learning_rate": 2.7684248934275327e-06, + "loss": 0.4387754201889038, + "step": 5264 + }, + { + "epoch": 1.539552566164644, + "grad_norm": 1.573259655998941, + "learning_rate": 2.765086488014268e-06, + "loss": 0.5640195608139038, + "step": 5265 + }, + { + "epoch": 1.5398450065799092, + "grad_norm": 2.3327619392306684, + "learning_rate": 2.7617497737180508e-06, + "loss": 0.5780993103981018, + "step": 5266 + }, + { + "epoch": 1.5401374469951747, + "grad_norm": 1.7296077762304434, + "learning_rate": 2.758414751318813e-06, + "loss": 0.5190057158470154, + "step": 5267 + }, + { + "epoch": 1.54042988741044, + "grad_norm": 1.6180118608432006, + "learning_rate": 2.7550814215960964e-06, + "loss": 0.4204869270324707, + "step": 5268 + }, + { + "epoch": 1.5407223278257054, + "grad_norm": 1.5345717637092124, + "learning_rate": 2.7517497853290477e-06, + "loss": 0.5649294853210449, + "step": 5269 + }, + { + "epoch": 1.541014768240971, + "grad_norm": 1.8541084629609554, + "learning_rate": 2.748419843296416e-06, + "loss": 0.49142545461654663, + "step": 5270 + }, + { + "epoch": 1.5413072086562363, + "grad_norm": 2.006144774477858, + "learning_rate": 2.745091596276557e-06, + "loss": 0.483539879322052, + "step": 5271 + }, + { + "epoch": 1.5415996490715016, + "grad_norm": 1.8772157933692841, + "learning_rate": 2.7417650450474253e-06, + "loss": 0.5400283336639404, + "step": 5272 + }, + { + "epoch": 1.5418920894867671, + "grad_norm": 1.6915167892784866, + "learning_rate": 2.7384401903865844e-06, + "loss": 0.5490765571594238, + "step": 5273 + }, + { + "epoch": 1.5421845299020325, + "grad_norm": 2.267512124400057, + "learning_rate": 2.7351170330711975e-06, + "loss": 0.5434873700141907, + "step": 5274 + }, + { + "epoch": 1.5424769703172978, + "grad_norm": 1.8064402200670897, + "learning_rate": 2.7317955738780333e-06, + "loss": 0.6195025444030762, + "step": 5275 + }, + { + "epoch": 1.5427694107325634, + "grad_norm": 1.6751288499310806, + "learning_rate": 2.728475813583462e-06, + "loss": 0.5552260875701904, + "step": 5276 + }, + { + "epoch": 1.5430618511478287, + "grad_norm": 1.8146552227089312, + "learning_rate": 2.725157752963461e-06, + "loss": 0.5430501699447632, + "step": 5277 + }, + { + "epoch": 1.543354291563094, + "grad_norm": 2.1339271947469047, + "learning_rate": 2.7218413927936006e-06, + "loss": 0.633337676525116, + "step": 5278 + }, + { + "epoch": 1.5436467319783596, + "grad_norm": 1.6483089945499043, + "learning_rate": 2.718526733849062e-06, + "loss": 0.4974183738231659, + "step": 5279 + }, + { + "epoch": 1.5439391723936247, + "grad_norm": 2.06701718299293, + "learning_rate": 2.715213776904628e-06, + "loss": 0.5840449929237366, + "step": 5280 + }, + { + "epoch": 1.5442316128088902, + "grad_norm": 1.480832016038464, + "learning_rate": 2.7119025227346807e-06, + "loss": 0.4684101343154907, + "step": 5281 + }, + { + "epoch": 1.5445240532241555, + "grad_norm": 1.5849030043466241, + "learning_rate": 2.7085929721132078e-06, + "loss": 0.48402637243270874, + "step": 5282 + }, + { + "epoch": 1.5448164936394209, + "grad_norm": 1.6449199299919448, + "learning_rate": 2.7052851258137936e-06, + "loss": 0.6122831106185913, + "step": 5283 + }, + { + "epoch": 1.5451089340546864, + "grad_norm": 1.6951661547391625, + "learning_rate": 2.701978984609629e-06, + "loss": 0.5731217861175537, + "step": 5284 + }, + { + "epoch": 1.5454013744699517, + "grad_norm": 1.869052563685483, + "learning_rate": 2.6986745492735044e-06, + "loss": 0.5610803961753845, + "step": 5285 + }, + { + "epoch": 1.545693814885217, + "grad_norm": 1.4190791359210344, + "learning_rate": 2.695371820577811e-06, + "loss": 0.46112626791000366, + "step": 5286 + }, + { + "epoch": 1.5459862553004826, + "grad_norm": 2.1150576387004247, + "learning_rate": 2.692070799294542e-06, + "loss": 0.5368741154670715, + "step": 5287 + }, + { + "epoch": 1.546278695715748, + "grad_norm": 1.905327182706658, + "learning_rate": 2.688771486195293e-06, + "loss": 0.5991438627243042, + "step": 5288 + }, + { + "epoch": 1.5465711361310133, + "grad_norm": 1.9084615434749013, + "learning_rate": 2.685473882051254e-06, + "loss": 0.5751149654388428, + "step": 5289 + }, + { + "epoch": 1.5468635765462788, + "grad_norm": 2.0751264575493247, + "learning_rate": 2.682177987633221e-06, + "loss": 0.6055437326431274, + "step": 5290 + }, + { + "epoch": 1.547156016961544, + "grad_norm": 1.8883429200709412, + "learning_rate": 2.6788838037115916e-06, + "loss": 0.6009221076965332, + "step": 5291 + }, + { + "epoch": 1.5474484573768095, + "grad_norm": 1.8170478309101001, + "learning_rate": 2.6755913310563585e-06, + "loss": 0.6071531772613525, + "step": 5292 + }, + { + "epoch": 1.547740897792075, + "grad_norm": 1.4851824864906211, + "learning_rate": 2.6723005704371164e-06, + "loss": 0.4102080464363098, + "step": 5293 + }, + { + "epoch": 1.5480333382073401, + "grad_norm": 1.861843061560023, + "learning_rate": 2.6690115226230663e-06, + "loss": 0.48021870851516724, + "step": 5294 + }, + { + "epoch": 1.5483257786226057, + "grad_norm": 1.916351154521063, + "learning_rate": 2.665724188382999e-06, + "loss": 0.4893236458301544, + "step": 5295 + }, + { + "epoch": 1.548618219037871, + "grad_norm": 1.611822755629755, + "learning_rate": 2.6624385684853095e-06, + "loss": 0.6365019083023071, + "step": 5296 + }, + { + "epoch": 1.5489106594531363, + "grad_norm": 1.8901541843584413, + "learning_rate": 2.659154663697995e-06, + "loss": 0.46510767936706543, + "step": 5297 + }, + { + "epoch": 1.5492030998684019, + "grad_norm": 1.4887188273793392, + "learning_rate": 2.655872474788641e-06, + "loss": 0.4355175495147705, + "step": 5298 + }, + { + "epoch": 1.5494955402836672, + "grad_norm": 1.3536753107928572, + "learning_rate": 2.6525920025244432e-06, + "loss": 0.5180836915969849, + "step": 5299 + }, + { + "epoch": 1.5497879806989325, + "grad_norm": 1.9072335806805663, + "learning_rate": 2.6493132476721927e-06, + "loss": 0.5597968101501465, + "step": 5300 + }, + { + "epoch": 1.550080421114198, + "grad_norm": 1.7134796878533993, + "learning_rate": 2.646036210998276e-06, + "loss": 0.6581016778945923, + "step": 5301 + }, + { + "epoch": 1.5503728615294634, + "grad_norm": 1.8671635537156963, + "learning_rate": 2.642760893268684e-06, + "loss": 0.4875848889350891, + "step": 5302 + }, + { + "epoch": 1.5506653019447287, + "grad_norm": 1.571897962721608, + "learning_rate": 2.639487295248999e-06, + "loss": 0.4410843253135681, + "step": 5303 + }, + { + "epoch": 1.5509577423599943, + "grad_norm": 1.8113376757557438, + "learning_rate": 2.6362154177044076e-06, + "loss": 0.5829580426216125, + "step": 5304 + }, + { + "epoch": 1.5512501827752594, + "grad_norm": 1.6979805053981243, + "learning_rate": 2.6329452613996886e-06, + "loss": 0.6281459927558899, + "step": 5305 + }, + { + "epoch": 1.551542623190525, + "grad_norm": 1.6778942363253981, + "learning_rate": 2.629676827099222e-06, + "loss": 0.525640606880188, + "step": 5306 + }, + { + "epoch": 1.5518350636057903, + "grad_norm": 1.710219412838542, + "learning_rate": 2.626410115566985e-06, + "loss": 0.5219406485557556, + "step": 5307 + }, + { + "epoch": 1.5521275040210556, + "grad_norm": 1.7812622188686809, + "learning_rate": 2.623145127566555e-06, + "loss": 0.5120927691459656, + "step": 5308 + }, + { + "epoch": 1.5524199444363211, + "grad_norm": 1.856533490372594, + "learning_rate": 2.6198818638610967e-06, + "loss": 0.586410641670227, + "step": 5309 + }, + { + "epoch": 1.5527123848515865, + "grad_norm": 1.726189213717832, + "learning_rate": 2.6166203252133803e-06, + "loss": 0.5014485120773315, + "step": 5310 + }, + { + "epoch": 1.5530048252668518, + "grad_norm": 1.7251785105103856, + "learning_rate": 2.6133605123857707e-06, + "loss": 0.5087070465087891, + "step": 5311 + }, + { + "epoch": 1.5532972656821173, + "grad_norm": 1.9411711444593984, + "learning_rate": 2.610102426140231e-06, + "loss": 0.5829774737358093, + "step": 5312 + }, + { + "epoch": 1.5535897060973827, + "grad_norm": 1.9403338817582965, + "learning_rate": 2.6068460672383166e-06, + "loss": 0.5273870229721069, + "step": 5313 + }, + { + "epoch": 1.553882146512648, + "grad_norm": 1.6781304796241345, + "learning_rate": 2.603591436441183e-06, + "loss": 0.528778076171875, + "step": 5314 + }, + { + "epoch": 1.5541745869279135, + "grad_norm": 1.6477790459502455, + "learning_rate": 2.600338534509581e-06, + "loss": 0.4914259612560272, + "step": 5315 + }, + { + "epoch": 1.5544670273431789, + "grad_norm": 1.5838952242674544, + "learning_rate": 2.597087362203855e-06, + "loss": 0.48063480854034424, + "step": 5316 + }, + { + "epoch": 1.5547594677584442, + "grad_norm": 1.6948007690415343, + "learning_rate": 2.593837920283949e-06, + "loss": 0.4406088888645172, + "step": 5317 + }, + { + "epoch": 1.5550519081737098, + "grad_norm": 1.5839061375343884, + "learning_rate": 2.590590209509398e-06, + "loss": 0.5027159452438354, + "step": 5318 + }, + { + "epoch": 1.5553443485889749, + "grad_norm": 1.447462212774582, + "learning_rate": 2.5873442306393357e-06, + "loss": 0.3894188404083252, + "step": 5319 + }, + { + "epoch": 1.5556367890042404, + "grad_norm": 1.8834380096125083, + "learning_rate": 2.584099984432492e-06, + "loss": 0.5393104553222656, + "step": 5320 + }, + { + "epoch": 1.5559292294195057, + "grad_norm": 1.640256381642302, + "learning_rate": 2.580857471647186e-06, + "loss": 0.5701737999916077, + "step": 5321 + }, + { + "epoch": 1.556221669834771, + "grad_norm": 1.9050066043706444, + "learning_rate": 2.577616693041336e-06, + "loss": 0.6173145174980164, + "step": 5322 + }, + { + "epoch": 1.5565141102500366, + "grad_norm": 1.718666562714064, + "learning_rate": 2.5743776493724548e-06, + "loss": 0.534600555896759, + "step": 5323 + }, + { + "epoch": 1.556806550665302, + "grad_norm": 1.7258193752543447, + "learning_rate": 2.571140341397651e-06, + "loss": 0.5205268859863281, + "step": 5324 + }, + { + "epoch": 1.5570989910805673, + "grad_norm": 1.9160383524514086, + "learning_rate": 2.5679047698736224e-06, + "loss": 0.5631835460662842, + "step": 5325 + }, + { + "epoch": 1.5573914314958328, + "grad_norm": 1.786367865175988, + "learning_rate": 2.564670935556667e-06, + "loss": 0.5855015516281128, + "step": 5326 + }, + { + "epoch": 1.5576838719110981, + "grad_norm": 1.538967985462843, + "learning_rate": 2.5614388392026735e-06, + "loss": 0.5219928026199341, + "step": 5327 + }, + { + "epoch": 1.5579763123263635, + "grad_norm": 1.6118392863192783, + "learning_rate": 2.5582084815671225e-06, + "loss": 0.50178462266922, + "step": 5328 + }, + { + "epoch": 1.558268752741629, + "grad_norm": 1.65351304969076, + "learning_rate": 2.554979863405094e-06, + "loss": 0.643866777420044, + "step": 5329 + }, + { + "epoch": 1.5585611931568941, + "grad_norm": 1.6117676019433484, + "learning_rate": 2.5517529854712543e-06, + "loss": 0.4976714849472046, + "step": 5330 + }, + { + "epoch": 1.5588536335721597, + "grad_norm": 1.6012275122207043, + "learning_rate": 2.5485278485198716e-06, + "loss": 0.47352612018585205, + "step": 5331 + }, + { + "epoch": 1.5591460739874252, + "grad_norm": 1.5967917267320113, + "learning_rate": 2.5453044533047955e-06, + "loss": 0.6319230794906616, + "step": 5332 + }, + { + "epoch": 1.5594385144026903, + "grad_norm": 1.9005541524381997, + "learning_rate": 2.5420828005794786e-06, + "loss": 0.724555253982544, + "step": 5333 + }, + { + "epoch": 1.5597309548179559, + "grad_norm": 1.987695201205215, + "learning_rate": 2.5388628910969625e-06, + "loss": 0.6235928535461426, + "step": 5334 + }, + { + "epoch": 1.5600233952332212, + "grad_norm": 1.9501926966829706, + "learning_rate": 2.5356447256098805e-06, + "loss": 0.47880417108535767, + "step": 5335 + }, + { + "epoch": 1.5603158356484865, + "grad_norm": 1.451114547860928, + "learning_rate": 2.53242830487046e-06, + "loss": 0.3986828029155731, + "step": 5336 + }, + { + "epoch": 1.560608276063752, + "grad_norm": 1.747029246487311, + "learning_rate": 2.529213629630519e-06, + "loss": 0.515389084815979, + "step": 5337 + }, + { + "epoch": 1.5609007164790174, + "grad_norm": 1.4773319281213657, + "learning_rate": 2.52600070064147e-06, + "loss": 0.611845076084137, + "step": 5338 + }, + { + "epoch": 1.5611931568942827, + "grad_norm": 1.4758258492307896, + "learning_rate": 2.522789518654314e-06, + "loss": 0.4417461156845093, + "step": 5339 + }, + { + "epoch": 1.5614855973095483, + "grad_norm": 1.819505142519117, + "learning_rate": 2.519580084419646e-06, + "loss": 0.5082979798316956, + "step": 5340 + }, + { + "epoch": 1.5617780377248136, + "grad_norm": 1.6547823991622836, + "learning_rate": 2.516372398687652e-06, + "loss": 0.4535973072052002, + "step": 5341 + }, + { + "epoch": 1.562070478140079, + "grad_norm": 1.5836674832459754, + "learning_rate": 2.513166462208111e-06, + "loss": 0.5528950095176697, + "step": 5342 + }, + { + "epoch": 1.5623629185553445, + "grad_norm": 1.9642626952112248, + "learning_rate": 2.5099622757303865e-06, + "loss": 0.6272662281990051, + "step": 5343 + }, + { + "epoch": 1.5626553589706096, + "grad_norm": 1.6065246572629583, + "learning_rate": 2.506759840003439e-06, + "loss": 0.602135181427002, + "step": 5344 + }, + { + "epoch": 1.5629477993858751, + "grad_norm": 1.6289588222907745, + "learning_rate": 2.5035591557758197e-06, + "loss": 0.6336733102798462, + "step": 5345 + }, + { + "epoch": 1.5632402398011405, + "grad_norm": 1.6487862192612195, + "learning_rate": 2.500360223795668e-06, + "loss": 0.5819063186645508, + "step": 5346 + }, + { + "epoch": 1.5635326802164058, + "grad_norm": 1.9625665043715836, + "learning_rate": 2.4971630448107166e-06, + "loss": 0.6384624242782593, + "step": 5347 + }, + { + "epoch": 1.5638251206316713, + "grad_norm": 1.7408709214756897, + "learning_rate": 2.493967619568285e-06, + "loss": 0.5495754480361938, + "step": 5348 + }, + { + "epoch": 1.5641175610469367, + "grad_norm": 1.7544921790911043, + "learning_rate": 2.490773948815284e-06, + "loss": 0.5661545395851135, + "step": 5349 + }, + { + "epoch": 1.564410001462202, + "grad_norm": 1.6122536544450556, + "learning_rate": 2.487582033298217e-06, + "loss": 0.47731083631515503, + "step": 5350 + }, + { + "epoch": 1.5647024418774675, + "grad_norm": 1.6660059461046859, + "learning_rate": 2.4843918737631724e-06, + "loss": 0.5081999897956848, + "step": 5351 + }, + { + "epoch": 1.5649948822927329, + "grad_norm": 1.7409567692793637, + "learning_rate": 2.481203470955832e-06, + "loss": 0.4803314208984375, + "step": 5352 + }, + { + "epoch": 1.5652873227079982, + "grad_norm": 1.5751543533365946, + "learning_rate": 2.4780168256214687e-06, + "loss": 0.5049692392349243, + "step": 5353 + }, + { + "epoch": 1.5655797631232637, + "grad_norm": 1.5980094392584046, + "learning_rate": 2.4748319385049346e-06, + "loss": 0.46404945850372314, + "step": 5354 + }, + { + "epoch": 1.565872203538529, + "grad_norm": 1.8809652221147528, + "learning_rate": 2.471648810350681e-06, + "loss": 0.426737904548645, + "step": 5355 + }, + { + "epoch": 1.5661646439537944, + "grad_norm": 1.8658447876398343, + "learning_rate": 2.4684674419027445e-06, + "loss": 0.511459231376648, + "step": 5356 + }, + { + "epoch": 1.56645708436906, + "grad_norm": 1.6030611377734088, + "learning_rate": 2.4652878339047516e-06, + "loss": 0.5199254155158997, + "step": 5357 + }, + { + "epoch": 1.566749524784325, + "grad_norm": 1.8647690278368902, + "learning_rate": 2.4621099870999156e-06, + "loss": 0.6220999360084534, + "step": 5358 + }, + { + "epoch": 1.5670419651995906, + "grad_norm": 1.6243824818203765, + "learning_rate": 2.4589339022310386e-06, + "loss": 0.598499059677124, + "step": 5359 + }, + { + "epoch": 1.567334405614856, + "grad_norm": 1.6070369897776633, + "learning_rate": 2.455759580040512e-06, + "loss": 0.4726351499557495, + "step": 5360 + }, + { + "epoch": 1.5676268460301213, + "grad_norm": 1.5276631939356082, + "learning_rate": 2.452587021270314e-06, + "loss": 0.4492379426956177, + "step": 5361 + }, + { + "epoch": 1.5679192864453868, + "grad_norm": 1.5322598639207448, + "learning_rate": 2.4494162266620105e-06, + "loss": 0.46546655893325806, + "step": 5362 + }, + { + "epoch": 1.5682117268606521, + "grad_norm": 1.5784589531224524, + "learning_rate": 2.446247196956756e-06, + "loss": 0.45048198103904724, + "step": 5363 + }, + { + "epoch": 1.5685041672759175, + "grad_norm": 1.7001549698958467, + "learning_rate": 2.4430799328952935e-06, + "loss": 0.543383002281189, + "step": 5364 + }, + { + "epoch": 1.568796607691183, + "grad_norm": 1.881054972907132, + "learning_rate": 2.4399144352179484e-06, + "loss": 0.560661256313324, + "step": 5365 + }, + { + "epoch": 1.5690890481064483, + "grad_norm": 1.7380225532335671, + "learning_rate": 2.4367507046646367e-06, + "loss": 0.4915887117385864, + "step": 5366 + }, + { + "epoch": 1.5693814885217137, + "grad_norm": 3.6756946542988396, + "learning_rate": 2.433588741974863e-06, + "loss": 0.576668918132782, + "step": 5367 + }, + { + "epoch": 1.5696739289369792, + "grad_norm": 1.9696979271734443, + "learning_rate": 2.4304285478877134e-06, + "loss": 0.615422248840332, + "step": 5368 + }, + { + "epoch": 1.5699663693522443, + "grad_norm": 1.7262412669866045, + "learning_rate": 2.4272701231418706e-06, + "loss": 0.505649209022522, + "step": 5369 + }, + { + "epoch": 1.5702588097675099, + "grad_norm": 1.6721925296757776, + "learning_rate": 2.424113468475593e-06, + "loss": 0.4803265929222107, + "step": 5370 + }, + { + "epoch": 1.5705512501827754, + "grad_norm": 1.5546849518292136, + "learning_rate": 2.4209585846267293e-06, + "loss": 0.43251073360443115, + "step": 5371 + }, + { + "epoch": 1.5708436905980405, + "grad_norm": 1.517432850414526, + "learning_rate": 2.417805472332716e-06, + "loss": 0.6021081209182739, + "step": 5372 + }, + { + "epoch": 1.571136131013306, + "grad_norm": 1.5438721648404399, + "learning_rate": 2.414654132330575e-06, + "loss": 0.5236715078353882, + "step": 5373 + }, + { + "epoch": 1.5714285714285714, + "grad_norm": 1.7272971424194805, + "learning_rate": 2.4115045653569092e-06, + "loss": 0.45632290840148926, + "step": 5374 + }, + { + "epoch": 1.5717210118438367, + "grad_norm": 1.51681371819029, + "learning_rate": 2.408356772147912e-06, + "loss": 0.5745086669921875, + "step": 5375 + }, + { + "epoch": 1.5720134522591023, + "grad_norm": 1.7235832219181546, + "learning_rate": 2.405210753439361e-06, + "loss": 0.6032901406288147, + "step": 5376 + }, + { + "epoch": 1.5723058926743676, + "grad_norm": 1.9887425059975659, + "learning_rate": 2.40206650996662e-06, + "loss": 0.579899787902832, + "step": 5377 + }, + { + "epoch": 1.572598333089633, + "grad_norm": 1.84593228973349, + "learning_rate": 2.3989240424646355e-06, + "loss": 0.5920897722244263, + "step": 5378 + }, + { + "epoch": 1.5728907735048985, + "grad_norm": 1.6814027292095717, + "learning_rate": 2.395783351667941e-06, + "loss": 0.5080469846725464, + "step": 5379 + }, + { + "epoch": 1.5731832139201638, + "grad_norm": 1.6852885660534134, + "learning_rate": 2.392644438310654e-06, + "loss": 0.6438730955123901, + "step": 5380 + }, + { + "epoch": 1.5734756543354291, + "grad_norm": 1.5835392817230642, + "learning_rate": 2.389507303126475e-06, + "loss": 0.6496621370315552, + "step": 5381 + }, + { + "epoch": 1.5737680947506947, + "grad_norm": 2.056471050614057, + "learning_rate": 2.3863719468486925e-06, + "loss": 0.5780459642410278, + "step": 5382 + }, + { + "epoch": 1.5740605351659598, + "grad_norm": 1.6854861118133662, + "learning_rate": 2.3832383702101747e-06, + "loss": 0.47817176580429077, + "step": 5383 + }, + { + "epoch": 1.5743529755812253, + "grad_norm": 1.8294128359408837, + "learning_rate": 2.3801065739433816e-06, + "loss": 0.565629243850708, + "step": 5384 + }, + { + "epoch": 1.5746454159964907, + "grad_norm": 1.6612699899563574, + "learning_rate": 2.376976558780343e-06, + "loss": 0.6291453838348389, + "step": 5385 + }, + { + "epoch": 1.574937856411756, + "grad_norm": 1.538236610732314, + "learning_rate": 2.3738483254526856e-06, + "loss": 0.5309170484542847, + "step": 5386 + }, + { + "epoch": 1.5752302968270215, + "grad_norm": 1.5901478294831086, + "learning_rate": 2.370721874691614e-06, + "loss": 0.36860692501068115, + "step": 5387 + }, + { + "epoch": 1.5755227372422869, + "grad_norm": 1.4970687777761233, + "learning_rate": 2.3675972072279172e-06, + "loss": 0.4871997833251953, + "step": 5388 + }, + { + "epoch": 1.5758151776575522, + "grad_norm": 1.7243858787556505, + "learning_rate": 2.3644743237919674e-06, + "loss": 0.5318939685821533, + "step": 5389 + }, + { + "epoch": 1.5761076180728177, + "grad_norm": 1.6509311118620078, + "learning_rate": 2.3613532251137205e-06, + "loss": 0.5851289629936218, + "step": 5390 + }, + { + "epoch": 1.576400058488083, + "grad_norm": 1.7554122423009038, + "learning_rate": 2.358233911922713e-06, + "loss": 0.5535321235656738, + "step": 5391 + }, + { + "epoch": 1.5766924989033484, + "grad_norm": 1.6614076147074466, + "learning_rate": 2.3551163849480664e-06, + "loss": 0.5443980693817139, + "step": 5392 + }, + { + "epoch": 1.576984939318614, + "grad_norm": 1.7236213464789372, + "learning_rate": 2.352000644918483e-06, + "loss": 0.6381241083145142, + "step": 5393 + }, + { + "epoch": 1.5772773797338793, + "grad_norm": 1.7284545309348427, + "learning_rate": 2.348886692562248e-06, + "loss": 0.5710772275924683, + "step": 5394 + }, + { + "epoch": 1.5775698201491446, + "grad_norm": 1.5430684665624785, + "learning_rate": 2.3457745286072307e-06, + "loss": 0.5507428050041199, + "step": 5395 + }, + { + "epoch": 1.5778622605644101, + "grad_norm": 1.4206197407713899, + "learning_rate": 2.342664153780878e-06, + "loss": 0.4475744366645813, + "step": 5396 + }, + { + "epoch": 1.5781547009796753, + "grad_norm": 1.636583588423456, + "learning_rate": 2.339555568810221e-06, + "loss": 0.5237560868263245, + "step": 5397 + }, + { + "epoch": 1.5784471413949408, + "grad_norm": 1.8224385271688819, + "learning_rate": 2.3364487744218735e-06, + "loss": 0.513353705406189, + "step": 5398 + }, + { + "epoch": 1.5787395818102061, + "grad_norm": 1.7286392562782233, + "learning_rate": 2.3333437713420305e-06, + "loss": 0.5986731052398682, + "step": 5399 + }, + { + "epoch": 1.5790320222254715, + "grad_norm": 1.5907081834202914, + "learning_rate": 2.330240560296466e-06, + "loss": 0.5834506750106812, + "step": 5400 + }, + { + "epoch": 1.579324462640737, + "grad_norm": 1.4316449017872799, + "learning_rate": 2.3271391420105384e-06, + "loss": 0.4756021499633789, + "step": 5401 + }, + { + "epoch": 1.5796169030560023, + "grad_norm": 1.828748410964233, + "learning_rate": 2.3240395172091847e-06, + "loss": 0.5524263978004456, + "step": 5402 + }, + { + "epoch": 1.5799093434712677, + "grad_norm": 1.7797701447484084, + "learning_rate": 2.320941686616922e-06, + "loss": 0.5689926743507385, + "step": 5403 + }, + { + "epoch": 1.5802017838865332, + "grad_norm": 2.079791124123793, + "learning_rate": 2.317845650957852e-06, + "loss": 0.5737600326538086, + "step": 5404 + }, + { + "epoch": 1.5804942243017985, + "grad_norm": 2.1591480990218406, + "learning_rate": 2.314751410955652e-06, + "loss": 0.585626482963562, + "step": 5405 + }, + { + "epoch": 1.5807866647170639, + "grad_norm": 1.3475179143489473, + "learning_rate": 2.3116589673335833e-06, + "loss": 0.4410518407821655, + "step": 5406 + }, + { + "epoch": 1.5810791051323294, + "grad_norm": 1.4002471500541231, + "learning_rate": 2.308568320814487e-06, + "loss": 0.49071764945983887, + "step": 5407 + }, + { + "epoch": 1.5813715455475945, + "grad_norm": 1.7384943405251394, + "learning_rate": 2.3054794721207796e-06, + "loss": 0.5332186818122864, + "step": 5408 + }, + { + "epoch": 1.58166398596286, + "grad_norm": 1.672632129609112, + "learning_rate": 2.3023924219744607e-06, + "loss": 0.4655637741088867, + "step": 5409 + }, + { + "epoch": 1.5819564263781256, + "grad_norm": 1.8700821530052487, + "learning_rate": 2.2993071710971115e-06, + "loss": 0.4226027727127075, + "step": 5410 + }, + { + "epoch": 1.5822488667933907, + "grad_norm": 1.662889108823369, + "learning_rate": 2.2962237202098903e-06, + "loss": 0.5582948923110962, + "step": 5411 + }, + { + "epoch": 1.5825413072086563, + "grad_norm": 1.9177043486104604, + "learning_rate": 2.293142070033535e-06, + "loss": 0.6695314645767212, + "step": 5412 + }, + { + "epoch": 1.5828337476239216, + "grad_norm": 1.3346239854361734, + "learning_rate": 2.2900622212883617e-06, + "loss": 0.39315858483314514, + "step": 5413 + }, + { + "epoch": 1.583126188039187, + "grad_norm": 1.6781692583647863, + "learning_rate": 2.2869841746942666e-06, + "loss": 0.5034759044647217, + "step": 5414 + }, + { + "epoch": 1.5834186284544525, + "grad_norm": 1.9091862181504, + "learning_rate": 2.2839079309707256e-06, + "loss": 0.6739548444747925, + "step": 5415 + }, + { + "epoch": 1.5837110688697178, + "grad_norm": 1.700292089346711, + "learning_rate": 2.2808334908367914e-06, + "loss": 0.4091438949108124, + "step": 5416 + }, + { + "epoch": 1.5840035092849831, + "grad_norm": 1.9132208987373394, + "learning_rate": 2.277760855011094e-06, + "loss": 0.5543409585952759, + "step": 5417 + }, + { + "epoch": 1.5842959497002487, + "grad_norm": 1.5448108643055853, + "learning_rate": 2.2746900242118487e-06, + "loss": 0.44680702686309814, + "step": 5418 + }, + { + "epoch": 1.584588390115514, + "grad_norm": 1.812422444695138, + "learning_rate": 2.271620999156837e-06, + "loss": 0.604156494140625, + "step": 5419 + }, + { + "epoch": 1.5848808305307793, + "grad_norm": 1.7746704953171426, + "learning_rate": 2.268553780563427e-06, + "loss": 0.6055774688720703, + "step": 5420 + }, + { + "epoch": 1.5851732709460449, + "grad_norm": 1.6413153541100303, + "learning_rate": 2.265488369148563e-06, + "loss": 0.5826502442359924, + "step": 5421 + }, + { + "epoch": 1.58546571136131, + "grad_norm": 1.6438604610732335, + "learning_rate": 2.2624247656287658e-06, + "loss": 0.61782306432724, + "step": 5422 + }, + { + "epoch": 1.5857581517765755, + "grad_norm": 1.6412325546038886, + "learning_rate": 2.2593629707201348e-06, + "loss": 0.5561526417732239, + "step": 5423 + }, + { + "epoch": 1.5860505921918409, + "grad_norm": 1.934339107757701, + "learning_rate": 2.2563029851383447e-06, + "loss": 0.6122138500213623, + "step": 5424 + }, + { + "epoch": 1.5863430326071062, + "grad_norm": 1.7721974769204, + "learning_rate": 2.2532448095986504e-06, + "loss": 0.5694067478179932, + "step": 5425 + }, + { + "epoch": 1.5866354730223717, + "grad_norm": 2.0424311158796145, + "learning_rate": 2.2501884448158804e-06, + "loss": 0.5243874788284302, + "step": 5426 + }, + { + "epoch": 1.586927913437637, + "grad_norm": 1.8166715080001115, + "learning_rate": 2.2471338915044414e-06, + "loss": 0.5144485831260681, + "step": 5427 + }, + { + "epoch": 1.5872203538529024, + "grad_norm": 1.853424108367526, + "learning_rate": 2.244081150378318e-06, + "loss": 0.5013881325721741, + "step": 5428 + }, + { + "epoch": 1.587512794268168, + "grad_norm": 1.7554305935150418, + "learning_rate": 2.2410302221510704e-06, + "loss": 0.45199382305145264, + "step": 5429 + }, + { + "epoch": 1.5878052346834333, + "grad_norm": 1.7321007114143003, + "learning_rate": 2.2379811075358315e-06, + "loss": 0.4699060022830963, + "step": 5430 + }, + { + "epoch": 1.5880976750986986, + "grad_norm": 1.6542253790144112, + "learning_rate": 2.234933807245314e-06, + "loss": 0.6530928611755371, + "step": 5431 + }, + { + "epoch": 1.5883901155139641, + "grad_norm": 2.1734435533671337, + "learning_rate": 2.2318883219918075e-06, + "loss": 0.653563380241394, + "step": 5432 + }, + { + "epoch": 1.5886825559292295, + "grad_norm": 1.6977334736027891, + "learning_rate": 2.2288446524871743e-06, + "loss": 0.5283595323562622, + "step": 5433 + }, + { + "epoch": 1.5889749963444948, + "grad_norm": 1.8947978942641126, + "learning_rate": 2.2258027994428543e-06, + "loss": 0.4382442831993103, + "step": 5434 + }, + { + "epoch": 1.5892674367597603, + "grad_norm": 1.5530659953902877, + "learning_rate": 2.2227627635698624e-06, + "loss": 0.427448570728302, + "step": 5435 + }, + { + "epoch": 1.5895598771750254, + "grad_norm": 1.9154573086486193, + "learning_rate": 2.2197245455787875e-06, + "loss": 0.5794345140457153, + "step": 5436 + }, + { + "epoch": 1.589852317590291, + "grad_norm": 1.7112908574959096, + "learning_rate": 2.2166881461797953e-06, + "loss": 0.4996277987957001, + "step": 5437 + }, + { + "epoch": 1.5901447580055563, + "grad_norm": 1.6191576283665394, + "learning_rate": 2.213653566082625e-06, + "loss": 0.580248236656189, + "step": 5438 + }, + { + "epoch": 1.5904371984208217, + "grad_norm": 1.7775881602556973, + "learning_rate": 2.210620805996594e-06, + "loss": 0.5173758864402771, + "step": 5439 + }, + { + "epoch": 1.5907296388360872, + "grad_norm": 1.9899233221127093, + "learning_rate": 2.2075898666305908e-06, + "loss": 0.5336873531341553, + "step": 5440 + }, + { + "epoch": 1.5910220792513525, + "grad_norm": 1.6076505919691177, + "learning_rate": 2.204560748693074e-06, + "loss": 0.44921910762786865, + "step": 5441 + }, + { + "epoch": 1.5913145196666179, + "grad_norm": 1.6059480320951056, + "learning_rate": 2.201533452892086e-06, + "loss": 0.46475526690483093, + "step": 5442 + }, + { + "epoch": 1.5916069600818834, + "grad_norm": 1.9029311381102771, + "learning_rate": 2.1985079799352383e-06, + "loss": 0.6213991045951843, + "step": 5443 + }, + { + "epoch": 1.5918994004971487, + "grad_norm": 1.7215123021526133, + "learning_rate": 2.1954843305297138e-06, + "loss": 0.5271334648132324, + "step": 5444 + }, + { + "epoch": 1.592191840912414, + "grad_norm": 2.0224569757299333, + "learning_rate": 2.192462505382277e-06, + "loss": 0.6957610249519348, + "step": 5445 + }, + { + "epoch": 1.5924842813276796, + "grad_norm": 1.474394106198892, + "learning_rate": 2.1894425051992587e-06, + "loss": 0.4935681223869324, + "step": 5446 + }, + { + "epoch": 1.5927767217429447, + "grad_norm": 1.8779887346615283, + "learning_rate": 2.1864243306865663e-06, + "loss": 0.7389976978302002, + "step": 5447 + }, + { + "epoch": 1.5930691621582103, + "grad_norm": 1.6663547791548505, + "learning_rate": 2.183407982549679e-06, + "loss": 0.4711039662361145, + "step": 5448 + }, + { + "epoch": 1.5933616025734758, + "grad_norm": 1.7966119865723598, + "learning_rate": 2.180393461493654e-06, + "loss": 0.5640024542808533, + "step": 5449 + }, + { + "epoch": 1.593654042988741, + "grad_norm": 1.8836976245237465, + "learning_rate": 2.1773807682231095e-06, + "loss": 0.5471343994140625, + "step": 5450 + }, + { + "epoch": 1.5939464834040065, + "grad_norm": 1.948314000978572, + "learning_rate": 2.1743699034422483e-06, + "loss": 0.4971361756324768, + "step": 5451 + }, + { + "epoch": 1.5942389238192718, + "grad_norm": 1.816359724345797, + "learning_rate": 2.1713608678548414e-06, + "loss": 0.6338681578636169, + "step": 5452 + }, + { + "epoch": 1.5945313642345371, + "grad_norm": 1.5944974938870278, + "learning_rate": 2.168353662164233e-06, + "loss": 0.5218038558959961, + "step": 5453 + }, + { + "epoch": 1.5948238046498027, + "grad_norm": 1.4835669226996004, + "learning_rate": 2.165348287073339e-06, + "loss": 0.44414108991622925, + "step": 5454 + }, + { + "epoch": 1.595116245065068, + "grad_norm": 1.741912524884312, + "learning_rate": 2.162344743284647e-06, + "loss": 0.5994665622711182, + "step": 5455 + }, + { + "epoch": 1.5954086854803333, + "grad_norm": 1.8002543813503216, + "learning_rate": 2.159343031500217e-06, + "loss": 0.6745023727416992, + "step": 5456 + }, + { + "epoch": 1.5957011258955989, + "grad_norm": 1.8583415901166598, + "learning_rate": 2.1563431524216825e-06, + "loss": 0.4678364396095276, + "step": 5457 + }, + { + "epoch": 1.5959935663108642, + "grad_norm": 1.5733048792098263, + "learning_rate": 2.1533451067502464e-06, + "loss": 0.5792031288146973, + "step": 5458 + }, + { + "epoch": 1.5962860067261295, + "grad_norm": 1.575360514250564, + "learning_rate": 2.1503488951866822e-06, + "loss": 0.48152512311935425, + "step": 5459 + }, + { + "epoch": 1.596578447141395, + "grad_norm": 1.6753593421486697, + "learning_rate": 2.147354518431339e-06, + "loss": 0.4407780170440674, + "step": 5460 + }, + { + "epoch": 1.5968708875566602, + "grad_norm": 1.5845476508430212, + "learning_rate": 2.1443619771841308e-06, + "loss": 0.41062241792678833, + "step": 5461 + }, + { + "epoch": 1.5971633279719257, + "grad_norm": 1.6329985009235597, + "learning_rate": 2.1413712721445478e-06, + "loss": 0.4564778208732605, + "step": 5462 + }, + { + "epoch": 1.597455768387191, + "grad_norm": 1.7819738842734478, + "learning_rate": 2.1383824040116474e-06, + "loss": 0.4347888231277466, + "step": 5463 + }, + { + "epoch": 1.5977482088024564, + "grad_norm": 1.7547086253653914, + "learning_rate": 2.1353953734840615e-06, + "loss": 0.574216902256012, + "step": 5464 + }, + { + "epoch": 1.598040649217722, + "grad_norm": 1.5449681232026575, + "learning_rate": 2.1324101812599884e-06, + "loss": 0.46540650725364685, + "step": 5465 + }, + { + "epoch": 1.5983330896329873, + "grad_norm": 1.7330971380509632, + "learning_rate": 2.129426828037201e-06, + "loss": 0.5446870923042297, + "step": 5466 + }, + { + "epoch": 1.5986255300482526, + "grad_norm": 1.5387720739202952, + "learning_rate": 2.126445314513038e-06, + "loss": 0.5442406535148621, + "step": 5467 + }, + { + "epoch": 1.5989179704635181, + "grad_norm": 1.5552673745283687, + "learning_rate": 2.1234656413844114e-06, + "loss": 0.48960334062576294, + "step": 5468 + }, + { + "epoch": 1.5992104108787835, + "grad_norm": 1.6554781479614895, + "learning_rate": 2.1204878093477998e-06, + "loss": 0.5053935647010803, + "step": 5469 + }, + { + "epoch": 1.5995028512940488, + "grad_norm": 1.9853825289751812, + "learning_rate": 2.117511819099256e-06, + "loss": 0.5984711647033691, + "step": 5470 + }, + { + "epoch": 1.5997952917093143, + "grad_norm": 1.7887732493049897, + "learning_rate": 2.1145376713344e-06, + "loss": 0.6060935258865356, + "step": 5471 + }, + { + "epoch": 1.6000877321245797, + "grad_norm": 1.7731884284372257, + "learning_rate": 2.111565366748416e-06, + "loss": 0.5640311241149902, + "step": 5472 + }, + { + "epoch": 1.600380172539845, + "grad_norm": 1.4780823569090165, + "learning_rate": 2.1085949060360654e-06, + "loss": 0.5127131342887878, + "step": 5473 + }, + { + "epoch": 1.6006726129551105, + "grad_norm": 1.7137118890776333, + "learning_rate": 2.1056262898916747e-06, + "loss": 0.5630159378051758, + "step": 5474 + }, + { + "epoch": 1.6009650533703756, + "grad_norm": 1.6419339983794916, + "learning_rate": 2.1026595190091403e-06, + "loss": 0.4511195421218872, + "step": 5475 + }, + { + "epoch": 1.6012574937856412, + "grad_norm": 1.5933389134682139, + "learning_rate": 2.099694594081927e-06, + "loss": 0.47073638439178467, + "step": 5476 + }, + { + "epoch": 1.6015499342009065, + "grad_norm": 1.7678159005173808, + "learning_rate": 2.0967315158030675e-06, + "loss": 0.47757452726364136, + "step": 5477 + }, + { + "epoch": 1.6018423746161718, + "grad_norm": 1.612539233178663, + "learning_rate": 2.093770284865164e-06, + "loss": 0.4703200161457062, + "step": 5478 + }, + { + "epoch": 1.6021348150314374, + "grad_norm": 1.7112390228319339, + "learning_rate": 2.090810901960385e-06, + "loss": 0.47457796335220337, + "step": 5479 + }, + { + "epoch": 1.6024272554467027, + "grad_norm": 1.6069409002673796, + "learning_rate": 2.087853367780469e-06, + "loss": 0.4907105267047882, + "step": 5480 + }, + { + "epoch": 1.602719695861968, + "grad_norm": 1.8859078577608002, + "learning_rate": 2.0848976830167224e-06, + "loss": 0.5329782962799072, + "step": 5481 + }, + { + "epoch": 1.6030121362772336, + "grad_norm": 1.8407304692969428, + "learning_rate": 2.0819438483600197e-06, + "loss": 0.45858579874038696, + "step": 5482 + }, + { + "epoch": 1.603304576692499, + "grad_norm": 1.7103287599993058, + "learning_rate": 2.0789918645007977e-06, + "loss": 0.47545814514160156, + "step": 5483 + }, + { + "epoch": 1.6035970171077643, + "grad_norm": 1.7521375813446352, + "learning_rate": 2.076041732129066e-06, + "loss": 0.5482660531997681, + "step": 5484 + }, + { + "epoch": 1.6038894575230298, + "grad_norm": 1.650951498750666, + "learning_rate": 2.0730934519344025e-06, + "loss": 0.5252633094787598, + "step": 5485 + }, + { + "epoch": 1.604181897938295, + "grad_norm": 2.7727108215969882, + "learning_rate": 2.0701470246059472e-06, + "loss": 0.5400367379188538, + "step": 5486 + }, + { + "epoch": 1.6044743383535605, + "grad_norm": 1.5423948281806983, + "learning_rate": 2.0672024508324107e-06, + "loss": 0.4788953363895416, + "step": 5487 + }, + { + "epoch": 1.604766778768826, + "grad_norm": 1.6092306606930025, + "learning_rate": 2.0642597313020685e-06, + "loss": 0.5430850982666016, + "step": 5488 + }, + { + "epoch": 1.6050592191840911, + "grad_norm": 1.8683302543522238, + "learning_rate": 2.061318866702765e-06, + "loss": 0.5833520293235779, + "step": 5489 + }, + { + "epoch": 1.6053516595993567, + "grad_norm": 1.7369107165445012, + "learning_rate": 2.058379857721908e-06, + "loss": 0.5854958295822144, + "step": 5490 + }, + { + "epoch": 1.605644100014622, + "grad_norm": 1.6603772170749127, + "learning_rate": 2.0554427050464742e-06, + "loss": 0.5577352643013, + "step": 5491 + }, + { + "epoch": 1.6059365404298873, + "grad_norm": 1.6757677840410201, + "learning_rate": 2.052507409363004e-06, + "loss": 0.5328816175460815, + "step": 5492 + }, + { + "epoch": 1.6062289808451529, + "grad_norm": 1.7643397031335737, + "learning_rate": 2.0495739713576046e-06, + "loss": 0.5606744289398193, + "step": 5493 + }, + { + "epoch": 1.6065214212604182, + "grad_norm": 1.7836115172074085, + "learning_rate": 2.0466423917159526e-06, + "loss": 0.541358470916748, + "step": 5494 + }, + { + "epoch": 1.6068138616756835, + "grad_norm": 2.1455011977132714, + "learning_rate": 2.0437126711232826e-06, + "loss": 0.6578946709632874, + "step": 5495 + }, + { + "epoch": 1.607106302090949, + "grad_norm": 1.9512378226148355, + "learning_rate": 2.0407848102644002e-06, + "loss": 0.5967978239059448, + "step": 5496 + }, + { + "epoch": 1.6073987425062144, + "grad_norm": 1.623105883994405, + "learning_rate": 2.037858809823675e-06, + "loss": 0.46947693824768066, + "step": 5497 + }, + { + "epoch": 1.6076911829214797, + "grad_norm": 1.5763151196056784, + "learning_rate": 2.0349346704850436e-06, + "loss": 0.5014760494232178, + "step": 5498 + }, + { + "epoch": 1.6079836233367453, + "grad_norm": 1.5417734514532708, + "learning_rate": 2.0320123929320033e-06, + "loss": 0.4399675726890564, + "step": 5499 + }, + { + "epoch": 1.6082760637520104, + "grad_norm": 1.8719036359624468, + "learning_rate": 2.0290919778476214e-06, + "loss": 0.4729107618331909, + "step": 5500 + }, + { + "epoch": 1.608568504167276, + "grad_norm": 1.5894079730285777, + "learning_rate": 2.0261734259145248e-06, + "loss": 0.5669134259223938, + "step": 5501 + }, + { + "epoch": 1.6088609445825413, + "grad_norm": 1.554035864612711, + "learning_rate": 2.0232567378149082e-06, + "loss": 0.4200817942619324, + "step": 5502 + }, + { + "epoch": 1.6091533849978066, + "grad_norm": 1.8154865090092227, + "learning_rate": 2.0203419142305303e-06, + "loss": 0.6057849526405334, + "step": 5503 + }, + { + "epoch": 1.6094458254130721, + "grad_norm": 1.7156552575659618, + "learning_rate": 2.017428955842713e-06, + "loss": 0.5644170045852661, + "step": 5504 + }, + { + "epoch": 1.6097382658283375, + "grad_norm": 1.9102243104698693, + "learning_rate": 2.014517863332345e-06, + "loss": 0.6368730068206787, + "step": 5505 + }, + { + "epoch": 1.6100307062436028, + "grad_norm": 1.5712918255487374, + "learning_rate": 2.0116086373798704e-06, + "loss": 0.4829355478286743, + "step": 5506 + }, + { + "epoch": 1.6103231466588683, + "grad_norm": 1.642541904242283, + "learning_rate": 2.0087012786653072e-06, + "loss": 0.5604796409606934, + "step": 5507 + }, + { + "epoch": 1.6106155870741337, + "grad_norm": 1.8591393596163848, + "learning_rate": 2.005795787868232e-06, + "loss": 0.5594274997711182, + "step": 5508 + }, + { + "epoch": 1.610908027489399, + "grad_norm": 1.607362999733334, + "learning_rate": 2.0028921656677857e-06, + "loss": 0.5553449988365173, + "step": 5509 + }, + { + "epoch": 1.6112004679046645, + "grad_norm": 1.7968941470299316, + "learning_rate": 1.999990412742673e-06, + "loss": 0.5056631565093994, + "step": 5510 + }, + { + "epoch": 1.6114929083199299, + "grad_norm": 1.5654499452702673, + "learning_rate": 1.9970905297711606e-06, + "loss": 0.432037353515625, + "step": 5511 + }, + { + "epoch": 1.6117853487351952, + "grad_norm": 1.6991047972494284, + "learning_rate": 1.9941925174310773e-06, + "loss": 0.5152974128723145, + "step": 5512 + }, + { + "epoch": 1.6120777891504607, + "grad_norm": 2.1530610582321015, + "learning_rate": 1.9912963763998185e-06, + "loss": 0.59015291929245, + "step": 5513 + }, + { + "epoch": 1.6123702295657258, + "grad_norm": 2.024675130869183, + "learning_rate": 1.9884021073543368e-06, + "loss": 0.564031720161438, + "step": 5514 + }, + { + "epoch": 1.6126626699809914, + "grad_norm": 1.560415326953441, + "learning_rate": 1.985509710971152e-06, + "loss": 0.5930228233337402, + "step": 5515 + }, + { + "epoch": 1.6129551103962567, + "grad_norm": 1.6853261691368011, + "learning_rate": 1.9826191879263446e-06, + "loss": 0.540229082107544, + "step": 5516 + }, + { + "epoch": 1.613247550811522, + "grad_norm": 1.3918983021829734, + "learning_rate": 1.9797305388955547e-06, + "loss": 0.5473166704177856, + "step": 5517 + }, + { + "epoch": 1.6135399912267876, + "grad_norm": 1.888180196247059, + "learning_rate": 1.976843764553986e-06, + "loss": 0.5814535617828369, + "step": 5518 + }, + { + "epoch": 1.613832431642053, + "grad_norm": 1.556089571926902, + "learning_rate": 1.973958865576403e-06, + "loss": 0.4892576038837433, + "step": 5519 + }, + { + "epoch": 1.6141248720573182, + "grad_norm": 2.0461002845877454, + "learning_rate": 1.97107584263714e-06, + "loss": 0.5416869521141052, + "step": 5520 + }, + { + "epoch": 1.6144173124725838, + "grad_norm": 1.9685372161480885, + "learning_rate": 1.9681946964100807e-06, + "loss": 0.5956105589866638, + "step": 5521 + }, + { + "epoch": 1.6147097528878491, + "grad_norm": 1.7885384988170376, + "learning_rate": 1.9653154275686782e-06, + "loss": 0.5722565650939941, + "step": 5522 + }, + { + "epoch": 1.6150021933031145, + "grad_norm": 1.7917880328936266, + "learning_rate": 1.962438036785942e-06, + "loss": 0.3984888195991516, + "step": 5523 + }, + { + "epoch": 1.61529463371838, + "grad_norm": 1.6334267618118792, + "learning_rate": 1.959562524734445e-06, + "loss": 0.601211428642273, + "step": 5524 + }, + { + "epoch": 1.615587074133645, + "grad_norm": 1.8080265301577823, + "learning_rate": 1.9566888920863247e-06, + "loss": 0.4803691506385803, + "step": 5525 + }, + { + "epoch": 1.6158795145489107, + "grad_norm": 1.7017865626810558, + "learning_rate": 1.9538171395132688e-06, + "loss": 0.6914256811141968, + "step": 5526 + }, + { + "epoch": 1.6161719549641762, + "grad_norm": 1.6511977253132817, + "learning_rate": 1.950947267686536e-06, + "loss": 0.49076569080352783, + "step": 5527 + }, + { + "epoch": 1.6164643953794413, + "grad_norm": 1.747888743558531, + "learning_rate": 1.9480792772769384e-06, + "loss": 0.45781368017196655, + "step": 5528 + }, + { + "epoch": 1.6167568357947069, + "grad_norm": 1.6564602036044371, + "learning_rate": 1.9452131689548547e-06, + "loss": 0.5257985591888428, + "step": 5529 + }, + { + "epoch": 1.6170492762099722, + "grad_norm": 1.6440311298220422, + "learning_rate": 1.9423489433902186e-06, + "loss": 0.4170517921447754, + "step": 5530 + }, + { + "epoch": 1.6173417166252375, + "grad_norm": 2.0566981290938386, + "learning_rate": 1.939486601252525e-06, + "loss": 0.5612319707870483, + "step": 5531 + }, + { + "epoch": 1.617634157040503, + "grad_norm": 1.6971941543602107, + "learning_rate": 1.93662614321083e-06, + "loss": 0.4543185234069824, + "step": 5532 + }, + { + "epoch": 1.6179265974557684, + "grad_norm": 1.8366998724664239, + "learning_rate": 1.933767569933749e-06, + "loss": 0.5506256222724915, + "step": 5533 + }, + { + "epoch": 1.6182190378710337, + "grad_norm": 1.9975995427991473, + "learning_rate": 1.930910882089454e-06, + "loss": 0.5411139130592346, + "step": 5534 + }, + { + "epoch": 1.6185114782862993, + "grad_norm": 1.5549343206880035, + "learning_rate": 1.9280560803456794e-06, + "loss": 0.5332196950912476, + "step": 5535 + }, + { + "epoch": 1.6188039187015646, + "grad_norm": 1.6015028384804206, + "learning_rate": 1.92520316536972e-06, + "loss": 0.5159808993339539, + "step": 5536 + }, + { + "epoch": 1.61909635911683, + "grad_norm": 1.6182793083642761, + "learning_rate": 1.9223521378284227e-06, + "loss": 0.5483378767967224, + "step": 5537 + }, + { + "epoch": 1.6193887995320955, + "grad_norm": 1.748287896704832, + "learning_rate": 1.9195029983882008e-06, + "loss": 0.6451961994171143, + "step": 5538 + }, + { + "epoch": 1.6196812399473606, + "grad_norm": 1.799346834276764, + "learning_rate": 1.9166557477150227e-06, + "loss": 0.5904289484024048, + "step": 5539 + }, + { + "epoch": 1.6199736803626261, + "grad_norm": 1.7892510148499787, + "learning_rate": 1.9138103864744164e-06, + "loss": 0.6688845753669739, + "step": 5540 + }, + { + "epoch": 1.6202661207778914, + "grad_norm": 2.050448659373089, + "learning_rate": 1.910966915331467e-06, + "loss": 0.6299211382865906, + "step": 5541 + }, + { + "epoch": 1.6205585611931568, + "grad_norm": 1.7426964698819027, + "learning_rate": 1.908125334950819e-06, + "loss": 0.5502864122390747, + "step": 5542 + }, + { + "epoch": 1.6208510016084223, + "grad_norm": 1.7235241145346871, + "learning_rate": 1.905285645996674e-06, + "loss": 0.5332865118980408, + "step": 5543 + }, + { + "epoch": 1.6211434420236877, + "grad_norm": 1.6383658575716402, + "learning_rate": 1.9024478491327936e-06, + "loss": 0.43371304869651794, + "step": 5544 + }, + { + "epoch": 1.621435882438953, + "grad_norm": 1.6217362880484214, + "learning_rate": 1.8996119450224936e-06, + "loss": 0.6992501616477966, + "step": 5545 + }, + { + "epoch": 1.6217283228542185, + "grad_norm": 1.6128570910751827, + "learning_rate": 1.8967779343286507e-06, + "loss": 0.46558254957199097, + "step": 5546 + }, + { + "epoch": 1.6220207632694839, + "grad_norm": 1.8944422030763228, + "learning_rate": 1.8939458177136994e-06, + "loss": 0.48943620920181274, + "step": 5547 + }, + { + "epoch": 1.6223132036847492, + "grad_norm": 1.7707340805275773, + "learning_rate": 1.8911155958396256e-06, + "loss": 0.6041419506072998, + "step": 5548 + }, + { + "epoch": 1.6226056441000147, + "grad_norm": 1.9134744412177116, + "learning_rate": 1.8882872693679787e-06, + "loss": 0.5695589780807495, + "step": 5549 + }, + { + "epoch": 1.62289808451528, + "grad_norm": 1.5970321508292495, + "learning_rate": 1.8854608389598616e-06, + "loss": 0.45147764682769775, + "step": 5550 + }, + { + "epoch": 1.6231905249305454, + "grad_norm": 1.6145559649024994, + "learning_rate": 1.8826363052759367e-06, + "loss": 0.45560893416404724, + "step": 5551 + }, + { + "epoch": 1.623482965345811, + "grad_norm": 2.0039561405471855, + "learning_rate": 1.8798136689764213e-06, + "loss": 0.5714661478996277, + "step": 5552 + }, + { + "epoch": 1.623775405761076, + "grad_norm": 1.963169578207157, + "learning_rate": 1.8769929307210889e-06, + "loss": 0.6074325442314148, + "step": 5553 + }, + { + "epoch": 1.6240678461763416, + "grad_norm": 1.8912554250379197, + "learning_rate": 1.8741740911692708e-06, + "loss": 0.5406322479248047, + "step": 5554 + }, + { + "epoch": 1.624360286591607, + "grad_norm": 1.6201303288999382, + "learning_rate": 1.8713571509798524e-06, + "loss": 0.6098664999008179, + "step": 5555 + }, + { + "epoch": 1.6246527270068722, + "grad_norm": 1.5939995677707521, + "learning_rate": 1.8685421108112778e-06, + "loss": 0.4424859881401062, + "step": 5556 + }, + { + "epoch": 1.6249451674221378, + "grad_norm": 1.939602482231334, + "learning_rate": 1.8657289713215442e-06, + "loss": 0.5893913507461548, + "step": 5557 + }, + { + "epoch": 1.6252376078374031, + "grad_norm": 1.7158163961672592, + "learning_rate": 1.862917733168208e-06, + "loss": 0.5462610125541687, + "step": 5558 + }, + { + "epoch": 1.6255300482526684, + "grad_norm": 1.6886650692922842, + "learning_rate": 1.8601083970083766e-06, + "loss": 0.6044303178787231, + "step": 5559 + }, + { + "epoch": 1.625822488667934, + "grad_norm": 1.9164005939081288, + "learning_rate": 1.857300963498715e-06, + "loss": 0.4110264778137207, + "step": 5560 + }, + { + "epoch": 1.6261149290831993, + "grad_norm": 1.863152431496286, + "learning_rate": 1.8544954332954445e-06, + "loss": 0.5640783309936523, + "step": 5561 + }, + { + "epoch": 1.6264073694984647, + "grad_norm": 1.7604141548514003, + "learning_rate": 1.851691807054342e-06, + "loss": 0.43247851729393005, + "step": 5562 + }, + { + "epoch": 1.6266998099137302, + "grad_norm": 1.5534888737518595, + "learning_rate": 1.8488900854307367e-06, + "loss": 0.4909735918045044, + "step": 5563 + }, + { + "epoch": 1.6269922503289953, + "grad_norm": 2.0368143734269983, + "learning_rate": 1.8460902690795135e-06, + "loss": 0.5705426335334778, + "step": 5564 + }, + { + "epoch": 1.6272846907442609, + "grad_norm": 1.7974229709801126, + "learning_rate": 1.8432923586551144e-06, + "loss": 0.6455575823783875, + "step": 5565 + }, + { + "epoch": 1.6275771311595264, + "grad_norm": 1.5854644304225498, + "learning_rate": 1.8404963548115318e-06, + "loss": 0.4156351089477539, + "step": 5566 + }, + { + "epoch": 1.6278695715747915, + "grad_norm": 1.5858505218040218, + "learning_rate": 1.8377022582023163e-06, + "loss": 0.5497896075248718, + "step": 5567 + }, + { + "epoch": 1.628162011990057, + "grad_norm": 1.5247094519796704, + "learning_rate": 1.8349100694805711e-06, + "loss": 0.5237758159637451, + "step": 5568 + }, + { + "epoch": 1.6284544524053224, + "grad_norm": 1.8340347816856337, + "learning_rate": 1.832119789298954e-06, + "loss": 0.5140771865844727, + "step": 5569 + }, + { + "epoch": 1.6287468928205877, + "grad_norm": 2.04051717357991, + "learning_rate": 1.8293314183096721e-06, + "loss": 0.5942349433898926, + "step": 5570 + }, + { + "epoch": 1.6290393332358533, + "grad_norm": 1.7868670881272706, + "learning_rate": 1.8265449571644933e-06, + "loss": 0.6316613554954529, + "step": 5571 + }, + { + "epoch": 1.6293317736511186, + "grad_norm": 1.7168155291178147, + "learning_rate": 1.823760406514735e-06, + "loss": 0.4789954423904419, + "step": 5572 + }, + { + "epoch": 1.629624214066384, + "grad_norm": 1.67674259516067, + "learning_rate": 1.8209777670112706e-06, + "loss": 0.596744179725647, + "step": 5573 + }, + { + "epoch": 1.6299166544816495, + "grad_norm": 1.7162317239554103, + "learning_rate": 1.8181970393045223e-06, + "loss": 0.5785890817642212, + "step": 5574 + }, + { + "epoch": 1.6302090948969148, + "grad_norm": 1.72408279785472, + "learning_rate": 1.8154182240444706e-06, + "loss": 0.5399461388587952, + "step": 5575 + }, + { + "epoch": 1.6305015353121801, + "grad_norm": 1.789842505433769, + "learning_rate": 1.812641321880645e-06, + "loss": 0.5251961946487427, + "step": 5576 + }, + { + "epoch": 1.6307939757274457, + "grad_norm": 1.5658430659550284, + "learning_rate": 1.8098663334621314e-06, + "loss": 0.6094855070114136, + "step": 5577 + }, + { + "epoch": 1.6310864161427108, + "grad_norm": 1.7839781318616403, + "learning_rate": 1.8070932594375656e-06, + "loss": 0.5586157441139221, + "step": 5578 + }, + { + "epoch": 1.6313788565579763, + "grad_norm": 1.6074136925381057, + "learning_rate": 1.804322100455136e-06, + "loss": 0.5572035312652588, + "step": 5579 + }, + { + "epoch": 1.6316712969732416, + "grad_norm": 1.8419935059375991, + "learning_rate": 1.801552857162585e-06, + "loss": 0.5567929148674011, + "step": 5580 + }, + { + "epoch": 1.631963737388507, + "grad_norm": 1.6633256712541593, + "learning_rate": 1.79878553020721e-06, + "loss": 0.4823629558086395, + "step": 5581 + }, + { + "epoch": 1.6322561778037725, + "grad_norm": 1.6367496398860508, + "learning_rate": 1.7960201202358495e-06, + "loss": 0.52935791015625, + "step": 5582 + }, + { + "epoch": 1.6325486182190379, + "grad_norm": 1.612437469487566, + "learning_rate": 1.7932566278949049e-06, + "loss": 0.5486055016517639, + "step": 5583 + }, + { + "epoch": 1.6328410586343032, + "grad_norm": 1.638977663987494, + "learning_rate": 1.7904950538303256e-06, + "loss": 0.5606030225753784, + "step": 5584 + }, + { + "epoch": 1.6331334990495687, + "grad_norm": 1.5693957426770746, + "learning_rate": 1.7877353986876134e-06, + "loss": 0.5394873023033142, + "step": 5585 + }, + { + "epoch": 1.633425939464834, + "grad_norm": 1.7113121312436326, + "learning_rate": 1.7849776631118198e-06, + "loss": 0.6015416383743286, + "step": 5586 + }, + { + "epoch": 1.6337183798800994, + "grad_norm": 1.308708455891742, + "learning_rate": 1.7822218477475496e-06, + "loss": 0.3476119041442871, + "step": 5587 + }, + { + "epoch": 1.634010820295365, + "grad_norm": 1.6872606261874499, + "learning_rate": 1.7794679532389569e-06, + "loss": 0.43062901496887207, + "step": 5588 + }, + { + "epoch": 1.6343032607106303, + "grad_norm": 1.5715829289628913, + "learning_rate": 1.7767159802297497e-06, + "loss": 0.5267136096954346, + "step": 5589 + }, + { + "epoch": 1.6345957011258956, + "grad_norm": 1.5843839322860915, + "learning_rate": 1.7739659293631828e-06, + "loss": 0.40477365255355835, + "step": 5590 + }, + { + "epoch": 1.6348881415411611, + "grad_norm": 1.5464703907052304, + "learning_rate": 1.7712178012820657e-06, + "loss": 0.5166594386100769, + "step": 5591 + }, + { + "epoch": 1.6351805819564262, + "grad_norm": 1.8725681057880097, + "learning_rate": 1.768471596628757e-06, + "loss": 0.577332615852356, + "step": 5592 + }, + { + "epoch": 1.6354730223716918, + "grad_norm": 1.9361068668488919, + "learning_rate": 1.7657273160451626e-06, + "loss": 0.6265558004379272, + "step": 5593 + }, + { + "epoch": 1.6357654627869571, + "grad_norm": 1.359788014623014, + "learning_rate": 1.7629849601727422e-06, + "loss": 0.46483689546585083, + "step": 5594 + }, + { + "epoch": 1.6360579032022224, + "grad_norm": 1.763922790196176, + "learning_rate": 1.760244529652504e-06, + "loss": 0.5217114090919495, + "step": 5595 + }, + { + "epoch": 1.636350343617488, + "grad_norm": 1.6050490395737056, + "learning_rate": 1.7575060251250098e-06, + "loss": 0.40754032135009766, + "step": 5596 + }, + { + "epoch": 1.6366427840327533, + "grad_norm": 1.8321306870013994, + "learning_rate": 1.7547694472303677e-06, + "loss": 0.5153856873512268, + "step": 5597 + }, + { + "epoch": 1.6369352244480186, + "grad_norm": 1.7719174136737381, + "learning_rate": 1.7520347966082352e-06, + "loss": 0.47374534606933594, + "step": 5598 + }, + { + "epoch": 1.6372276648632842, + "grad_norm": 1.85303064846871, + "learning_rate": 1.7493020738978205e-06, + "loss": 0.375232070684433, + "step": 5599 + }, + { + "epoch": 1.6375201052785495, + "grad_norm": 1.6844665277996391, + "learning_rate": 1.746571279737884e-06, + "loss": 0.5731218457221985, + "step": 5600 + }, + { + "epoch": 1.6378125456938148, + "grad_norm": 1.7604017420749336, + "learning_rate": 1.7438424147667267e-06, + "loss": 0.4908478260040283, + "step": 5601 + }, + { + "epoch": 1.6381049861090804, + "grad_norm": 1.759771030770569, + "learning_rate": 1.741115479622205e-06, + "loss": 0.6114420890808105, + "step": 5602 + }, + { + "epoch": 1.6383974265243455, + "grad_norm": 1.6860969538693165, + "learning_rate": 1.738390474941727e-06, + "loss": 0.6207842826843262, + "step": 5603 + }, + { + "epoch": 1.638689866939611, + "grad_norm": 1.8627303036453442, + "learning_rate": 1.7356674013622431e-06, + "loss": 0.4745057225227356, + "step": 5604 + }, + { + "epoch": 1.6389823073548766, + "grad_norm": 1.7448279838579288, + "learning_rate": 1.7329462595202573e-06, + "loss": 0.5501791834831238, + "step": 5605 + }, + { + "epoch": 1.6392747477701417, + "grad_norm": 1.5723514930569527, + "learning_rate": 1.7302270500518181e-06, + "loss": 0.5497169494628906, + "step": 5606 + }, + { + "epoch": 1.6395671881854073, + "grad_norm": 1.5582550082102102, + "learning_rate": 1.7275097735925239e-06, + "loss": 0.4439499080181122, + "step": 5607 + }, + { + "epoch": 1.6398596286006726, + "grad_norm": 1.5421642594165323, + "learning_rate": 1.7247944307775245e-06, + "loss": 0.5869239568710327, + "step": 5608 + }, + { + "epoch": 1.640152069015938, + "grad_norm": 1.8811101308859866, + "learning_rate": 1.722081022241512e-06, + "loss": 0.6979252099990845, + "step": 5609 + }, + { + "epoch": 1.6404445094312035, + "grad_norm": 1.6052357503320651, + "learning_rate": 1.719369548618729e-06, + "loss": 0.43291550874710083, + "step": 5610 + }, + { + "epoch": 1.6407369498464688, + "grad_norm": 1.9414841639869573, + "learning_rate": 1.7166600105429676e-06, + "loss": 0.5670255422592163, + "step": 5611 + }, + { + "epoch": 1.6410293902617341, + "grad_norm": 1.8236286685742322, + "learning_rate": 1.7139524086475679e-06, + "loss": 0.5956759452819824, + "step": 5612 + }, + { + "epoch": 1.6413218306769997, + "grad_norm": 1.8379778243010318, + "learning_rate": 1.71124674356541e-06, + "loss": 0.624202311038971, + "step": 5613 + }, + { + "epoch": 1.641614271092265, + "grad_norm": 1.546976601945301, + "learning_rate": 1.7085430159289295e-06, + "loss": 0.5394845604896545, + "step": 5614 + }, + { + "epoch": 1.6419067115075303, + "grad_norm": 1.689395226298913, + "learning_rate": 1.7058412263701063e-06, + "loss": 0.5320364236831665, + "step": 5615 + }, + { + "epoch": 1.6421991519227959, + "grad_norm": 1.719103506089404, + "learning_rate": 1.7031413755204673e-06, + "loss": 0.5889087915420532, + "step": 5616 + }, + { + "epoch": 1.642491592338061, + "grad_norm": 2.1274597082343103, + "learning_rate": 1.7004434640110857e-06, + "loss": 0.63529372215271, + "step": 5617 + }, + { + "epoch": 1.6427840327533265, + "grad_norm": 1.4153766033649497, + "learning_rate": 1.6977474924725823e-06, + "loss": 0.48696887493133545, + "step": 5618 + }, + { + "epoch": 1.6430764731685918, + "grad_norm": 1.592412158520241, + "learning_rate": 1.6950534615351234e-06, + "loss": 0.5998564958572388, + "step": 5619 + }, + { + "epoch": 1.6433689135838572, + "grad_norm": 2.0156836237169142, + "learning_rate": 1.6923613718284237e-06, + "loss": 0.5256673693656921, + "step": 5620 + }, + { + "epoch": 1.6436613539991227, + "grad_norm": 1.6125826678096948, + "learning_rate": 1.6896712239817425e-06, + "loss": 0.4609792232513428, + "step": 5621 + }, + { + "epoch": 1.643953794414388, + "grad_norm": 1.6601641389435113, + "learning_rate": 1.6869830186238846e-06, + "loss": 0.6816249489784241, + "step": 5622 + }, + { + "epoch": 1.6442462348296534, + "grad_norm": 1.7917988570441037, + "learning_rate": 1.6842967563832036e-06, + "loss": 0.4622993767261505, + "step": 5623 + }, + { + "epoch": 1.644538675244919, + "grad_norm": 1.7039192593388794, + "learning_rate": 1.6816124378875942e-06, + "loss": 0.5089092254638672, + "step": 5624 + }, + { + "epoch": 1.6448311156601843, + "grad_norm": 1.804753905417491, + "learning_rate": 1.6789300637645e-06, + "loss": 0.49178463220596313, + "step": 5625 + }, + { + "epoch": 1.6451235560754496, + "grad_norm": 1.5823189990427826, + "learning_rate": 1.676249634640912e-06, + "loss": 0.4943847358226776, + "step": 5626 + }, + { + "epoch": 1.6454159964907151, + "grad_norm": 1.368767643177251, + "learning_rate": 1.6735711511433606e-06, + "loss": 0.38509243726730347, + "step": 5627 + }, + { + "epoch": 1.6457084369059805, + "grad_norm": 1.8368901340386043, + "learning_rate": 1.6708946138979288e-06, + "loss": 0.4765651822090149, + "step": 5628 + }, + { + "epoch": 1.6460008773212458, + "grad_norm": 1.9577136986762462, + "learning_rate": 1.6682200235302383e-06, + "loss": 0.5667406916618347, + "step": 5629 + }, + { + "epoch": 1.6462933177365113, + "grad_norm": 1.7936789387136831, + "learning_rate": 1.66554738066546e-06, + "loss": 0.702905535697937, + "step": 5630 + }, + { + "epoch": 1.6465857581517764, + "grad_norm": 1.8218045279879265, + "learning_rate": 1.6628766859283064e-06, + "loss": 0.5056663155555725, + "step": 5631 + }, + { + "epoch": 1.646878198567042, + "grad_norm": 1.5059989898819282, + "learning_rate": 1.660207939943037e-06, + "loss": 0.3949700593948364, + "step": 5632 + }, + { + "epoch": 1.6471706389823073, + "grad_norm": 1.8365180821647582, + "learning_rate": 1.6575411433334553e-06, + "loss": 0.5562522411346436, + "step": 5633 + }, + { + "epoch": 1.6474630793975726, + "grad_norm": 1.853282636299631, + "learning_rate": 1.6548762967229104e-06, + "loss": 0.5046012997627258, + "step": 5634 + }, + { + "epoch": 1.6477555198128382, + "grad_norm": 1.873405273649113, + "learning_rate": 1.6522134007342894e-06, + "loss": 0.510586678981781, + "step": 5635 + }, + { + "epoch": 1.6480479602281035, + "grad_norm": 1.7244709678320052, + "learning_rate": 1.649552455990031e-06, + "loss": 0.5587502717971802, + "step": 5636 + }, + { + "epoch": 1.6483404006433688, + "grad_norm": 1.4244703013642708, + "learning_rate": 1.6468934631121147e-06, + "loss": 0.4774302840232849, + "step": 5637 + }, + { + "epoch": 1.6486328410586344, + "grad_norm": 1.7840577383362874, + "learning_rate": 1.644236422722063e-06, + "loss": 0.5969966650009155, + "step": 5638 + }, + { + "epoch": 1.6489252814738997, + "grad_norm": 1.9166026144170052, + "learning_rate": 1.6415813354409438e-06, + "loss": 0.6344267129898071, + "step": 5639 + }, + { + "epoch": 1.649217721889165, + "grad_norm": 1.6455866581497667, + "learning_rate": 1.638928201889367e-06, + "loss": 0.5252394676208496, + "step": 5640 + }, + { + "epoch": 1.6495101623044306, + "grad_norm": 1.7211145798255698, + "learning_rate": 1.636277022687488e-06, + "loss": 0.5092496871948242, + "step": 5641 + }, + { + "epoch": 1.6498026027196957, + "grad_norm": 1.797167898340461, + "learning_rate": 1.633627798455002e-06, + "loss": 0.6530938148498535, + "step": 5642 + }, + { + "epoch": 1.6500950431349612, + "grad_norm": 1.9009398203220143, + "learning_rate": 1.6309805298111492e-06, + "loss": 0.5152128338813782, + "step": 5643 + }, + { + "epoch": 1.6503874835502268, + "grad_norm": 1.4233567646508596, + "learning_rate": 1.6283352173747148e-06, + "loss": 0.4670040011405945, + "step": 5644 + }, + { + "epoch": 1.650679923965492, + "grad_norm": 1.718468492793745, + "learning_rate": 1.625691861764024e-06, + "loss": 0.47373896837234497, + "step": 5645 + }, + { + "epoch": 1.6509723643807575, + "grad_norm": 1.7609405530750961, + "learning_rate": 1.6230504635969413e-06, + "loss": 0.44277217984199524, + "step": 5646 + }, + { + "epoch": 1.6512648047960228, + "grad_norm": 1.9781946361433098, + "learning_rate": 1.6204110234908798e-06, + "loss": 0.6000313758850098, + "step": 5647 + }, + { + "epoch": 1.651557245211288, + "grad_norm": 1.7535547863968097, + "learning_rate": 1.6177735420627939e-06, + "loss": 0.5950880646705627, + "step": 5648 + }, + { + "epoch": 1.6518496856265537, + "grad_norm": 1.6566653852646755, + "learning_rate": 1.6151380199291767e-06, + "loss": 0.6308536529541016, + "step": 5649 + }, + { + "epoch": 1.652142126041819, + "grad_norm": 1.6807098070808562, + "learning_rate": 1.6125044577060667e-06, + "loss": 0.619708240032196, + "step": 5650 + }, + { + "epoch": 1.6524345664570843, + "grad_norm": 1.5330415165514617, + "learning_rate": 1.6098728560090438e-06, + "loss": 0.4912105202674866, + "step": 5651 + }, + { + "epoch": 1.6527270068723499, + "grad_norm": 1.6147329529882195, + "learning_rate": 1.607243215453227e-06, + "loss": 0.5078046917915344, + "step": 5652 + }, + { + "epoch": 1.6530194472876152, + "grad_norm": 1.562057881587638, + "learning_rate": 1.60461553665328e-06, + "loss": 0.4845188856124878, + "step": 5653 + }, + { + "epoch": 1.6533118877028805, + "grad_norm": 1.7846716530712952, + "learning_rate": 1.6019898202234075e-06, + "loss": 0.6323055028915405, + "step": 5654 + }, + { + "epoch": 1.653604328118146, + "grad_norm": 1.6998274797888833, + "learning_rate": 1.5993660667773524e-06, + "loss": 0.5700039863586426, + "step": 5655 + }, + { + "epoch": 1.6538967685334112, + "grad_norm": 1.567032216624363, + "learning_rate": 1.596744276928406e-06, + "loss": 0.5415322780609131, + "step": 5656 + }, + { + "epoch": 1.6541892089486767, + "grad_norm": 1.5705725451812174, + "learning_rate": 1.5941244512893894e-06, + "loss": 0.47339457273483276, + "step": 5657 + }, + { + "epoch": 1.654481649363942, + "grad_norm": 1.7467509306260278, + "learning_rate": 1.5915065904726735e-06, + "loss": 0.5391967296600342, + "step": 5658 + }, + { + "epoch": 1.6547740897792074, + "grad_norm": 1.4866752002795596, + "learning_rate": 1.5888906950901683e-06, + "loss": 0.4832335114479065, + "step": 5659 + }, + { + "epoch": 1.655066530194473, + "grad_norm": 1.6779111992749078, + "learning_rate": 1.5862767657533217e-06, + "loss": 0.4539526104927063, + "step": 5660 + }, + { + "epoch": 1.6553589706097382, + "grad_norm": 1.6528644114250834, + "learning_rate": 1.583664803073125e-06, + "loss": 0.5261383056640625, + "step": 5661 + }, + { + "epoch": 1.6556514110250036, + "grad_norm": 1.497445031852123, + "learning_rate": 1.5810548076601096e-06, + "loss": 0.44060665369033813, + "step": 5662 + }, + { + "epoch": 1.6559438514402691, + "grad_norm": 1.5492116356252563, + "learning_rate": 1.578446780124344e-06, + "loss": 0.4202715754508972, + "step": 5663 + }, + { + "epoch": 1.6562362918555344, + "grad_norm": 1.6539664834530166, + "learning_rate": 1.57584072107544e-06, + "loss": 0.4736124873161316, + "step": 5664 + }, + { + "epoch": 1.6565287322707998, + "grad_norm": 1.71312353367257, + "learning_rate": 1.5732366311225466e-06, + "loss": 0.46696585416793823, + "step": 5665 + }, + { + "epoch": 1.6568211726860653, + "grad_norm": 1.9879711761174368, + "learning_rate": 1.570634510874356e-06, + "loss": 0.566236138343811, + "step": 5666 + }, + { + "epoch": 1.6571136131013307, + "grad_norm": 1.8521270783851422, + "learning_rate": 1.568034360939098e-06, + "loss": 0.5486587882041931, + "step": 5667 + }, + { + "epoch": 1.657406053516596, + "grad_norm": 1.7248593346342498, + "learning_rate": 1.5654361819245423e-06, + "loss": 0.49735748767852783, + "step": 5668 + }, + { + "epoch": 1.6576984939318615, + "grad_norm": 1.7146086057727925, + "learning_rate": 1.562839974437993e-06, + "loss": 0.6306062340736389, + "step": 5669 + }, + { + "epoch": 1.6579909343471266, + "grad_norm": 1.524921725608077, + "learning_rate": 1.5602457390863e-06, + "loss": 0.5062750577926636, + "step": 5670 + }, + { + "epoch": 1.6582833747623922, + "grad_norm": 1.511094812843301, + "learning_rate": 1.5576534764758522e-06, + "loss": 0.5037271976470947, + "step": 5671 + }, + { + "epoch": 1.6585758151776575, + "grad_norm": 1.5731242321067331, + "learning_rate": 1.5550631872125743e-06, + "loss": 0.5749099254608154, + "step": 5672 + }, + { + "epoch": 1.6588682555929228, + "grad_norm": 1.7434864310127793, + "learning_rate": 1.5524748719019312e-06, + "loss": 0.5241814255714417, + "step": 5673 + }, + { + "epoch": 1.6591606960081884, + "grad_norm": 1.6599956977784314, + "learning_rate": 1.5498885311489243e-06, + "loss": 0.5410301685333252, + "step": 5674 + }, + { + "epoch": 1.6594531364234537, + "grad_norm": 1.6011154372945764, + "learning_rate": 1.5473041655580956e-06, + "loss": 0.6363968253135681, + "step": 5675 + }, + { + "epoch": 1.659745576838719, + "grad_norm": 1.7262614943682328, + "learning_rate": 1.5447217757335264e-06, + "loss": 0.5476758480072021, + "step": 5676 + }, + { + "epoch": 1.6600380172539846, + "grad_norm": 2.1890795829733465, + "learning_rate": 1.5421413622788328e-06, + "loss": 0.5375553369522095, + "step": 5677 + }, + { + "epoch": 1.66033045766925, + "grad_norm": 1.6882335328441256, + "learning_rate": 1.53956292579717e-06, + "loss": 0.6227232217788696, + "step": 5678 + }, + { + "epoch": 1.6606228980845152, + "grad_norm": 1.6718724900526085, + "learning_rate": 1.5369864668912327e-06, + "loss": 0.5981070399284363, + "step": 5679 + }, + { + "epoch": 1.6609153384997808, + "grad_norm": 1.6935493861773532, + "learning_rate": 1.5344119861632535e-06, + "loss": 0.5535466074943542, + "step": 5680 + }, + { + "epoch": 1.661207778915046, + "grad_norm": 1.5610811166933365, + "learning_rate": 1.5318394842150009e-06, + "loss": 0.5562780499458313, + "step": 5681 + }, + { + "epoch": 1.6615002193303114, + "grad_norm": 2.8725866349660683, + "learning_rate": 1.5292689616477808e-06, + "loss": 0.5937552452087402, + "step": 5682 + }, + { + "epoch": 1.661792659745577, + "grad_norm": 1.578580766715999, + "learning_rate": 1.526700419062439e-06, + "loss": 0.5154576301574707, + "step": 5683 + }, + { + "epoch": 1.662085100160842, + "grad_norm": 1.691867883182686, + "learning_rate": 1.5241338570593557e-06, + "loss": 0.6887973546981812, + "step": 5684 + }, + { + "epoch": 1.6623775405761076, + "grad_norm": 1.6575847736482525, + "learning_rate": 1.5215692762384481e-06, + "loss": 0.5365385413169861, + "step": 5685 + }, + { + "epoch": 1.662669980991373, + "grad_norm": 1.8209681086307343, + "learning_rate": 1.519006677199173e-06, + "loss": 0.5906165838241577, + "step": 5686 + }, + { + "epoch": 1.6629624214066383, + "grad_norm": 1.5861180854035228, + "learning_rate": 1.5164460605405252e-06, + "loss": 0.5752634406089783, + "step": 5687 + }, + { + "epoch": 1.6632548618219039, + "grad_norm": 1.5319119666926662, + "learning_rate": 1.5138874268610259e-06, + "loss": 0.6265667676925659, + "step": 5688 + }, + { + "epoch": 1.6635473022371692, + "grad_norm": 1.9588164368468703, + "learning_rate": 1.5113307767587449e-06, + "loss": 0.5032769441604614, + "step": 5689 + }, + { + "epoch": 1.6638397426524345, + "grad_norm": 1.799708728422752, + "learning_rate": 1.5087761108312837e-06, + "loss": 0.5740037560462952, + "step": 5690 + }, + { + "epoch": 1.6641321830677, + "grad_norm": 1.7236751091410876, + "learning_rate": 1.5062234296757782e-06, + "loss": 0.5745523571968079, + "step": 5691 + }, + { + "epoch": 1.6644246234829654, + "grad_norm": 1.4898534105858117, + "learning_rate": 1.5036727338889035e-06, + "loss": 0.4448510408401489, + "step": 5692 + }, + { + "epoch": 1.6647170638982307, + "grad_norm": 1.7482222251428332, + "learning_rate": 1.5011240240668678e-06, + "loss": 0.5142196416854858, + "step": 5693 + }, + { + "epoch": 1.6650095043134963, + "grad_norm": 1.484675519827308, + "learning_rate": 1.4985773008054184e-06, + "loss": 0.3317479193210602, + "step": 5694 + }, + { + "epoch": 1.6653019447287614, + "grad_norm": 1.9471413048062423, + "learning_rate": 1.4960325646998353e-06, + "loss": 0.5721619129180908, + "step": 5695 + }, + { + "epoch": 1.665594385144027, + "grad_norm": 1.7546330993452042, + "learning_rate": 1.4934898163449341e-06, + "loss": 0.4937021732330322, + "step": 5696 + }, + { + "epoch": 1.6658868255592922, + "grad_norm": 1.9665727915679332, + "learning_rate": 1.4909490563350694e-06, + "loss": 0.6414870023727417, + "step": 5697 + }, + { + "epoch": 1.6661792659745576, + "grad_norm": 2.144217430639584, + "learning_rate": 1.4884102852641258e-06, + "loss": 0.6265281438827515, + "step": 5698 + }, + { + "epoch": 1.6664717063898231, + "grad_norm": 1.9171094003176723, + "learning_rate": 1.48587350372553e-06, + "loss": 0.5908917784690857, + "step": 5699 + }, + { + "epoch": 1.6667641468050884, + "grad_norm": 1.5794126433874063, + "learning_rate": 1.4833387123122334e-06, + "loss": 0.6098382472991943, + "step": 5700 + }, + { + "epoch": 1.6670565872203538, + "grad_norm": 1.7930907914682574, + "learning_rate": 1.4808059116167306e-06, + "loss": 0.5106536746025085, + "step": 5701 + }, + { + "epoch": 1.6673490276356193, + "grad_norm": 1.9054848074745216, + "learning_rate": 1.4782751022310481e-06, + "loss": 0.5548620820045471, + "step": 5702 + }, + { + "epoch": 1.6676414680508846, + "grad_norm": 1.5332665904029121, + "learning_rate": 1.4757462847467475e-06, + "loss": 0.4596245288848877, + "step": 5703 + }, + { + "epoch": 1.66793390846615, + "grad_norm": 1.98056012220508, + "learning_rate": 1.4732194597549244e-06, + "loss": 0.6000612378120422, + "step": 5704 + }, + { + "epoch": 1.6682263488814155, + "grad_norm": 1.53003821533968, + "learning_rate": 1.4706946278462097e-06, + "loss": 0.5522277355194092, + "step": 5705 + }, + { + "epoch": 1.6685187892966808, + "grad_norm": 1.741978737228361, + "learning_rate": 1.468171789610766e-06, + "loss": 0.4765724837779999, + "step": 5706 + }, + { + "epoch": 1.6688112297119462, + "grad_norm": 1.737640693413614, + "learning_rate": 1.4656509456382927e-06, + "loss": 0.564188539981842, + "step": 5707 + }, + { + "epoch": 1.6691036701272117, + "grad_norm": 1.8586603218062736, + "learning_rate": 1.4631320965180208e-06, + "loss": 0.4910390377044678, + "step": 5708 + }, + { + "epoch": 1.6693961105424768, + "grad_norm": 1.8232002468100077, + "learning_rate": 1.4606152428387166e-06, + "loss": 0.5992041826248169, + "step": 5709 + }, + { + "epoch": 1.6696885509577424, + "grad_norm": 1.7676126822410316, + "learning_rate": 1.4581003851886811e-06, + "loss": 0.4873291850090027, + "step": 5710 + }, + { + "epoch": 1.6699809913730077, + "grad_norm": 1.7366762629360202, + "learning_rate": 1.4555875241557426e-06, + "loss": 0.6487013101577759, + "step": 5711 + }, + { + "epoch": 1.670273431788273, + "grad_norm": 1.5436242943423213, + "learning_rate": 1.4530766603272695e-06, + "loss": 0.4624609351158142, + "step": 5712 + }, + { + "epoch": 1.6705658722035386, + "grad_norm": 1.8279691880026145, + "learning_rate": 1.4505677942901609e-06, + "loss": 0.5765592455863953, + "step": 5713 + }, + { + "epoch": 1.670858312618804, + "grad_norm": 1.6171369345434061, + "learning_rate": 1.4480609266308488e-06, + "loss": 0.6730339527130127, + "step": 5714 + }, + { + "epoch": 1.6711507530340692, + "grad_norm": 1.7616383708984635, + "learning_rate": 1.445556057935299e-06, + "loss": 0.6381770372390747, + "step": 5715 + }, + { + "epoch": 1.6714431934493348, + "grad_norm": 1.8270298461203718, + "learning_rate": 1.4430531887890076e-06, + "loss": 0.6236029863357544, + "step": 5716 + }, + { + "epoch": 1.6717356338646001, + "grad_norm": 1.8837930420569144, + "learning_rate": 1.4405523197770076e-06, + "loss": 0.521639347076416, + "step": 5717 + }, + { + "epoch": 1.6720280742798654, + "grad_norm": 1.651464369232987, + "learning_rate": 1.4380534514838596e-06, + "loss": 0.5912468433380127, + "step": 5718 + }, + { + "epoch": 1.672320514695131, + "grad_norm": 1.7062190862435904, + "learning_rate": 1.4355565844936602e-06, + "loss": 0.5533329248428345, + "step": 5719 + }, + { + "epoch": 1.672612955110396, + "grad_norm": 1.8368834191923704, + "learning_rate": 1.4330617193900365e-06, + "loss": 0.5901006460189819, + "step": 5720 + }, + { + "epoch": 1.6729053955256616, + "grad_norm": 1.7501848609248272, + "learning_rate": 1.4305688567561503e-06, + "loss": 0.5083344578742981, + "step": 5721 + }, + { + "epoch": 1.6731978359409272, + "grad_norm": 1.359271864269329, + "learning_rate": 1.4280779971746894e-06, + "loss": 0.4443317651748657, + "step": 5722 + }, + { + "epoch": 1.6734902763561923, + "grad_norm": 1.931591797384203, + "learning_rate": 1.4255891412278778e-06, + "loss": 0.6355078220367432, + "step": 5723 + }, + { + "epoch": 1.6737827167714578, + "grad_norm": 1.751894451134603, + "learning_rate": 1.423102289497471e-06, + "loss": 0.5403381586074829, + "step": 5724 + }, + { + "epoch": 1.6740751571867232, + "grad_norm": 1.7703719298346523, + "learning_rate": 1.4206174425647556e-06, + "loss": 0.5272151231765747, + "step": 5725 + }, + { + "epoch": 1.6743675976019885, + "grad_norm": 1.6453865684399975, + "learning_rate": 1.41813460101055e-06, + "loss": 0.6750346422195435, + "step": 5726 + }, + { + "epoch": 1.674660038017254, + "grad_norm": 1.7471006401236766, + "learning_rate": 1.4156537654152026e-06, + "loss": 0.552655816078186, + "step": 5727 + }, + { + "epoch": 1.6749524784325194, + "grad_norm": 1.5245393452927156, + "learning_rate": 1.4131749363585933e-06, + "loss": 0.3947732448577881, + "step": 5728 + }, + { + "epoch": 1.6752449188477847, + "grad_norm": 1.4115886314708204, + "learning_rate": 1.4106981144201337e-06, + "loss": 0.4910270571708679, + "step": 5729 + }, + { + "epoch": 1.6755373592630503, + "grad_norm": 1.9268335112778272, + "learning_rate": 1.408223300178767e-06, + "loss": 0.5717943906784058, + "step": 5730 + }, + { + "epoch": 1.6758297996783156, + "grad_norm": 1.6782914146067396, + "learning_rate": 1.4057504942129652e-06, + "loss": 0.4993055462837219, + "step": 5731 + }, + { + "epoch": 1.676122240093581, + "grad_norm": 1.851203153701759, + "learning_rate": 1.4032796971007322e-06, + "loss": 0.4772619605064392, + "step": 5732 + }, + { + "epoch": 1.6764146805088465, + "grad_norm": 1.6250346930838577, + "learning_rate": 1.400810909419601e-06, + "loss": 0.3824518322944641, + "step": 5733 + }, + { + "epoch": 1.6767071209241116, + "grad_norm": 1.815105841906862, + "learning_rate": 1.398344131746634e-06, + "loss": 0.5302368402481079, + "step": 5734 + }, + { + "epoch": 1.676999561339377, + "grad_norm": 1.738439871277175, + "learning_rate": 1.3958793646584279e-06, + "loss": 0.5776697397232056, + "step": 5735 + }, + { + "epoch": 1.6772920017546424, + "grad_norm": 1.7943869884408015, + "learning_rate": 1.3934166087311063e-06, + "loss": 0.53890061378479, + "step": 5736 + }, + { + "epoch": 1.6775844421699078, + "grad_norm": 1.71821325954837, + "learning_rate": 1.3909558645403243e-06, + "loss": 0.47210827469825745, + "step": 5737 + }, + { + "epoch": 1.6778768825851733, + "grad_norm": 1.8689211559459666, + "learning_rate": 1.388497132661264e-06, + "loss": 0.6020913124084473, + "step": 5738 + }, + { + "epoch": 1.6781693230004386, + "grad_norm": 1.8612626444994878, + "learning_rate": 1.3860404136686411e-06, + "loss": 0.4244590997695923, + "step": 5739 + }, + { + "epoch": 1.678461763415704, + "grad_norm": 1.8281285744352933, + "learning_rate": 1.3835857081366965e-06, + "loss": 0.5969624519348145, + "step": 5740 + }, + { + "epoch": 1.6787542038309695, + "grad_norm": 1.774375495611947, + "learning_rate": 1.3811330166392057e-06, + "loss": 0.6573030352592468, + "step": 5741 + }, + { + "epoch": 1.6790466442462348, + "grad_norm": 1.641770470616675, + "learning_rate": 1.3786823397494675e-06, + "loss": 0.4251132905483246, + "step": 5742 + }, + { + "epoch": 1.6793390846615002, + "grad_norm": 1.504067842347657, + "learning_rate": 1.3762336780403163e-06, + "loss": 0.5555700659751892, + "step": 5743 + }, + { + "epoch": 1.6796315250767657, + "grad_norm": 2.1042602241156128, + "learning_rate": 1.3737870320841073e-06, + "loss": 0.5651364326477051, + "step": 5744 + }, + { + "epoch": 1.679923965492031, + "grad_norm": 1.7135873981198582, + "learning_rate": 1.371342402452731e-06, + "loss": 0.6283698678016663, + "step": 5745 + }, + { + "epoch": 1.6802164059072964, + "grad_norm": 1.4720135811876174, + "learning_rate": 1.3688997897176037e-06, + "loss": 0.47864413261413574, + "step": 5746 + }, + { + "epoch": 1.680508846322562, + "grad_norm": 1.9268157997034314, + "learning_rate": 1.366459194449674e-06, + "loss": 0.6254131197929382, + "step": 5747 + }, + { + "epoch": 1.680801286737827, + "grad_norm": 1.4626288934383243, + "learning_rate": 1.364020617219415e-06, + "loss": 0.35147637128829956, + "step": 5748 + }, + { + "epoch": 1.6810937271530926, + "grad_norm": 1.9627536321629568, + "learning_rate": 1.3615840585968287e-06, + "loss": 0.6126410961151123, + "step": 5749 + }, + { + "epoch": 1.681386167568358, + "grad_norm": 1.7732443093164585, + "learning_rate": 1.359149519151447e-06, + "loss": 0.5807974338531494, + "step": 5750 + }, + { + "epoch": 1.6816786079836232, + "grad_norm": 1.5024396656291241, + "learning_rate": 1.3567169994523277e-06, + "loss": 0.5033349990844727, + "step": 5751 + }, + { + "epoch": 1.6819710483988888, + "grad_norm": 1.5694630419560385, + "learning_rate": 1.3542865000680604e-06, + "loss": 0.47656023502349854, + "step": 5752 + }, + { + "epoch": 1.682263488814154, + "grad_norm": 1.6495545571688441, + "learning_rate": 1.3518580215667542e-06, + "loss": 0.5137293338775635, + "step": 5753 + }, + { + "epoch": 1.6825559292294194, + "grad_norm": 1.5950800812601422, + "learning_rate": 1.3494315645160539e-06, + "loss": 0.4636800289154053, + "step": 5754 + }, + { + "epoch": 1.682848369644685, + "grad_norm": 1.937366310273075, + "learning_rate": 1.3470071294831289e-06, + "loss": 0.5825523138046265, + "step": 5755 + }, + { + "epoch": 1.6831408100599503, + "grad_norm": 1.798274160020649, + "learning_rate": 1.344584717034677e-06, + "loss": 0.49282288551330566, + "step": 5756 + }, + { + "epoch": 1.6834332504752156, + "grad_norm": 1.6393172330125654, + "learning_rate": 1.3421643277369211e-06, + "loss": 0.5551935434341431, + "step": 5757 + }, + { + "epoch": 1.6837256908904812, + "grad_norm": 1.7844394306187494, + "learning_rate": 1.339745962155613e-06, + "loss": 0.6423832178115845, + "step": 5758 + }, + { + "epoch": 1.6840181313057463, + "grad_norm": 1.7061841923170233, + "learning_rate": 1.3373296208560316e-06, + "loss": 0.6178075671195984, + "step": 5759 + }, + { + "epoch": 1.6843105717210118, + "grad_norm": 1.7099687312543272, + "learning_rate": 1.3349153044029816e-06, + "loss": 0.6781176328659058, + "step": 5760 + }, + { + "epoch": 1.6846030121362774, + "grad_norm": 1.837996192806761, + "learning_rate": 1.332503013360794e-06, + "loss": 0.6511910557746887, + "step": 5761 + }, + { + "epoch": 1.6848954525515425, + "grad_norm": 1.3861294384859772, + "learning_rate": 1.3300927482933279e-06, + "loss": 0.4980696141719818, + "step": 5762 + }, + { + "epoch": 1.685187892966808, + "grad_norm": 1.6100202697936232, + "learning_rate": 1.3276845097639702e-06, + "loss": 0.49176928400993347, + "step": 5763 + }, + { + "epoch": 1.6854803333820734, + "grad_norm": 1.893755486996651, + "learning_rate": 1.3252782983356272e-06, + "loss": 0.5198799967765808, + "step": 5764 + }, + { + "epoch": 1.6857727737973387, + "grad_norm": 1.4629269004624288, + "learning_rate": 1.322874114570739e-06, + "loss": 0.5058869123458862, + "step": 5765 + }, + { + "epoch": 1.6860652142126042, + "grad_norm": 1.7429625548536576, + "learning_rate": 1.3204719590312698e-06, + "loss": 0.46573105454444885, + "step": 5766 + }, + { + "epoch": 1.6863576546278696, + "grad_norm": 1.702952537068074, + "learning_rate": 1.3180718322787067e-06, + "loss": 0.5033260583877563, + "step": 5767 + }, + { + "epoch": 1.686650095043135, + "grad_norm": 1.4816614067920655, + "learning_rate": 1.3156737348740655e-06, + "loss": 0.5306515693664551, + "step": 5768 + }, + { + "epoch": 1.6869425354584004, + "grad_norm": 1.5781671263541353, + "learning_rate": 1.313277667377888e-06, + "loss": 0.45660221576690674, + "step": 5769 + }, + { + "epoch": 1.6872349758736658, + "grad_norm": 1.8450654821638361, + "learning_rate": 1.3108836303502392e-06, + "loss": 0.5353757739067078, + "step": 5770 + }, + { + "epoch": 1.687527416288931, + "grad_norm": 1.719850536675987, + "learning_rate": 1.3084916243507118e-06, + "loss": 0.5415239930152893, + "step": 5771 + }, + { + "epoch": 1.6878198567041967, + "grad_norm": 1.7512787251419633, + "learning_rate": 1.3061016499384217e-06, + "loss": 0.5860229730606079, + "step": 5772 + }, + { + "epoch": 1.6881122971194618, + "grad_norm": 1.7755386406909037, + "learning_rate": 1.3037137076720107e-06, + "loss": 0.5524891018867493, + "step": 5773 + }, + { + "epoch": 1.6884047375347273, + "grad_norm": 1.9368287931198411, + "learning_rate": 1.3013277981096484e-06, + "loss": 0.5557498931884766, + "step": 5774 + }, + { + "epoch": 1.6886971779499926, + "grad_norm": 1.6092314723070207, + "learning_rate": 1.2989439218090227e-06, + "loss": 0.45877397060394287, + "step": 5775 + }, + { + "epoch": 1.688989618365258, + "grad_norm": 1.587163397973365, + "learning_rate": 1.2965620793273515e-06, + "loss": 0.5310335159301758, + "step": 5776 + }, + { + "epoch": 1.6892820587805235, + "grad_norm": 1.4561579735469703, + "learning_rate": 1.294182271221377e-06, + "loss": 0.4855915904045105, + "step": 5777 + }, + { + "epoch": 1.6895744991957888, + "grad_norm": 1.5707049211364605, + "learning_rate": 1.2918044980473643e-06, + "loss": 0.6070747971534729, + "step": 5778 + }, + { + "epoch": 1.6898669396110542, + "grad_norm": 1.6739979529118527, + "learning_rate": 1.2894287603611033e-06, + "loss": 0.5108609795570374, + "step": 5779 + }, + { + "epoch": 1.6901593800263197, + "grad_norm": 1.7647615756485302, + "learning_rate": 1.2870550587179087e-06, + "loss": 0.49141189455986023, + "step": 5780 + }, + { + "epoch": 1.690451820441585, + "grad_norm": 1.4176033732152467, + "learning_rate": 1.2846833936726178e-06, + "loss": 0.4239678382873535, + "step": 5781 + }, + { + "epoch": 1.6907442608568504, + "grad_norm": 1.8431022697656632, + "learning_rate": 1.2823137657795948e-06, + "loss": 0.6348937153816223, + "step": 5782 + }, + { + "epoch": 1.691036701272116, + "grad_norm": 1.4853353146024342, + "learning_rate": 1.2799461755927233e-06, + "loss": 0.4561845064163208, + "step": 5783 + }, + { + "epoch": 1.6913291416873812, + "grad_norm": 2.1521785942560197, + "learning_rate": 1.2775806236654153e-06, + "loss": 0.5663880109786987, + "step": 5784 + }, + { + "epoch": 1.6916215821026466, + "grad_norm": 1.5729874297711008, + "learning_rate": 1.275217110550604e-06, + "loss": 0.5200550556182861, + "step": 5785 + }, + { + "epoch": 1.6919140225179121, + "grad_norm": 1.7072937541006934, + "learning_rate": 1.2728556368007461e-06, + "loss": 0.5401214361190796, + "step": 5786 + }, + { + "epoch": 1.6922064629331772, + "grad_norm": 2.0089316276908917, + "learning_rate": 1.2704962029678202e-06, + "loss": 0.5409752130508423, + "step": 5787 + }, + { + "epoch": 1.6924989033484428, + "grad_norm": 1.6551832796501305, + "learning_rate": 1.2681388096033298e-06, + "loss": 0.46215158700942993, + "step": 5788 + }, + { + "epoch": 1.692791343763708, + "grad_norm": 1.508586050733543, + "learning_rate": 1.2657834572583027e-06, + "loss": 0.44687867164611816, + "step": 5789 + }, + { + "epoch": 1.6930837841789734, + "grad_norm": 1.5112474922130816, + "learning_rate": 1.2634301464832877e-06, + "loss": 0.47882723808288574, + "step": 5790 + }, + { + "epoch": 1.693376224594239, + "grad_norm": 2.0838428918534264, + "learning_rate": 1.2610788778283567e-06, + "loss": 0.6108201742172241, + "step": 5791 + }, + { + "epoch": 1.6936686650095043, + "grad_norm": 1.4370335670353505, + "learning_rate": 1.2587296518431036e-06, + "loss": 0.45024657249450684, + "step": 5792 + }, + { + "epoch": 1.6939611054247696, + "grad_norm": 2.263053324487421, + "learning_rate": 1.256382469076648e-06, + "loss": 0.6746254563331604, + "step": 5793 + }, + { + "epoch": 1.6942535458400352, + "grad_norm": 1.7423805800598553, + "learning_rate": 1.2540373300776264e-06, + "loss": 0.6439248323440552, + "step": 5794 + }, + { + "epoch": 1.6945459862553005, + "grad_norm": 1.5101648188878154, + "learning_rate": 1.251694235394204e-06, + "loss": 0.467510461807251, + "step": 5795 + }, + { + "epoch": 1.6948384266705658, + "grad_norm": 2.0083904845815117, + "learning_rate": 1.2493531855740626e-06, + "loss": 0.5509516596794128, + "step": 5796 + }, + { + "epoch": 1.6951308670858314, + "grad_norm": 1.6291523574406077, + "learning_rate": 1.247014181164412e-06, + "loss": 0.49178194999694824, + "step": 5797 + }, + { + "epoch": 1.6954233075010965, + "grad_norm": 1.6626228068208797, + "learning_rate": 1.2446772227119753e-06, + "loss": 0.4825005531311035, + "step": 5798 + }, + { + "epoch": 1.695715747916362, + "grad_norm": 1.7609982762736733, + "learning_rate": 1.242342310763005e-06, + "loss": 0.7441064715385437, + "step": 5799 + }, + { + "epoch": 1.6960081883316276, + "grad_norm": 1.6166055740202077, + "learning_rate": 1.2400094458632717e-06, + "loss": 0.5020110011100769, + "step": 5800 + }, + { + "epoch": 1.6963006287468927, + "grad_norm": 1.6328086702132818, + "learning_rate": 1.237678628558069e-06, + "loss": 0.5439830422401428, + "step": 5801 + }, + { + "epoch": 1.6965930691621582, + "grad_norm": 2.0549338843530136, + "learning_rate": 1.235349859392211e-06, + "loss": 0.6235179901123047, + "step": 5802 + }, + { + "epoch": 1.6968855095774236, + "grad_norm": 1.7141848290041162, + "learning_rate": 1.2330231389100323e-06, + "loss": 0.6176612377166748, + "step": 5803 + }, + { + "epoch": 1.697177949992689, + "grad_norm": 1.7381769122607003, + "learning_rate": 1.2306984676553924e-06, + "loss": 0.5956840515136719, + "step": 5804 + }, + { + "epoch": 1.6974703904079544, + "grad_norm": 1.6857909163061566, + "learning_rate": 1.2283758461716667e-06, + "loss": 0.5025947690010071, + "step": 5805 + }, + { + "epoch": 1.6977628308232198, + "grad_norm": 1.507035347865144, + "learning_rate": 1.2260552750017551e-06, + "loss": 0.5772436857223511, + "step": 5806 + }, + { + "epoch": 1.698055271238485, + "grad_norm": 1.6665432076063584, + "learning_rate": 1.223736754688075e-06, + "loss": 0.4336615204811096, + "step": 5807 + }, + { + "epoch": 1.6983477116537506, + "grad_norm": 1.695081220374435, + "learning_rate": 1.221420285772572e-06, + "loss": 0.5697668790817261, + "step": 5808 + }, + { + "epoch": 1.698640152069016, + "grad_norm": 1.8545475442236217, + "learning_rate": 1.2191058687966995e-06, + "loss": 0.4966861605644226, + "step": 5809 + }, + { + "epoch": 1.6989325924842813, + "grad_norm": 1.777484506048346, + "learning_rate": 1.2167935043014411e-06, + "loss": 0.5805951952934265, + "step": 5810 + }, + { + "epoch": 1.6992250328995469, + "grad_norm": 1.6055305498040644, + "learning_rate": 1.2144831928272994e-06, + "loss": 0.4669906497001648, + "step": 5811 + }, + { + "epoch": 1.699517473314812, + "grad_norm": 1.7730179282571827, + "learning_rate": 1.212174934914294e-06, + "loss": 0.5630965828895569, + "step": 5812 + }, + { + "epoch": 1.6998099137300775, + "grad_norm": 1.7272395334456936, + "learning_rate": 1.2098687311019663e-06, + "loss": 0.5345104932785034, + "step": 5813 + }, + { + "epoch": 1.7001023541453428, + "grad_norm": 1.9547814584710963, + "learning_rate": 1.207564581929378e-06, + "loss": 0.5760249495506287, + "step": 5814 + }, + { + "epoch": 1.7003947945606082, + "grad_norm": 1.563397994600299, + "learning_rate": 1.2052624879351105e-06, + "loss": 0.506635308265686, + "step": 5815 + }, + { + "epoch": 1.7006872349758737, + "grad_norm": 1.792775034126629, + "learning_rate": 1.2029624496572622e-06, + "loss": 0.5107032656669617, + "step": 5816 + }, + { + "epoch": 1.700979675391139, + "grad_norm": 1.5891211780153636, + "learning_rate": 1.2006644676334557e-06, + "loss": 0.5888187885284424, + "step": 5817 + }, + { + "epoch": 1.7012721158064044, + "grad_norm": 1.8008314810247776, + "learning_rate": 1.1983685424008285e-06, + "loss": 0.5326075553894043, + "step": 5818 + }, + { + "epoch": 1.70156455622167, + "grad_norm": 1.6515493940564925, + "learning_rate": 1.1960746744960417e-06, + "loss": 0.5097993612289429, + "step": 5819 + }, + { + "epoch": 1.7018569966369352, + "grad_norm": 1.6532256911128915, + "learning_rate": 1.1937828644552696e-06, + "loss": 0.6001093983650208, + "step": 5820 + }, + { + "epoch": 1.7021494370522006, + "grad_norm": 1.7728326525757572, + "learning_rate": 1.1914931128142072e-06, + "loss": 0.513684093952179, + "step": 5821 + }, + { + "epoch": 1.7024418774674661, + "grad_norm": 1.6118848482453871, + "learning_rate": 1.189205420108076e-06, + "loss": 0.4688597321510315, + "step": 5822 + }, + { + "epoch": 1.7027343178827314, + "grad_norm": 1.6755720349462948, + "learning_rate": 1.1869197868716075e-06, + "loss": 0.4537498354911804, + "step": 5823 + }, + { + "epoch": 1.7030267582979968, + "grad_norm": 1.625108439053771, + "learning_rate": 1.1846362136390531e-06, + "loss": 0.43031078577041626, + "step": 5824 + }, + { + "epoch": 1.7033191987132623, + "grad_norm": 1.9244406734438975, + "learning_rate": 1.182354700944187e-06, + "loss": 0.5139330625534058, + "step": 5825 + }, + { + "epoch": 1.7036116391285274, + "grad_norm": 1.7897651312393703, + "learning_rate": 1.180075249320296e-06, + "loss": 0.6542010307312012, + "step": 5826 + }, + { + "epoch": 1.703904079543793, + "grad_norm": 1.6365189888188503, + "learning_rate": 1.1777978593001903e-06, + "loss": 0.5371676087379456, + "step": 5827 + }, + { + "epoch": 1.7041965199590583, + "grad_norm": 1.6793659914593386, + "learning_rate": 1.1755225314161967e-06, + "loss": 0.47583359479904175, + "step": 5828 + }, + { + "epoch": 1.7044889603743236, + "grad_norm": 1.7363884838234833, + "learning_rate": 1.173249266200156e-06, + "loss": 0.5471247434616089, + "step": 5829 + }, + { + "epoch": 1.7047814007895892, + "grad_norm": 1.850508925320166, + "learning_rate": 1.1709780641834323e-06, + "loss": 0.5095713138580322, + "step": 5830 + }, + { + "epoch": 1.7050738412048545, + "grad_norm": 1.5373790027628114, + "learning_rate": 1.1687089258969041e-06, + "loss": 0.41944777965545654, + "step": 5831 + }, + { + "epoch": 1.7053662816201198, + "grad_norm": 1.5434472143224902, + "learning_rate": 1.1664418518709697e-06, + "loss": 0.42380404472351074, + "step": 5832 + }, + { + "epoch": 1.7056587220353854, + "grad_norm": 1.8798510100106, + "learning_rate": 1.1641768426355427e-06, + "loss": 0.5688038468360901, + "step": 5833 + }, + { + "epoch": 1.7059511624506507, + "grad_norm": 1.6396391570153137, + "learning_rate": 1.1619138987200562e-06, + "loss": 0.5432788133621216, + "step": 5834 + }, + { + "epoch": 1.706243602865916, + "grad_norm": 1.699260651340017, + "learning_rate": 1.1596530206534606e-06, + "loss": 0.5408512949943542, + "step": 5835 + }, + { + "epoch": 1.7065360432811816, + "grad_norm": 1.5364052920051108, + "learning_rate": 1.1573942089642198e-06, + "loss": 0.5149247646331787, + "step": 5836 + }, + { + "epoch": 1.7068284836964467, + "grad_norm": 1.6490213140214325, + "learning_rate": 1.1551374641803193e-06, + "loss": 0.36905592679977417, + "step": 5837 + }, + { + "epoch": 1.7071209241117122, + "grad_norm": 1.7960598101415164, + "learning_rate": 1.152882786829259e-06, + "loss": 0.5370720624923706, + "step": 5838 + }, + { + "epoch": 1.7074133645269778, + "grad_norm": 1.5874644037104577, + "learning_rate": 1.1506301774380578e-06, + "loss": 0.4535629153251648, + "step": 5839 + }, + { + "epoch": 1.7077058049422429, + "grad_norm": 1.79916689116012, + "learning_rate": 1.1483796365332455e-06, + "loss": 0.5456075668334961, + "step": 5840 + }, + { + "epoch": 1.7079982453575084, + "grad_norm": 1.4286640626946725, + "learning_rate": 1.1461311646408756e-06, + "loss": 0.5884554386138916, + "step": 5841 + }, + { + "epoch": 1.7082906857727738, + "grad_norm": 1.6397329737807809, + "learning_rate": 1.1438847622865125e-06, + "loss": 0.605168879032135, + "step": 5842 + }, + { + "epoch": 1.708583126188039, + "grad_norm": 1.5178839829112376, + "learning_rate": 1.14164042999524e-06, + "loss": 0.43739163875579834, + "step": 5843 + }, + { + "epoch": 1.7088755666033046, + "grad_norm": 1.46949260133067, + "learning_rate": 1.1393981682916578e-06, + "loss": 0.4508574306964874, + "step": 5844 + }, + { + "epoch": 1.70916800701857, + "grad_norm": 1.822138537734332, + "learning_rate": 1.1371579776998798e-06, + "loss": 0.5918034315109253, + "step": 5845 + }, + { + "epoch": 1.7094604474338353, + "grad_norm": 2.0746386130567873, + "learning_rate": 1.1349198587435373e-06, + "loss": 0.5668582320213318, + "step": 5846 + }, + { + "epoch": 1.7097528878491008, + "grad_norm": 1.9282537614980426, + "learning_rate": 1.1326838119457784e-06, + "loss": 0.6374846696853638, + "step": 5847 + }, + { + "epoch": 1.7100453282643662, + "grad_norm": 1.8903891011788552, + "learning_rate": 1.130449837829264e-06, + "loss": 0.5074985027313232, + "step": 5848 + }, + { + "epoch": 1.7103377686796315, + "grad_norm": 1.5190057242638555, + "learning_rate": 1.1282179369161717e-06, + "loss": 0.5012484788894653, + "step": 5849 + }, + { + "epoch": 1.710630209094897, + "grad_norm": 1.632090745734556, + "learning_rate": 1.1259881097281977e-06, + "loss": 0.4417869746685028, + "step": 5850 + }, + { + "epoch": 1.7109226495101622, + "grad_norm": 1.8294483106085377, + "learning_rate": 1.1237603567865452e-06, + "loss": 0.6032637357711792, + "step": 5851 + }, + { + "epoch": 1.7112150899254277, + "grad_norm": 1.688609377749929, + "learning_rate": 1.121534678611942e-06, + "loss": 0.5790234804153442, + "step": 5852 + }, + { + "epoch": 1.711507530340693, + "grad_norm": 1.591889646615377, + "learning_rate": 1.1193110757246251e-06, + "loss": 0.5436397194862366, + "step": 5853 + }, + { + "epoch": 1.7117999707559584, + "grad_norm": 1.7117652881589365, + "learning_rate": 1.11708954864435e-06, + "loss": 0.5088083744049072, + "step": 5854 + }, + { + "epoch": 1.712092411171224, + "grad_norm": 1.889980799223528, + "learning_rate": 1.1148700978903826e-06, + "loss": 0.5907719135284424, + "step": 5855 + }, + { + "epoch": 1.7123848515864892, + "grad_norm": 1.567722853126729, + "learning_rate": 1.1126527239815078e-06, + "loss": 0.4744384288787842, + "step": 5856 + }, + { + "epoch": 1.7126772920017546, + "grad_norm": 1.9312865174889629, + "learning_rate": 1.110437427436023e-06, + "loss": 0.6644346714019775, + "step": 5857 + }, + { + "epoch": 1.71296973241702, + "grad_norm": 1.6765623554239069, + "learning_rate": 1.10822420877174e-06, + "loss": 0.4926042854785919, + "step": 5858 + }, + { + "epoch": 1.7132621728322854, + "grad_norm": 1.6722485452227753, + "learning_rate": 1.1060130685059845e-06, + "loss": 0.47684335708618164, + "step": 5859 + }, + { + "epoch": 1.7135546132475508, + "grad_norm": 1.8519560247307543, + "learning_rate": 1.1038040071555988e-06, + "loss": 0.5574014186859131, + "step": 5860 + }, + { + "epoch": 1.7138470536628163, + "grad_norm": 1.737717748392033, + "learning_rate": 1.101597025236939e-06, + "loss": 0.6276485323905945, + "step": 5861 + }, + { + "epoch": 1.7141394940780816, + "grad_norm": 1.7853097232505406, + "learning_rate": 1.099392123265869e-06, + "loss": 0.558611273765564, + "step": 5862 + }, + { + "epoch": 1.714431934493347, + "grad_norm": 1.8318989515664625, + "learning_rate": 1.097189301757773e-06, + "loss": 0.5561566948890686, + "step": 5863 + }, + { + "epoch": 1.7147243749086125, + "grad_norm": 1.7772127580066208, + "learning_rate": 1.094988561227548e-06, + "loss": 0.5360273122787476, + "step": 5864 + }, + { + "epoch": 1.7150168153238776, + "grad_norm": 1.9869672499266697, + "learning_rate": 1.0927899021896038e-06, + "loss": 0.5572026968002319, + "step": 5865 + }, + { + "epoch": 1.7153092557391432, + "grad_norm": 1.696631763346783, + "learning_rate": 1.0905933251578626e-06, + "loss": 0.4593105912208557, + "step": 5866 + }, + { + "epoch": 1.7156016961544085, + "grad_norm": 1.7954251083275348, + "learning_rate": 1.0883988306457627e-06, + "loss": 0.5017558336257935, + "step": 5867 + }, + { + "epoch": 1.7158941365696738, + "grad_norm": 1.6294086582619267, + "learning_rate": 1.0862064191662524e-06, + "loss": 0.4982030391693115, + "step": 5868 + }, + { + "epoch": 1.7161865769849394, + "grad_norm": 1.5832146918310879, + "learning_rate": 1.0840160912317943e-06, + "loss": 0.5563114881515503, + "step": 5869 + }, + { + "epoch": 1.7164790174002047, + "grad_norm": 1.6522408781609719, + "learning_rate": 1.0818278473543652e-06, + "loss": 0.4817348122596741, + "step": 5870 + }, + { + "epoch": 1.71677145781547, + "grad_norm": 1.6923338540004815, + "learning_rate": 1.079641688045453e-06, + "loss": 0.47907108068466187, + "step": 5871 + }, + { + "epoch": 1.7170638982307356, + "grad_norm": 1.985278987997586, + "learning_rate": 1.0774576138160596e-06, + "loss": 0.6158252954483032, + "step": 5872 + }, + { + "epoch": 1.717356338646001, + "grad_norm": 1.712800633970283, + "learning_rate": 1.0752756251767015e-06, + "loss": 0.5336505174636841, + "step": 5873 + }, + { + "epoch": 1.7176487790612662, + "grad_norm": 1.6889119428738892, + "learning_rate": 1.0730957226374006e-06, + "loss": 0.5806115865707397, + "step": 5874 + }, + { + "epoch": 1.7179412194765318, + "grad_norm": 1.7163109676688793, + "learning_rate": 1.070917906707698e-06, + "loss": 0.3701411485671997, + "step": 5875 + }, + { + "epoch": 1.7182336598917969, + "grad_norm": 1.5519162070562529, + "learning_rate": 1.0687421778966445e-06, + "loss": 0.5779517292976379, + "step": 5876 + }, + { + "epoch": 1.7185261003070624, + "grad_norm": 1.5444011974555767, + "learning_rate": 1.0665685367128041e-06, + "loss": 0.43965232372283936, + "step": 5877 + }, + { + "epoch": 1.718818540722328, + "grad_norm": 1.7154722678485648, + "learning_rate": 1.064396983664253e-06, + "loss": 0.4768058657646179, + "step": 5878 + }, + { + "epoch": 1.719110981137593, + "grad_norm": 1.6286437020829267, + "learning_rate": 1.0622275192585773e-06, + "loss": 0.5331600904464722, + "step": 5879 + }, + { + "epoch": 1.7194034215528586, + "grad_norm": 1.6603687606186237, + "learning_rate": 1.0600601440028758e-06, + "loss": 0.5495625734329224, + "step": 5880 + }, + { + "epoch": 1.719695861968124, + "grad_norm": 1.6915455937474744, + "learning_rate": 1.0578948584037608e-06, + "loss": 0.4244312345981598, + "step": 5881 + }, + { + "epoch": 1.7199883023833893, + "grad_norm": 1.7562786480710206, + "learning_rate": 1.0557316629673531e-06, + "loss": 0.4618447721004486, + "step": 5882 + }, + { + "epoch": 1.7202807427986548, + "grad_norm": 1.3835850144546908, + "learning_rate": 1.0535705581992873e-06, + "loss": 0.4226785898208618, + "step": 5883 + }, + { + "epoch": 1.7205731832139202, + "grad_norm": 1.8373576265806915, + "learning_rate": 1.0514115446047101e-06, + "loss": 0.5813404321670532, + "step": 5884 + }, + { + "epoch": 1.7208656236291855, + "grad_norm": 1.774672318962678, + "learning_rate": 1.0492546226882738e-06, + "loss": 0.6700260639190674, + "step": 5885 + }, + { + "epoch": 1.721158064044451, + "grad_norm": 1.8100136828076652, + "learning_rate": 1.0470997929541494e-06, + "loss": 0.6024131178855896, + "step": 5886 + }, + { + "epoch": 1.7214505044597164, + "grad_norm": 1.8033126749427817, + "learning_rate": 1.0449470559060125e-06, + "loss": 0.6015123724937439, + "step": 5887 + }, + { + "epoch": 1.7217429448749817, + "grad_norm": 1.714487906410119, + "learning_rate": 1.0427964120470534e-06, + "loss": 0.6631267070770264, + "step": 5888 + }, + { + "epoch": 1.7220353852902472, + "grad_norm": 1.7445362923992234, + "learning_rate": 1.0406478618799731e-06, + "loss": 0.5267488956451416, + "step": 5889 + }, + { + "epoch": 1.7223278257055123, + "grad_norm": 1.6683876570881706, + "learning_rate": 1.038501405906982e-06, + "loss": 0.5190263986587524, + "step": 5890 + }, + { + "epoch": 1.722620266120778, + "grad_norm": 1.6678272928853268, + "learning_rate": 1.0363570446297999e-06, + "loss": 0.5253189206123352, + "step": 5891 + }, + { + "epoch": 1.7229127065360432, + "grad_norm": 1.6306770585402846, + "learning_rate": 1.0342147785496581e-06, + "loss": 0.5271278619766235, + "step": 5892 + }, + { + "epoch": 1.7232051469513086, + "grad_norm": 1.7373442044536598, + "learning_rate": 1.0320746081672994e-06, + "loss": 0.5284109711647034, + "step": 5893 + }, + { + "epoch": 1.723497587366574, + "grad_norm": 1.764049872395232, + "learning_rate": 1.0299365339829747e-06, + "loss": 0.6119050979614258, + "step": 5894 + }, + { + "epoch": 1.7237900277818394, + "grad_norm": 1.583925980020329, + "learning_rate": 1.0278005564964488e-06, + "loss": 0.42297711968421936, + "step": 5895 + }, + { + "epoch": 1.7240824681971048, + "grad_norm": 1.7105013452989373, + "learning_rate": 1.02566667620699e-06, + "loss": 0.5923792123794556, + "step": 5896 + }, + { + "epoch": 1.7243749086123703, + "grad_norm": 1.6831938137571334, + "learning_rate": 1.023534893613377e-06, + "loss": 0.4999189078807831, + "step": 5897 + }, + { + "epoch": 1.7246673490276356, + "grad_norm": 1.6907699986400666, + "learning_rate": 1.0214052092139082e-06, + "loss": 0.49083560705184937, + "step": 5898 + }, + { + "epoch": 1.724959789442901, + "grad_norm": 3.9391609144586437, + "learning_rate": 1.0192776235063795e-06, + "loss": 0.6001632213592529, + "step": 5899 + }, + { + "epoch": 1.7252522298581665, + "grad_norm": 1.473933103211581, + "learning_rate": 1.0171521369881044e-06, + "loss": 0.4897228479385376, + "step": 5900 + }, + { + "epoch": 1.7255446702734318, + "grad_norm": 1.6762179044603425, + "learning_rate": 1.0150287501558997e-06, + "loss": 0.44784292578697205, + "step": 5901 + }, + { + "epoch": 1.7258371106886972, + "grad_norm": 1.542625612657722, + "learning_rate": 1.0129074635060943e-06, + "loss": 0.46105432510375977, + "step": 5902 + }, + { + "epoch": 1.7261295511039627, + "grad_norm": 1.9028079699425045, + "learning_rate": 1.0107882775345278e-06, + "loss": 0.5805546045303345, + "step": 5903 + }, + { + "epoch": 1.7264219915192278, + "grad_norm": 1.719859761694945, + "learning_rate": 1.0086711927365488e-06, + "loss": 0.560761570930481, + "step": 5904 + }, + { + "epoch": 1.7267144319344934, + "grad_norm": 1.475103420661766, + "learning_rate": 1.006556209607007e-06, + "loss": 0.533979058265686, + "step": 5905 + }, + { + "epoch": 1.7270068723497587, + "grad_norm": 1.7039894712110264, + "learning_rate": 1.004443328640271e-06, + "loss": 0.5742807984352112, + "step": 5906 + }, + { + "epoch": 1.727299312765024, + "grad_norm": 1.9394101910903232, + "learning_rate": 1.0023325503302129e-06, + "loss": 0.5617523789405823, + "step": 5907 + }, + { + "epoch": 1.7275917531802896, + "grad_norm": 1.620137966655423, + "learning_rate": 1.0002238751702143e-06, + "loss": 0.45596855878829956, + "step": 5908 + }, + { + "epoch": 1.727884193595555, + "grad_norm": 1.523715810181856, + "learning_rate": 9.981173036531655e-07, + "loss": 0.4917908012866974, + "step": 5909 + }, + { + "epoch": 1.7281766340108202, + "grad_norm": 1.8089323806924067, + "learning_rate": 9.960128362714637e-07, + "loss": 0.6204911470413208, + "step": 5910 + }, + { + "epoch": 1.7284690744260858, + "grad_norm": 1.615074466715287, + "learning_rate": 9.93910473517018e-07, + "loss": 0.47288352251052856, + "step": 5911 + }, + { + "epoch": 1.728761514841351, + "grad_norm": 1.9414111913595387, + "learning_rate": 9.918102158812404e-07, + "loss": 0.48668670654296875, + "step": 5912 + }, + { + "epoch": 1.7290539552566164, + "grad_norm": 1.723740686191889, + "learning_rate": 9.89712063855054e-07, + "loss": 0.43311381340026855, + "step": 5913 + }, + { + "epoch": 1.729346395671882, + "grad_norm": 1.748275288399291, + "learning_rate": 9.876160179288886e-07, + "loss": 0.5066087245941162, + "step": 5914 + }, + { + "epoch": 1.729638836087147, + "grad_norm": 1.6099318260759374, + "learning_rate": 9.855220785926856e-07, + "loss": 0.6022528409957886, + "step": 5915 + }, + { + "epoch": 1.7299312765024126, + "grad_norm": 1.6809686879748886, + "learning_rate": 9.834302463358858e-07, + "loss": 0.5288707613945007, + "step": 5916 + }, + { + "epoch": 1.7302237169176782, + "grad_norm": 1.7087060764928856, + "learning_rate": 9.813405216474436e-07, + "loss": 0.6150302290916443, + "step": 5917 + }, + { + "epoch": 1.7305161573329433, + "grad_norm": 1.7234099983807605, + "learning_rate": 9.792529050158218e-07, + "loss": 0.5431156158447266, + "step": 5918 + }, + { + "epoch": 1.7308085977482088, + "grad_norm": 1.7871856102017598, + "learning_rate": 9.771673969289851e-07, + "loss": 0.6844080686569214, + "step": 5919 + }, + { + "epoch": 1.7311010381634742, + "grad_norm": 1.9483136158091563, + "learning_rate": 9.750839978744098e-07, + "loss": 0.4778372049331665, + "step": 5920 + }, + { + "epoch": 1.7313934785787395, + "grad_norm": 1.803034120095624, + "learning_rate": 9.73002708339077e-07, + "loss": 0.6913809776306152, + "step": 5921 + }, + { + "epoch": 1.731685918994005, + "grad_norm": 1.5934425203745812, + "learning_rate": 9.709235288094765e-07, + "loss": 0.6289864778518677, + "step": 5922 + }, + { + "epoch": 1.7319783594092704, + "grad_norm": 1.7803434049533318, + "learning_rate": 9.68846459771604e-07, + "loss": 0.4735794961452484, + "step": 5923 + }, + { + "epoch": 1.7322707998245357, + "grad_norm": 1.7329775832839742, + "learning_rate": 9.667715017109614e-07, + "loss": 0.53554767370224, + "step": 5924 + }, + { + "epoch": 1.7325632402398012, + "grad_norm": 1.9726609824515038, + "learning_rate": 9.64698655112557e-07, + "loss": 0.5118460655212402, + "step": 5925 + }, + { + "epoch": 1.7328556806550666, + "grad_norm": 1.819236864509276, + "learning_rate": 9.626279204609079e-07, + "loss": 0.5739814043045044, + "step": 5926 + }, + { + "epoch": 1.733148121070332, + "grad_norm": 1.6784156679062403, + "learning_rate": 9.605592982400325e-07, + "loss": 0.5716123580932617, + "step": 5927 + }, + { + "epoch": 1.7334405614855974, + "grad_norm": 1.7916971306174196, + "learning_rate": 9.584927889334605e-07, + "loss": 0.5091898441314697, + "step": 5928 + }, + { + "epoch": 1.7337330019008625, + "grad_norm": 1.6267503374739263, + "learning_rate": 9.564283930242258e-07, + "loss": 0.46946650743484497, + "step": 5929 + }, + { + "epoch": 1.734025442316128, + "grad_norm": 1.6625890698419732, + "learning_rate": 9.543661109948688e-07, + "loss": 0.6238217353820801, + "step": 5930 + }, + { + "epoch": 1.7343178827313934, + "grad_norm": 1.8870256552743607, + "learning_rate": 9.52305943327434e-07, + "loss": 0.7464175224304199, + "step": 5931 + }, + { + "epoch": 1.7346103231466588, + "grad_norm": 2.417177332317345, + "learning_rate": 9.502478905034751e-07, + "loss": 0.6064578294754028, + "step": 5932 + }, + { + "epoch": 1.7349027635619243, + "grad_norm": 1.5711166860907437, + "learning_rate": 9.481919530040484e-07, + "loss": 0.5703303813934326, + "step": 5933 + }, + { + "epoch": 1.7351952039771896, + "grad_norm": 1.773413757718004, + "learning_rate": 9.461381313097162e-07, + "loss": 0.5570278167724609, + "step": 5934 + }, + { + "epoch": 1.735487644392455, + "grad_norm": 1.795987369299435, + "learning_rate": 9.440864259005477e-07, + "loss": 0.54972243309021, + "step": 5935 + }, + { + "epoch": 1.7357800848077205, + "grad_norm": 1.6140356285907533, + "learning_rate": 9.420368372561161e-07, + "loss": 0.5670010447502136, + "step": 5936 + }, + { + "epoch": 1.7360725252229858, + "grad_norm": 1.642113144044588, + "learning_rate": 9.399893658555026e-07, + "loss": 0.5306927561759949, + "step": 5937 + }, + { + "epoch": 1.7363649656382512, + "grad_norm": 1.5565759572639428, + "learning_rate": 9.379440121772876e-07, + "loss": 0.5080308318138123, + "step": 5938 + }, + { + "epoch": 1.7366574060535167, + "grad_norm": 1.5576076668453387, + "learning_rate": 9.359007766995609e-07, + "loss": 0.5444519519805908, + "step": 5939 + }, + { + "epoch": 1.736949846468782, + "grad_norm": 1.737287044912212, + "learning_rate": 9.338596598999172e-07, + "loss": 0.5353262424468994, + "step": 5940 + }, + { + "epoch": 1.7372422868840474, + "grad_norm": 1.5405646785157867, + "learning_rate": 9.318206622554549e-07, + "loss": 0.4766794443130493, + "step": 5941 + }, + { + "epoch": 1.737534727299313, + "grad_norm": 1.5818937282065444, + "learning_rate": 9.29783784242777e-07, + "loss": 0.4913482666015625, + "step": 5942 + }, + { + "epoch": 1.737827167714578, + "grad_norm": 1.5030657740252151, + "learning_rate": 9.277490263379918e-07, + "loss": 0.47637009620666504, + "step": 5943 + }, + { + "epoch": 1.7381196081298436, + "grad_norm": 1.8131560819786492, + "learning_rate": 9.25716389016712e-07, + "loss": 0.5122126936912537, + "step": 5944 + }, + { + "epoch": 1.738412048545109, + "grad_norm": 1.51801570238093, + "learning_rate": 9.236858727540543e-07, + "loss": 0.5263532400131226, + "step": 5945 + }, + { + "epoch": 1.7387044889603742, + "grad_norm": 1.607505719698849, + "learning_rate": 9.216574780246379e-07, + "loss": 0.5214182734489441, + "step": 5946 + }, + { + "epoch": 1.7389969293756398, + "grad_norm": 1.877073258708154, + "learning_rate": 9.196312053025891e-07, + "loss": 0.5955429077148438, + "step": 5947 + }, + { + "epoch": 1.739289369790905, + "grad_norm": 1.6543213511410424, + "learning_rate": 9.176070550615379e-07, + "loss": 0.4809807538986206, + "step": 5948 + }, + { + "epoch": 1.7395818102061704, + "grad_norm": 1.882804975326707, + "learning_rate": 9.155850277746148e-07, + "loss": 0.4769969582557678, + "step": 5949 + }, + { + "epoch": 1.739874250621436, + "grad_norm": 2.4545379886365954, + "learning_rate": 9.135651239144561e-07, + "loss": 0.48527538776397705, + "step": 5950 + }, + { + "epoch": 1.7401666910367013, + "grad_norm": 1.696389032166004, + "learning_rate": 9.115473439532041e-07, + "loss": 0.6703393459320068, + "step": 5951 + }, + { + "epoch": 1.7404591314519666, + "grad_norm": 1.807366721076005, + "learning_rate": 9.095316883625016e-07, + "loss": 0.5742951035499573, + "step": 5952 + }, + { + "epoch": 1.7407515718672322, + "grad_norm": 1.9552666950039521, + "learning_rate": 9.075181576134961e-07, + "loss": 0.6285614967346191, + "step": 5953 + }, + { + "epoch": 1.7410440122824973, + "grad_norm": 1.6961990538831806, + "learning_rate": 9.055067521768379e-07, + "loss": 0.5872488021850586, + "step": 5954 + }, + { + "epoch": 1.7413364526977628, + "grad_norm": 1.6900638369260592, + "learning_rate": 9.034974725226808e-07, + "loss": 0.5483776330947876, + "step": 5955 + }, + { + "epoch": 1.7416288931130284, + "grad_norm": 1.838843211951185, + "learning_rate": 9.014903191206825e-07, + "loss": 0.4913061261177063, + "step": 5956 + }, + { + "epoch": 1.7419213335282935, + "grad_norm": 1.6413412279440867, + "learning_rate": 8.994852924400022e-07, + "loss": 0.5431212186813354, + "step": 5957 + }, + { + "epoch": 1.742213773943559, + "grad_norm": 1.735940615294129, + "learning_rate": 8.974823929493015e-07, + "loss": 0.5391141176223755, + "step": 5958 + }, + { + "epoch": 1.7425062143588244, + "grad_norm": 1.455007956070738, + "learning_rate": 8.954816211167483e-07, + "loss": 0.48980265855789185, + "step": 5959 + }, + { + "epoch": 1.7427986547740897, + "grad_norm": 1.6465722416646151, + "learning_rate": 8.934829774100118e-07, + "loss": 0.6747336387634277, + "step": 5960 + }, + { + "epoch": 1.7430910951893552, + "grad_norm": 2.048914745001018, + "learning_rate": 8.914864622962582e-07, + "loss": 0.4911282956600189, + "step": 5961 + }, + { + "epoch": 1.7433835356046206, + "grad_norm": 1.6999465895023511, + "learning_rate": 8.894920762421644e-07, + "loss": 0.5863965153694153, + "step": 5962 + }, + { + "epoch": 1.7436759760198859, + "grad_norm": 1.6964011957158196, + "learning_rate": 8.87499819713904e-07, + "loss": 0.5413792729377747, + "step": 5963 + }, + { + "epoch": 1.7439684164351514, + "grad_norm": 1.8650590121272839, + "learning_rate": 8.855096931771568e-07, + "loss": 0.5288723707199097, + "step": 5964 + }, + { + "epoch": 1.7442608568504168, + "grad_norm": 1.72339918808855, + "learning_rate": 8.835216970971006e-07, + "loss": 0.5129783749580383, + "step": 5965 + }, + { + "epoch": 1.744553297265682, + "grad_norm": 1.7489856693904517, + "learning_rate": 8.815358319384193e-07, + "loss": 0.5606918334960938, + "step": 5966 + }, + { + "epoch": 1.7448457376809476, + "grad_norm": 1.9036006380739827, + "learning_rate": 8.79552098165296e-07, + "loss": 0.6277288198471069, + "step": 5967 + }, + { + "epoch": 1.7451381780962127, + "grad_norm": 1.7432749923566282, + "learning_rate": 8.775704962414167e-07, + "loss": 0.5390176773071289, + "step": 5968 + }, + { + "epoch": 1.7454306185114783, + "grad_norm": 1.8645943677337757, + "learning_rate": 8.755910266299684e-07, + "loss": 0.680462121963501, + "step": 5969 + }, + { + "epoch": 1.7457230589267436, + "grad_norm": 1.678166381653785, + "learning_rate": 8.736136897936398e-07, + "loss": 0.5134397149085999, + "step": 5970 + }, + { + "epoch": 1.746015499342009, + "grad_norm": 1.796274905651791, + "learning_rate": 8.716384861946248e-07, + "loss": 0.6280460357666016, + "step": 5971 + }, + { + "epoch": 1.7463079397572745, + "grad_norm": 1.8396010080181593, + "learning_rate": 8.696654162946094e-07, + "loss": 0.5425370931625366, + "step": 5972 + }, + { + "epoch": 1.7466003801725398, + "grad_norm": 1.7657169836698012, + "learning_rate": 8.676944805547882e-07, + "loss": 0.5831055045127869, + "step": 5973 + }, + { + "epoch": 1.7468928205878052, + "grad_norm": 1.3865571188404813, + "learning_rate": 8.657256794358592e-07, + "loss": 0.4615570306777954, + "step": 5974 + }, + { + "epoch": 1.7471852610030707, + "grad_norm": 1.6631260131171646, + "learning_rate": 8.637590133980145e-07, + "loss": 0.5727440118789673, + "step": 5975 + }, + { + "epoch": 1.747477701418336, + "grad_norm": 1.6981377401436724, + "learning_rate": 8.617944829009517e-07, + "loss": 0.5652801990509033, + "step": 5976 + }, + { + "epoch": 1.7477701418336014, + "grad_norm": 1.6704888560345945, + "learning_rate": 8.59832088403868e-07, + "loss": 0.42455562949180603, + "step": 5977 + }, + { + "epoch": 1.748062582248867, + "grad_norm": 1.8565352683598422, + "learning_rate": 8.578718303654588e-07, + "loss": 0.526951789855957, + "step": 5978 + }, + { + "epoch": 1.7483550226641322, + "grad_norm": 1.5113931171346078, + "learning_rate": 8.559137092439252e-07, + "loss": 0.45547354221343994, + "step": 5979 + }, + { + "epoch": 1.7486474630793976, + "grad_norm": 1.843493314178274, + "learning_rate": 8.539577254969667e-07, + "loss": 0.5470790863037109, + "step": 5980 + }, + { + "epoch": 1.748939903494663, + "grad_norm": 1.6766357010415907, + "learning_rate": 8.520038795817798e-07, + "loss": 0.5608032941818237, + "step": 5981 + }, + { + "epoch": 1.7492323439099282, + "grad_norm": 1.7251948475523264, + "learning_rate": 8.500521719550648e-07, + "loss": 0.5243809223175049, + "step": 5982 + }, + { + "epoch": 1.7495247843251938, + "grad_norm": 1.7843504248813373, + "learning_rate": 8.481026030730222e-07, + "loss": 0.5040958523750305, + "step": 5983 + }, + { + "epoch": 1.749817224740459, + "grad_norm": 1.9016982717852353, + "learning_rate": 8.461551733913509e-07, + "loss": 0.5026291012763977, + "step": 5984 + }, + { + "epoch": 1.7501096651557244, + "grad_norm": 1.578287817505696, + "learning_rate": 8.442098833652523e-07, + "loss": 0.5273059010505676, + "step": 5985 + }, + { + "epoch": 1.75040210557099, + "grad_norm": 1.7872787423587175, + "learning_rate": 8.42266733449425e-07, + "loss": 0.5811910629272461, + "step": 5986 + }, + { + "epoch": 1.7506945459862553, + "grad_norm": 1.9383664928260165, + "learning_rate": 8.4032572409807e-07, + "loss": 0.6078274250030518, + "step": 5987 + }, + { + "epoch": 1.7509869864015206, + "grad_norm": 1.8956639494069205, + "learning_rate": 8.383868557648833e-07, + "loss": 0.5214031338691711, + "step": 5988 + }, + { + "epoch": 1.7512794268167862, + "grad_norm": 1.8686899180431094, + "learning_rate": 8.364501289030669e-07, + "loss": 0.5464918613433838, + "step": 5989 + }, + { + "epoch": 1.7515718672320515, + "grad_norm": 1.9577387999849984, + "learning_rate": 8.345155439653175e-07, + "loss": 0.48296278715133667, + "step": 5990 + }, + { + "epoch": 1.7518643076473168, + "grad_norm": 1.6097156536359971, + "learning_rate": 8.325831014038344e-07, + "loss": 0.5441919565200806, + "step": 5991 + }, + { + "epoch": 1.7521567480625824, + "grad_norm": 1.524249865256617, + "learning_rate": 8.306528016703097e-07, + "loss": 0.4623905420303345, + "step": 5992 + }, + { + "epoch": 1.7524491884778475, + "grad_norm": 1.7850630013083288, + "learning_rate": 8.287246452159437e-07, + "loss": 0.5671495795249939, + "step": 5993 + }, + { + "epoch": 1.752741628893113, + "grad_norm": 1.7371249179959158, + "learning_rate": 8.267986324914278e-07, + "loss": 0.5400685667991638, + "step": 5994 + }, + { + "epoch": 1.7530340693083786, + "grad_norm": 1.7239850907759944, + "learning_rate": 8.24874763946959e-07, + "loss": 0.4362148642539978, + "step": 5995 + }, + { + "epoch": 1.7533265097236437, + "grad_norm": 1.7548276097653166, + "learning_rate": 8.229530400322283e-07, + "loss": 0.554877519607544, + "step": 5996 + }, + { + "epoch": 1.7536189501389092, + "grad_norm": 1.6421753593232726, + "learning_rate": 8.210334611964266e-07, + "loss": 0.5239896774291992, + "step": 5997 + }, + { + "epoch": 1.7539113905541746, + "grad_norm": 1.9442998633220852, + "learning_rate": 8.191160278882438e-07, + "loss": 0.4729669988155365, + "step": 5998 + }, + { + "epoch": 1.7542038309694399, + "grad_norm": 1.5789777380903094, + "learning_rate": 8.172007405558702e-07, + "loss": 0.5449322462081909, + "step": 5999 + }, + { + "epoch": 1.7544962713847054, + "grad_norm": 1.6329056939447448, + "learning_rate": 8.1528759964699e-07, + "loss": 0.5422194600105286, + "step": 6000 + }, + { + "epoch": 1.7547887117999708, + "grad_norm": 1.4174806038648198, + "learning_rate": 8.1337660560879e-07, + "loss": 0.3854302763938904, + "step": 6001 + }, + { + "epoch": 1.755081152215236, + "grad_norm": 1.7209418471597917, + "learning_rate": 8.114677588879549e-07, + "loss": 0.4678449034690857, + "step": 6002 + }, + { + "epoch": 1.7553735926305016, + "grad_norm": 1.5464176931987315, + "learning_rate": 8.095610599306614e-07, + "loss": 0.5462471842765808, + "step": 6003 + }, + { + "epoch": 1.755666033045767, + "grad_norm": 2.204727836372247, + "learning_rate": 8.076565091825916e-07, + "loss": 0.6314511299133301, + "step": 6004 + }, + { + "epoch": 1.7559584734610323, + "grad_norm": 1.5748030564701405, + "learning_rate": 8.057541070889229e-07, + "loss": 0.6373077630996704, + "step": 6005 + }, + { + "epoch": 1.7562509138762978, + "grad_norm": 1.3531361411828478, + "learning_rate": 8.038538540943297e-07, + "loss": 0.5279273986816406, + "step": 6006 + }, + { + "epoch": 1.756543354291563, + "grad_norm": 1.8888532901747122, + "learning_rate": 8.019557506429843e-07, + "loss": 0.5645443201065063, + "step": 6007 + }, + { + "epoch": 1.7568357947068285, + "grad_norm": 1.776791412383221, + "learning_rate": 8.000597971785573e-07, + "loss": 0.552385151386261, + "step": 6008 + }, + { + "epoch": 1.757128235122094, + "grad_norm": 1.8476397874412334, + "learning_rate": 7.981659941442154e-07, + "loss": 0.5790541172027588, + "step": 6009 + }, + { + "epoch": 1.7574206755373591, + "grad_norm": 1.8909444917759248, + "learning_rate": 7.962743419826247e-07, + "loss": 0.550809383392334, + "step": 6010 + }, + { + "epoch": 1.7577131159526247, + "grad_norm": 1.845124979293289, + "learning_rate": 7.943848411359479e-07, + "loss": 0.4659814238548279, + "step": 6011 + }, + { + "epoch": 1.75800555636789, + "grad_norm": 1.8856668900422473, + "learning_rate": 7.924974920458428e-07, + "loss": 0.5099040269851685, + "step": 6012 + }, + { + "epoch": 1.7582979967831553, + "grad_norm": 1.8389637809818669, + "learning_rate": 7.906122951534678e-07, + "loss": 0.4819038510322571, + "step": 6013 + }, + { + "epoch": 1.758590437198421, + "grad_norm": 1.6198962208116707, + "learning_rate": 7.887292508994737e-07, + "loss": 0.4033840298652649, + "step": 6014 + }, + { + "epoch": 1.7588828776136862, + "grad_norm": 2.0694906070649397, + "learning_rate": 7.868483597240117e-07, + "loss": 0.6316418647766113, + "step": 6015 + }, + { + "epoch": 1.7591753180289516, + "grad_norm": 1.574018695954754, + "learning_rate": 7.84969622066728e-07, + "loss": 0.5141040682792664, + "step": 6016 + }, + { + "epoch": 1.759467758444217, + "grad_norm": 1.625714616568423, + "learning_rate": 7.830930383667668e-07, + "loss": 0.44808077812194824, + "step": 6017 + }, + { + "epoch": 1.7597601988594824, + "grad_norm": 1.6060850378753004, + "learning_rate": 7.812186090627694e-07, + "loss": 0.5661089420318604, + "step": 6018 + }, + { + "epoch": 1.7600526392747478, + "grad_norm": 1.553528332849771, + "learning_rate": 7.793463345928697e-07, + "loss": 0.487590491771698, + "step": 6019 + }, + { + "epoch": 1.7603450796900133, + "grad_norm": 1.9699234516767667, + "learning_rate": 7.774762153947024e-07, + "loss": 0.5775022506713867, + "step": 6020 + }, + { + "epoch": 1.7606375201052784, + "grad_norm": 1.7091892859281639, + "learning_rate": 7.756082519053965e-07, + "loss": 0.5714563131332397, + "step": 6021 + }, + { + "epoch": 1.760929960520544, + "grad_norm": 1.8764497127243964, + "learning_rate": 7.73742444561576e-07, + "loss": 0.6063593626022339, + "step": 6022 + }, + { + "epoch": 1.7612224009358093, + "grad_norm": 1.7254357706950765, + "learning_rate": 7.718787937993622e-07, + "loss": 0.48034632205963135, + "step": 6023 + }, + { + "epoch": 1.7615148413510746, + "grad_norm": 1.4591503666123415, + "learning_rate": 7.700173000543742e-07, + "loss": 0.6003588438034058, + "step": 6024 + }, + { + "epoch": 1.7618072817663402, + "grad_norm": 1.6378303717233282, + "learning_rate": 7.681579637617209e-07, + "loss": 0.42842140793800354, + "step": 6025 + }, + { + "epoch": 1.7620997221816055, + "grad_norm": 1.502196803812996, + "learning_rate": 7.663007853560145e-07, + "loss": 0.5235859155654907, + "step": 6026 + }, + { + "epoch": 1.7623921625968708, + "grad_norm": 1.6904284507093605, + "learning_rate": 7.644457652713566e-07, + "loss": 0.47140365839004517, + "step": 6027 + }, + { + "epoch": 1.7626846030121364, + "grad_norm": 1.6014343948293654, + "learning_rate": 7.625929039413483e-07, + "loss": 0.53680419921875, + "step": 6028 + }, + { + "epoch": 1.7629770434274017, + "grad_norm": 1.6173156649426792, + "learning_rate": 7.60742201799084e-07, + "loss": 0.5280998349189758, + "step": 6029 + }, + { + "epoch": 1.763269483842667, + "grad_norm": 1.58299525140219, + "learning_rate": 7.588936592771545e-07, + "loss": 0.49653276801109314, + "step": 6030 + }, + { + "epoch": 1.7635619242579326, + "grad_norm": 1.6031325431493386, + "learning_rate": 7.570472768076464e-07, + "loss": 0.511070966720581, + "step": 6031 + }, + { + "epoch": 1.7638543646731977, + "grad_norm": 1.9173967106238505, + "learning_rate": 7.552030548221379e-07, + "loss": 0.6601030826568604, + "step": 6032 + }, + { + "epoch": 1.7641468050884632, + "grad_norm": 1.7630822043129881, + "learning_rate": 7.533609937517072e-07, + "loss": 0.6216480731964111, + "step": 6033 + }, + { + "epoch": 1.7644392455037288, + "grad_norm": 2.1444282721386765, + "learning_rate": 7.515210940269224e-07, + "loss": 0.7237618565559387, + "step": 6034 + }, + { + "epoch": 1.7647316859189939, + "grad_norm": 1.9895778147794236, + "learning_rate": 7.496833560778527e-07, + "loss": 0.4979498088359833, + "step": 6035 + }, + { + "epoch": 1.7650241263342594, + "grad_norm": 1.5816967377469986, + "learning_rate": 7.478477803340533e-07, + "loss": 0.49408191442489624, + "step": 6036 + }, + { + "epoch": 1.7653165667495248, + "grad_norm": 1.8439349693338256, + "learning_rate": 7.460143672245823e-07, + "loss": 0.524259626865387, + "step": 6037 + }, + { + "epoch": 1.76560900716479, + "grad_norm": 1.8574151410796558, + "learning_rate": 7.441831171779878e-07, + "loss": 0.625320315361023, + "step": 6038 + }, + { + "epoch": 1.7659014475800556, + "grad_norm": 1.7217980866482836, + "learning_rate": 7.42354030622312e-07, + "loss": 0.5971028804779053, + "step": 6039 + }, + { + "epoch": 1.766193887995321, + "grad_norm": 1.5069481360511938, + "learning_rate": 7.405271079850951e-07, + "loss": 0.48935002088546753, + "step": 6040 + }, + { + "epoch": 1.7664863284105863, + "grad_norm": 1.7616973297205794, + "learning_rate": 7.387023496933687e-07, + "loss": 0.46346336603164673, + "step": 6041 + }, + { + "epoch": 1.7667787688258518, + "grad_norm": 1.5425066644175864, + "learning_rate": 7.368797561736574e-07, + "loss": 0.5135314464569092, + "step": 6042 + }, + { + "epoch": 1.7670712092411172, + "grad_norm": 1.7938719309176694, + "learning_rate": 7.350593278519824e-07, + "loss": 0.45815128087997437, + "step": 6043 + }, + { + "epoch": 1.7673636496563825, + "grad_norm": 1.8253657375894647, + "learning_rate": 7.332410651538591e-07, + "loss": 0.5663015246391296, + "step": 6044 + }, + { + "epoch": 1.767656090071648, + "grad_norm": 1.6737365706300193, + "learning_rate": 7.314249685042929e-07, + "loss": 0.5323490500450134, + "step": 6045 + }, + { + "epoch": 1.7679485304869131, + "grad_norm": 1.8380863614801877, + "learning_rate": 7.296110383277866e-07, + "loss": 0.5489768981933594, + "step": 6046 + }, + { + "epoch": 1.7682409709021787, + "grad_norm": 1.867533811207324, + "learning_rate": 7.277992750483364e-07, + "loss": 0.5951086282730103, + "step": 6047 + }, + { + "epoch": 1.7685334113174442, + "grad_norm": 1.6688539257267474, + "learning_rate": 7.259896790894271e-07, + "loss": 0.48228102922439575, + "step": 6048 + }, + { + "epoch": 1.7688258517327093, + "grad_norm": 1.7579049817410466, + "learning_rate": 7.241822508740448e-07, + "loss": 0.6318891644477844, + "step": 6049 + }, + { + "epoch": 1.769118292147975, + "grad_norm": 1.967894881109258, + "learning_rate": 7.223769908246636e-07, + "loss": 0.4966656267642975, + "step": 6050 + }, + { + "epoch": 1.7694107325632402, + "grad_norm": 1.7465352091582635, + "learning_rate": 7.205738993632516e-07, + "loss": 0.5645290613174438, + "step": 6051 + }, + { + "epoch": 1.7697031729785055, + "grad_norm": 1.8324400656837103, + "learning_rate": 7.187729769112717e-07, + "loss": 0.560075044631958, + "step": 6052 + }, + { + "epoch": 1.769995613393771, + "grad_norm": 1.658346896913261, + "learning_rate": 7.169742238896771e-07, + "loss": 0.6375163793563843, + "step": 6053 + }, + { + "epoch": 1.7702880538090364, + "grad_norm": 1.9991114191844357, + "learning_rate": 7.15177640718916e-07, + "loss": 0.5620392560958862, + "step": 6054 + }, + { + "epoch": 1.7705804942243017, + "grad_norm": 1.7885795694198106, + "learning_rate": 7.133832278189301e-07, + "loss": 0.5382653474807739, + "step": 6055 + }, + { + "epoch": 1.7708729346395673, + "grad_norm": 1.954649524899457, + "learning_rate": 7.115909856091497e-07, + "loss": 0.502597451210022, + "step": 6056 + }, + { + "epoch": 1.7711653750548326, + "grad_norm": 1.782753780230982, + "learning_rate": 7.098009145085016e-07, + "loss": 0.5876599550247192, + "step": 6057 + }, + { + "epoch": 1.771457815470098, + "grad_norm": 1.7624219528533958, + "learning_rate": 7.080130149354048e-07, + "loss": 0.5164280533790588, + "step": 6058 + }, + { + "epoch": 1.7717502558853635, + "grad_norm": 1.7004652166347358, + "learning_rate": 7.062272873077691e-07, + "loss": 0.5192137360572815, + "step": 6059 + }, + { + "epoch": 1.7720426963006286, + "grad_norm": 1.6924472823946135, + "learning_rate": 7.044437320429987e-07, + "loss": 0.5298370122909546, + "step": 6060 + }, + { + "epoch": 1.7723351367158942, + "grad_norm": 1.671988873461514, + "learning_rate": 7.026623495579876e-07, + "loss": 0.5099462270736694, + "step": 6061 + }, + { + "epoch": 1.7726275771311595, + "grad_norm": 1.8314661737989666, + "learning_rate": 7.00883140269123e-07, + "loss": 0.6061269640922546, + "step": 6062 + }, + { + "epoch": 1.7729200175464248, + "grad_norm": 1.9189229950794147, + "learning_rate": 6.991061045922854e-07, + "loss": 0.683641254901886, + "step": 6063 + }, + { + "epoch": 1.7732124579616904, + "grad_norm": 2.089118565246571, + "learning_rate": 6.973312429428458e-07, + "loss": 0.6294830441474915, + "step": 6064 + }, + { + "epoch": 1.7735048983769557, + "grad_norm": 1.6252098698149335, + "learning_rate": 6.95558555735667e-07, + "loss": 0.40493613481521606, + "step": 6065 + }, + { + "epoch": 1.773797338792221, + "grad_norm": 1.7745752298261492, + "learning_rate": 6.93788043385103e-07, + "loss": 0.501255452632904, + "step": 6066 + }, + { + "epoch": 1.7740897792074866, + "grad_norm": 1.7883463098117711, + "learning_rate": 6.920197063050038e-07, + "loss": 0.6004104614257812, + "step": 6067 + }, + { + "epoch": 1.7743822196227519, + "grad_norm": 1.5939834110995985, + "learning_rate": 6.902535449087023e-07, + "loss": 0.48683321475982666, + "step": 6068 + }, + { + "epoch": 1.7746746600380172, + "grad_norm": 1.7279814402431617, + "learning_rate": 6.884895596090302e-07, + "loss": 0.6048111319541931, + "step": 6069 + }, + { + "epoch": 1.7749671004532828, + "grad_norm": 1.8759604993064984, + "learning_rate": 6.867277508183101e-07, + "loss": 0.5532732009887695, + "step": 6070 + }, + { + "epoch": 1.7752595408685479, + "grad_norm": 2.066556008321799, + "learning_rate": 6.849681189483515e-07, + "loss": 0.544552206993103, + "step": 6071 + }, + { + "epoch": 1.7755519812838134, + "grad_norm": 1.9161876673278242, + "learning_rate": 6.832106644104586e-07, + "loss": 0.5114158391952515, + "step": 6072 + }, + { + "epoch": 1.775844421699079, + "grad_norm": 1.6996182780694216, + "learning_rate": 6.814553876154273e-07, + "loss": 0.45777493715286255, + "step": 6073 + }, + { + "epoch": 1.776136862114344, + "grad_norm": 1.6209289540377791, + "learning_rate": 6.797022889735405e-07, + "loss": 0.5449005365371704, + "step": 6074 + }, + { + "epoch": 1.7764293025296096, + "grad_norm": 1.8749070330960134, + "learning_rate": 6.779513688945749e-07, + "loss": 0.6308485865592957, + "step": 6075 + }, + { + "epoch": 1.776721742944875, + "grad_norm": 1.951122544814841, + "learning_rate": 6.762026277877986e-07, + "loss": 0.5904842019081116, + "step": 6076 + }, + { + "epoch": 1.7770141833601403, + "grad_norm": 1.8358819377761475, + "learning_rate": 6.744560660619681e-07, + "loss": 0.6681115627288818, + "step": 6077 + }, + { + "epoch": 1.7773066237754058, + "grad_norm": 1.7337774705028348, + "learning_rate": 6.727116841253334e-07, + "loss": 0.5084429979324341, + "step": 6078 + }, + { + "epoch": 1.7775990641906712, + "grad_norm": 1.706737040250044, + "learning_rate": 6.709694823856305e-07, + "loss": 0.5705291032791138, + "step": 6079 + }, + { + "epoch": 1.7778915046059365, + "grad_norm": 1.541912819246542, + "learning_rate": 6.692294612500894e-07, + "loss": 0.6481744050979614, + "step": 6080 + }, + { + "epoch": 1.778183945021202, + "grad_norm": 1.5164317234096627, + "learning_rate": 6.67491621125429e-07, + "loss": 0.5236573815345764, + "step": 6081 + }, + { + "epoch": 1.7784763854364674, + "grad_norm": 1.761941770239031, + "learning_rate": 6.657559624178611e-07, + "loss": 0.5169326663017273, + "step": 6082 + }, + { + "epoch": 1.7787688258517327, + "grad_norm": 1.7653960525219785, + "learning_rate": 6.640224855330824e-07, + "loss": 0.5304254293441772, + "step": 6083 + }, + { + "epoch": 1.7790612662669982, + "grad_norm": 1.7073706399680681, + "learning_rate": 6.622911908762852e-07, + "loss": 0.457882285118103, + "step": 6084 + }, + { + "epoch": 1.7793537066822633, + "grad_norm": 1.4459810475641077, + "learning_rate": 6.605620788521472e-07, + "loss": 0.48427796363830566, + "step": 6085 + }, + { + "epoch": 1.7796461470975289, + "grad_norm": 1.7511368613506917, + "learning_rate": 6.588351498648382e-07, + "loss": 0.598512589931488, + "step": 6086 + }, + { + "epoch": 1.7799385875127944, + "grad_norm": 1.6445184894388314, + "learning_rate": 6.571104043180188e-07, + "loss": 0.5065094232559204, + "step": 6087 + }, + { + "epoch": 1.7802310279280595, + "grad_norm": 1.7505635404599922, + "learning_rate": 6.553878426148364e-07, + "loss": 0.5493142008781433, + "step": 6088 + }, + { + "epoch": 1.780523468343325, + "grad_norm": 1.5236545905427594, + "learning_rate": 6.5366746515793e-07, + "loss": 0.40520578622817993, + "step": 6089 + }, + { + "epoch": 1.7808159087585904, + "grad_norm": 1.6562045226817075, + "learning_rate": 6.51949272349427e-07, + "loss": 0.5416547656059265, + "step": 6090 + }, + { + "epoch": 1.7811083491738557, + "grad_norm": 1.5389792406208165, + "learning_rate": 6.502332645909438e-07, + "loss": 0.4531989097595215, + "step": 6091 + }, + { + "epoch": 1.7814007895891213, + "grad_norm": 1.9811412419033423, + "learning_rate": 6.485194422835872e-07, + "loss": 0.6385304927825928, + "step": 6092 + }, + { + "epoch": 1.7816932300043866, + "grad_norm": 1.631678357707061, + "learning_rate": 6.468078058279537e-07, + "loss": 0.5503095388412476, + "step": 6093 + }, + { + "epoch": 1.781985670419652, + "grad_norm": 1.810992666384156, + "learning_rate": 6.450983556241264e-07, + "loss": 0.5184366703033447, + "step": 6094 + }, + { + "epoch": 1.7822781108349175, + "grad_norm": 1.8021498649724184, + "learning_rate": 6.433910920716813e-07, + "loss": 0.5211689472198486, + "step": 6095 + }, + { + "epoch": 1.7825705512501828, + "grad_norm": 1.5495698877916986, + "learning_rate": 6.416860155696781e-07, + "loss": 0.7357909679412842, + "step": 6096 + }, + { + "epoch": 1.7828629916654481, + "grad_norm": 1.6814949660424658, + "learning_rate": 6.399831265166689e-07, + "loss": 0.6283953189849854, + "step": 6097 + }, + { + "epoch": 1.7831554320807137, + "grad_norm": 1.7274003515879492, + "learning_rate": 6.382824253106945e-07, + "loss": 0.45040953159332275, + "step": 6098 + }, + { + "epoch": 1.7834478724959788, + "grad_norm": 1.9179221464776945, + "learning_rate": 6.365839123492834e-07, + "loss": 0.5056609511375427, + "step": 6099 + }, + { + "epoch": 1.7837403129112444, + "grad_norm": 1.4295507016254647, + "learning_rate": 6.348875880294536e-07, + "loss": 0.4940416216850281, + "step": 6100 + }, + { + "epoch": 1.7840327533265097, + "grad_norm": 1.487738102541406, + "learning_rate": 6.33193452747708e-07, + "loss": 0.45796072483062744, + "step": 6101 + }, + { + "epoch": 1.784325193741775, + "grad_norm": 1.5314389713015535, + "learning_rate": 6.315015069000408e-07, + "loss": 0.4828432500362396, + "step": 6102 + }, + { + "epoch": 1.7846176341570406, + "grad_norm": 1.7652995666195541, + "learning_rate": 6.298117508819357e-07, + "loss": 0.5564515590667725, + "step": 6103 + }, + { + "epoch": 1.7849100745723059, + "grad_norm": 1.7672116497467336, + "learning_rate": 6.281241850883624e-07, + "loss": 0.5160977840423584, + "step": 6104 + }, + { + "epoch": 1.7852025149875712, + "grad_norm": 1.6835388368372863, + "learning_rate": 6.264388099137775e-07, + "loss": 0.585543155670166, + "step": 6105 + }, + { + "epoch": 1.7854949554028368, + "grad_norm": 1.9025389414417693, + "learning_rate": 6.247556257521303e-07, + "loss": 0.5377194881439209, + "step": 6106 + }, + { + "epoch": 1.785787395818102, + "grad_norm": 1.6124331818311004, + "learning_rate": 6.230746329968518e-07, + "loss": 0.46788060665130615, + "step": 6107 + }, + { + "epoch": 1.7860798362333674, + "grad_norm": 1.481941465563148, + "learning_rate": 6.213958320408664e-07, + "loss": 0.511722207069397, + "step": 6108 + }, + { + "epoch": 1.786372276648633, + "grad_norm": 1.7380505303184415, + "learning_rate": 6.197192232765814e-07, + "loss": 0.5609079599380493, + "step": 6109 + }, + { + "epoch": 1.786664717063898, + "grad_norm": 1.5715739237199864, + "learning_rate": 6.180448070958955e-07, + "loss": 0.47641855478286743, + "step": 6110 + }, + { + "epoch": 1.7869571574791636, + "grad_norm": 1.4072609352957208, + "learning_rate": 6.163725838901946e-07, + "loss": 0.4209919273853302, + "step": 6111 + }, + { + "epoch": 1.7872495978944292, + "grad_norm": 1.7120783337900378, + "learning_rate": 6.147025540503459e-07, + "loss": 0.6012829542160034, + "step": 6112 + }, + { + "epoch": 1.7875420383096943, + "grad_norm": 1.8789998564305304, + "learning_rate": 6.130347179667129e-07, + "loss": 0.6112918853759766, + "step": 6113 + }, + { + "epoch": 1.7878344787249598, + "grad_norm": 1.8641199827985835, + "learning_rate": 6.113690760291402e-07, + "loss": 0.6370030641555786, + "step": 6114 + }, + { + "epoch": 1.7881269191402251, + "grad_norm": 1.837749741108103, + "learning_rate": 6.097056286269631e-07, + "loss": 0.5385129451751709, + "step": 6115 + }, + { + "epoch": 1.7884193595554905, + "grad_norm": 1.7733960362556163, + "learning_rate": 6.080443761490007e-07, + "loss": 0.4707196354866028, + "step": 6116 + }, + { + "epoch": 1.788711799970756, + "grad_norm": 1.8302621423982353, + "learning_rate": 6.063853189835611e-07, + "loss": 0.5361602306365967, + "step": 6117 + }, + { + "epoch": 1.7890042403860213, + "grad_norm": 1.592603561791519, + "learning_rate": 6.047284575184398e-07, + "loss": 0.48841261863708496, + "step": 6118 + }, + { + "epoch": 1.7892966808012867, + "grad_norm": 1.6413123655048356, + "learning_rate": 6.030737921409169e-07, + "loss": 0.47491973638534546, + "step": 6119 + }, + { + "epoch": 1.7895891212165522, + "grad_norm": 1.608045516338794, + "learning_rate": 6.014213232377608e-07, + "loss": 0.4579542875289917, + "step": 6120 + }, + { + "epoch": 1.7898815616318176, + "grad_norm": 1.7739986275669979, + "learning_rate": 5.997710511952259e-07, + "loss": 0.4517485499382019, + "step": 6121 + }, + { + "epoch": 1.7901740020470829, + "grad_norm": 1.61243285020885, + "learning_rate": 5.981229763990559e-07, + "loss": 0.5656695365905762, + "step": 6122 + }, + { + "epoch": 1.7904664424623484, + "grad_norm": 1.8328920976142473, + "learning_rate": 5.964770992344737e-07, + "loss": 0.5000064373016357, + "step": 6123 + }, + { + "epoch": 1.7907588828776135, + "grad_norm": 1.691423776793607, + "learning_rate": 5.948334200861927e-07, + "loss": 0.4823925495147705, + "step": 6124 + }, + { + "epoch": 1.791051323292879, + "grad_norm": 1.6081373509153076, + "learning_rate": 5.931919393384189e-07, + "loss": 0.45079779624938965, + "step": 6125 + }, + { + "epoch": 1.7913437637081446, + "grad_norm": 1.7368976771393152, + "learning_rate": 5.915526573748331e-07, + "loss": 0.5887237787246704, + "step": 6126 + }, + { + "epoch": 1.7916362041234097, + "grad_norm": 1.5326002891728705, + "learning_rate": 5.8991557457861e-07, + "loss": 0.5625102519989014, + "step": 6127 + }, + { + "epoch": 1.7919286445386753, + "grad_norm": 1.773152580661058, + "learning_rate": 5.882806913324079e-07, + "loss": 0.5290789604187012, + "step": 6128 + }, + { + "epoch": 1.7922210849539406, + "grad_norm": 1.8240731968563617, + "learning_rate": 5.86648008018369e-07, + "loss": 0.47694748640060425, + "step": 6129 + }, + { + "epoch": 1.792513525369206, + "grad_norm": 1.7480468996944738, + "learning_rate": 5.850175250181244e-07, + "loss": 0.6297628879547119, + "step": 6130 + }, + { + "epoch": 1.7928059657844715, + "grad_norm": 1.767468792446569, + "learning_rate": 5.833892427127908e-07, + "loss": 0.5748087167739868, + "step": 6131 + }, + { + "epoch": 1.7930984061997368, + "grad_norm": 2.0367130445902313, + "learning_rate": 5.817631614829666e-07, + "loss": 0.552059531211853, + "step": 6132 + }, + { + "epoch": 1.7933908466150021, + "grad_norm": 1.881082319886368, + "learning_rate": 5.801392817087392e-07, + "loss": 0.5980287790298462, + "step": 6133 + }, + { + "epoch": 1.7936832870302677, + "grad_norm": 1.7948740811393897, + "learning_rate": 5.785176037696815e-07, + "loss": 0.5682743191719055, + "step": 6134 + }, + { + "epoch": 1.793975727445533, + "grad_norm": 1.6227048981437364, + "learning_rate": 5.768981280448494e-07, + "loss": 0.6907520294189453, + "step": 6135 + }, + { + "epoch": 1.7942681678607983, + "grad_norm": 1.82613812962419, + "learning_rate": 5.752808549127875e-07, + "loss": 0.5939712524414062, + "step": 6136 + }, + { + "epoch": 1.794560608276064, + "grad_norm": 1.961952469296216, + "learning_rate": 5.736657847515215e-07, + "loss": 0.5169910192489624, + "step": 6137 + }, + { + "epoch": 1.794853048691329, + "grad_norm": 1.7101466149490088, + "learning_rate": 5.720529179385659e-07, + "loss": 0.5795155167579651, + "step": 6138 + }, + { + "epoch": 1.7951454891065945, + "grad_norm": 1.6643593680063449, + "learning_rate": 5.704422548509181e-07, + "loss": 0.4296284317970276, + "step": 6139 + }, + { + "epoch": 1.7954379295218599, + "grad_norm": 1.780840768711558, + "learning_rate": 5.688337958650603e-07, + "loss": 0.5175303220748901, + "step": 6140 + }, + { + "epoch": 1.7957303699371252, + "grad_norm": 1.5534990300027502, + "learning_rate": 5.672275413569605e-07, + "loss": 0.49900466203689575, + "step": 6141 + }, + { + "epoch": 1.7960228103523908, + "grad_norm": 1.741229060320259, + "learning_rate": 5.65623491702072e-07, + "loss": 0.5047665238380432, + "step": 6142 + }, + { + "epoch": 1.796315250767656, + "grad_norm": 1.6004175896698871, + "learning_rate": 5.64021647275329e-07, + "loss": 0.5309686660766602, + "step": 6143 + }, + { + "epoch": 1.7966076911829214, + "grad_norm": 1.84753723892279, + "learning_rate": 5.624220084511544e-07, + "loss": 0.7270892858505249, + "step": 6144 + }, + { + "epoch": 1.796900131598187, + "grad_norm": 1.8607152469266723, + "learning_rate": 5.608245756034536e-07, + "loss": 0.515272319316864, + "step": 6145 + }, + { + "epoch": 1.7971925720134523, + "grad_norm": 1.5111910050436628, + "learning_rate": 5.592293491056167e-07, + "loss": 0.4919237196445465, + "step": 6146 + }, + { + "epoch": 1.7974850124287176, + "grad_norm": 1.8345189418412804, + "learning_rate": 5.576363293305187e-07, + "loss": 0.5812259316444397, + "step": 6147 + }, + { + "epoch": 1.7977774528439832, + "grad_norm": 1.7464814721572284, + "learning_rate": 5.560455166505185e-07, + "loss": 0.434345006942749, + "step": 6148 + }, + { + "epoch": 1.7980698932592483, + "grad_norm": 1.6287087584719833, + "learning_rate": 5.544569114374588e-07, + "loss": 0.4670771360397339, + "step": 6149 + }, + { + "epoch": 1.7983623336745138, + "grad_norm": 1.5038620849892772, + "learning_rate": 5.528705140626667e-07, + "loss": 0.5867526531219482, + "step": 6150 + }, + { + "epoch": 1.7986547740897794, + "grad_norm": 1.8981858755166237, + "learning_rate": 5.512863248969513e-07, + "loss": 0.5453605651855469, + "step": 6151 + }, + { + "epoch": 1.7989472145050445, + "grad_norm": 1.9030067654858334, + "learning_rate": 5.497043443106087e-07, + "loss": 0.5535463690757751, + "step": 6152 + }, + { + "epoch": 1.79923965492031, + "grad_norm": 1.72031713178446, + "learning_rate": 5.481245726734174e-07, + "loss": 0.6250847578048706, + "step": 6153 + }, + { + "epoch": 1.7995320953355753, + "grad_norm": 1.625961067284692, + "learning_rate": 5.465470103546399e-07, + "loss": 0.45504581928253174, + "step": 6154 + }, + { + "epoch": 1.7998245357508407, + "grad_norm": 2.039802523536217, + "learning_rate": 5.449716577230202e-07, + "loss": 0.6192604303359985, + "step": 6155 + }, + { + "epoch": 1.8001169761661062, + "grad_norm": 1.8695276161806251, + "learning_rate": 5.433985151467869e-07, + "loss": 0.5624358654022217, + "step": 6156 + }, + { + "epoch": 1.8004094165813715, + "grad_norm": 1.7494457460727728, + "learning_rate": 5.418275829936537e-07, + "loss": 0.5759576559066772, + "step": 6157 + }, + { + "epoch": 1.8007018569966369, + "grad_norm": 1.752894288026352, + "learning_rate": 5.402588616308169e-07, + "loss": 0.5710508227348328, + "step": 6158 + }, + { + "epoch": 1.8009942974119024, + "grad_norm": 1.6781697189669698, + "learning_rate": 5.386923514249542e-07, + "loss": 0.6146141290664673, + "step": 6159 + }, + { + "epoch": 1.8012867378271677, + "grad_norm": 1.618055518270054, + "learning_rate": 5.371280527422296e-07, + "loss": 0.425834983587265, + "step": 6160 + }, + { + "epoch": 1.801579178242433, + "grad_norm": 1.8062077594882358, + "learning_rate": 5.35565965948287e-07, + "loss": 0.4353194236755371, + "step": 6161 + }, + { + "epoch": 1.8018716186576986, + "grad_norm": 2.0598668441022037, + "learning_rate": 5.340060914082546e-07, + "loss": 0.7202355861663818, + "step": 6162 + }, + { + "epoch": 1.8021640590729637, + "grad_norm": 1.552014134498689, + "learning_rate": 5.324484294867449e-07, + "loss": 0.5371845960617065, + "step": 6163 + }, + { + "epoch": 1.8024564994882293, + "grad_norm": 1.7812688374701713, + "learning_rate": 5.308929805478513e-07, + "loss": 0.4995431900024414, + "step": 6164 + }, + { + "epoch": 1.8027489399034948, + "grad_norm": 1.9376433940202618, + "learning_rate": 5.293397449551519e-07, + "loss": 0.6503393650054932, + "step": 6165 + }, + { + "epoch": 1.80304138031876, + "grad_norm": 1.608511841040304, + "learning_rate": 5.277887230717027e-07, + "loss": 0.5083032250404358, + "step": 6166 + }, + { + "epoch": 1.8033338207340255, + "grad_norm": 1.7910725457082355, + "learning_rate": 5.262399152600473e-07, + "loss": 0.6067851781845093, + "step": 6167 + }, + { + "epoch": 1.8036262611492908, + "grad_norm": 1.6601362559713981, + "learning_rate": 5.246933218822104e-07, + "loss": 0.6446479558944702, + "step": 6168 + }, + { + "epoch": 1.8039187015645561, + "grad_norm": 1.9668874595165033, + "learning_rate": 5.231489432996984e-07, + "loss": 0.6940749883651733, + "step": 6169 + }, + { + "epoch": 1.8042111419798217, + "grad_norm": 1.6254914024201104, + "learning_rate": 5.216067798735014e-07, + "loss": 0.558691143989563, + "step": 6170 + }, + { + "epoch": 1.804503582395087, + "grad_norm": 1.706821795047188, + "learning_rate": 5.2006683196409e-07, + "loss": 0.4561213254928589, + "step": 6171 + }, + { + "epoch": 1.8047960228103523, + "grad_norm": 1.5741713506995776, + "learning_rate": 5.185290999314174e-07, + "loss": 0.514278769493103, + "step": 6172 + }, + { + "epoch": 1.805088463225618, + "grad_norm": 1.7438493762338294, + "learning_rate": 5.169935841349194e-07, + "loss": 0.41933614015579224, + "step": 6173 + }, + { + "epoch": 1.8053809036408832, + "grad_norm": 1.5639626592195386, + "learning_rate": 5.154602849335133e-07, + "loss": 0.5590407848358154, + "step": 6174 + }, + { + "epoch": 1.8056733440561485, + "grad_norm": 1.7923343761763981, + "learning_rate": 5.139292026855991e-07, + "loss": 0.49428898096084595, + "step": 6175 + }, + { + "epoch": 1.805965784471414, + "grad_norm": 1.6980318077322492, + "learning_rate": 5.124003377490582e-07, + "loss": 0.4737596809864044, + "step": 6176 + }, + { + "epoch": 1.8062582248866792, + "grad_norm": 1.6716862203734568, + "learning_rate": 5.108736904812517e-07, + "loss": 0.5017397403717041, + "step": 6177 + }, + { + "epoch": 1.8065506653019447, + "grad_norm": 1.733919571237643, + "learning_rate": 5.09349261239026e-07, + "loss": 0.4509057402610779, + "step": 6178 + }, + { + "epoch": 1.80684310571721, + "grad_norm": 1.9095997808768526, + "learning_rate": 5.078270503787053e-07, + "loss": 0.4440206289291382, + "step": 6179 + }, + { + "epoch": 1.8071355461324754, + "grad_norm": 1.6672235625660048, + "learning_rate": 5.063070582560991e-07, + "loss": 0.4981609582901001, + "step": 6180 + }, + { + "epoch": 1.807427986547741, + "grad_norm": 1.4041701397189061, + "learning_rate": 5.047892852264946e-07, + "loss": 0.4057808518409729, + "step": 6181 + }, + { + "epoch": 1.8077204269630063, + "grad_norm": 1.8238388895662465, + "learning_rate": 5.032737316446634e-07, + "loss": 0.5770435333251953, + "step": 6182 + }, + { + "epoch": 1.8080128673782716, + "grad_norm": 1.5817149529336438, + "learning_rate": 5.017603978648567e-07, + "loss": 0.5431563258171082, + "step": 6183 + }, + { + "epoch": 1.8083053077935372, + "grad_norm": 1.7959973431061746, + "learning_rate": 5.002492842408058e-07, + "loss": 0.469868928194046, + "step": 6184 + }, + { + "epoch": 1.8085977482088025, + "grad_norm": 1.6470575782998251, + "learning_rate": 4.98740391125726e-07, + "loss": 0.4581238925457001, + "step": 6185 + }, + { + "epoch": 1.8088901886240678, + "grad_norm": 1.5613704220145663, + "learning_rate": 4.972337188723108e-07, + "loss": 0.43255913257598877, + "step": 6186 + }, + { + "epoch": 1.8091826290393334, + "grad_norm": 1.6405804521880538, + "learning_rate": 4.957292678327374e-07, + "loss": 0.5817975997924805, + "step": 6187 + }, + { + "epoch": 1.8094750694545985, + "grad_norm": 1.701175567145501, + "learning_rate": 4.9422703835866e-07, + "loss": 0.506614089012146, + "step": 6188 + }, + { + "epoch": 1.809767509869864, + "grad_norm": 1.8093255501568073, + "learning_rate": 4.927270308012155e-07, + "loss": 0.5245084762573242, + "step": 6189 + }, + { + "epoch": 1.8100599502851296, + "grad_norm": 1.9638481802757681, + "learning_rate": 4.912292455110235e-07, + "loss": 0.48700785636901855, + "step": 6190 + }, + { + "epoch": 1.8103523907003947, + "grad_norm": 1.7084108143801102, + "learning_rate": 4.897336828381794e-07, + "loss": 0.5512829422950745, + "step": 6191 + }, + { + "epoch": 1.8106448311156602, + "grad_norm": 1.9425355962156208, + "learning_rate": 4.882403431322647e-07, + "loss": 0.444965660572052, + "step": 6192 + }, + { + "epoch": 1.8109372715309255, + "grad_norm": 1.6773870360526466, + "learning_rate": 4.86749226742338e-07, + "loss": 0.49120527505874634, + "step": 6193 + }, + { + "epoch": 1.8112297119461909, + "grad_norm": 1.5444026883137385, + "learning_rate": 4.852603340169371e-07, + "loss": 0.47114405035972595, + "step": 6194 + }, + { + "epoch": 1.8115221523614564, + "grad_norm": 1.3641759741105037, + "learning_rate": 4.837736653040825e-07, + "loss": 0.41404014825820923, + "step": 6195 + }, + { + "epoch": 1.8118145927767217, + "grad_norm": 1.5779692763243462, + "learning_rate": 4.822892209512742e-07, + "loss": 0.5773917436599731, + "step": 6196 + }, + { + "epoch": 1.812107033191987, + "grad_norm": 1.5867022738126413, + "learning_rate": 4.808070013054911e-07, + "loss": 0.5048927068710327, + "step": 6197 + }, + { + "epoch": 1.8123994736072526, + "grad_norm": 1.4880382186782968, + "learning_rate": 4.793270067131961e-07, + "loss": 0.48112595081329346, + "step": 6198 + }, + { + "epoch": 1.812691914022518, + "grad_norm": 1.5982708355484612, + "learning_rate": 4.778492375203236e-07, + "loss": 0.465067982673645, + "step": 6199 + }, + { + "epoch": 1.8129843544377833, + "grad_norm": 2.10382956966043, + "learning_rate": 4.763736940722985e-07, + "loss": 0.5456488132476807, + "step": 6200 + }, + { + "epoch": 1.8132767948530488, + "grad_norm": 1.7197696401081977, + "learning_rate": 4.74900376714017e-07, + "loss": 0.5078476071357727, + "step": 6201 + }, + { + "epoch": 1.813569235268314, + "grad_norm": 1.8035895737751002, + "learning_rate": 4.7342928578985814e-07, + "loss": 0.5087896585464478, + "step": 6202 + }, + { + "epoch": 1.8138616756835795, + "grad_norm": 1.8289842367399733, + "learning_rate": 4.719604216436824e-07, + "loss": 0.5734537243843079, + "step": 6203 + }, + { + "epoch": 1.814154116098845, + "grad_norm": 1.8255387764821909, + "learning_rate": 4.704937846188262e-07, + "loss": 0.5163359045982361, + "step": 6204 + }, + { + "epoch": 1.8144465565141101, + "grad_norm": 1.7367361746759034, + "learning_rate": 4.6902937505810765e-07, + "loss": 0.5884007811546326, + "step": 6205 + }, + { + "epoch": 1.8147389969293757, + "grad_norm": 1.459881439563451, + "learning_rate": 4.675671933038228e-07, + "loss": 0.454215407371521, + "step": 6206 + }, + { + "epoch": 1.815031437344641, + "grad_norm": 1.4834270754413148, + "learning_rate": 4.661072396977506e-07, + "loss": 0.4380212426185608, + "step": 6207 + }, + { + "epoch": 1.8153238777599063, + "grad_norm": 1.5724796080178702, + "learning_rate": 4.646495145811425e-07, + "loss": 0.6138126850128174, + "step": 6208 + }, + { + "epoch": 1.8156163181751719, + "grad_norm": 1.7578891144089137, + "learning_rate": 4.6319401829473366e-07, + "loss": 0.560515284538269, + "step": 6209 + }, + { + "epoch": 1.8159087585904372, + "grad_norm": 1.6717823771103892, + "learning_rate": 4.6174075117873976e-07, + "loss": 0.4744090735912323, + "step": 6210 + }, + { + "epoch": 1.8162011990057025, + "grad_norm": 1.566667953265204, + "learning_rate": 4.6028971357285126e-07, + "loss": 0.4508114457130432, + "step": 6211 + }, + { + "epoch": 1.816493639420968, + "grad_norm": 1.6686159118306128, + "learning_rate": 4.5884090581623906e-07, + "loss": 0.5437598824501038, + "step": 6212 + }, + { + "epoch": 1.8167860798362334, + "grad_norm": 1.871048661690424, + "learning_rate": 4.5739432824755456e-07, + "loss": 0.608635425567627, + "step": 6213 + }, + { + "epoch": 1.8170785202514987, + "grad_norm": 1.683927429440131, + "learning_rate": 4.5594998120492505e-07, + "loss": 0.45614784955978394, + "step": 6214 + }, + { + "epoch": 1.8173709606667643, + "grad_norm": 1.8175326303925177, + "learning_rate": 4.5450786502595933e-07, + "loss": 0.46722525358200073, + "step": 6215 + }, + { + "epoch": 1.8176634010820294, + "grad_norm": 1.6729337536988582, + "learning_rate": 4.5306798004774333e-07, + "loss": 0.5424127578735352, + "step": 6216 + }, + { + "epoch": 1.817955841497295, + "grad_norm": 1.8512870023540355, + "learning_rate": 4.5163032660684e-07, + "loss": 0.4360300302505493, + "step": 6217 + }, + { + "epoch": 1.8182482819125603, + "grad_norm": 1.4671759860658016, + "learning_rate": 4.5019490503929395e-07, + "loss": 0.43406206369400024, + "step": 6218 + }, + { + "epoch": 1.8185407223278256, + "grad_norm": 1.5669201854687904, + "learning_rate": 4.4876171568062346e-07, + "loss": 0.5435998439788818, + "step": 6219 + }, + { + "epoch": 1.8188331627430911, + "grad_norm": 1.7571994730111475, + "learning_rate": 4.4733075886583043e-07, + "loss": 0.4555914103984833, + "step": 6220 + }, + { + "epoch": 1.8191256031583565, + "grad_norm": 1.9267993644134682, + "learning_rate": 4.4590203492939076e-07, + "loss": 0.5246081352233887, + "step": 6221 + }, + { + "epoch": 1.8194180435736218, + "grad_norm": 1.4234567063452161, + "learning_rate": 4.4447554420525954e-07, + "loss": 0.5093664526939392, + "step": 6222 + }, + { + "epoch": 1.8197104839888874, + "grad_norm": 1.9251138549109805, + "learning_rate": 4.430512870268733e-07, + "loss": 0.5759550333023071, + "step": 6223 + }, + { + "epoch": 1.8200029244041527, + "grad_norm": 2.2446814471076184, + "learning_rate": 4.416292637271402e-07, + "loss": 0.5477207899093628, + "step": 6224 + }, + { + "epoch": 1.820295364819418, + "grad_norm": 1.7579783947323675, + "learning_rate": 4.402094746384511e-07, + "loss": 0.5786882638931274, + "step": 6225 + }, + { + "epoch": 1.8205878052346836, + "grad_norm": 1.6652775403735034, + "learning_rate": 4.3879192009267266e-07, + "loss": 0.36909428238868713, + "step": 6226 + }, + { + "epoch": 1.8208802456499487, + "grad_norm": 1.6359565015929571, + "learning_rate": 4.3737660042114993e-07, + "loss": 0.5471982955932617, + "step": 6227 + }, + { + "epoch": 1.8211726860652142, + "grad_norm": 1.633893653092529, + "learning_rate": 4.3596351595470596e-07, + "loss": 0.49737733602523804, + "step": 6228 + }, + { + "epoch": 1.8214651264804798, + "grad_norm": 1.8445639233475513, + "learning_rate": 4.3455266702363997e-07, + "loss": 0.70830237865448, + "step": 6229 + }, + { + "epoch": 1.8217575668957449, + "grad_norm": 1.5312305470870462, + "learning_rate": 4.331440539577281e-07, + "loss": 0.5844424962997437, + "step": 6230 + }, + { + "epoch": 1.8220500073110104, + "grad_norm": 1.5427896071730656, + "learning_rate": 4.317376770862269e-07, + "loss": 0.42457354068756104, + "step": 6231 + }, + { + "epoch": 1.8223424477262757, + "grad_norm": 2.058390634719774, + "learning_rate": 4.3033353673786695e-07, + "loss": 0.5154321193695068, + "step": 6232 + }, + { + "epoch": 1.822634888141541, + "grad_norm": 1.7898699548834731, + "learning_rate": 4.2893163324085886e-07, + "loss": 0.5896856784820557, + "step": 6233 + }, + { + "epoch": 1.8229273285568066, + "grad_norm": 1.8303948048078211, + "learning_rate": 4.2753196692288835e-07, + "loss": 0.5032835006713867, + "step": 6234 + }, + { + "epoch": 1.823219768972072, + "grad_norm": 1.8584560183845538, + "learning_rate": 4.2613453811111814e-07, + "loss": 0.4691713750362396, + "step": 6235 + }, + { + "epoch": 1.8235122093873373, + "grad_norm": 1.5627513261590378, + "learning_rate": 4.2473934713219033e-07, + "loss": 0.595095694065094, + "step": 6236 + }, + { + "epoch": 1.8238046498026028, + "grad_norm": 1.6531612719483142, + "learning_rate": 4.233463943122218e-07, + "loss": 0.5004895329475403, + "step": 6237 + }, + { + "epoch": 1.8240970902178681, + "grad_norm": 1.7047690953050751, + "learning_rate": 4.2195567997680654e-07, + "loss": 0.4924081563949585, + "step": 6238 + }, + { + "epoch": 1.8243895306331335, + "grad_norm": 1.7572886707576447, + "learning_rate": 4.2056720445101565e-07, + "loss": 0.5350006818771362, + "step": 6239 + }, + { + "epoch": 1.824681971048399, + "grad_norm": 1.9485734179206806, + "learning_rate": 4.191809680593961e-07, + "loss": 0.5404629707336426, + "step": 6240 + }, + { + "epoch": 1.8249744114636641, + "grad_norm": 1.6023324600099473, + "learning_rate": 4.177969711259744e-07, + "loss": 0.727859377861023, + "step": 6241 + }, + { + "epoch": 1.8252668518789297, + "grad_norm": 1.553973004264676, + "learning_rate": 4.164152139742494e-07, + "loss": 0.4805057644844055, + "step": 6242 + }, + { + "epoch": 1.8255592922941952, + "grad_norm": 1.7536116301732134, + "learning_rate": 4.1503569692719847e-07, + "loss": 0.5520761013031006, + "step": 6243 + }, + { + "epoch": 1.8258517327094603, + "grad_norm": 1.8327055737656117, + "learning_rate": 4.1365842030727576e-07, + "loss": 0.6130107641220093, + "step": 6244 + }, + { + "epoch": 1.8261441731247259, + "grad_norm": 1.7887203227793926, + "learning_rate": 4.122833844364116e-07, + "loss": 0.6048229932785034, + "step": 6245 + }, + { + "epoch": 1.8264366135399912, + "grad_norm": 1.717414490213998, + "learning_rate": 4.1091058963601214e-07, + "loss": 0.667324960231781, + "step": 6246 + }, + { + "epoch": 1.8267290539552565, + "grad_norm": 2.083699506724501, + "learning_rate": 4.095400362269597e-07, + "loss": 0.45595815777778625, + "step": 6247 + }, + { + "epoch": 1.827021494370522, + "grad_norm": 1.7162831332631867, + "learning_rate": 4.081717245296124e-07, + "loss": 0.49015533924102783, + "step": 6248 + }, + { + "epoch": 1.8273139347857874, + "grad_norm": 2.1906207360630763, + "learning_rate": 4.068056548638055e-07, + "loss": 0.5230038166046143, + "step": 6249 + }, + { + "epoch": 1.8276063752010527, + "grad_norm": 1.6860531929221865, + "learning_rate": 4.054418275488492e-07, + "loss": 0.5025942325592041, + "step": 6250 + }, + { + "epoch": 1.8278988156163183, + "grad_norm": 1.736980191753769, + "learning_rate": 4.0408024290352955e-07, + "loss": 0.5136677026748657, + "step": 6251 + }, + { + "epoch": 1.8281912560315836, + "grad_norm": 1.7988212644666006, + "learning_rate": 4.0272090124611086e-07, + "loss": 0.6209211945533752, + "step": 6252 + }, + { + "epoch": 1.828483696446849, + "grad_norm": 1.9742781188768104, + "learning_rate": 4.0136380289432784e-07, + "loss": 0.5913738012313843, + "step": 6253 + }, + { + "epoch": 1.8287761368621145, + "grad_norm": 1.9710058674803597, + "learning_rate": 4.000089481653946e-07, + "loss": 0.5745095610618591, + "step": 6254 + }, + { + "epoch": 1.8290685772773796, + "grad_norm": 1.4867167586867893, + "learning_rate": 3.9865633737600105e-07, + "loss": 0.4566704034805298, + "step": 6255 + }, + { + "epoch": 1.8293610176926451, + "grad_norm": 1.672257025513455, + "learning_rate": 3.9730597084231105e-07, + "loss": 0.49784860014915466, + "step": 6256 + }, + { + "epoch": 1.8296534581079105, + "grad_norm": 1.7381596787517106, + "learning_rate": 3.9595784887996647e-07, + "loss": 0.4489399790763855, + "step": 6257 + }, + { + "epoch": 1.8299458985231758, + "grad_norm": 1.9703484082158151, + "learning_rate": 3.946119718040797e-07, + "loss": 0.6335956454277039, + "step": 6258 + }, + { + "epoch": 1.8302383389384413, + "grad_norm": 1.4097270774574866, + "learning_rate": 3.932683399292436e-07, + "loss": 0.44865918159484863, + "step": 6259 + }, + { + "epoch": 1.8305307793537067, + "grad_norm": 1.6485718017332285, + "learning_rate": 3.919269535695225e-07, + "loss": 0.4328421354293823, + "step": 6260 + }, + { + "epoch": 1.830823219768972, + "grad_norm": 1.6528043958881276, + "learning_rate": 3.9058781303845886e-07, + "loss": 0.463814377784729, + "step": 6261 + }, + { + "epoch": 1.8311156601842375, + "grad_norm": 1.9336577936651187, + "learning_rate": 3.892509186490667e-07, + "loss": 0.5857536196708679, + "step": 6262 + }, + { + "epoch": 1.8314081005995029, + "grad_norm": 1.4512027972560333, + "learning_rate": 3.879162707138395e-07, + "loss": 0.4873831272125244, + "step": 6263 + }, + { + "epoch": 1.8317005410147682, + "grad_norm": 1.89367526659171, + "learning_rate": 3.8658386954474104e-07, + "loss": 0.5428040027618408, + "step": 6264 + }, + { + "epoch": 1.8319929814300338, + "grad_norm": 1.759804366679343, + "learning_rate": 3.852537154532121e-07, + "loss": 0.49092623591423035, + "step": 6265 + }, + { + "epoch": 1.8322854218452989, + "grad_norm": 1.7919708064212196, + "learning_rate": 3.839258087501685e-07, + "loss": 0.5515817999839783, + "step": 6266 + }, + { + "epoch": 1.8325778622605644, + "grad_norm": 1.5550731443697672, + "learning_rate": 3.8260014974600077e-07, + "loss": 0.48080340027809143, + "step": 6267 + }, + { + "epoch": 1.83287030267583, + "grad_norm": 2.292962123842254, + "learning_rate": 3.812767387505734e-07, + "loss": 0.6129888296127319, + "step": 6268 + }, + { + "epoch": 1.833162743091095, + "grad_norm": 1.8203026764024284, + "learning_rate": 3.7995557607322543e-07, + "loss": 0.5843402147293091, + "step": 6269 + }, + { + "epoch": 1.8334551835063606, + "grad_norm": 1.9423893526281284, + "learning_rate": 3.7863666202276996e-07, + "loss": 0.5573143362998962, + "step": 6270 + }, + { + "epoch": 1.833747623921626, + "grad_norm": 1.9386384718546945, + "learning_rate": 3.773199969074959e-07, + "loss": 0.552756667137146, + "step": 6271 + }, + { + "epoch": 1.8340400643368913, + "grad_norm": 1.7629811878645265, + "learning_rate": 3.7600558103516706e-07, + "loss": 0.5559083223342896, + "step": 6272 + }, + { + "epoch": 1.8343325047521568, + "grad_norm": 1.9388416947858518, + "learning_rate": 3.746934147130177e-07, + "loss": 0.5388067364692688, + "step": 6273 + }, + { + "epoch": 1.8346249451674221, + "grad_norm": 1.694909278172827, + "learning_rate": 3.7338349824776133e-07, + "loss": 0.5816110968589783, + "step": 6274 + }, + { + "epoch": 1.8349173855826875, + "grad_norm": 1.9312358476553817, + "learning_rate": 3.720758319455786e-07, + "loss": 0.5720102787017822, + "step": 6275 + }, + { + "epoch": 1.835209825997953, + "grad_norm": 1.5440220572809102, + "learning_rate": 3.707704161121328e-07, + "loss": 0.46005699038505554, + "step": 6276 + }, + { + "epoch": 1.8355022664132183, + "grad_norm": 2.0613584980065776, + "learning_rate": 3.6946725105255656e-07, + "loss": 0.5602168440818787, + "step": 6277 + }, + { + "epoch": 1.8357947068284837, + "grad_norm": 1.6156922208810771, + "learning_rate": 3.68166337071455e-07, + "loss": 0.5390583276748657, + "step": 6278 + }, + { + "epoch": 1.8360871472437492, + "grad_norm": 1.558407958302267, + "learning_rate": 3.668676744729094e-07, + "loss": 0.48980700969696045, + "step": 6279 + }, + { + "epoch": 1.8363795876590143, + "grad_norm": 1.5853357453165142, + "learning_rate": 3.655712635604747e-07, + "loss": 0.6565061807632446, + "step": 6280 + }, + { + "epoch": 1.8366720280742799, + "grad_norm": 1.5692146512642422, + "learning_rate": 3.642771046371785e-07, + "loss": 0.465609610080719, + "step": 6281 + }, + { + "epoch": 1.8369644684895454, + "grad_norm": 1.7219983092976099, + "learning_rate": 3.6298519800552434e-07, + "loss": 0.5698891282081604, + "step": 6282 + }, + { + "epoch": 1.8372569089048105, + "grad_norm": 2.103680074754177, + "learning_rate": 3.616955439674863e-07, + "loss": 0.5885399580001831, + "step": 6283 + }, + { + "epoch": 1.837549349320076, + "grad_norm": 1.7028861151189467, + "learning_rate": 3.60408142824511e-07, + "loss": 0.5158063173294067, + "step": 6284 + }, + { + "epoch": 1.8378417897353414, + "grad_norm": 1.6728867893623607, + "learning_rate": 3.5912299487752434e-07, + "loss": 0.49203822016716003, + "step": 6285 + }, + { + "epoch": 1.8381342301506067, + "grad_norm": 1.991753525300203, + "learning_rate": 3.578401004269183e-07, + "loss": 0.5756489038467407, + "step": 6286 + }, + { + "epoch": 1.8384266705658723, + "grad_norm": 1.9424738806131756, + "learning_rate": 3.565594597725652e-07, + "loss": 0.5970584154129028, + "step": 6287 + }, + { + "epoch": 1.8387191109811376, + "grad_norm": 1.4438564684738853, + "learning_rate": 3.552810732138046e-07, + "loss": 0.48702481389045715, + "step": 6288 + }, + { + "epoch": 1.839011551396403, + "grad_norm": 1.6632334435868308, + "learning_rate": 3.540049410494517e-07, + "loss": 0.4818963408470154, + "step": 6289 + }, + { + "epoch": 1.8393039918116685, + "grad_norm": 1.6617150886827665, + "learning_rate": 3.5273106357779585e-07, + "loss": 0.389699786901474, + "step": 6290 + }, + { + "epoch": 1.8395964322269338, + "grad_norm": 1.7654595369504777, + "learning_rate": 3.514594410965977e-07, + "loss": 0.6438174247741699, + "step": 6291 + }, + { + "epoch": 1.8398888726421991, + "grad_norm": 1.9409260673022277, + "learning_rate": 3.501900739030906e-07, + "loss": 0.654021143913269, + "step": 6292 + }, + { + "epoch": 1.8401813130574647, + "grad_norm": 1.921461492738401, + "learning_rate": 3.489229622939827e-07, + "loss": 0.748673677444458, + "step": 6293 + }, + { + "epoch": 1.8404737534727298, + "grad_norm": 1.850157344469969, + "learning_rate": 3.476581065654527e-07, + "loss": 0.47883105278015137, + "step": 6294 + }, + { + "epoch": 1.8407661938879953, + "grad_norm": 1.555147241743972, + "learning_rate": 3.4639550701315303e-07, + "loss": 0.5221554040908813, + "step": 6295 + }, + { + "epoch": 1.8410586343032607, + "grad_norm": 1.7256564846330384, + "learning_rate": 3.451351639322087e-07, + "loss": 0.482231080532074, + "step": 6296 + }, + { + "epoch": 1.841351074718526, + "grad_norm": 1.797442509245834, + "learning_rate": 3.4387707761721625e-07, + "loss": 0.5407366752624512, + "step": 6297 + }, + { + "epoch": 1.8416435151337915, + "grad_norm": 1.9177358417772523, + "learning_rate": 3.426212483622482e-07, + "loss": 0.626631498336792, + "step": 6298 + }, + { + "epoch": 1.8419359555490569, + "grad_norm": 1.4729327167263073, + "learning_rate": 3.4136767646084424e-07, + "loss": 0.4401513338088989, + "step": 6299 + }, + { + "epoch": 1.8422283959643222, + "grad_norm": 1.756926078765411, + "learning_rate": 3.4011636220602106e-07, + "loss": 0.48130229115486145, + "step": 6300 + }, + { + "epoch": 1.8425208363795877, + "grad_norm": 1.9010914484665373, + "learning_rate": 3.3886730589026475e-07, + "loss": 0.7132935523986816, + "step": 6301 + }, + { + "epoch": 1.842813276794853, + "grad_norm": 1.692313625720156, + "learning_rate": 3.37620507805535e-07, + "loss": 0.6665343642234802, + "step": 6302 + }, + { + "epoch": 1.8431057172101184, + "grad_norm": 1.7909091838212496, + "learning_rate": 3.3637596824326435e-07, + "loss": 0.4313231408596039, + "step": 6303 + }, + { + "epoch": 1.843398157625384, + "grad_norm": 1.6745971926171657, + "learning_rate": 3.3513368749435447e-07, + "loss": 0.6263744235038757, + "step": 6304 + }, + { + "epoch": 1.843690598040649, + "grad_norm": 1.6133043168174617, + "learning_rate": 3.3389366584918313e-07, + "loss": 0.6215947866439819, + "step": 6305 + }, + { + "epoch": 1.8439830384559146, + "grad_norm": 1.6349014502820445, + "learning_rate": 3.3265590359759517e-07, + "loss": 0.45956021547317505, + "step": 6306 + }, + { + "epoch": 1.8442754788711802, + "grad_norm": 1.6194578088821072, + "learning_rate": 3.3142040102891126e-07, + "loss": 0.5363642573356628, + "step": 6307 + }, + { + "epoch": 1.8445679192864453, + "grad_norm": 1.7115305858843777, + "learning_rate": 3.3018715843192273e-07, + "loss": 0.4574592709541321, + "step": 6308 + }, + { + "epoch": 1.8448603597017108, + "grad_norm": 1.6684239678735615, + "learning_rate": 3.2895617609489337e-07, + "loss": 0.43236005306243896, + "step": 6309 + }, + { + "epoch": 1.8451528001169761, + "grad_norm": 1.574172974777944, + "learning_rate": 3.277274543055564e-07, + "loss": 0.46349820494651794, + "step": 6310 + }, + { + "epoch": 1.8454452405322415, + "grad_norm": 1.9135327602518888, + "learning_rate": 3.265009933511176e-07, + "loss": 0.5233386754989624, + "step": 6311 + }, + { + "epoch": 1.845737680947507, + "grad_norm": 1.5165768096310508, + "learning_rate": 3.252767935182566e-07, + "loss": 0.44902727007865906, + "step": 6312 + }, + { + "epoch": 1.8460301213627723, + "grad_norm": 1.9281348385682333, + "learning_rate": 3.240548550931222e-07, + "loss": 0.709855854511261, + "step": 6313 + }, + { + "epoch": 1.8463225617780377, + "grad_norm": 1.8532989008830933, + "learning_rate": 3.228351783613348e-07, + "loss": 0.5194632411003113, + "step": 6314 + }, + { + "epoch": 1.8466150021933032, + "grad_norm": 1.750242735396334, + "learning_rate": 3.2161776360798535e-07, + "loss": 0.6027804017066956, + "step": 6315 + }, + { + "epoch": 1.8469074426085685, + "grad_norm": 1.591118544218686, + "learning_rate": 3.2040261111763946e-07, + "loss": 0.5047632455825806, + "step": 6316 + }, + { + "epoch": 1.8471998830238339, + "grad_norm": 2.082041129535105, + "learning_rate": 3.1918972117433e-07, + "loss": 0.5763708353042603, + "step": 6317 + }, + { + "epoch": 1.8474923234390994, + "grad_norm": 1.7701935148884373, + "learning_rate": 3.1797909406156234e-07, + "loss": 0.4725028872489929, + "step": 6318 + }, + { + "epoch": 1.8477847638543645, + "grad_norm": 1.5419878667068574, + "learning_rate": 3.167707300623135e-07, + "loss": 0.523047924041748, + "step": 6319 + }, + { + "epoch": 1.84807720426963, + "grad_norm": 1.6321175932285703, + "learning_rate": 3.15564629459032e-07, + "loss": 0.5100070238113403, + "step": 6320 + }, + { + "epoch": 1.8483696446848956, + "grad_norm": 1.7375024362733555, + "learning_rate": 3.143607925336356e-07, + "loss": 0.6019359827041626, + "step": 6321 + }, + { + "epoch": 1.8486620851001607, + "grad_norm": 1.8195133886893664, + "learning_rate": 3.1315921956751483e-07, + "loss": 0.5514570474624634, + "step": 6322 + }, + { + "epoch": 1.8489545255154263, + "grad_norm": 1.6002643586013279, + "learning_rate": 3.1195991084152944e-07, + "loss": 0.49585646390914917, + "step": 6323 + }, + { + "epoch": 1.8492469659306916, + "grad_norm": 1.724322382501938, + "learning_rate": 3.1076286663601076e-07, + "loss": 0.5738509297370911, + "step": 6324 + }, + { + "epoch": 1.849539406345957, + "grad_norm": 1.8621720995112787, + "learning_rate": 3.095680872307605e-07, + "loss": 0.5149112939834595, + "step": 6325 + }, + { + "epoch": 1.8498318467612225, + "grad_norm": 1.6738148879498993, + "learning_rate": 3.0837557290505083e-07, + "loss": 0.45808184146881104, + "step": 6326 + }, + { + "epoch": 1.8501242871764878, + "grad_norm": 1.6155317269058609, + "learning_rate": 3.0718532393762435e-07, + "loss": 0.5173396468162537, + "step": 6327 + }, + { + "epoch": 1.8504167275917531, + "grad_norm": 1.6905273546590853, + "learning_rate": 3.059973406066963e-07, + "loss": 0.6229383945465088, + "step": 6328 + }, + { + "epoch": 1.8507091680070187, + "grad_norm": 1.6794531990129002, + "learning_rate": 3.0481162318994894e-07, + "loss": 0.45520371198654175, + "step": 6329 + }, + { + "epoch": 1.851001608422284, + "grad_norm": 1.5024073523898138, + "learning_rate": 3.036281719645373e-07, + "loss": 0.43216121196746826, + "step": 6330 + }, + { + "epoch": 1.8512940488375493, + "grad_norm": 1.9238309164883824, + "learning_rate": 3.0244698720708456e-07, + "loss": 0.5440583825111389, + "step": 6331 + }, + { + "epoch": 1.8515864892528149, + "grad_norm": 1.8189444343843324, + "learning_rate": 3.0126806919368756e-07, + "loss": 0.5474626421928406, + "step": 6332 + }, + { + "epoch": 1.85187892966808, + "grad_norm": 1.7800420936387606, + "learning_rate": 3.000914181999093e-07, + "loss": 0.5122883915901184, + "step": 6333 + }, + { + "epoch": 1.8521713700833455, + "grad_norm": 1.776220435476035, + "learning_rate": 2.989170345007852e-07, + "loss": 0.48304370045661926, + "step": 6334 + }, + { + "epoch": 1.8524638104986109, + "grad_norm": 1.6949801188317577, + "learning_rate": 2.977449183708214e-07, + "loss": 0.566180408000946, + "step": 6335 + }, + { + "epoch": 1.8527562509138762, + "grad_norm": 1.7482351137010406, + "learning_rate": 2.96575070083992e-07, + "loss": 0.5218988656997681, + "step": 6336 + }, + { + "epoch": 1.8530486913291417, + "grad_norm": 1.8289145949576808, + "learning_rate": 2.954074899137427e-07, + "loss": 0.49669283628463745, + "step": 6337 + }, + { + "epoch": 1.853341131744407, + "grad_norm": 1.6012219042297557, + "learning_rate": 2.942421781329874e-07, + "loss": 0.5505487322807312, + "step": 6338 + }, + { + "epoch": 1.8536335721596724, + "grad_norm": 1.6156483149639533, + "learning_rate": 2.930791350141116e-07, + "loss": 0.5386735200881958, + "step": 6339 + }, + { + "epoch": 1.853926012574938, + "grad_norm": 2.0764057670166776, + "learning_rate": 2.919183608289689e-07, + "loss": 0.5266523957252502, + "step": 6340 + }, + { + "epoch": 1.8542184529902033, + "grad_norm": 1.573480922837112, + "learning_rate": 2.907598558488822e-07, + "loss": 0.5335103273391724, + "step": 6341 + }, + { + "epoch": 1.8545108934054686, + "grad_norm": 1.8447961626822076, + "learning_rate": 2.896036203446473e-07, + "loss": 0.6155405044555664, + "step": 6342 + }, + { + "epoch": 1.8548033338207341, + "grad_norm": 1.5602039082453873, + "learning_rate": 2.884496545865245e-07, + "loss": 0.5258159041404724, + "step": 6343 + }, + { + "epoch": 1.8550957742359993, + "grad_norm": 1.7894466773590292, + "learning_rate": 2.8729795884424927e-07, + "loss": 0.5428795218467712, + "step": 6344 + }, + { + "epoch": 1.8553882146512648, + "grad_norm": 1.4344098630811726, + "learning_rate": 2.8614853338702066e-07, + "loss": 0.4876418709754944, + "step": 6345 + }, + { + "epoch": 1.8556806550665303, + "grad_norm": 1.606511441088432, + "learning_rate": 2.850013784835115e-07, + "loss": 0.49640393257141113, + "step": 6346 + }, + { + "epoch": 1.8559730954817955, + "grad_norm": 1.8316843043903746, + "learning_rate": 2.838564944018618e-07, + "loss": 0.5726122260093689, + "step": 6347 + }, + { + "epoch": 1.856265535897061, + "grad_norm": 1.653087716973347, + "learning_rate": 2.827138814096819e-07, + "loss": 0.5106557011604309, + "step": 6348 + }, + { + "epoch": 1.8565579763123263, + "grad_norm": 1.5025453294784719, + "learning_rate": 2.8157353977405044e-07, + "loss": 0.45941129326820374, + "step": 6349 + }, + { + "epoch": 1.8568504167275917, + "grad_norm": 1.781767756464568, + "learning_rate": 2.8043546976151414e-07, + "loss": 0.488609254360199, + "step": 6350 + }, + { + "epoch": 1.8571428571428572, + "grad_norm": 1.764244860072195, + "learning_rate": 2.7929967163809135e-07, + "loss": 0.639745831489563, + "step": 6351 + }, + { + "epoch": 1.8574352975581225, + "grad_norm": 1.498822179909691, + "learning_rate": 2.7816614566926747e-07, + "loss": 0.45327228307724, + "step": 6352 + }, + { + "epoch": 1.8577277379733879, + "grad_norm": 1.625074504661963, + "learning_rate": 2.7703489211999725e-07, + "loss": 0.5606091022491455, + "step": 6353 + }, + { + "epoch": 1.8580201783886534, + "grad_norm": 1.7312129624633084, + "learning_rate": 2.759059112547047e-07, + "loss": 0.5078528523445129, + "step": 6354 + }, + { + "epoch": 1.8583126188039187, + "grad_norm": 1.6687258508972733, + "learning_rate": 2.74779203337282e-07, + "loss": 0.5558253526687622, + "step": 6355 + }, + { + "epoch": 1.858605059219184, + "grad_norm": 1.7014892476807573, + "learning_rate": 2.7365476863108974e-07, + "loss": 0.3962102234363556, + "step": 6356 + }, + { + "epoch": 1.8588974996344496, + "grad_norm": 1.6957699860554467, + "learning_rate": 2.725326073989587e-07, + "loss": 0.4737718105316162, + "step": 6357 + }, + { + "epoch": 1.8591899400497147, + "grad_norm": 3.313281560384309, + "learning_rate": 2.7141271990318576e-07, + "loss": 0.5389090180397034, + "step": 6358 + }, + { + "epoch": 1.8594823804649803, + "grad_norm": 1.7840378938084138, + "learning_rate": 2.7029510640554033e-07, + "loss": 0.5311479568481445, + "step": 6359 + }, + { + "epoch": 1.8597748208802458, + "grad_norm": 1.631290291956445, + "learning_rate": 2.691797671672558e-07, + "loss": 0.4753482937812805, + "step": 6360 + }, + { + "epoch": 1.860067261295511, + "grad_norm": 1.581254208029566, + "learning_rate": 2.6806670244903577e-07, + "loss": 0.5192427635192871, + "step": 6361 + }, + { + "epoch": 1.8603597017107765, + "grad_norm": 1.9540580966263197, + "learning_rate": 2.6695591251105214e-07, + "loss": 0.5910875797271729, + "step": 6362 + }, + { + "epoch": 1.8606521421260418, + "grad_norm": 1.7486575397054567, + "learning_rate": 2.658473976129472e-07, + "loss": 0.5465212464332581, + "step": 6363 + }, + { + "epoch": 1.8609445825413071, + "grad_norm": 1.7446293681201037, + "learning_rate": 2.647411580138282e-07, + "loss": 0.43188267946243286, + "step": 6364 + }, + { + "epoch": 1.8612370229565727, + "grad_norm": 2.144472636918694, + "learning_rate": 2.636371939722715e-07, + "loss": 0.5723724365234375, + "step": 6365 + }, + { + "epoch": 1.861529463371838, + "grad_norm": 1.6310859619397844, + "learning_rate": 2.62535505746323e-07, + "loss": 0.47383856773376465, + "step": 6366 + }, + { + "epoch": 1.8618219037871033, + "grad_norm": 1.764378835172625, + "learning_rate": 2.6143609359349566e-07, + "loss": 0.502855658531189, + "step": 6367 + }, + { + "epoch": 1.8621143442023689, + "grad_norm": 2.265501418087609, + "learning_rate": 2.6033895777077043e-07, + "loss": 0.5934205055236816, + "step": 6368 + }, + { + "epoch": 1.8624067846176342, + "grad_norm": 1.469455820490925, + "learning_rate": 2.5924409853459455e-07, + "loss": 0.4157971143722534, + "step": 6369 + }, + { + "epoch": 1.8626992250328995, + "grad_norm": 1.8051847044948597, + "learning_rate": 2.5815151614088764e-07, + "loss": 0.5944307446479797, + "step": 6370 + }, + { + "epoch": 1.862991665448165, + "grad_norm": 2.0081645135491812, + "learning_rate": 2.57061210845031e-07, + "loss": 0.5603153705596924, + "step": 6371 + }, + { + "epoch": 1.8632841058634302, + "grad_norm": 1.752999497142634, + "learning_rate": 2.559731829018786e-07, + "loss": 0.49231380224227905, + "step": 6372 + }, + { + "epoch": 1.8635765462786957, + "grad_norm": 1.666251917997058, + "learning_rate": 2.548874325657502e-07, + "loss": 0.46984565258026123, + "step": 6373 + }, + { + "epoch": 1.863868986693961, + "grad_norm": 1.7373025752546019, + "learning_rate": 2.5380396009043297e-07, + "loss": 0.5088338255882263, + "step": 6374 + }, + { + "epoch": 1.8641614271092264, + "grad_norm": 1.7554684094014161, + "learning_rate": 2.52722765729182e-07, + "loss": 0.4760589599609375, + "step": 6375 + }, + { + "epoch": 1.864453867524492, + "grad_norm": 1.6521606786384044, + "learning_rate": 2.5164384973471954e-07, + "loss": 0.44232040643692017, + "step": 6376 + }, + { + "epoch": 1.8647463079397573, + "grad_norm": 1.736903879415624, + "learning_rate": 2.505672123592373e-07, + "loss": 0.46714338660240173, + "step": 6377 + }, + { + "epoch": 1.8650387483550226, + "grad_norm": 1.9333860177281759, + "learning_rate": 2.494928538543917e-07, + "loss": 0.5527149438858032, + "step": 6378 + }, + { + "epoch": 1.8653311887702881, + "grad_norm": 1.690422887605866, + "learning_rate": 2.484207744713074e-07, + "loss": 0.5006313323974609, + "step": 6379 + }, + { + "epoch": 1.8656236291855535, + "grad_norm": 1.5247883016042734, + "learning_rate": 2.473509744605751e-07, + "loss": 0.5007860660552979, + "step": 6380 + }, + { + "epoch": 1.8659160696008188, + "grad_norm": 1.683063597354387, + "learning_rate": 2.4628345407225804e-07, + "loss": 0.4354132413864136, + "step": 6381 + }, + { + "epoch": 1.8662085100160843, + "grad_norm": 1.718309113338333, + "learning_rate": 2.452182135558789e-07, + "loss": 0.5199555158615112, + "step": 6382 + }, + { + "epoch": 1.8665009504313494, + "grad_norm": 1.6260046663066803, + "learning_rate": 2.441552531604319e-07, + "loss": 0.5117326974868774, + "step": 6383 + }, + { + "epoch": 1.866793390846615, + "grad_norm": 1.803024051218915, + "learning_rate": 2.43094573134377e-07, + "loss": 0.5169814825057983, + "step": 6384 + }, + { + "epoch": 1.8670858312618805, + "grad_norm": 1.7012998015666523, + "learning_rate": 2.420361737256438e-07, + "loss": 0.563339352607727, + "step": 6385 + }, + { + "epoch": 1.8673782716771457, + "grad_norm": 2.1248949598274325, + "learning_rate": 2.409800551816255e-07, + "loss": 0.710465133190155, + "step": 6386 + }, + { + "epoch": 1.8676707120924112, + "grad_norm": 1.6580658731053397, + "learning_rate": 2.3992621774918343e-07, + "loss": 0.6894562244415283, + "step": 6387 + }, + { + "epoch": 1.8679631525076765, + "grad_norm": 1.7380197058585787, + "learning_rate": 2.388746616746462e-07, + "loss": 0.5105183124542236, + "step": 6388 + }, + { + "epoch": 1.8682555929229419, + "grad_norm": 2.0034985048956684, + "learning_rate": 2.3782538720380722e-07, + "loss": 0.4602908492088318, + "step": 6389 + }, + { + "epoch": 1.8685480333382074, + "grad_norm": 1.7787197864367217, + "learning_rate": 2.3677839458192908e-07, + "loss": 0.5395161509513855, + "step": 6390 + }, + { + "epoch": 1.8688404737534727, + "grad_norm": 1.6121023481071262, + "learning_rate": 2.3573368405374054e-07, + "loss": 0.5842725038528442, + "step": 6391 + }, + { + "epoch": 1.869132914168738, + "grad_norm": 1.6354709739233064, + "learning_rate": 2.346912558634362e-07, + "loss": 0.5837947130203247, + "step": 6392 + }, + { + "epoch": 1.8694253545840036, + "grad_norm": 1.8136211176417363, + "learning_rate": 2.3365111025467568e-07, + "loss": 0.5255596041679382, + "step": 6393 + }, + { + "epoch": 1.869717794999269, + "grad_norm": 1.5586602271443384, + "learning_rate": 2.326132474705889e-07, + "loss": 0.5614485144615173, + "step": 6394 + }, + { + "epoch": 1.8700102354145343, + "grad_norm": 1.5895893761997042, + "learning_rate": 2.3157766775376733e-07, + "loss": 0.5510128736495972, + "step": 6395 + }, + { + "epoch": 1.8703026758297998, + "grad_norm": 2.295988070565878, + "learning_rate": 2.3054437134627406e-07, + "loss": 0.690884530544281, + "step": 6396 + }, + { + "epoch": 1.870595116245065, + "grad_norm": 1.94960784120805, + "learning_rate": 2.2951335848963364e-07, + "loss": 0.637476921081543, + "step": 6397 + }, + { + "epoch": 1.8708875566603305, + "grad_norm": 1.6526446878259382, + "learning_rate": 2.2848462942484108e-07, + "loss": 0.5254319906234741, + "step": 6398 + }, + { + "epoch": 1.871179997075596, + "grad_norm": 1.7552717813182315, + "learning_rate": 2.27458184392354e-07, + "loss": 0.5038233995437622, + "step": 6399 + }, + { + "epoch": 1.8714724374908611, + "grad_norm": 1.4123258498894362, + "learning_rate": 2.2643402363209832e-07, + "loss": 0.43701431155204773, + "step": 6400 + }, + { + "epoch": 1.8717648779061267, + "grad_norm": 1.8138198755485717, + "learning_rate": 2.2541214738346583e-07, + "loss": 0.5490877628326416, + "step": 6401 + }, + { + "epoch": 1.872057318321392, + "grad_norm": 1.5452561215431913, + "learning_rate": 2.2439255588531327e-07, + "loss": 0.48393410444259644, + "step": 6402 + }, + { + "epoch": 1.8723497587366573, + "grad_norm": 1.6213926610567049, + "learning_rate": 2.2337524937596444e-07, + "loss": 0.5439243912696838, + "step": 6403 + }, + { + "epoch": 1.8726421991519229, + "grad_norm": 1.6026974016529494, + "learning_rate": 2.22360228093208e-07, + "loss": 0.5272157192230225, + "step": 6404 + }, + { + "epoch": 1.8729346395671882, + "grad_norm": 1.6750451870732375, + "learning_rate": 2.2134749227429864e-07, + "loss": 0.6323473453521729, + "step": 6405 + }, + { + "epoch": 1.8732270799824535, + "grad_norm": 1.6749139186520705, + "learning_rate": 2.2033704215595808e-07, + "loss": 0.4568995237350464, + "step": 6406 + }, + { + "epoch": 1.873519520397719, + "grad_norm": 1.8331627672377568, + "learning_rate": 2.1932887797437296e-07, + "loss": 0.5817153453826904, + "step": 6407 + }, + { + "epoch": 1.8738119608129844, + "grad_norm": 1.4674902238035163, + "learning_rate": 2.183229999651948e-07, + "loss": 0.5104260444641113, + "step": 6408 + }, + { + "epoch": 1.8741044012282497, + "grad_norm": 1.7946613600749395, + "learning_rate": 2.1731940836354105e-07, + "loss": 0.44944921135902405, + "step": 6409 + }, + { + "epoch": 1.8743968416435153, + "grad_norm": 1.794977484250215, + "learning_rate": 2.163181034039974e-07, + "loss": 0.6935169696807861, + "step": 6410 + }, + { + "epoch": 1.8746892820587804, + "grad_norm": 1.7330999339843873, + "learning_rate": 2.1531908532060998e-07, + "loss": 0.55609130859375, + "step": 6411 + }, + { + "epoch": 1.874981722474046, + "grad_norm": 1.6428359107019144, + "learning_rate": 2.143223543468953e-07, + "loss": 0.5402215719223022, + "step": 6412 + }, + { + "epoch": 1.8752741628893113, + "grad_norm": 1.8163043216263146, + "learning_rate": 2.1332791071583258e-07, + "loss": 0.5669365525245667, + "step": 6413 + }, + { + "epoch": 1.8755666033045766, + "grad_norm": 2.2122008806914044, + "learning_rate": 2.123357546598659e-07, + "loss": 0.46257615089416504, + "step": 6414 + }, + { + "epoch": 1.8758590437198421, + "grad_norm": 1.6308794717153283, + "learning_rate": 2.1134588641090858e-07, + "loss": 0.4596136212348938, + "step": 6415 + }, + { + "epoch": 1.8761514841351075, + "grad_norm": 1.6758615624094995, + "learning_rate": 2.1035830620033227e-07, + "loss": 0.5086819529533386, + "step": 6416 + }, + { + "epoch": 1.8764439245503728, + "grad_norm": 1.8974547658257448, + "learning_rate": 2.0937301425898115e-07, + "loss": 0.6008501052856445, + "step": 6417 + }, + { + "epoch": 1.8767363649656383, + "grad_norm": 1.8448672190670345, + "learning_rate": 2.0839001081715882e-07, + "loss": 0.5943784713745117, + "step": 6418 + }, + { + "epoch": 1.8770288053809037, + "grad_norm": 1.3203141385144623, + "learning_rate": 2.0740929610463813e-07, + "loss": 0.5006660223007202, + "step": 6419 + }, + { + "epoch": 1.877321245796169, + "grad_norm": 1.7508035137785818, + "learning_rate": 2.0643087035065458e-07, + "loss": 0.5434073805809021, + "step": 6420 + }, + { + "epoch": 1.8776136862114345, + "grad_norm": 1.8446497118213794, + "learning_rate": 2.0545473378390858e-07, + "loss": 0.6426963210105896, + "step": 6421 + }, + { + "epoch": 1.8779061266266996, + "grad_norm": 1.7388169538440008, + "learning_rate": 2.044808866325676e-07, + "loss": 0.5190218687057495, + "step": 6422 + }, + { + "epoch": 1.8781985670419652, + "grad_norm": 1.5291942184143035, + "learning_rate": 2.035093291242607e-07, + "loss": 0.40918534994125366, + "step": 6423 + }, + { + "epoch": 1.8784910074572307, + "grad_norm": 1.719713887519883, + "learning_rate": 2.0254006148608507e-07, + "loss": 0.5403652191162109, + "step": 6424 + }, + { + "epoch": 1.8787834478724958, + "grad_norm": 1.3839892041506006, + "learning_rate": 2.0157308394460062e-07, + "loss": 0.49781516194343567, + "step": 6425 + }, + { + "epoch": 1.8790758882877614, + "grad_norm": 1.8332751958303748, + "learning_rate": 2.006083967258321e-07, + "loss": 0.5841303467750549, + "step": 6426 + }, + { + "epoch": 1.8793683287030267, + "grad_norm": 1.679945923485487, + "learning_rate": 1.9964600005527024e-07, + "loss": 0.5054808855056763, + "step": 6427 + }, + { + "epoch": 1.879660769118292, + "grad_norm": 1.7695393284467882, + "learning_rate": 1.9868589415786843e-07, + "loss": 0.4801362454891205, + "step": 6428 + }, + { + "epoch": 1.8799532095335576, + "grad_norm": 1.8547174560912147, + "learning_rate": 1.9772807925804494e-07, + "loss": 0.4709380269050598, + "step": 6429 + }, + { + "epoch": 1.880245649948823, + "grad_norm": 1.8447220446699908, + "learning_rate": 1.9677255557968511e-07, + "loss": 0.665968120098114, + "step": 6430 + }, + { + "epoch": 1.8805380903640883, + "grad_norm": 1.7494009698963573, + "learning_rate": 1.9581932334613585e-07, + "loss": 0.515839159488678, + "step": 6431 + }, + { + "epoch": 1.8808305307793538, + "grad_norm": 1.6699738562759978, + "learning_rate": 1.948683827802089e-07, + "loss": 0.5399242043495178, + "step": 6432 + }, + { + "epoch": 1.8811229711946191, + "grad_norm": 1.7478095955612059, + "learning_rate": 1.9391973410418097e-07, + "loss": 0.6167087554931641, + "step": 6433 + }, + { + "epoch": 1.8814154116098845, + "grad_norm": 1.826500337038364, + "learning_rate": 1.9297337753979462e-07, + "loss": 0.6139745116233826, + "step": 6434 + }, + { + "epoch": 1.88170785202515, + "grad_norm": 2.0873679343118257, + "learning_rate": 1.9202931330825292e-07, + "loss": 0.7103149890899658, + "step": 6435 + }, + { + "epoch": 1.8820002924404151, + "grad_norm": 1.6777685812633742, + "learning_rate": 1.9108754163022602e-07, + "loss": 0.5958741903305054, + "step": 6436 + }, + { + "epoch": 1.8822927328556807, + "grad_norm": 1.2489160599157765, + "learning_rate": 1.9014806272584673e-07, + "loss": 0.32660478353500366, + "step": 6437 + }, + { + "epoch": 1.8825851732709462, + "grad_norm": 1.822465954469875, + "learning_rate": 1.8921087681471272e-07, + "loss": 0.49485981464385986, + "step": 6438 + }, + { + "epoch": 1.8828776136862113, + "grad_norm": 1.5404253681507418, + "learning_rate": 1.8827598411588544e-07, + "loss": 0.5106277465820312, + "step": 6439 + }, + { + "epoch": 1.8831700541014769, + "grad_norm": 1.5696470040532076, + "learning_rate": 1.8734338484789115e-07, + "loss": 0.50006502866745, + "step": 6440 + }, + { + "epoch": 1.8834624945167422, + "grad_norm": 1.5827360977472946, + "learning_rate": 1.8641307922871887e-07, + "loss": 0.47097745537757874, + "step": 6441 + }, + { + "epoch": 1.8837549349320075, + "grad_norm": 1.718260594389779, + "learning_rate": 1.854850674758213e-07, + "loss": 0.5874402523040771, + "step": 6442 + }, + { + "epoch": 1.884047375347273, + "grad_norm": 1.7055917291229012, + "learning_rate": 1.8455934980611602e-07, + "loss": 0.45705318450927734, + "step": 6443 + }, + { + "epoch": 1.8843398157625384, + "grad_norm": 1.8262667617041222, + "learning_rate": 1.8363592643598328e-07, + "loss": 0.4949952960014343, + "step": 6444 + }, + { + "epoch": 1.8846322561778037, + "grad_norm": 2.0005095204142056, + "learning_rate": 1.827147975812693e-07, + "loss": 0.5311721563339233, + "step": 6445 + }, + { + "epoch": 1.8849246965930693, + "grad_norm": 1.8075375628836245, + "learning_rate": 1.817959634572819e-07, + "loss": 0.5652828216552734, + "step": 6446 + }, + { + "epoch": 1.8852171370083346, + "grad_norm": 1.7007026167846622, + "learning_rate": 1.8087942427879146e-07, + "loss": 0.4856044054031372, + "step": 6447 + }, + { + "epoch": 1.8855095774236, + "grad_norm": 1.6920105837383546, + "learning_rate": 1.799651802600344e-07, + "loss": 0.55420982837677, + "step": 6448 + }, + { + "epoch": 1.8858020178388655, + "grad_norm": 1.8804834035548856, + "learning_rate": 1.7905323161470867e-07, + "loss": 0.5869326591491699, + "step": 6449 + }, + { + "epoch": 1.8860944582541306, + "grad_norm": 1.761061751635786, + "learning_rate": 1.781435785559793e-07, + "loss": 0.4505504369735718, + "step": 6450 + }, + { + "epoch": 1.8863868986693961, + "grad_norm": 1.7194415376329713, + "learning_rate": 1.7723622129646955e-07, + "loss": 0.5460773706436157, + "step": 6451 + }, + { + "epoch": 1.8866793390846615, + "grad_norm": 1.7253684204963688, + "learning_rate": 1.7633116004826978e-07, + "loss": 0.6214778423309326, + "step": 6452 + }, + { + "epoch": 1.8869717794999268, + "grad_norm": 1.786722853658628, + "learning_rate": 1.7542839502293297e-07, + "loss": 0.4900703430175781, + "step": 6453 + }, + { + "epoch": 1.8872642199151923, + "grad_norm": 1.8351888114829378, + "learning_rate": 1.7452792643147364e-07, + "loss": 0.5177547931671143, + "step": 6454 + }, + { + "epoch": 1.8875566603304577, + "grad_norm": 1.6033594290974305, + "learning_rate": 1.7362975448437236e-07, + "loss": 0.3914458453655243, + "step": 6455 + }, + { + "epoch": 1.887849100745723, + "grad_norm": 1.7306995937297311, + "learning_rate": 1.7273387939157116e-07, + "loss": 0.5222523212432861, + "step": 6456 + }, + { + "epoch": 1.8881415411609885, + "grad_norm": 1.8351026582741266, + "learning_rate": 1.7184030136247477e-07, + "loss": 0.5097587704658508, + "step": 6457 + }, + { + "epoch": 1.8884339815762539, + "grad_norm": 1.711376264331189, + "learning_rate": 1.7094902060595053e-07, + "loss": 0.517410397529602, + "step": 6458 + }, + { + "epoch": 1.8887264219915192, + "grad_norm": 1.5054067124169248, + "learning_rate": 1.7006003733033182e-07, + "loss": 0.4951689839363098, + "step": 6459 + }, + { + "epoch": 1.8890188624067847, + "grad_norm": 1.8698243351971042, + "learning_rate": 1.6917335174341242e-07, + "loss": 0.5530004501342773, + "step": 6460 + }, + { + "epoch": 1.8893113028220498, + "grad_norm": 1.3793759581483827, + "learning_rate": 1.6828896405244988e-07, + "loss": 0.5231990814208984, + "step": 6461 + }, + { + "epoch": 1.8896037432373154, + "grad_norm": 1.7109665283076239, + "learning_rate": 1.6740687446416326e-07, + "loss": 0.5142268538475037, + "step": 6462 + }, + { + "epoch": 1.889896183652581, + "grad_norm": 1.5939124952252972, + "learning_rate": 1.6652708318473765e-07, + "loss": 0.4803999364376068, + "step": 6463 + }, + { + "epoch": 1.890188624067846, + "grad_norm": 1.8261203070041963, + "learning_rate": 1.6564959041981743e-07, + "loss": 0.38822099566459656, + "step": 6464 + }, + { + "epoch": 1.8904810644831116, + "grad_norm": 1.7158195687276572, + "learning_rate": 1.6477439637451186e-07, + "loss": 0.4778556823730469, + "step": 6465 + }, + { + "epoch": 1.890773504898377, + "grad_norm": 1.548976438279917, + "learning_rate": 1.6390150125339178e-07, + "loss": 0.5083664059638977, + "step": 6466 + }, + { + "epoch": 1.8910659453136422, + "grad_norm": 2.298817115631298, + "learning_rate": 1.6303090526049058e-07, + "loss": 0.6592142581939697, + "step": 6467 + }, + { + "epoch": 1.8913583857289078, + "grad_norm": 1.7188849828284447, + "learning_rate": 1.6216260859930776e-07, + "loss": 0.6350588798522949, + "step": 6468 + }, + { + "epoch": 1.8916508261441731, + "grad_norm": 1.900981319900476, + "learning_rate": 1.6129661147279763e-07, + "loss": 0.5542852282524109, + "step": 6469 + }, + { + "epoch": 1.8919432665594385, + "grad_norm": 1.7094379727839777, + "learning_rate": 1.6043291408338602e-07, + "loss": 0.572988748550415, + "step": 6470 + }, + { + "epoch": 1.892235706974704, + "grad_norm": 1.578693569659532, + "learning_rate": 1.5957151663295367e-07, + "loss": 0.4801466763019562, + "step": 6471 + }, + { + "epoch": 1.8925281473899693, + "grad_norm": 2.0149025268161207, + "learning_rate": 1.5871241932284953e-07, + "loss": 0.6286160349845886, + "step": 6472 + }, + { + "epoch": 1.8928205878052347, + "grad_norm": 1.8739502258074872, + "learning_rate": 1.5785562235388074e-07, + "loss": 0.5731645822525024, + "step": 6473 + }, + { + "epoch": 1.8931130282205002, + "grad_norm": 2.02559646967304, + "learning_rate": 1.5700112592631933e-07, + "loss": 0.47890836000442505, + "step": 6474 + }, + { + "epoch": 1.8934054686357653, + "grad_norm": 1.8833158182705436, + "learning_rate": 1.5614893023989886e-07, + "loss": 0.4379703998565674, + "step": 6475 + }, + { + "epoch": 1.8936979090510309, + "grad_norm": 1.886508266764503, + "learning_rate": 1.5529903549381331e-07, + "loss": 0.5629044771194458, + "step": 6476 + }, + { + "epoch": 1.8939903494662964, + "grad_norm": 1.6388873220258502, + "learning_rate": 1.5445144188672268e-07, + "loss": 0.4995439052581787, + "step": 6477 + }, + { + "epoch": 1.8942827898815615, + "grad_norm": 1.54762620576383, + "learning_rate": 1.5360614961674403e-07, + "loss": 0.5350549221038818, + "step": 6478 + }, + { + "epoch": 1.894575230296827, + "grad_norm": 1.636976407400752, + "learning_rate": 1.5276315888146266e-07, + "loss": 0.5245925188064575, + "step": 6479 + }, + { + "epoch": 1.8948676707120924, + "grad_norm": 1.870112790684546, + "learning_rate": 1.519224698779198e-07, + "loss": 0.5159675478935242, + "step": 6480 + }, + { + "epoch": 1.8951601111273577, + "grad_norm": 1.6327790205426773, + "learning_rate": 1.5108408280262276e-07, + "loss": 0.5046014189720154, + "step": 6481 + }, + { + "epoch": 1.8954525515426233, + "grad_norm": 1.5658787677393426, + "learning_rate": 1.502479978515381e-07, + "loss": 0.35977911949157715, + "step": 6482 + }, + { + "epoch": 1.8957449919578886, + "grad_norm": 1.6374646749200208, + "learning_rate": 1.4941421522009725e-07, + "loss": 0.4689600467681885, + "step": 6483 + }, + { + "epoch": 1.896037432373154, + "grad_norm": 1.713919299692529, + "learning_rate": 1.485827351031899e-07, + "loss": 0.5729683637619019, + "step": 6484 + }, + { + "epoch": 1.8963298727884195, + "grad_norm": 2.081397285004385, + "learning_rate": 1.4775355769517163e-07, + "loss": 0.5929673314094543, + "step": 6485 + }, + { + "epoch": 1.8966223132036848, + "grad_norm": 1.6003411415494537, + "learning_rate": 1.4692668318985636e-07, + "loss": 0.43075594305992126, + "step": 6486 + }, + { + "epoch": 1.8969147536189501, + "grad_norm": 1.7646064022155787, + "learning_rate": 1.461021117805217e-07, + "loss": 0.5247992277145386, + "step": 6487 + }, + { + "epoch": 1.8972071940342157, + "grad_norm": 1.7947400732319756, + "learning_rate": 1.4527984365990455e-07, + "loss": 0.4930630326271057, + "step": 6488 + }, + { + "epoch": 1.8974996344494808, + "grad_norm": 2.490399223660391, + "learning_rate": 1.4445987902020676e-07, + "loss": 0.7183758616447449, + "step": 6489 + }, + { + "epoch": 1.8977920748647463, + "grad_norm": 1.6603594705802933, + "learning_rate": 1.4364221805309052e-07, + "loss": 0.4766094982624054, + "step": 6490 + }, + { + "epoch": 1.8980845152800117, + "grad_norm": 1.7401365125544646, + "learning_rate": 1.4282686094967747e-07, + "loss": 0.43594151735305786, + "step": 6491 + }, + { + "epoch": 1.898376955695277, + "grad_norm": 1.4953976915814553, + "learning_rate": 1.4201380790055397e-07, + "loss": 0.49320366978645325, + "step": 6492 + }, + { + "epoch": 1.8986693961105425, + "grad_norm": 1.7835092237734465, + "learning_rate": 1.4120305909576359e-07, + "loss": 0.600296139717102, + "step": 6493 + }, + { + "epoch": 1.8989618365258079, + "grad_norm": 1.8325915671317163, + "learning_rate": 1.4039461472481696e-07, + "loss": 0.6692827939987183, + "step": 6494 + }, + { + "epoch": 1.8992542769410732, + "grad_norm": 1.5707947665490356, + "learning_rate": 1.395884749766807e-07, + "loss": 0.49206262826919556, + "step": 6495 + }, + { + "epoch": 1.8995467173563387, + "grad_norm": 1.5801197568349268, + "learning_rate": 1.3878464003978741e-07, + "loss": 0.4987361431121826, + "step": 6496 + }, + { + "epoch": 1.899839157771604, + "grad_norm": 1.5345056226134064, + "learning_rate": 1.3798311010202681e-07, + "loss": 0.5020350217819214, + "step": 6497 + }, + { + "epoch": 1.9001315981868694, + "grad_norm": 1.804856300616187, + "learning_rate": 1.3718388535075123e-07, + "loss": 0.5906451344490051, + "step": 6498 + }, + { + "epoch": 1.900424038602135, + "grad_norm": 1.7402170644717794, + "learning_rate": 1.3638696597277678e-07, + "loss": 0.5089905858039856, + "step": 6499 + }, + { + "epoch": 1.9007164790174, + "grad_norm": 1.7322257732687294, + "learning_rate": 1.3559235215437672e-07, + "loss": 0.4633820056915283, + "step": 6500 + }, + { + "epoch": 1.9010089194326656, + "grad_norm": 1.760704522549711, + "learning_rate": 1.34800044081288e-07, + "loss": 0.4214053750038147, + "step": 6501 + }, + { + "epoch": 1.9013013598479311, + "grad_norm": 1.8502899980656935, + "learning_rate": 1.3401004193870694e-07, + "loss": 0.6652689576148987, + "step": 6502 + }, + { + "epoch": 1.9015938002631962, + "grad_norm": 1.9902189127655232, + "learning_rate": 1.3322234591129247e-07, + "loss": 0.610877275466919, + "step": 6503 + }, + { + "epoch": 1.9018862406784618, + "grad_norm": 1.8174576160077263, + "learning_rate": 1.324369561831651e-07, + "loss": 0.5051777958869934, + "step": 6504 + }, + { + "epoch": 1.9021786810937271, + "grad_norm": 1.679424427708786, + "learning_rate": 1.3165387293790133e-07, + "loss": 0.5004675984382629, + "step": 6505 + }, + { + "epoch": 1.9024711215089924, + "grad_norm": 1.7771913721647656, + "learning_rate": 1.3087309635854583e-07, + "loss": 0.5778615474700928, + "step": 6506 + }, + { + "epoch": 1.902763561924258, + "grad_norm": 2.0292378822767807, + "learning_rate": 1.300946266275982e-07, + "loss": 0.5282145738601685, + "step": 6507 + }, + { + "epoch": 1.9030560023395233, + "grad_norm": 1.7978860546574749, + "learning_rate": 1.2931846392702174e-07, + "loss": 0.5965359807014465, + "step": 6508 + }, + { + "epoch": 1.9033484427547886, + "grad_norm": 1.6652110616876246, + "learning_rate": 1.2854460843823912e-07, + "loss": 0.5891281366348267, + "step": 6509 + }, + { + "epoch": 1.9036408831700542, + "grad_norm": 1.6248785946895787, + "learning_rate": 1.2777306034213677e-07, + "loss": 0.516204297542572, + "step": 6510 + }, + { + "epoch": 1.9039333235853195, + "grad_norm": 1.6814946652270708, + "learning_rate": 1.2700381981905486e-07, + "loss": 0.5148355960845947, + "step": 6511 + }, + { + "epoch": 1.9042257640005849, + "grad_norm": 1.8013354973349966, + "learning_rate": 1.2623688704880287e-07, + "loss": 0.5599791407585144, + "step": 6512 + }, + { + "epoch": 1.9045182044158504, + "grad_norm": 1.4831223871376102, + "learning_rate": 1.2547226221064412e-07, + "loss": 0.44349417090415955, + "step": 6513 + }, + { + "epoch": 1.9048106448311155, + "grad_norm": 1.8442502212828862, + "learning_rate": 1.2470994548330672e-07, + "loss": 0.5919830799102783, + "step": 6514 + }, + { + "epoch": 1.905103085246381, + "grad_norm": 1.5907952124169482, + "learning_rate": 1.2394993704497592e-07, + "loss": 0.4615499675273895, + "step": 6515 + }, + { + "epoch": 1.9053955256616466, + "grad_norm": 2.080938429630683, + "learning_rate": 1.2319223707330074e-07, + "loss": 0.5217719674110413, + "step": 6516 + }, + { + "epoch": 1.9056879660769117, + "grad_norm": 1.7218384779241716, + "learning_rate": 1.2243684574538838e-07, + "loss": 0.510722279548645, + "step": 6517 + }, + { + "epoch": 1.9059804064921773, + "grad_norm": 1.7577870608967676, + "learning_rate": 1.2168376323780652e-07, + "loss": 0.6744403839111328, + "step": 6518 + }, + { + "epoch": 1.9062728469074426, + "grad_norm": 1.852387461765699, + "learning_rate": 1.209329897265832e-07, + "loss": 0.4991394281387329, + "step": 6519 + }, + { + "epoch": 1.906565287322708, + "grad_norm": 1.5123359691224252, + "learning_rate": 1.2018452538720805e-07, + "loss": 0.43237754702568054, + "step": 6520 + }, + { + "epoch": 1.9068577277379735, + "grad_norm": 1.929873331270624, + "learning_rate": 1.1943837039463112e-07, + "loss": 0.6042662262916565, + "step": 6521 + }, + { + "epoch": 1.9071501681532388, + "grad_norm": 1.5924831654811167, + "learning_rate": 1.186945249232585e-07, + "loss": 0.4275910556316376, + "step": 6522 + }, + { + "epoch": 1.9074426085685041, + "grad_norm": 1.6404715584331906, + "learning_rate": 1.1795298914696219e-07, + "loss": 0.4368266463279724, + "step": 6523 + }, + { + "epoch": 1.9077350489837697, + "grad_norm": 1.7092717646033881, + "learning_rate": 1.172137632390713e-07, + "loss": 0.49492496252059937, + "step": 6524 + }, + { + "epoch": 1.908027489399035, + "grad_norm": 1.587478317321713, + "learning_rate": 1.164768473723743e-07, + "loss": 0.4296407103538513, + "step": 6525 + }, + { + "epoch": 1.9083199298143003, + "grad_norm": 1.746911434628144, + "learning_rate": 1.1574224171912118e-07, + "loss": 0.4609370231628418, + "step": 6526 + }, + { + "epoch": 1.9086123702295659, + "grad_norm": 1.687321204236502, + "learning_rate": 1.1500994645102237e-07, + "loss": 0.5201660394668579, + "step": 6527 + }, + { + "epoch": 1.908904810644831, + "grad_norm": 1.686028014701993, + "learning_rate": 1.1427996173924649e-07, + "loss": 0.49946731328964233, + "step": 6528 + }, + { + "epoch": 1.9091972510600965, + "grad_norm": 1.700984250030961, + "learning_rate": 1.1355228775442262e-07, + "loss": 0.5479187369346619, + "step": 6529 + }, + { + "epoch": 1.9094896914753618, + "grad_norm": 1.6485232123504545, + "learning_rate": 1.1282692466664247e-07, + "loss": 0.5227243900299072, + "step": 6530 + }, + { + "epoch": 1.9097821318906272, + "grad_norm": 1.896983089459967, + "learning_rate": 1.1210387264545264e-07, + "loss": 0.42863208055496216, + "step": 6531 + }, + { + "epoch": 1.9100745723058927, + "grad_norm": 1.552171259240321, + "learning_rate": 1.113831318598635e-07, + "loss": 0.37858498096466064, + "step": 6532 + }, + { + "epoch": 1.910367012721158, + "grad_norm": 1.852509398879224, + "learning_rate": 1.1066470247834471e-07, + "loss": 0.6447315216064453, + "step": 6533 + }, + { + "epoch": 1.9106594531364234, + "grad_norm": 1.4833597844037574, + "learning_rate": 1.0994858466882197e-07, + "loss": 0.4159877300262451, + "step": 6534 + }, + { + "epoch": 1.910951893551689, + "grad_norm": 1.7056274655886765, + "learning_rate": 1.0923477859868581e-07, + "loss": 0.5042530298233032, + "step": 6535 + }, + { + "epoch": 1.9112443339669543, + "grad_norm": 1.6939120860687955, + "learning_rate": 1.0852328443478278e-07, + "loss": 0.35955798625946045, + "step": 6536 + }, + { + "epoch": 1.9115367743822196, + "grad_norm": 1.6272843503399623, + "learning_rate": 1.0781410234342093e-07, + "loss": 0.561823308467865, + "step": 6537 + }, + { + "epoch": 1.9118292147974851, + "grad_norm": 1.6724333597123697, + "learning_rate": 1.0710723249036659e-07, + "loss": 0.44518136978149414, + "step": 6538 + }, + { + "epoch": 1.9121216552127502, + "grad_norm": 2.0012454930429397, + "learning_rate": 1.0640267504084756e-07, + "loss": 0.5657057166099548, + "step": 6539 + }, + { + "epoch": 1.9124140956280158, + "grad_norm": 1.5762808769057957, + "learning_rate": 1.0570043015954989e-07, + "loss": 0.5659947395324707, + "step": 6540 + }, + { + "epoch": 1.9127065360432813, + "grad_norm": 1.5811137035723222, + "learning_rate": 1.0500049801061784e-07, + "loss": 0.45648419857025146, + "step": 6541 + }, + { + "epoch": 1.9129989764585464, + "grad_norm": 1.8646406465839787, + "learning_rate": 1.0430287875765611e-07, + "loss": 0.4978141784667969, + "step": 6542 + }, + { + "epoch": 1.913291416873812, + "grad_norm": 1.54273033799953, + "learning_rate": 1.0360757256372977e-07, + "loss": 0.5397627949714661, + "step": 6543 + }, + { + "epoch": 1.9135838572890773, + "grad_norm": 1.8918413526412523, + "learning_rate": 1.029145795913633e-07, + "loss": 0.6359304189682007, + "step": 6544 + }, + { + "epoch": 1.9138762977043426, + "grad_norm": 1.70706044627556, + "learning_rate": 1.0222390000253824e-07, + "loss": 0.5023899078369141, + "step": 6545 + }, + { + "epoch": 1.9141687381196082, + "grad_norm": 1.8668808073409142, + "learning_rate": 1.0153553395869654e-07, + "loss": 0.5231877565383911, + "step": 6546 + }, + { + "epoch": 1.9144611785348735, + "grad_norm": 1.7146199886416342, + "learning_rate": 1.008494816207406e-07, + "loss": 0.5925711393356323, + "step": 6547 + }, + { + "epoch": 1.9147536189501388, + "grad_norm": 1.5881527564838034, + "learning_rate": 1.0016574314902993e-07, + "loss": 0.42732810974121094, + "step": 6548 + }, + { + "epoch": 1.9150460593654044, + "grad_norm": 1.8539790257850415, + "learning_rate": 9.948431870338559e-08, + "loss": 0.5011821985244751, + "step": 6549 + }, + { + "epoch": 1.9153384997806697, + "grad_norm": 1.7063021653673758, + "learning_rate": 9.88052084430846e-08, + "loss": 0.5112487077713013, + "step": 6550 + }, + { + "epoch": 1.915630940195935, + "grad_norm": 1.8003514575818433, + "learning_rate": 9.812841252686667e-08, + "loss": 0.4751431345939636, + "step": 6551 + }, + { + "epoch": 1.9159233806112006, + "grad_norm": 1.9933791417538373, + "learning_rate": 9.745393111292745e-08, + "loss": 0.5343109369277954, + "step": 6552 + }, + { + "epoch": 1.9162158210264657, + "grad_norm": 1.4980785147509508, + "learning_rate": 9.678176435892417e-08, + "loss": 0.4602724015712738, + "step": 6553 + }, + { + "epoch": 1.9165082614417313, + "grad_norm": 1.5436966250785777, + "learning_rate": 9.611191242197005e-08, + "loss": 0.4756245017051697, + "step": 6554 + }, + { + "epoch": 1.9168007018569968, + "grad_norm": 1.6531719135209273, + "learning_rate": 9.544437545864093e-08, + "loss": 0.5291459560394287, + "step": 6555 + }, + { + "epoch": 1.917093142272262, + "grad_norm": 2.0976196168420946, + "learning_rate": 9.47791536249676e-08, + "loss": 0.5357412099838257, + "step": 6556 + }, + { + "epoch": 1.9173855826875275, + "grad_norm": 2.238353466121697, + "learning_rate": 9.411624707644229e-08, + "loss": 0.6298913955688477, + "step": 6557 + }, + { + "epoch": 1.9176780231027928, + "grad_norm": 1.4485326554294644, + "learning_rate": 9.345565596801553e-08, + "loss": 0.5150517225265503, + "step": 6558 + }, + { + "epoch": 1.917970463518058, + "grad_norm": 1.8563821954536717, + "learning_rate": 9.279738045409603e-08, + "loss": 0.6264858245849609, + "step": 6559 + }, + { + "epoch": 1.9182629039333237, + "grad_norm": 1.46383829182073, + "learning_rate": 9.214142068855292e-08, + "loss": 0.33123475313186646, + "step": 6560 + }, + { + "epoch": 1.918555344348589, + "grad_norm": 1.6384165039446617, + "learning_rate": 9.148777682471133e-08, + "loss": 0.5540212392807007, + "step": 6561 + }, + { + "epoch": 1.9188477847638543, + "grad_norm": 1.8427168178125763, + "learning_rate": 9.083644901535793e-08, + "loss": 0.5633922219276428, + "step": 6562 + }, + { + "epoch": 1.9191402251791199, + "grad_norm": 1.7743383669625796, + "learning_rate": 9.018743741273428e-08, + "loss": 0.58629310131073, + "step": 6563 + }, + { + "epoch": 1.9194326655943852, + "grad_norm": 1.8674136448530827, + "learning_rate": 8.95407421685457e-08, + "loss": 0.5985243320465088, + "step": 6564 + }, + { + "epoch": 1.9197251060096505, + "grad_norm": 1.6803719834498339, + "learning_rate": 8.889636343395235e-08, + "loss": 0.5344138741493225, + "step": 6565 + }, + { + "epoch": 1.920017546424916, + "grad_norm": 1.783895238536977, + "learning_rate": 8.825430135957381e-08, + "loss": 0.6139744520187378, + "step": 6566 + }, + { + "epoch": 1.9203099868401812, + "grad_norm": 1.4220884637268112, + "learning_rate": 8.761455609548663e-08, + "loss": 0.46376854181289673, + "step": 6567 + }, + { + "epoch": 1.9206024272554467, + "grad_norm": 1.7412635159811354, + "learning_rate": 8.697712779122902e-08, + "loss": 0.5053622722625732, + "step": 6568 + }, + { + "epoch": 1.920894867670712, + "grad_norm": 1.52795636278423, + "learning_rate": 8.634201659579622e-08, + "loss": 0.4363771080970764, + "step": 6569 + }, + { + "epoch": 1.9211873080859774, + "grad_norm": 1.6799265353987254, + "learning_rate": 8.570922265764059e-08, + "loss": 0.4167904853820801, + "step": 6570 + }, + { + "epoch": 1.921479748501243, + "grad_norm": 1.7506509667217935, + "learning_rate": 8.507874612467382e-08, + "loss": 0.525320291519165, + "step": 6571 + }, + { + "epoch": 1.9217721889165083, + "grad_norm": 1.5127507314447914, + "learning_rate": 8.445058714426691e-08, + "loss": 0.4087376594543457, + "step": 6572 + }, + { + "epoch": 1.9220646293317736, + "grad_norm": 1.975359435328043, + "learning_rate": 8.382474586324796e-08, + "loss": 0.471457839012146, + "step": 6573 + }, + { + "epoch": 1.9223570697470391, + "grad_norm": 1.5584377744842253, + "learning_rate": 8.32012224279033e-08, + "loss": 0.6125116348266602, + "step": 6574 + }, + { + "epoch": 1.9226495101623045, + "grad_norm": 1.8527915049964467, + "learning_rate": 8.258001698397744e-08, + "loss": 0.3800301253795624, + "step": 6575 + }, + { + "epoch": 1.9229419505775698, + "grad_norm": 1.7927235022665284, + "learning_rate": 8.196112967667313e-08, + "loss": 0.561034083366394, + "step": 6576 + }, + { + "epoch": 1.9232343909928353, + "grad_norm": 1.8012018638552385, + "learning_rate": 8.134456065065354e-08, + "loss": 0.5768460631370544, + "step": 6577 + }, + { + "epoch": 1.9235268314081004, + "grad_norm": 1.809882879975094, + "learning_rate": 8.073031005003562e-08, + "loss": 0.47440657019615173, + "step": 6578 + }, + { + "epoch": 1.923819271823366, + "grad_norm": 1.4902012429082565, + "learning_rate": 8.011837801839672e-08, + "loss": 0.5315208435058594, + "step": 6579 + }, + { + "epoch": 1.9241117122386315, + "grad_norm": 1.7054296975282524, + "learning_rate": 7.950876469877467e-08, + "loss": 0.4587036371231079, + "step": 6580 + }, + { + "epoch": 1.9244041526538966, + "grad_norm": 1.6717861291166198, + "learning_rate": 7.890147023366101e-08, + "loss": 0.5356466770172119, + "step": 6581 + }, + { + "epoch": 1.9246965930691622, + "grad_norm": 1.8066170712430372, + "learning_rate": 7.829649476500667e-08, + "loss": 0.48034095764160156, + "step": 6582 + }, + { + "epoch": 1.9249890334844275, + "grad_norm": 1.9403707417182101, + "learning_rate": 7.769383843422185e-08, + "loss": 0.502929151058197, + "step": 6583 + }, + { + "epoch": 1.9252814738996928, + "grad_norm": 1.5994546211401888, + "learning_rate": 7.709350138217386e-08, + "loss": 0.44771361351013184, + "step": 6584 + }, + { + "epoch": 1.9255739143149584, + "grad_norm": 1.7058923530240673, + "learning_rate": 7.649548374918824e-08, + "loss": 0.462479829788208, + "step": 6585 + }, + { + "epoch": 1.9258663547302237, + "grad_norm": 1.7481939511400157, + "learning_rate": 7.589978567504763e-08, + "loss": 0.4758496880531311, + "step": 6586 + }, + { + "epoch": 1.926158795145489, + "grad_norm": 1.8447645858435646, + "learning_rate": 7.530640729899174e-08, + "loss": 0.521172285079956, + "step": 6587 + }, + { + "epoch": 1.9264512355607546, + "grad_norm": 1.685029384432281, + "learning_rate": 7.471534875971964e-08, + "loss": 0.5274392366409302, + "step": 6588 + }, + { + "epoch": 1.92674367597602, + "grad_norm": 1.5547682278755586, + "learning_rate": 7.412661019538858e-08, + "loss": 0.4350961446762085, + "step": 6589 + }, + { + "epoch": 1.9270361163912852, + "grad_norm": 1.5773569785123847, + "learning_rate": 7.354019174361183e-08, + "loss": 0.6298524737358093, + "step": 6590 + }, + { + "epoch": 1.9273285568065508, + "grad_norm": 1.7494178023153484, + "learning_rate": 7.295609354146194e-08, + "loss": 0.5451292395591736, + "step": 6591 + }, + { + "epoch": 1.927620997221816, + "grad_norm": 1.8824055292173802, + "learning_rate": 7.23743157254675e-08, + "loss": 0.5371264219284058, + "step": 6592 + }, + { + "epoch": 1.9279134376370815, + "grad_norm": 1.714393478017535, + "learning_rate": 7.179485843161526e-08, + "loss": 0.5805129408836365, + "step": 6593 + }, + { + "epoch": 1.928205878052347, + "grad_norm": 1.9692321834579947, + "learning_rate": 7.121772179535135e-08, + "loss": 0.5542718172073364, + "step": 6594 + }, + { + "epoch": 1.928498318467612, + "grad_norm": 1.7503350699121312, + "learning_rate": 7.064290595157675e-08, + "loss": 0.5668192505836487, + "step": 6595 + }, + { + "epoch": 1.9287907588828777, + "grad_norm": 1.6293975396756264, + "learning_rate": 7.007041103465062e-08, + "loss": 0.5107895731925964, + "step": 6596 + }, + { + "epoch": 1.929083199298143, + "grad_norm": 1.847055531354174, + "learning_rate": 6.950023717839261e-08, + "loss": 0.47974276542663574, + "step": 6597 + }, + { + "epoch": 1.9293756397134083, + "grad_norm": 1.5624753949857668, + "learning_rate": 6.893238451607387e-08, + "loss": 0.5641148090362549, + "step": 6598 + }, + { + "epoch": 1.9296680801286739, + "grad_norm": 1.7181332365296518, + "learning_rate": 6.836685318042935e-08, + "loss": 0.5940253734588623, + "step": 6599 + }, + { + "epoch": 1.9299605205439392, + "grad_norm": 1.6880020580834156, + "learning_rate": 6.780364330364775e-08, + "loss": 0.46844422817230225, + "step": 6600 + }, + { + "epoch": 1.9302529609592045, + "grad_norm": 1.6235992853167036, + "learning_rate": 6.724275501737487e-08, + "loss": 0.3933336138725281, + "step": 6601 + }, + { + "epoch": 1.93054540137447, + "grad_norm": 1.4538666395679365, + "learning_rate": 6.668418845271695e-08, + "loss": 0.4786602258682251, + "step": 6602 + }, + { + "epoch": 1.9308378417897354, + "grad_norm": 1.798637107768398, + "learning_rate": 6.612794374023402e-08, + "loss": 0.49695518612861633, + "step": 6603 + }, + { + "epoch": 1.9311302822050007, + "grad_norm": 1.5049309556488495, + "learning_rate": 6.557402100994426e-08, + "loss": 0.4798729121685028, + "step": 6604 + }, + { + "epoch": 1.9314227226202663, + "grad_norm": 1.7300127457609986, + "learning_rate": 6.502242039132634e-08, + "loss": 0.4187319278717041, + "step": 6605 + }, + { + "epoch": 1.9317151630355314, + "grad_norm": 2.050722935709042, + "learning_rate": 6.447314201331156e-08, + "loss": 0.4945526719093323, + "step": 6606 + }, + { + "epoch": 1.932007603450797, + "grad_norm": 1.8976456851513979, + "learning_rate": 6.392618600429057e-08, + "loss": 0.5721586346626282, + "step": 6607 + }, + { + "epoch": 1.9323000438660622, + "grad_norm": 1.6286185694607815, + "learning_rate": 6.338155249211109e-08, + "loss": 0.45542022585868835, + "step": 6608 + }, + { + "epoch": 1.9325924842813276, + "grad_norm": 1.7597762099762242, + "learning_rate": 6.283924160407796e-08, + "loss": 0.5627170205116272, + "step": 6609 + }, + { + "epoch": 1.9328849246965931, + "grad_norm": 1.6951677907486626, + "learning_rate": 6.22992534669542e-08, + "loss": 0.5369620323181152, + "step": 6610 + }, + { + "epoch": 1.9331773651118584, + "grad_norm": 1.619968087818578, + "learning_rate": 6.176158820695665e-08, + "loss": 0.5268368124961853, + "step": 6611 + }, + { + "epoch": 1.9334698055271238, + "grad_norm": 1.6828649754520415, + "learning_rate": 6.122624594976257e-08, + "loss": 0.5734575986862183, + "step": 6612 + }, + { + "epoch": 1.9337622459423893, + "grad_norm": 1.86766787540182, + "learning_rate": 6.069322682050516e-08, + "loss": 0.5066978931427002, + "step": 6613 + }, + { + "epoch": 1.9340546863576547, + "grad_norm": 1.68962846891993, + "learning_rate": 6.016253094377366e-08, + "loss": 0.5462731719017029, + "step": 6614 + }, + { + "epoch": 1.93434712677292, + "grad_norm": 1.8689912619353801, + "learning_rate": 5.963415844361553e-08, + "loss": 0.5407041311264038, + "step": 6615 + }, + { + "epoch": 1.9346395671881855, + "grad_norm": 1.792133188360025, + "learning_rate": 5.910810944353418e-08, + "loss": 0.48977869749069214, + "step": 6616 + }, + { + "epoch": 1.9349320076034506, + "grad_norm": 1.8900630995604775, + "learning_rate": 5.858438406649125e-08, + "loss": 0.5320937037467957, + "step": 6617 + }, + { + "epoch": 1.9352244480187162, + "grad_norm": 1.6602834270947344, + "learning_rate": 5.806298243490327e-08, + "loss": 0.5860059261322021, + "step": 6618 + }, + { + "epoch": 1.9355168884339817, + "grad_norm": 1.7299178033338176, + "learning_rate": 5.7543904670644965e-08, + "loss": 0.49517208337783813, + "step": 6619 + }, + { + "epoch": 1.9358093288492468, + "grad_norm": 1.4975030277698207, + "learning_rate": 5.7027150895049286e-08, + "loss": 0.5060882568359375, + "step": 6620 + }, + { + "epoch": 1.9361017692645124, + "grad_norm": 1.7387399518104565, + "learning_rate": 5.651272122890184e-08, + "loss": 0.5887798070907593, + "step": 6621 + }, + { + "epoch": 1.9363942096797777, + "grad_norm": 2.006477050241073, + "learning_rate": 5.600061579244753e-08, + "loss": 0.6567577123641968, + "step": 6622 + }, + { + "epoch": 1.936686650095043, + "grad_norm": 1.7419376875296542, + "learning_rate": 5.549083470538952e-08, + "loss": 0.5672584176063538, + "step": 6623 + }, + { + "epoch": 1.9369790905103086, + "grad_norm": 1.6312975104255192, + "learning_rate": 5.4983378086885806e-08, + "loss": 0.5166369676589966, + "step": 6624 + }, + { + "epoch": 1.937271530925574, + "grad_norm": 1.7351407182284893, + "learning_rate": 5.447824605555041e-08, + "loss": 0.5157661437988281, + "step": 6625 + }, + { + "epoch": 1.9375639713408392, + "grad_norm": 1.5452343867654343, + "learning_rate": 5.397543872945443e-08, + "loss": 0.5001711845397949, + "step": 6626 + }, + { + "epoch": 1.9378564117561048, + "grad_norm": 1.5666441918912, + "learning_rate": 5.34749562261272e-08, + "loss": 0.48944878578186035, + "step": 6627 + }, + { + "epoch": 1.9381488521713701, + "grad_norm": 1.8943450842549039, + "learning_rate": 5.297679866255401e-08, + "loss": 0.5400780439376831, + "step": 6628 + }, + { + "epoch": 1.9384412925866354, + "grad_norm": 1.6944930575034618, + "learning_rate": 5.248096615517395e-08, + "loss": 0.544346809387207, + "step": 6629 + }, + { + "epoch": 1.938733733001901, + "grad_norm": 1.8360261063384646, + "learning_rate": 5.1987458819886535e-08, + "loss": 0.5283153653144836, + "step": 6630 + }, + { + "epoch": 1.939026173417166, + "grad_norm": 1.8162414803988312, + "learning_rate": 5.149627677204616e-08, + "loss": 0.555808424949646, + "step": 6631 + }, + { + "epoch": 1.9393186138324316, + "grad_norm": 1.7068645601820531, + "learning_rate": 5.10074201264632e-08, + "loss": 0.5230466723442078, + "step": 6632 + }, + { + "epoch": 1.9396110542476972, + "grad_norm": 1.592321180041504, + "learning_rate": 5.052088899740515e-08, + "loss": 0.4810416102409363, + "step": 6633 + }, + { + "epoch": 1.9399034946629623, + "grad_norm": 1.2489690563293379, + "learning_rate": 5.0036683498594365e-08, + "loss": 0.35233962535858154, + "step": 6634 + }, + { + "epoch": 1.9401959350782279, + "grad_norm": 1.5949248677680616, + "learning_rate": 4.955480374321253e-08, + "loss": 0.5250035524368286, + "step": 6635 + }, + { + "epoch": 1.9404883754934932, + "grad_norm": 1.5547636594172098, + "learning_rate": 4.907524984389622e-08, + "loss": 0.5896221399307251, + "step": 6636 + }, + { + "epoch": 1.9407808159087585, + "grad_norm": 1.5725705573586048, + "learning_rate": 4.859802191273688e-08, + "loss": 0.5410518050193787, + "step": 6637 + }, + { + "epoch": 1.941073256324024, + "grad_norm": 1.5273512663488045, + "learning_rate": 4.812312006128528e-08, + "loss": 0.5044152736663818, + "step": 6638 + }, + { + "epoch": 1.9413656967392894, + "grad_norm": 1.6537481992077037, + "learning_rate": 4.765054440054484e-08, + "loss": 0.5388177633285522, + "step": 6639 + }, + { + "epoch": 1.9416581371545547, + "grad_norm": 2.0702365693466485, + "learning_rate": 4.718029504097943e-08, + "loss": 0.5074491500854492, + "step": 6640 + }, + { + "epoch": 1.9419505775698203, + "grad_norm": 1.6224415285858116, + "learning_rate": 4.671237209250557e-08, + "loss": 0.47772669792175293, + "step": 6641 + }, + { + "epoch": 1.9422430179850856, + "grad_norm": 1.6570845374645817, + "learning_rate": 4.624677566449798e-08, + "loss": 0.4682825207710266, + "step": 6642 + }, + { + "epoch": 1.942535458400351, + "grad_norm": 1.5100328644654928, + "learning_rate": 4.578350586578628e-08, + "loss": 0.48880642652511597, + "step": 6643 + }, + { + "epoch": 1.9428278988156165, + "grad_norm": 1.6890744037677652, + "learning_rate": 4.532256280465719e-08, + "loss": 0.4590389132499695, + "step": 6644 + }, + { + "epoch": 1.9431203392308816, + "grad_norm": 1.903981857624826, + "learning_rate": 4.48639465888534e-08, + "loss": 0.5893105268478394, + "step": 6645 + }, + { + "epoch": 1.9434127796461471, + "grad_norm": 1.7274912065627603, + "learning_rate": 4.4407657325574725e-08, + "loss": 0.561900794506073, + "step": 6646 + }, + { + "epoch": 1.9437052200614124, + "grad_norm": 1.662019693277273, + "learning_rate": 4.395369512147474e-08, + "loss": 0.4140210747718811, + "step": 6647 + }, + { + "epoch": 1.9439976604766778, + "grad_norm": 1.7955978434650512, + "learning_rate": 4.350206008266522e-08, + "loss": 0.6220303773880005, + "step": 6648 + }, + { + "epoch": 1.9442901008919433, + "grad_norm": 1.771531678180808, + "learning_rate": 4.3052752314712844e-08, + "loss": 0.4903472065925598, + "step": 6649 + }, + { + "epoch": 1.9445825413072086, + "grad_norm": 1.889992657698585, + "learning_rate": 4.260577192263915e-08, + "loss": 0.4519340991973877, + "step": 6650 + }, + { + "epoch": 1.944874981722474, + "grad_norm": 1.7435292517018475, + "learning_rate": 4.216111901092501e-08, + "loss": 0.49067920446395874, + "step": 6651 + }, + { + "epoch": 1.9451674221377395, + "grad_norm": 1.8654652047797853, + "learning_rate": 4.1718793683505066e-08, + "loss": 0.5935854911804199, + "step": 6652 + }, + { + "epoch": 1.9454598625530048, + "grad_norm": 1.7744411864937968, + "learning_rate": 4.127879604376883e-08, + "loss": 0.5209576487541199, + "step": 6653 + }, + { + "epoch": 1.9457523029682702, + "grad_norm": 1.50564473891113, + "learning_rate": 4.084112619456515e-08, + "loss": 0.4454221725463867, + "step": 6654 + }, + { + "epoch": 1.9460447433835357, + "grad_norm": 1.8157940398905494, + "learning_rate": 4.0405784238194415e-08, + "loss": 0.5129591226577759, + "step": 6655 + }, + { + "epoch": 1.9463371837988008, + "grad_norm": 1.63185696744402, + "learning_rate": 3.997277027641744e-08, + "loss": 0.48704665899276733, + "step": 6656 + }, + { + "epoch": 1.9466296242140664, + "grad_norm": 1.8037751571098388, + "learning_rate": 3.95420844104466e-08, + "loss": 0.4510651230812073, + "step": 6657 + }, + { + "epoch": 1.946922064629332, + "grad_norm": 1.7817975919339482, + "learning_rate": 3.911372674095249e-08, + "loss": 0.5116807222366333, + "step": 6658 + }, + { + "epoch": 1.947214505044597, + "grad_norm": 1.7985765763419883, + "learning_rate": 3.868769736806277e-08, + "loss": 0.592056393623352, + "step": 6659 + }, + { + "epoch": 1.9475069454598626, + "grad_norm": 1.7881377609654638, + "learning_rate": 3.8263996391357805e-08, + "loss": 0.579146146774292, + "step": 6660 + }, + { + "epoch": 1.947799385875128, + "grad_norm": 1.6202416659647267, + "learning_rate": 3.784262390987503e-08, + "loss": 0.5253209471702576, + "step": 6661 + }, + { + "epoch": 1.9480918262903932, + "grad_norm": 2.008309380522338, + "learning_rate": 3.742358002210789e-08, + "loss": 0.5614888072013855, + "step": 6662 + }, + { + "epoch": 1.9483842667056588, + "grad_norm": 1.6491223001780133, + "learning_rate": 3.7006864826005796e-08, + "loss": 0.5630952715873718, + "step": 6663 + }, + { + "epoch": 1.9486767071209241, + "grad_norm": 2.1390311477096944, + "learning_rate": 3.659247841897306e-08, + "loss": 0.5990846157073975, + "step": 6664 + }, + { + "epoch": 1.9489691475361894, + "grad_norm": 1.6162006621933969, + "learning_rate": 3.6180420897868886e-08, + "loss": 0.5290813446044922, + "step": 6665 + }, + { + "epoch": 1.949261587951455, + "grad_norm": 2.6144126732722803, + "learning_rate": 3.577069235901176e-08, + "loss": 0.6710211038589478, + "step": 6666 + }, + { + "epoch": 1.9495540283667203, + "grad_norm": 1.71689411729531, + "learning_rate": 3.536329289817064e-08, + "loss": 0.4802299737930298, + "step": 6667 + }, + { + "epoch": 1.9498464687819856, + "grad_norm": 1.6268319596207468, + "learning_rate": 3.495822261057491e-08, + "loss": 0.5432649850845337, + "step": 6668 + }, + { + "epoch": 1.9501389091972512, + "grad_norm": 1.9426982793491434, + "learning_rate": 3.4555481590905495e-08, + "loss": 0.5824951529502869, + "step": 6669 + }, + { + "epoch": 1.9504313496125163, + "grad_norm": 1.5773733844612365, + "learning_rate": 3.4155069933301535e-08, + "loss": 0.48428961634635925, + "step": 6670 + }, + { + "epoch": 1.9507237900277818, + "grad_norm": 1.7258198741312958, + "learning_rate": 3.375698773135705e-08, + "loss": 0.5684780478477478, + "step": 6671 + }, + { + "epoch": 1.9510162304430474, + "grad_norm": 1.7742355369350526, + "learning_rate": 3.336123507811983e-08, + "loss": 0.5658689737319946, + "step": 6672 + }, + { + "epoch": 1.9513086708583125, + "grad_norm": 1.7743474017748566, + "learning_rate": 3.2967812066097006e-08, + "loss": 0.6265745162963867, + "step": 6673 + }, + { + "epoch": 1.951601111273578, + "grad_norm": 1.768397532537575, + "learning_rate": 3.257671878724722e-08, + "loss": 0.5732975006103516, + "step": 6674 + }, + { + "epoch": 1.9518935516888434, + "grad_norm": 2.3801499199920273, + "learning_rate": 3.218795533298624e-08, + "loss": 0.46968942880630493, + "step": 6675 + }, + { + "epoch": 1.9521859921041087, + "grad_norm": 1.9250466851177817, + "learning_rate": 3.180152179418472e-08, + "loss": 0.5651586055755615, + "step": 6676 + }, + { + "epoch": 1.9524784325193743, + "grad_norm": 1.4699414350235678, + "learning_rate": 3.141741826117151e-08, + "loss": 0.46789437532424927, + "step": 6677 + }, + { + "epoch": 1.9527708729346396, + "grad_norm": 1.6701838665271502, + "learning_rate": 3.1035644823725896e-08, + "loss": 0.5332610011100769, + "step": 6678 + }, + { + "epoch": 1.953063313349905, + "grad_norm": 1.825129394239336, + "learning_rate": 3.06562015710854e-08, + "loss": 0.49613600969314575, + "step": 6679 + }, + { + "epoch": 1.9533557537651705, + "grad_norm": 2.1340240197713265, + "learning_rate": 3.027908859194351e-08, + "loss": 0.5498408079147339, + "step": 6680 + }, + { + "epoch": 1.9536481941804358, + "grad_norm": 1.8887907896186948, + "learning_rate": 2.99043059744486e-08, + "loss": 0.6802657842636108, + "step": 6681 + }, + { + "epoch": 1.953940634595701, + "grad_norm": 1.8609256911752867, + "learning_rate": 2.9531853806201716e-08, + "loss": 0.5149989724159241, + "step": 6682 + }, + { + "epoch": 1.9542330750109667, + "grad_norm": 1.7262483706342455, + "learning_rate": 2.9161732174263212e-08, + "loss": 0.5249730944633484, + "step": 6683 + }, + { + "epoch": 1.9545255154262318, + "grad_norm": 1.7003943133697261, + "learning_rate": 2.8793941165147222e-08, + "loss": 0.5711483359336853, + "step": 6684 + }, + { + "epoch": 1.9548179558414973, + "grad_norm": 1.7303037823896377, + "learning_rate": 2.842848086482053e-08, + "loss": 0.4591020345687866, + "step": 6685 + }, + { + "epoch": 1.9551103962567626, + "grad_norm": 1.887004603599524, + "learning_rate": 2.8065351358708136e-08, + "loss": 0.575869083404541, + "step": 6686 + }, + { + "epoch": 1.955402836672028, + "grad_norm": 1.7563501117497715, + "learning_rate": 2.7704552731688816e-08, + "loss": 0.5664101839065552, + "step": 6687 + }, + { + "epoch": 1.9556952770872935, + "grad_norm": 1.5280681451949298, + "learning_rate": 2.7346085068098437e-08, + "loss": 0.5739811062812805, + "step": 6688 + }, + { + "epoch": 1.9559877175025588, + "grad_norm": 1.64304520297204, + "learning_rate": 2.6989948451726643e-08, + "loss": 0.4707348942756653, + "step": 6689 + }, + { + "epoch": 1.9562801579178242, + "grad_norm": 1.4347028954089904, + "learning_rate": 2.6636142965816848e-08, + "loss": 0.38842523097991943, + "step": 6690 + }, + { + "epoch": 1.9565725983330897, + "grad_norm": 1.9429266961932796, + "learning_rate": 2.628466869306956e-08, + "loss": 0.4295673668384552, + "step": 6691 + }, + { + "epoch": 1.956865038748355, + "grad_norm": 1.9886421076178336, + "learning_rate": 2.5935525715640176e-08, + "loss": 0.5358999967575073, + "step": 6692 + }, + { + "epoch": 1.9571574791636204, + "grad_norm": 1.8207487442928234, + "learning_rate": 2.5588714115137857e-08, + "loss": 0.49730730056762695, + "step": 6693 + }, + { + "epoch": 1.957449919578886, + "grad_norm": 1.8975782350563493, + "learning_rate": 2.5244233972627762e-08, + "loss": 0.5368232131004333, + "step": 6694 + }, + { + "epoch": 1.957742359994151, + "grad_norm": 1.6616905607648789, + "learning_rate": 2.4902085368632144e-08, + "loss": 0.48084500432014465, + "step": 6695 + }, + { + "epoch": 1.9580348004094166, + "grad_norm": 1.6503756551181779, + "learning_rate": 2.45622683831237e-08, + "loss": 0.5197296142578125, + "step": 6696 + }, + { + "epoch": 1.9583272408246821, + "grad_norm": 1.7005704554604877, + "learning_rate": 2.4224783095532224e-08, + "loss": 0.4807678163051605, + "step": 6697 + }, + { + "epoch": 1.9586196812399472, + "grad_norm": 1.5200854711140026, + "learning_rate": 2.388962958474461e-08, + "loss": 0.5117641687393188, + "step": 6698 + }, + { + "epoch": 1.9589121216552128, + "grad_norm": 1.5153035364420055, + "learning_rate": 2.355680792910153e-08, + "loss": 0.5318149328231812, + "step": 6699 + }, + { + "epoch": 1.959204562070478, + "grad_norm": 1.642749755305391, + "learning_rate": 2.3226318206395206e-08, + "loss": 0.5590193271636963, + "step": 6700 + } + ], + "logging_steps": 1, + "max_steps": 6840, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2047433765437440.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-6700/training_args.bin b/checkpoint-6700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..81f2336f2b4301fde755bb2ff1a553c0af833dc6 --- /dev/null +++ b/checkpoint-6700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f18a5144102f6d607705c76873c9b6b52fea03ff40dc71ea0f2bf5e2547fe44f +size 6968 diff --git a/checkpoint-6700/zero_to_fp32.py b/checkpoint-6700/zero_to_fp32.py new file mode 100644 index 0000000000000000000000000000000000000000..5995d6e6f04e43b989587aa9022a3aef0c66d694 --- /dev/null +++ b/checkpoint-6700/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if ZERO_STAGE not in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info("Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info("Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/checkpoint-6800/README.md b/checkpoint-6800/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4005c4d8e7a819833408da4794e4e74d2ced6553 --- /dev/null +++ b/checkpoint-6800/README.md @@ -0,0 +1,208 @@ +--- +base_model: Qwen/Qwen2.5-VL-7B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-VL-7B-Instruct +- llama-factory +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/checkpoint-6800/adapter_config.json b/checkpoint-6800/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2358f759370f40d042b47e8407cdc2843daac45e --- /dev/null +++ b/checkpoint-6800/adapter_config.json @@ -0,0 +1,127 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-VL-7B-Instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "layers.6.mlp.up_proj", + "layers.24.mlp.gate_proj", + "layers.18.mlp.down_proj", + "layers.24.mlp.down_proj", + "layers.12.mlp.up_proj", + "layers.20.mlp.gate_proj", + "layers.23.mlp.up_proj", + "layers.19.mlp.down_proj", + "layers.14.mlp.down_proj", + "layers.11.mlp.gate_proj", + "layers.8.mlp.up_proj", + "layers.19.mlp.gate_proj", + "layers.7.mlp.down_proj", + "layers.22.mlp.down_proj", + "layers.13.mlp.down_proj", + "layers.23.mlp.down_proj", + "layers.9.mlp.gate_proj", + "layers.0.mlp.up_proj", + "layers.24.mlp.up_proj", + "layers.0.mlp.gate_proj", + "layers.3.mlp.gate_proj", + "layers.10.mlp.gate_proj", + "layers.10.mlp.up_proj", + "layers.14.mlp.gate_proj", + "layers.25.mlp.up_proj", + "layers.12.mlp.gate_proj", + "layers.20.mlp.down_proj", + "layers.0.mlp.down_proj", + "layers.5.mlp.down_proj", + "layers.13.mlp.gate_proj", + "layers.19.mlp.up_proj", + "layers.2.mlp.gate_proj", + "layers.18.mlp.up_proj", + "layers.21.mlp.up_proj", + "layers.2.mlp.down_proj", + "layers.6.mlp.down_proj", + "layers.21.mlp.gate_proj", + "layers.4.mlp.gate_proj", + "q_proj", + "layers.20.mlp.up_proj", + "layers.2.mlp.up_proj", + "layers.21.mlp.down_proj", + "layers.22.mlp.up_proj", + "layers.3.mlp.up_proj", + "layers.18.mlp.gate_proj", + "layers.23.mlp.gate_proj", + "layers.27.mlp.up_proj", + "v_proj", + "layers.7.mlp.gate_proj", + "layers.10.mlp.down_proj", + "layers.8.mlp.gate_proj", + "layers.17.mlp.up_proj", + "layers.5.mlp.gate_proj", + "layers.3.mlp.down_proj", + "layers.27.mlp.gate_proj", + "layers.26.mlp.down_proj", + "layers.11.mlp.down_proj", + "layers.22.mlp.gate_proj", + "layers.7.mlp.up_proj", + "layers.17.mlp.gate_proj", + "layers.14.mlp.up_proj", + "layers.13.mlp.up_proj", + "layers.17.mlp.down_proj", + "layers.15.mlp.up_proj", + "layers.26.mlp.up_proj", + "layers.5.mlp.up_proj", + "layers.16.mlp.up_proj", + "layers.1.mlp.gate_proj", + "layers.12.mlp.down_proj", + "layers.4.mlp.down_proj", + "layers.1.mlp.down_proj", + "layers.15.mlp.gate_proj", + "layers.27.mlp.down_proj", + "layers.15.mlp.down_proj", + "layers.25.mlp.gate_proj", + "layers.26.mlp.gate_proj", + "o_proj", + "layers.6.mlp.gate_proj", + "layers.11.mlp.up_proj", + "layers.9.mlp.up_proj", + "layers.16.mlp.gate_proj", + "layers.4.mlp.up_proj", + "layers.8.mlp.down_proj", + "layers.1.mlp.up_proj", + "k_proj", + "layers.16.mlp.down_proj", + "layers.25.mlp.down_proj", + "layers.9.mlp.down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-6800/adapter_model.safetensors b/checkpoint-6800/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6d01e1c4168b0c78242448127dc5220d263262b2 --- /dev/null +++ b/checkpoint-6800/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:595500f71cd67a331d75700197eefa431c841d197b6795c6e4a4769a6cf779ea +size 323020440 diff --git a/checkpoint-6800/chat_template.jinja b/checkpoint-6800/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..6c226632394ae7474b0d4b13e15793eac2e21ee9 --- /dev/null +++ b/checkpoint-6800/chat_template.jinja @@ -0,0 +1,7 @@ +{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system +You are a helpful assistant.<|im_end|> +{% endif %}<|im_start|>{{ message['role'] }} +{% if message['content'] is string %}{{ message['content'] }}<|im_end|> +{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|> +{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant +{% endif %} \ No newline at end of file diff --git a/checkpoint-6800/global_step6800/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/checkpoint-6800/global_step6800/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9fc75769f2e4e3bfdb6c5e698d2dec90cdfd7cb4 --- /dev/null +++ b/checkpoint-6800/global_step6800/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:589ba9c8e28f04a5add0d2f6217fa75ce16833a92cb267d78ec408c07467e948 +size 1937772272 diff --git a/checkpoint-6800/global_step6800/zero_pp_rank_0_mp_rank_00_model_states.pt b/checkpoint-6800/global_step6800/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f90938f38d2ebba97113ffe96baa4bd087b9dea --- /dev/null +++ b/checkpoint-6800/global_step6800/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05fa8c685d4d54e1117b6badf035235de305f2cfa8c5d6942b9356d1230ad5d1 +size 460630 diff --git a/checkpoint-6800/latest b/checkpoint-6800/latest new file mode 100644 index 0000000000000000000000000000000000000000..530c07d406a58648c3a5587101f57ae028af65a4 --- /dev/null +++ b/checkpoint-6800/latest @@ -0,0 +1 @@ +global_step6800 \ No newline at end of file diff --git a/checkpoint-6800/processor_config.json b/checkpoint-6800/processor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e717d9bf475c411369034636e82e48cf79108a8 --- /dev/null +++ b/checkpoint-6800/processor_config.json @@ -0,0 +1,63 @@ +{ + "image_processor": { + "data_format": "channels_first", + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessorFast", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "merge_size": 2, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2 + }, + "processor_class": "Qwen2_5_VLProcessor", + "video_processor": { + "data_format": "channels_first", + "default_to_square": true, + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "do_sample_frames": false, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessor", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "max_frames": 768, + "merge_size": 2, + "min_frames": 4, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "return_metadata": false, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2, + "video_processor_type": "Qwen2VLVideoProcessor" + } +} diff --git a/checkpoint-6800/rng_state.pth b/checkpoint-6800/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ea0029b3c99efc3d75dccf4b8c1d340f4b467d1b --- /dev/null +++ b/checkpoint-6800/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e4eb20b78d3061a2823f5291a7b814b2816e403c35b9c7c32def3ea8cf85d92 +size 14244 diff --git a/checkpoint-6800/scheduler.pt b/checkpoint-6800/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..deaab3bc1292a0422a86af5a14fd812285ea22df --- /dev/null +++ b/checkpoint-6800/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:121c7cc958cd98472310e1b98cf321e4c68112f4f504965dccf5d51fbbbd35b0 +size 1000 diff --git a/checkpoint-6800/tokenizer.json b/checkpoint-6800/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..34510ff0037cd50428af467a17ead5a96140a32c --- /dev/null +++ b/checkpoint-6800/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/checkpoint-6800/tokenizer_config.json b/checkpoint-6800/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7322db3e15385c79a5a29523dd1ccad6d343278 --- /dev/null +++ b/checkpoint-6800/tokenizer_config.json @@ -0,0 +1,31 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "processor_class": "Qwen2_5_VLProcessor", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/checkpoint-6800/trainer_state.json b/checkpoint-6800/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6214a39ad0a2b034d0c019373e92b5d1b9e02d9d --- /dev/null +++ b/checkpoint-6800/trainer_state.json @@ -0,0 +1,47634 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9884486035970173, + "eval_steps": 500, + "global_step": 6800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00029244041526538966, + "grad_norm": 1.376689135449382, + "learning_rate": 0.0, + "loss": 1.2599382400512695, + "step": 1 + }, + { + "epoch": 0.0005848808305307793, + "grad_norm": 1.234681838317607, + "learning_rate": 5.847953216374269e-08, + "loss": 0.9314937591552734, + "step": 2 + }, + { + "epoch": 0.000877321245796169, + "grad_norm": 1.3874138849382744, + "learning_rate": 1.1695906432748539e-07, + "loss": 1.1433629989624023, + "step": 3 + }, + { + "epoch": 0.0011697616610615586, + "grad_norm": 1.4618979511530414, + "learning_rate": 1.7543859649122808e-07, + "loss": 1.2224640846252441, + "step": 4 + }, + { + "epoch": 0.0014622020763269484, + "grad_norm": 1.236340065064986, + "learning_rate": 2.3391812865497077e-07, + "loss": 1.0468370914459229, + "step": 5 + }, + { + "epoch": 0.001754642491592338, + "grad_norm": 1.358651453520776, + "learning_rate": 2.9239766081871344e-07, + "loss": 1.1314436197280884, + "step": 6 + }, + { + "epoch": 0.0020470829068577278, + "grad_norm": 1.3850033876300505, + "learning_rate": 3.5087719298245616e-07, + "loss": 0.9903597831726074, + "step": 7 + }, + { + "epoch": 0.0023395233221231173, + "grad_norm": 1.326993456005612, + "learning_rate": 4.093567251461988e-07, + "loss": 1.1988611221313477, + "step": 8 + }, + { + "epoch": 0.002631963737388507, + "grad_norm": 1.3313234883955534, + "learning_rate": 4.6783625730994155e-07, + "loss": 1.1209533214569092, + "step": 9 + }, + { + "epoch": 0.0029244041526538967, + "grad_norm": 1.3471142230235869, + "learning_rate": 5.263157894736843e-07, + "loss": 1.1582586765289307, + "step": 10 + }, + { + "epoch": 0.0032168445679192866, + "grad_norm": 1.3073172655293792, + "learning_rate": 5.847953216374269e-07, + "loss": 1.2469007968902588, + "step": 11 + }, + { + "epoch": 0.003509284983184676, + "grad_norm": 1.500493931988472, + "learning_rate": 6.432748538011696e-07, + "loss": 1.115494728088379, + "step": 12 + }, + { + "epoch": 0.0038017253984500656, + "grad_norm": 1.4157975190751417, + "learning_rate": 7.017543859649123e-07, + "loss": 1.1927871704101562, + "step": 13 + }, + { + "epoch": 0.0040941658137154556, + "grad_norm": 1.4273551735693608, + "learning_rate": 7.60233918128655e-07, + "loss": 1.1014869213104248, + "step": 14 + }, + { + "epoch": 0.004386606228980845, + "grad_norm": 1.214320734942881, + "learning_rate": 8.187134502923977e-07, + "loss": 1.1055865287780762, + "step": 15 + }, + { + "epoch": 0.0046790466442462346, + "grad_norm": 1.2962699407775686, + "learning_rate": 8.771929824561404e-07, + "loss": 1.1071349382400513, + "step": 16 + }, + { + "epoch": 0.004971487059511625, + "grad_norm": 1.2885224717964352, + "learning_rate": 9.356725146198831e-07, + "loss": 1.1737473011016846, + "step": 17 + }, + { + "epoch": 0.005263927474777014, + "grad_norm": 1.407390623938155, + "learning_rate": 9.941520467836258e-07, + "loss": 1.283717155456543, + "step": 18 + }, + { + "epoch": 0.005556367890042404, + "grad_norm": 1.4470139877184414, + "learning_rate": 1.0526315789473685e-06, + "loss": 1.2509160041809082, + "step": 19 + }, + { + "epoch": 0.005848808305307793, + "grad_norm": 1.3242663031296102, + "learning_rate": 1.111111111111111e-06, + "loss": 0.9722317457199097, + "step": 20 + }, + { + "epoch": 0.006141248720573183, + "grad_norm": 1.7221218211796423, + "learning_rate": 1.1695906432748538e-06, + "loss": 1.1927049160003662, + "step": 21 + }, + { + "epoch": 0.006433689135838573, + "grad_norm": 1.4346324267765085, + "learning_rate": 1.2280701754385965e-06, + "loss": 1.2133033275604248, + "step": 22 + }, + { + "epoch": 0.006726129551103963, + "grad_norm": 1.449278395489955, + "learning_rate": 1.2865497076023392e-06, + "loss": 1.2373273372650146, + "step": 23 + }, + { + "epoch": 0.007018569966369352, + "grad_norm": 1.6650860096596214, + "learning_rate": 1.345029239766082e-06, + "loss": 0.9476668834686279, + "step": 24 + }, + { + "epoch": 0.007311010381634742, + "grad_norm": 1.2748998150534738, + "learning_rate": 1.4035087719298246e-06, + "loss": 1.1171324253082275, + "step": 25 + }, + { + "epoch": 0.007603450796900131, + "grad_norm": 1.4396688825039674, + "learning_rate": 1.4619883040935674e-06, + "loss": 1.1276075839996338, + "step": 26 + }, + { + "epoch": 0.007895891212165522, + "grad_norm": 1.4009443443291978, + "learning_rate": 1.52046783625731e-06, + "loss": 1.190751314163208, + "step": 27 + }, + { + "epoch": 0.008188331627430911, + "grad_norm": 1.3912141798418658, + "learning_rate": 1.5789473684210526e-06, + "loss": 1.2171813249588013, + "step": 28 + }, + { + "epoch": 0.0084807720426963, + "grad_norm": 1.3073224250652524, + "learning_rate": 1.6374269005847953e-06, + "loss": 0.8595987558364868, + "step": 29 + }, + { + "epoch": 0.00877321245796169, + "grad_norm": 1.2671914308960317, + "learning_rate": 1.695906432748538e-06, + "loss": 1.0270106792449951, + "step": 30 + }, + { + "epoch": 0.00906565287322708, + "grad_norm": 1.5005896829818803, + "learning_rate": 1.7543859649122807e-06, + "loss": 1.068537712097168, + "step": 31 + }, + { + "epoch": 0.009358093288492469, + "grad_norm": 1.2766478202995049, + "learning_rate": 1.8128654970760235e-06, + "loss": 1.1307867765426636, + "step": 32 + }, + { + "epoch": 0.009650533703757859, + "grad_norm": 1.5582616996952416, + "learning_rate": 1.8713450292397662e-06, + "loss": 1.0837950706481934, + "step": 33 + }, + { + "epoch": 0.00994297411902325, + "grad_norm": 1.4304945053464713, + "learning_rate": 1.929824561403509e-06, + "loss": 1.1506178379058838, + "step": 34 + }, + { + "epoch": 0.01023541453428864, + "grad_norm": 1.4722243618391941, + "learning_rate": 1.9883040935672516e-06, + "loss": 0.9450151324272156, + "step": 35 + }, + { + "epoch": 0.010527854949554029, + "grad_norm": 1.4847744449229108, + "learning_rate": 2.0467836257309943e-06, + "loss": 1.2040901184082031, + "step": 36 + }, + { + "epoch": 0.010820295364819418, + "grad_norm": 1.4600954284408973, + "learning_rate": 2.105263157894737e-06, + "loss": 1.2316429615020752, + "step": 37 + }, + { + "epoch": 0.011112735780084808, + "grad_norm": 1.479845514016971, + "learning_rate": 2.1637426900584798e-06, + "loss": 1.2119100093841553, + "step": 38 + }, + { + "epoch": 0.011405176195350197, + "grad_norm": 1.353351745720387, + "learning_rate": 2.222222222222222e-06, + "loss": 1.276926875114441, + "step": 39 + }, + { + "epoch": 0.011697616610615587, + "grad_norm": 1.256680621146734, + "learning_rate": 2.280701754385965e-06, + "loss": 0.9357824921607971, + "step": 40 + }, + { + "epoch": 0.011990057025880976, + "grad_norm": 1.3348703609284243, + "learning_rate": 2.3391812865497075e-06, + "loss": 1.1861131191253662, + "step": 41 + }, + { + "epoch": 0.012282497441146366, + "grad_norm": 1.3287978940598948, + "learning_rate": 2.3976608187134502e-06, + "loss": 1.1745539903640747, + "step": 42 + }, + { + "epoch": 0.012574937856411755, + "grad_norm": 1.1561631937443322, + "learning_rate": 2.456140350877193e-06, + "loss": 1.0291770696640015, + "step": 43 + }, + { + "epoch": 0.012867378271677147, + "grad_norm": 1.2176771446345134, + "learning_rate": 2.5146198830409357e-06, + "loss": 1.2361294031143188, + "step": 44 + }, + { + "epoch": 0.013159818686942536, + "grad_norm": 1.3295063710563702, + "learning_rate": 2.5730994152046784e-06, + "loss": 1.1909143924713135, + "step": 45 + }, + { + "epoch": 0.013452259102207926, + "grad_norm": 1.2650643173778968, + "learning_rate": 2.631578947368421e-06, + "loss": 1.1998133659362793, + "step": 46 + }, + { + "epoch": 0.013744699517473315, + "grad_norm": 1.1278701463292995, + "learning_rate": 2.690058479532164e-06, + "loss": 1.0011268854141235, + "step": 47 + }, + { + "epoch": 0.014037139932738705, + "grad_norm": 1.4726969666937608, + "learning_rate": 2.7485380116959066e-06, + "loss": 1.0552136898040771, + "step": 48 + }, + { + "epoch": 0.014329580348004094, + "grad_norm": 1.0797124442917296, + "learning_rate": 2.8070175438596493e-06, + "loss": 0.9727921485900879, + "step": 49 + }, + { + "epoch": 0.014622020763269484, + "grad_norm": 1.1798592697113668, + "learning_rate": 2.865497076023392e-06, + "loss": 0.9361351728439331, + "step": 50 + }, + { + "epoch": 0.014914461178534873, + "grad_norm": 1.1254749584923542, + "learning_rate": 2.9239766081871347e-06, + "loss": 1.140329360961914, + "step": 51 + }, + { + "epoch": 0.015206901593800263, + "grad_norm": 1.1050662639156084, + "learning_rate": 2.9824561403508774e-06, + "loss": 0.991325855255127, + "step": 52 + }, + { + "epoch": 0.015499342009065652, + "grad_norm": 1.364923415701691, + "learning_rate": 3.04093567251462e-06, + "loss": 1.3082914352416992, + "step": 53 + }, + { + "epoch": 0.015791782424331043, + "grad_norm": 1.1357483626397489, + "learning_rate": 3.0994152046783624e-06, + "loss": 0.9767723083496094, + "step": 54 + }, + { + "epoch": 0.016084222839596433, + "grad_norm": 1.1338887919712684, + "learning_rate": 3.157894736842105e-06, + "loss": 1.193568229675293, + "step": 55 + }, + { + "epoch": 0.016376663254861822, + "grad_norm": 1.176328275981774, + "learning_rate": 3.216374269005848e-06, + "loss": 0.9767440557479858, + "step": 56 + }, + { + "epoch": 0.016669103670127212, + "grad_norm": 1.0263265896491178, + "learning_rate": 3.2748538011695906e-06, + "loss": 0.8888605833053589, + "step": 57 + }, + { + "epoch": 0.0169615440853926, + "grad_norm": 1.0668435517314094, + "learning_rate": 3.3333333333333333e-06, + "loss": 1.087357997894287, + "step": 58 + }, + { + "epoch": 0.01725398450065799, + "grad_norm": 1.1952584851106463, + "learning_rate": 3.391812865497076e-06, + "loss": 1.0217459201812744, + "step": 59 + }, + { + "epoch": 0.01754642491592338, + "grad_norm": 1.1279843674972485, + "learning_rate": 3.4502923976608188e-06, + "loss": 1.0783777236938477, + "step": 60 + }, + { + "epoch": 0.01783886533118877, + "grad_norm": 0.9080265579264722, + "learning_rate": 3.5087719298245615e-06, + "loss": 0.85099196434021, + "step": 61 + }, + { + "epoch": 0.01813130574645416, + "grad_norm": 1.0228765689803359, + "learning_rate": 3.567251461988304e-06, + "loss": 0.9322569966316223, + "step": 62 + }, + { + "epoch": 0.01842374616171955, + "grad_norm": 0.991842254830473, + "learning_rate": 3.625730994152047e-06, + "loss": 0.8749685287475586, + "step": 63 + }, + { + "epoch": 0.018716186576984938, + "grad_norm": 0.9789077968505817, + "learning_rate": 3.6842105263157896e-06, + "loss": 0.857900857925415, + "step": 64 + }, + { + "epoch": 0.019008626992250328, + "grad_norm": 0.8092242526335478, + "learning_rate": 3.7426900584795324e-06, + "loss": 0.8891770243644714, + "step": 65 + }, + { + "epoch": 0.019301067407515717, + "grad_norm": 1.0526332302181824, + "learning_rate": 3.801169590643275e-06, + "loss": 1.0730159282684326, + "step": 66 + }, + { + "epoch": 0.019593507822781107, + "grad_norm": 1.124329301516788, + "learning_rate": 3.859649122807018e-06, + "loss": 1.108138084411621, + "step": 67 + }, + { + "epoch": 0.0198859482380465, + "grad_norm": 1.3581659451048562, + "learning_rate": 3.9181286549707605e-06, + "loss": 1.2126305103302002, + "step": 68 + }, + { + "epoch": 0.02017838865331189, + "grad_norm": 1.1108109420327934, + "learning_rate": 3.976608187134503e-06, + "loss": 0.9527193307876587, + "step": 69 + }, + { + "epoch": 0.02047082906857728, + "grad_norm": 0.9965971604796123, + "learning_rate": 4.035087719298246e-06, + "loss": 1.0454832315444946, + "step": 70 + }, + { + "epoch": 0.020763269483842668, + "grad_norm": 0.821178202034714, + "learning_rate": 4.093567251461989e-06, + "loss": 0.7075237035751343, + "step": 71 + }, + { + "epoch": 0.021055709899108058, + "grad_norm": 1.2413273222740282, + "learning_rate": 4.152046783625731e-06, + "loss": 1.0972111225128174, + "step": 72 + }, + { + "epoch": 0.021348150314373447, + "grad_norm": 0.9838475362870381, + "learning_rate": 4.210526315789474e-06, + "loss": 1.0400984287261963, + "step": 73 + }, + { + "epoch": 0.021640590729638837, + "grad_norm": 0.8577987626348056, + "learning_rate": 4.269005847953217e-06, + "loss": 0.7712557315826416, + "step": 74 + }, + { + "epoch": 0.021933031144904226, + "grad_norm": 1.0937426764383058, + "learning_rate": 4.3274853801169596e-06, + "loss": 1.1733636856079102, + "step": 75 + }, + { + "epoch": 0.022225471560169616, + "grad_norm": 0.9896291906902066, + "learning_rate": 4.385964912280702e-06, + "loss": 0.8653621673583984, + "step": 76 + }, + { + "epoch": 0.022517911975435005, + "grad_norm": 0.9059062097735997, + "learning_rate": 4.444444444444444e-06, + "loss": 0.8797299861907959, + "step": 77 + }, + { + "epoch": 0.022810352390700395, + "grad_norm": 1.0128235878781693, + "learning_rate": 4.502923976608187e-06, + "loss": 0.8357750177383423, + "step": 78 + }, + { + "epoch": 0.023102792805965784, + "grad_norm": 1.241636412088512, + "learning_rate": 4.56140350877193e-06, + "loss": 1.1249456405639648, + "step": 79 + }, + { + "epoch": 0.023395233221231174, + "grad_norm": 1.2743547410748093, + "learning_rate": 4.619883040935672e-06, + "loss": 0.9920758008956909, + "step": 80 + }, + { + "epoch": 0.023687673636496563, + "grad_norm": 1.0290847197991744, + "learning_rate": 4.678362573099415e-06, + "loss": 0.8115094900131226, + "step": 81 + }, + { + "epoch": 0.023980114051761953, + "grad_norm": 0.9339898981913745, + "learning_rate": 4.736842105263158e-06, + "loss": 1.060575246810913, + "step": 82 + }, + { + "epoch": 0.024272554467027342, + "grad_norm": 1.1898301512766587, + "learning_rate": 4.7953216374269005e-06, + "loss": 1.028218150138855, + "step": 83 + }, + { + "epoch": 0.02456499488229273, + "grad_norm": 0.9840324243241313, + "learning_rate": 4.853801169590643e-06, + "loss": 1.090872049331665, + "step": 84 + }, + { + "epoch": 0.02485743529755812, + "grad_norm": 1.110956193223445, + "learning_rate": 4.912280701754386e-06, + "loss": 1.0069574117660522, + "step": 85 + }, + { + "epoch": 0.02514987571282351, + "grad_norm": 1.0134868000559825, + "learning_rate": 4.970760233918129e-06, + "loss": 0.9391698837280273, + "step": 86 + }, + { + "epoch": 0.025442316128088904, + "grad_norm": 1.0912235029106665, + "learning_rate": 5.029239766081871e-06, + "loss": 0.881995677947998, + "step": 87 + }, + { + "epoch": 0.025734756543354293, + "grad_norm": 1.0399116507679627, + "learning_rate": 5.087719298245615e-06, + "loss": 0.87871253490448, + "step": 88 + }, + { + "epoch": 0.026027196958619683, + "grad_norm": 1.0265015868708693, + "learning_rate": 5.146198830409357e-06, + "loss": 1.005904197692871, + "step": 89 + }, + { + "epoch": 0.026319637373885072, + "grad_norm": 1.0161210383553128, + "learning_rate": 5.2046783625731e-06, + "loss": 0.8624223470687866, + "step": 90 + }, + { + "epoch": 0.02661207778915046, + "grad_norm": 1.0154040401745301, + "learning_rate": 5.263157894736842e-06, + "loss": 0.9976427555084229, + "step": 91 + }, + { + "epoch": 0.02690451820441585, + "grad_norm": 1.157266795240935, + "learning_rate": 5.321637426900586e-06, + "loss": 0.7743148803710938, + "step": 92 + }, + { + "epoch": 0.02719695861968124, + "grad_norm": 1.0027983307117943, + "learning_rate": 5.380116959064328e-06, + "loss": 0.8541792631149292, + "step": 93 + }, + { + "epoch": 0.02748939903494663, + "grad_norm": 1.0195872536359372, + "learning_rate": 5.438596491228071e-06, + "loss": 0.9141846895217896, + "step": 94 + }, + { + "epoch": 0.02778183945021202, + "grad_norm": 0.9964676811589505, + "learning_rate": 5.497076023391813e-06, + "loss": 0.9762974977493286, + "step": 95 + }, + { + "epoch": 0.02807427986547741, + "grad_norm": 1.086834377136063, + "learning_rate": 5.555555555555557e-06, + "loss": 0.8039775490760803, + "step": 96 + }, + { + "epoch": 0.0283667202807428, + "grad_norm": 1.0288673358640383, + "learning_rate": 5.6140350877192985e-06, + "loss": 0.9464477300643921, + "step": 97 + }, + { + "epoch": 0.028659160696008188, + "grad_norm": 0.9989091266376411, + "learning_rate": 5.672514619883041e-06, + "loss": 0.8264896869659424, + "step": 98 + }, + { + "epoch": 0.028951601111273578, + "grad_norm": 1.239452647422259, + "learning_rate": 5.730994152046784e-06, + "loss": 0.8347363471984863, + "step": 99 + }, + { + "epoch": 0.029244041526538967, + "grad_norm": 1.1482101557047766, + "learning_rate": 5.789473684210527e-06, + "loss": 0.7974327802658081, + "step": 100 + }, + { + "epoch": 0.029536481941804357, + "grad_norm": 1.040746567320999, + "learning_rate": 5.847953216374269e-06, + "loss": 0.7953752875328064, + "step": 101 + }, + { + "epoch": 0.029828922357069746, + "grad_norm": 1.0186289029859024, + "learning_rate": 5.906432748538012e-06, + "loss": 0.8652607798576355, + "step": 102 + }, + { + "epoch": 0.030121362772335136, + "grad_norm": 1.0719829766550855, + "learning_rate": 5.964912280701755e-06, + "loss": 0.973792552947998, + "step": 103 + }, + { + "epoch": 0.030413803187600525, + "grad_norm": 0.9226382056883017, + "learning_rate": 6.023391812865498e-06, + "loss": 0.8093612194061279, + "step": 104 + }, + { + "epoch": 0.030706243602865915, + "grad_norm": 0.9154711374479992, + "learning_rate": 6.08187134502924e-06, + "loss": 0.8463394045829773, + "step": 105 + }, + { + "epoch": 0.030998684018131304, + "grad_norm": 1.2769916053670627, + "learning_rate": 6.140350877192983e-06, + "loss": 0.7898350358009338, + "step": 106 + }, + { + "epoch": 0.0312911244333967, + "grad_norm": 1.298220618549192, + "learning_rate": 6.198830409356725e-06, + "loss": 0.9750698804855347, + "step": 107 + }, + { + "epoch": 0.031583564848662087, + "grad_norm": 1.000315516155276, + "learning_rate": 6.2573099415204685e-06, + "loss": 0.8137387633323669, + "step": 108 + }, + { + "epoch": 0.031876005263927476, + "grad_norm": 1.082436003075408, + "learning_rate": 6.31578947368421e-06, + "loss": 1.0641593933105469, + "step": 109 + }, + { + "epoch": 0.032168445679192866, + "grad_norm": 1.0363310086535433, + "learning_rate": 6.374269005847954e-06, + "loss": 0.9647193551063538, + "step": 110 + }, + { + "epoch": 0.032460886094458255, + "grad_norm": 1.1062097211432278, + "learning_rate": 6.432748538011696e-06, + "loss": 0.9693200588226318, + "step": 111 + }, + { + "epoch": 0.032753326509723645, + "grad_norm": 1.145031857661525, + "learning_rate": 6.491228070175439e-06, + "loss": 0.9600590467453003, + "step": 112 + }, + { + "epoch": 0.033045766924989034, + "grad_norm": 1.0203404188427831, + "learning_rate": 6.549707602339181e-06, + "loss": 0.8908880949020386, + "step": 113 + }, + { + "epoch": 0.033338207340254424, + "grad_norm": 1.2162435709165451, + "learning_rate": 6.608187134502925e-06, + "loss": 0.9803124666213989, + "step": 114 + }, + { + "epoch": 0.03363064775551981, + "grad_norm": 1.1738875143751093, + "learning_rate": 6.666666666666667e-06, + "loss": 0.8288271427154541, + "step": 115 + }, + { + "epoch": 0.0339230881707852, + "grad_norm": 0.9490473067752526, + "learning_rate": 6.72514619883041e-06, + "loss": 0.7203798890113831, + "step": 116 + }, + { + "epoch": 0.03421552858605059, + "grad_norm": 1.0046253156347025, + "learning_rate": 6.783625730994152e-06, + "loss": 0.7670629024505615, + "step": 117 + }, + { + "epoch": 0.03450796900131598, + "grad_norm": 1.0563125407630551, + "learning_rate": 6.842105263157896e-06, + "loss": 0.8487929105758667, + "step": 118 + }, + { + "epoch": 0.03480040941658137, + "grad_norm": 1.1292147521599132, + "learning_rate": 6.9005847953216375e-06, + "loss": 0.8332704305648804, + "step": 119 + }, + { + "epoch": 0.03509284983184676, + "grad_norm": 1.2138847310663696, + "learning_rate": 6.959064327485381e-06, + "loss": 0.9984017610549927, + "step": 120 + }, + { + "epoch": 0.03538529024711215, + "grad_norm": 1.126543099330432, + "learning_rate": 7.017543859649123e-06, + "loss": 0.788459062576294, + "step": 121 + }, + { + "epoch": 0.03567773066237754, + "grad_norm": 1.5166585395762038, + "learning_rate": 7.0760233918128665e-06, + "loss": 1.0288443565368652, + "step": 122 + }, + { + "epoch": 0.03597017107764293, + "grad_norm": 1.0086777607738802, + "learning_rate": 7.134502923976608e-06, + "loss": 0.7939552664756775, + "step": 123 + }, + { + "epoch": 0.03626261149290832, + "grad_norm": 1.0254521267017753, + "learning_rate": 7.192982456140352e-06, + "loss": 0.8816506862640381, + "step": 124 + }, + { + "epoch": 0.03655505190817371, + "grad_norm": 1.0223917066157164, + "learning_rate": 7.251461988304094e-06, + "loss": 0.8864353895187378, + "step": 125 + }, + { + "epoch": 0.0368474923234391, + "grad_norm": 1.2363556273996017, + "learning_rate": 7.309941520467837e-06, + "loss": 0.9817954897880554, + "step": 126 + }, + { + "epoch": 0.03713993273870449, + "grad_norm": 1.0757650534793346, + "learning_rate": 7.368421052631579e-06, + "loss": 0.8423842787742615, + "step": 127 + }, + { + "epoch": 0.037432373153969876, + "grad_norm": 1.1636915661730252, + "learning_rate": 7.426900584795322e-06, + "loss": 0.8375135660171509, + "step": 128 + }, + { + "epoch": 0.037724813569235266, + "grad_norm": 1.2215328884976426, + "learning_rate": 7.485380116959065e-06, + "loss": 0.9105685949325562, + "step": 129 + }, + { + "epoch": 0.038017253984500655, + "grad_norm": 1.1346801425180852, + "learning_rate": 7.5438596491228074e-06, + "loss": 0.8784557580947876, + "step": 130 + }, + { + "epoch": 0.038309694399766045, + "grad_norm": 1.0071578019284073, + "learning_rate": 7.60233918128655e-06, + "loss": 0.7557879686355591, + "step": 131 + }, + { + "epoch": 0.038602134815031434, + "grad_norm": 1.228942961434803, + "learning_rate": 7.660818713450294e-06, + "loss": 0.8966819047927856, + "step": 132 + }, + { + "epoch": 0.038894575230296824, + "grad_norm": 1.0961114842309465, + "learning_rate": 7.719298245614036e-06, + "loss": 0.7642185091972351, + "step": 133 + }, + { + "epoch": 0.03918701564556221, + "grad_norm": 1.062961529950125, + "learning_rate": 7.77777777777778e-06, + "loss": 0.8313230276107788, + "step": 134 + }, + { + "epoch": 0.0394794560608276, + "grad_norm": 1.3350623914867434, + "learning_rate": 7.836257309941521e-06, + "loss": 0.8388677835464478, + "step": 135 + }, + { + "epoch": 0.039771896476093, + "grad_norm": 1.2027686314521255, + "learning_rate": 7.894736842105265e-06, + "loss": 0.9065952301025391, + "step": 136 + }, + { + "epoch": 0.04006433689135839, + "grad_norm": 1.123144368922916, + "learning_rate": 7.953216374269006e-06, + "loss": 0.8153767585754395, + "step": 137 + }, + { + "epoch": 0.04035677730662378, + "grad_norm": 1.163761684167935, + "learning_rate": 8.01169590643275e-06, + "loss": 0.8976421356201172, + "step": 138 + }, + { + "epoch": 0.04064921772188917, + "grad_norm": 1.1354333989669174, + "learning_rate": 8.070175438596492e-06, + "loss": 0.7360264658927917, + "step": 139 + }, + { + "epoch": 0.04094165813715456, + "grad_norm": 1.1009203930924998, + "learning_rate": 8.128654970760235e-06, + "loss": 0.8442148566246033, + "step": 140 + }, + { + "epoch": 0.04123409855241995, + "grad_norm": 1.0872796831159965, + "learning_rate": 8.187134502923977e-06, + "loss": 0.6541435718536377, + "step": 141 + }, + { + "epoch": 0.041526538967685336, + "grad_norm": 1.2792221696979318, + "learning_rate": 8.24561403508772e-06, + "loss": 0.7492353916168213, + "step": 142 + }, + { + "epoch": 0.041818979382950726, + "grad_norm": 1.0406728730985955, + "learning_rate": 8.304093567251463e-06, + "loss": 0.6681893467903137, + "step": 143 + }, + { + "epoch": 0.042111419798216115, + "grad_norm": 1.2507905783247102, + "learning_rate": 8.362573099415205e-06, + "loss": 0.8384866714477539, + "step": 144 + }, + { + "epoch": 0.042403860213481505, + "grad_norm": 1.125680624680095, + "learning_rate": 8.421052631578948e-06, + "loss": 0.8338214159011841, + "step": 145 + }, + { + "epoch": 0.042696300628746894, + "grad_norm": 1.3441065562284606, + "learning_rate": 8.47953216374269e-06, + "loss": 0.8549021482467651, + "step": 146 + }, + { + "epoch": 0.042988741044012284, + "grad_norm": 1.0226139512096055, + "learning_rate": 8.538011695906434e-06, + "loss": 0.8324464559555054, + "step": 147 + }, + { + "epoch": 0.04328118145927767, + "grad_norm": 1.3742681865566602, + "learning_rate": 8.596491228070176e-06, + "loss": 0.9247474670410156, + "step": 148 + }, + { + "epoch": 0.04357362187454306, + "grad_norm": 1.3295257009133983, + "learning_rate": 8.654970760233919e-06, + "loss": 0.8488880395889282, + "step": 149 + }, + { + "epoch": 0.04386606228980845, + "grad_norm": 1.244174459745273, + "learning_rate": 8.713450292397661e-06, + "loss": 0.7844473123550415, + "step": 150 + }, + { + "epoch": 0.04415850270507384, + "grad_norm": 1.3605735346558072, + "learning_rate": 8.771929824561405e-06, + "loss": 1.0540976524353027, + "step": 151 + }, + { + "epoch": 0.04445094312033923, + "grad_norm": 1.096092225329518, + "learning_rate": 8.830409356725146e-06, + "loss": 0.7919446229934692, + "step": 152 + }, + { + "epoch": 0.04474338353560462, + "grad_norm": 1.1577837223865697, + "learning_rate": 8.888888888888888e-06, + "loss": 0.818670928478241, + "step": 153 + }, + { + "epoch": 0.04503582395087001, + "grad_norm": 1.4320201209257988, + "learning_rate": 8.947368421052632e-06, + "loss": 0.8491114377975464, + "step": 154 + }, + { + "epoch": 0.0453282643661354, + "grad_norm": 1.8326606844764444, + "learning_rate": 9.005847953216374e-06, + "loss": 0.660563588142395, + "step": 155 + }, + { + "epoch": 0.04562070478140079, + "grad_norm": 1.1838649114458772, + "learning_rate": 9.064327485380117e-06, + "loss": 0.8559159636497498, + "step": 156 + }, + { + "epoch": 0.04591314519666618, + "grad_norm": 1.0968958293675206, + "learning_rate": 9.12280701754386e-06, + "loss": 0.8478386402130127, + "step": 157 + }, + { + "epoch": 0.04620558561193157, + "grad_norm": 1.1272218094040445, + "learning_rate": 9.181286549707603e-06, + "loss": 0.758915901184082, + "step": 158 + }, + { + "epoch": 0.04649802602719696, + "grad_norm": 1.3159367769875163, + "learning_rate": 9.239766081871345e-06, + "loss": 0.773307204246521, + "step": 159 + }, + { + "epoch": 0.04679046644246235, + "grad_norm": 1.29739510285095, + "learning_rate": 9.298245614035088e-06, + "loss": 0.8948490023612976, + "step": 160 + }, + { + "epoch": 0.04708290685772774, + "grad_norm": 1.2170406448830853, + "learning_rate": 9.35672514619883e-06, + "loss": 0.83086097240448, + "step": 161 + }, + { + "epoch": 0.047375347272993126, + "grad_norm": 1.474814122834776, + "learning_rate": 9.415204678362574e-06, + "loss": 0.7683168649673462, + "step": 162 + }, + { + "epoch": 0.047667787688258516, + "grad_norm": 1.2546637555360107, + "learning_rate": 9.473684210526315e-06, + "loss": 0.9267748594284058, + "step": 163 + }, + { + "epoch": 0.047960228103523905, + "grad_norm": 1.1945733924353639, + "learning_rate": 9.532163742690059e-06, + "loss": 0.9243365526199341, + "step": 164 + }, + { + "epoch": 0.048252668518789295, + "grad_norm": 1.1508961292698372, + "learning_rate": 9.590643274853801e-06, + "loss": 0.7841176986694336, + "step": 165 + }, + { + "epoch": 0.048545108934054684, + "grad_norm": 1.1853174404309834, + "learning_rate": 9.649122807017545e-06, + "loss": 0.8318643569946289, + "step": 166 + }, + { + "epoch": 0.048837549349320074, + "grad_norm": 1.3089312801161905, + "learning_rate": 9.707602339181286e-06, + "loss": 0.866286039352417, + "step": 167 + }, + { + "epoch": 0.04912998976458546, + "grad_norm": 1.32215003396801, + "learning_rate": 9.76608187134503e-06, + "loss": 0.8232241868972778, + "step": 168 + }, + { + "epoch": 0.04942243017985085, + "grad_norm": 1.4759162272800292, + "learning_rate": 9.824561403508772e-06, + "loss": 0.874968945980072, + "step": 169 + }, + { + "epoch": 0.04971487059511624, + "grad_norm": 1.3247540509223557, + "learning_rate": 9.883040935672515e-06, + "loss": 0.9048999547958374, + "step": 170 + }, + { + "epoch": 0.05000731101038163, + "grad_norm": 1.4647995646715117, + "learning_rate": 9.941520467836257e-06, + "loss": 0.9220215082168579, + "step": 171 + }, + { + "epoch": 0.05029975142564702, + "grad_norm": 1.3290504006044366, + "learning_rate": 1e-05, + "loss": 0.8326996564865112, + "step": 172 + }, + { + "epoch": 0.05059219184091241, + "grad_norm": 1.0687285940591045, + "learning_rate": 1.0058479532163743e-05, + "loss": 0.8023662567138672, + "step": 173 + }, + { + "epoch": 0.05088463225617781, + "grad_norm": 1.4370267362244613, + "learning_rate": 1.0116959064327488e-05, + "loss": 0.9172271490097046, + "step": 174 + }, + { + "epoch": 0.0511770726714432, + "grad_norm": 1.2538172153184461, + "learning_rate": 1.017543859649123e-05, + "loss": 0.8016377687454224, + "step": 175 + }, + { + "epoch": 0.051469513086708586, + "grad_norm": 1.1436252675754246, + "learning_rate": 1.0233918128654972e-05, + "loss": 0.7656369805335999, + "step": 176 + }, + { + "epoch": 0.051761953501973976, + "grad_norm": 1.1951944941269466, + "learning_rate": 1.0292397660818714e-05, + "loss": 0.7769640684127808, + "step": 177 + }, + { + "epoch": 0.052054393917239365, + "grad_norm": 1.3791114600068226, + "learning_rate": 1.0350877192982459e-05, + "loss": 0.9830589294433594, + "step": 178 + }, + { + "epoch": 0.052346834332504755, + "grad_norm": 1.1501081025808126, + "learning_rate": 1.04093567251462e-05, + "loss": 0.8002523183822632, + "step": 179 + }, + { + "epoch": 0.052639274747770144, + "grad_norm": 1.3726838653365003, + "learning_rate": 1.0467836257309943e-05, + "loss": 0.879243016242981, + "step": 180 + }, + { + "epoch": 0.052931715163035534, + "grad_norm": 1.2863425151805854, + "learning_rate": 1.0526315789473684e-05, + "loss": 0.7266525030136108, + "step": 181 + }, + { + "epoch": 0.05322415557830092, + "grad_norm": 1.350994010752117, + "learning_rate": 1.0584795321637428e-05, + "loss": 0.784702479839325, + "step": 182 + }, + { + "epoch": 0.05351659599356631, + "grad_norm": 1.415897619399055, + "learning_rate": 1.0643274853801172e-05, + "loss": 0.8419734239578247, + "step": 183 + }, + { + "epoch": 0.0538090364088317, + "grad_norm": 1.201782404599289, + "learning_rate": 1.0701754385964913e-05, + "loss": 0.8462855815887451, + "step": 184 + }, + { + "epoch": 0.05410147682409709, + "grad_norm": 1.361501494219251, + "learning_rate": 1.0760233918128655e-05, + "loss": 0.8888737559318542, + "step": 185 + }, + { + "epoch": 0.05439391723936248, + "grad_norm": 1.3305576553150047, + "learning_rate": 1.0818713450292399e-05, + "loss": 0.8063781261444092, + "step": 186 + }, + { + "epoch": 0.05468635765462787, + "grad_norm": 1.2109684966022718, + "learning_rate": 1.0877192982456142e-05, + "loss": 0.7981499433517456, + "step": 187 + }, + { + "epoch": 0.05497879806989326, + "grad_norm": 1.5415785509759563, + "learning_rate": 1.0935672514619884e-05, + "loss": 0.8474490642547607, + "step": 188 + }, + { + "epoch": 0.05527123848515865, + "grad_norm": 1.300197838887535, + "learning_rate": 1.0994152046783626e-05, + "loss": 0.818732500076294, + "step": 189 + }, + { + "epoch": 0.05556367890042404, + "grad_norm": 1.3192619521811115, + "learning_rate": 1.105263157894737e-05, + "loss": 0.7660291194915771, + "step": 190 + }, + { + "epoch": 0.05585611931568943, + "grad_norm": 1.2626389127660034, + "learning_rate": 1.1111111111111113e-05, + "loss": 0.8240147233009338, + "step": 191 + }, + { + "epoch": 0.05614855973095482, + "grad_norm": 1.340830231936402, + "learning_rate": 1.1169590643274855e-05, + "loss": 0.9377203583717346, + "step": 192 + }, + { + "epoch": 0.05644100014622021, + "grad_norm": 1.416661564809907, + "learning_rate": 1.1228070175438597e-05, + "loss": 0.8662704229354858, + "step": 193 + }, + { + "epoch": 0.0567334405614856, + "grad_norm": 1.3274611257173192, + "learning_rate": 1.128654970760234e-05, + "loss": 0.717308759689331, + "step": 194 + }, + { + "epoch": 0.05702588097675099, + "grad_norm": 1.1942152308113003, + "learning_rate": 1.1345029239766083e-05, + "loss": 0.8538037538528442, + "step": 195 + }, + { + "epoch": 0.057318321392016376, + "grad_norm": 1.4411136610170212, + "learning_rate": 1.1403508771929826e-05, + "loss": 0.9016960859298706, + "step": 196 + }, + { + "epoch": 0.057610761807281766, + "grad_norm": 1.4664426354083508, + "learning_rate": 1.1461988304093568e-05, + "loss": 0.9313502311706543, + "step": 197 + }, + { + "epoch": 0.057903202222547155, + "grad_norm": 1.2885330427126278, + "learning_rate": 1.1520467836257312e-05, + "loss": 0.7330124974250793, + "step": 198 + }, + { + "epoch": 0.058195642637812545, + "grad_norm": 1.272277327326545, + "learning_rate": 1.1578947368421053e-05, + "loss": 0.8904056549072266, + "step": 199 + }, + { + "epoch": 0.058488083053077934, + "grad_norm": 1.4761275028472136, + "learning_rate": 1.1637426900584797e-05, + "loss": 0.7816377878189087, + "step": 200 + }, + { + "epoch": 0.058780523468343324, + "grad_norm": 1.3244130760300052, + "learning_rate": 1.1695906432748539e-05, + "loss": 0.7109910249710083, + "step": 201 + }, + { + "epoch": 0.05907296388360871, + "grad_norm": 1.499082853070359, + "learning_rate": 1.1754385964912282e-05, + "loss": 0.7657924890518188, + "step": 202 + }, + { + "epoch": 0.0593654042988741, + "grad_norm": 1.5632309821036996, + "learning_rate": 1.1812865497076024e-05, + "loss": 0.8521978259086609, + "step": 203 + }, + { + "epoch": 0.05965784471413949, + "grad_norm": 1.3625729366507646, + "learning_rate": 1.1871345029239766e-05, + "loss": 0.7558364868164062, + "step": 204 + }, + { + "epoch": 0.05995028512940488, + "grad_norm": 1.3362044158661328, + "learning_rate": 1.192982456140351e-05, + "loss": 0.8488497734069824, + "step": 205 + }, + { + "epoch": 0.06024272554467027, + "grad_norm": 1.5823695803446844, + "learning_rate": 1.1988304093567253e-05, + "loss": 0.7905591726303101, + "step": 206 + }, + { + "epoch": 0.06053516595993566, + "grad_norm": 1.324069880941127, + "learning_rate": 1.2046783625730995e-05, + "loss": 0.747936487197876, + "step": 207 + }, + { + "epoch": 0.06082760637520105, + "grad_norm": 1.3370127883002023, + "learning_rate": 1.2105263157894737e-05, + "loss": 0.8653486967086792, + "step": 208 + }, + { + "epoch": 0.06112004679046644, + "grad_norm": 1.295171295812896, + "learning_rate": 1.216374269005848e-05, + "loss": 0.8662437200546265, + "step": 209 + }, + { + "epoch": 0.06141248720573183, + "grad_norm": 1.6369328366726996, + "learning_rate": 1.2222222222222224e-05, + "loss": 0.9567133188247681, + "step": 210 + }, + { + "epoch": 0.06170492762099722, + "grad_norm": 1.4011109813275144, + "learning_rate": 1.2280701754385966e-05, + "loss": 0.8994660377502441, + "step": 211 + }, + { + "epoch": 0.06199736803626261, + "grad_norm": 1.2989562892904951, + "learning_rate": 1.2339181286549708e-05, + "loss": 0.7889316082000732, + "step": 212 + }, + { + "epoch": 0.062289808451528005, + "grad_norm": 1.2266327731037636, + "learning_rate": 1.239766081871345e-05, + "loss": 0.883985161781311, + "step": 213 + }, + { + "epoch": 0.0625822488667934, + "grad_norm": 1.2190679056716556, + "learning_rate": 1.2456140350877195e-05, + "loss": 0.7780495882034302, + "step": 214 + }, + { + "epoch": 0.06287468928205878, + "grad_norm": 1.3596314866008754, + "learning_rate": 1.2514619883040937e-05, + "loss": 0.6514906883239746, + "step": 215 + }, + { + "epoch": 0.06316712969732417, + "grad_norm": 1.3008367711622892, + "learning_rate": 1.2573099415204679e-05, + "loss": 0.750559389591217, + "step": 216 + }, + { + "epoch": 0.06345957011258956, + "grad_norm": 1.4761536100726258, + "learning_rate": 1.263157894736842e-05, + "loss": 0.8330573439598083, + "step": 217 + }, + { + "epoch": 0.06375201052785495, + "grad_norm": 1.4144186396910836, + "learning_rate": 1.2690058479532166e-05, + "loss": 0.8075361847877502, + "step": 218 + }, + { + "epoch": 0.06404445094312033, + "grad_norm": 1.2867265784947997, + "learning_rate": 1.2748538011695908e-05, + "loss": 0.7636772394180298, + "step": 219 + }, + { + "epoch": 0.06433689135838573, + "grad_norm": 1.1905704140813884, + "learning_rate": 1.280701754385965e-05, + "loss": 0.8241903185844421, + "step": 220 + }, + { + "epoch": 0.06462933177365111, + "grad_norm": 1.261461662230418, + "learning_rate": 1.2865497076023392e-05, + "loss": 0.6582514047622681, + "step": 221 + }, + { + "epoch": 0.06492177218891651, + "grad_norm": 1.461492259499335, + "learning_rate": 1.2923976608187137e-05, + "loss": 0.6363992691040039, + "step": 222 + }, + { + "epoch": 0.06521421260418189, + "grad_norm": 1.5776709499534403, + "learning_rate": 1.2982456140350879e-05, + "loss": 0.8093860149383545, + "step": 223 + }, + { + "epoch": 0.06550665301944729, + "grad_norm": 1.5281675606912017, + "learning_rate": 1.304093567251462e-05, + "loss": 0.7719511985778809, + "step": 224 + }, + { + "epoch": 0.06579909343471267, + "grad_norm": 1.4484434101459598, + "learning_rate": 1.3099415204678362e-05, + "loss": 0.8314809799194336, + "step": 225 + }, + { + "epoch": 0.06609153384997807, + "grad_norm": 1.3751378156667435, + "learning_rate": 1.3157894736842108e-05, + "loss": 0.8752902746200562, + "step": 226 + }, + { + "epoch": 0.06638397426524345, + "grad_norm": 1.4660956062146326, + "learning_rate": 1.321637426900585e-05, + "loss": 0.7564839124679565, + "step": 227 + }, + { + "epoch": 0.06667641468050885, + "grad_norm": 1.6744274403459947, + "learning_rate": 1.3274853801169591e-05, + "loss": 0.7377971410751343, + "step": 228 + }, + { + "epoch": 0.06696885509577423, + "grad_norm": 1.3046915227989528, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.7298087477684021, + "step": 229 + }, + { + "epoch": 0.06726129551103963, + "grad_norm": 1.4026797729918719, + "learning_rate": 1.3391812865497079e-05, + "loss": 0.7291176915168762, + "step": 230 + }, + { + "epoch": 0.06755373592630501, + "grad_norm": 1.3421785664914363, + "learning_rate": 1.345029239766082e-05, + "loss": 0.8226944208145142, + "step": 231 + }, + { + "epoch": 0.0678461763415704, + "grad_norm": 1.4277073905518047, + "learning_rate": 1.3508771929824562e-05, + "loss": 0.7185185551643372, + "step": 232 + }, + { + "epoch": 0.0681386167568358, + "grad_norm": 1.2950151686673683, + "learning_rate": 1.3567251461988304e-05, + "loss": 0.7028212547302246, + "step": 233 + }, + { + "epoch": 0.06843105717210118, + "grad_norm": 1.6157016450339874, + "learning_rate": 1.362573099415205e-05, + "loss": 0.8809897899627686, + "step": 234 + }, + { + "epoch": 0.06872349758736658, + "grad_norm": 1.388536739112073, + "learning_rate": 1.3684210526315791e-05, + "loss": 0.7779085040092468, + "step": 235 + }, + { + "epoch": 0.06901593800263196, + "grad_norm": 1.5070530641919806, + "learning_rate": 1.3742690058479533e-05, + "loss": 0.731019139289856, + "step": 236 + }, + { + "epoch": 0.06930837841789736, + "grad_norm": 1.4005389899518954, + "learning_rate": 1.3801169590643275e-05, + "loss": 0.7495850920677185, + "step": 237 + }, + { + "epoch": 0.06960081883316274, + "grad_norm": 1.2241508662035476, + "learning_rate": 1.385964912280702e-05, + "loss": 0.7018189430236816, + "step": 238 + }, + { + "epoch": 0.06989325924842814, + "grad_norm": 1.2596692368793962, + "learning_rate": 1.3918128654970762e-05, + "loss": 0.7072417736053467, + "step": 239 + }, + { + "epoch": 0.07018569966369352, + "grad_norm": 1.3606864903220994, + "learning_rate": 1.3976608187134504e-05, + "loss": 0.8125720620155334, + "step": 240 + }, + { + "epoch": 0.07047814007895892, + "grad_norm": 1.442924901417446, + "learning_rate": 1.4035087719298246e-05, + "loss": 0.6101655960083008, + "step": 241 + }, + { + "epoch": 0.0707705804942243, + "grad_norm": 1.3725413795436465, + "learning_rate": 1.409356725146199e-05, + "loss": 0.9005568623542786, + "step": 242 + }, + { + "epoch": 0.0710630209094897, + "grad_norm": 1.4215646059439664, + "learning_rate": 1.4152046783625733e-05, + "loss": 0.7678338289260864, + "step": 243 + }, + { + "epoch": 0.07135546132475508, + "grad_norm": 1.4745728838056915, + "learning_rate": 1.4210526315789475e-05, + "loss": 0.7563410997390747, + "step": 244 + }, + { + "epoch": 0.07164790174002048, + "grad_norm": 1.3043448641122064, + "learning_rate": 1.4269005847953217e-05, + "loss": 0.7497583627700806, + "step": 245 + }, + { + "epoch": 0.07194034215528586, + "grad_norm": 1.8237088246729396, + "learning_rate": 1.432748538011696e-05, + "loss": 0.8913442492485046, + "step": 246 + }, + { + "epoch": 0.07223278257055125, + "grad_norm": 1.446976759622428, + "learning_rate": 1.4385964912280704e-05, + "loss": 0.7714704871177673, + "step": 247 + }, + { + "epoch": 0.07252522298581664, + "grad_norm": 1.4721214924941617, + "learning_rate": 1.4444444444444446e-05, + "loss": 0.6752789616584778, + "step": 248 + }, + { + "epoch": 0.07281766340108203, + "grad_norm": 1.4015875441769006, + "learning_rate": 1.4502923976608188e-05, + "loss": 0.6092795133590698, + "step": 249 + }, + { + "epoch": 0.07311010381634742, + "grad_norm": 1.4602535650914903, + "learning_rate": 1.4561403508771931e-05, + "loss": 0.9300343990325928, + "step": 250 + }, + { + "epoch": 0.07340254423161281, + "grad_norm": 1.3884630911660603, + "learning_rate": 1.4619883040935675e-05, + "loss": 0.8005613088607788, + "step": 251 + }, + { + "epoch": 0.0736949846468782, + "grad_norm": 1.2918508056771596, + "learning_rate": 1.4678362573099417e-05, + "loss": 0.7188931703567505, + "step": 252 + }, + { + "epoch": 0.07398742506214359, + "grad_norm": 1.3258314938186555, + "learning_rate": 1.4736842105263159e-05, + "loss": 0.6967242956161499, + "step": 253 + }, + { + "epoch": 0.07427986547740897, + "grad_norm": 1.300875000270566, + "learning_rate": 1.4795321637426902e-05, + "loss": 0.6921653747558594, + "step": 254 + }, + { + "epoch": 0.07457230589267437, + "grad_norm": 1.4258732788152875, + "learning_rate": 1.4853801169590644e-05, + "loss": 0.8498743772506714, + "step": 255 + }, + { + "epoch": 0.07486474630793975, + "grad_norm": 1.4311730434285577, + "learning_rate": 1.4912280701754388e-05, + "loss": 0.6420027017593384, + "step": 256 + }, + { + "epoch": 0.07515718672320515, + "grad_norm": 1.3747073212413874, + "learning_rate": 1.497076023391813e-05, + "loss": 0.7101434469223022, + "step": 257 + }, + { + "epoch": 0.07544962713847053, + "grad_norm": 1.562801712624193, + "learning_rate": 1.5029239766081873e-05, + "loss": 0.740740180015564, + "step": 258 + }, + { + "epoch": 0.07574206755373593, + "grad_norm": 1.726645998674187, + "learning_rate": 1.5087719298245615e-05, + "loss": 0.891905665397644, + "step": 259 + }, + { + "epoch": 0.07603450796900131, + "grad_norm": 1.5486677390214905, + "learning_rate": 1.5146198830409358e-05, + "loss": 0.867740273475647, + "step": 260 + }, + { + "epoch": 0.07632694838426671, + "grad_norm": 1.5072500165891534, + "learning_rate": 1.52046783625731e-05, + "loss": 0.7895220518112183, + "step": 261 + }, + { + "epoch": 0.07661938879953209, + "grad_norm": 1.5579945503860015, + "learning_rate": 1.5263157894736846e-05, + "loss": 0.7987008094787598, + "step": 262 + }, + { + "epoch": 0.07691182921479749, + "grad_norm": 1.4014455476427317, + "learning_rate": 1.5321637426900587e-05, + "loss": 0.7780282497406006, + "step": 263 + }, + { + "epoch": 0.07720426963006287, + "grad_norm": 1.2290290646079385, + "learning_rate": 1.538011695906433e-05, + "loss": 0.6265891194343567, + "step": 264 + }, + { + "epoch": 0.07749671004532827, + "grad_norm": 1.4917276843875658, + "learning_rate": 1.543859649122807e-05, + "loss": 0.6559646129608154, + "step": 265 + }, + { + "epoch": 0.07778915046059365, + "grad_norm": 1.4406503206723986, + "learning_rate": 1.5497076023391816e-05, + "loss": 0.8362047672271729, + "step": 266 + }, + { + "epoch": 0.07808159087585904, + "grad_norm": 1.481487764499426, + "learning_rate": 1.555555555555556e-05, + "loss": 0.707663357257843, + "step": 267 + }, + { + "epoch": 0.07837403129112443, + "grad_norm": 1.398507930714671, + "learning_rate": 1.56140350877193e-05, + "loss": 0.67903071641922, + "step": 268 + }, + { + "epoch": 0.07866647170638982, + "grad_norm": 1.3187056037490035, + "learning_rate": 1.5672514619883042e-05, + "loss": 0.7634894251823425, + "step": 269 + }, + { + "epoch": 0.0789589121216552, + "grad_norm": 1.3791372975152867, + "learning_rate": 1.5730994152046787e-05, + "loss": 0.6395117044448853, + "step": 270 + }, + { + "epoch": 0.0792513525369206, + "grad_norm": 1.4273746235266698, + "learning_rate": 1.578947368421053e-05, + "loss": 0.6948165893554688, + "step": 271 + }, + { + "epoch": 0.079543792952186, + "grad_norm": 1.342718294320327, + "learning_rate": 1.584795321637427e-05, + "loss": 0.9288383722305298, + "step": 272 + }, + { + "epoch": 0.07983623336745138, + "grad_norm": 1.4727633207578312, + "learning_rate": 1.5906432748538013e-05, + "loss": 0.9291346073150635, + "step": 273 + }, + { + "epoch": 0.08012867378271678, + "grad_norm": 1.3613936763496384, + "learning_rate": 1.5964912280701755e-05, + "loss": 0.7399512529373169, + "step": 274 + }, + { + "epoch": 0.08042111419798216, + "grad_norm": 1.5856072060707183, + "learning_rate": 1.60233918128655e-05, + "loss": 0.6890764236450195, + "step": 275 + }, + { + "epoch": 0.08071355461324756, + "grad_norm": 1.1844012071470522, + "learning_rate": 1.6081871345029242e-05, + "loss": 0.6520324349403381, + "step": 276 + }, + { + "epoch": 0.08100599502851294, + "grad_norm": 1.4161353486782806, + "learning_rate": 1.6140350877192984e-05, + "loss": 0.6726658344268799, + "step": 277 + }, + { + "epoch": 0.08129843544377834, + "grad_norm": 1.5076627116667636, + "learning_rate": 1.6198830409356726e-05, + "loss": 0.7453294992446899, + "step": 278 + }, + { + "epoch": 0.08159087585904372, + "grad_norm": 1.6796077609043067, + "learning_rate": 1.625730994152047e-05, + "loss": 0.755578875541687, + "step": 279 + }, + { + "epoch": 0.08188331627430911, + "grad_norm": 1.576837195920435, + "learning_rate": 1.6315789473684213e-05, + "loss": 0.713086724281311, + "step": 280 + }, + { + "epoch": 0.0821757566895745, + "grad_norm": 1.5223162841340931, + "learning_rate": 1.6374269005847955e-05, + "loss": 0.8714310526847839, + "step": 281 + }, + { + "epoch": 0.0824681971048399, + "grad_norm": 1.4999918578300349, + "learning_rate": 1.6432748538011697e-05, + "loss": 0.6827348470687866, + "step": 282 + }, + { + "epoch": 0.08276063752010528, + "grad_norm": 1.5263417760460645, + "learning_rate": 1.649122807017544e-05, + "loss": 0.8613482713699341, + "step": 283 + }, + { + "epoch": 0.08305307793537067, + "grad_norm": 1.3847261162959308, + "learning_rate": 1.6549707602339184e-05, + "loss": 0.7442763447761536, + "step": 284 + }, + { + "epoch": 0.08334551835063606, + "grad_norm": 1.3784508201309091, + "learning_rate": 1.6608187134502926e-05, + "loss": 0.7505494356155396, + "step": 285 + }, + { + "epoch": 0.08363795876590145, + "grad_norm": 1.3042392110114591, + "learning_rate": 1.6666666666666667e-05, + "loss": 0.7720779776573181, + "step": 286 + }, + { + "epoch": 0.08393039918116683, + "grad_norm": 1.5516828033558783, + "learning_rate": 1.672514619883041e-05, + "loss": 0.7746216654777527, + "step": 287 + }, + { + "epoch": 0.08422283959643223, + "grad_norm": 1.4429865955911445, + "learning_rate": 1.6783625730994155e-05, + "loss": 0.8471436500549316, + "step": 288 + }, + { + "epoch": 0.08451528001169761, + "grad_norm": 1.4116704654777366, + "learning_rate": 1.6842105263157896e-05, + "loss": 0.7117248773574829, + "step": 289 + }, + { + "epoch": 0.08480772042696301, + "grad_norm": 1.4428575448924124, + "learning_rate": 1.690058479532164e-05, + "loss": 0.758680522441864, + "step": 290 + }, + { + "epoch": 0.08510016084222839, + "grad_norm": 1.4632326474117294, + "learning_rate": 1.695906432748538e-05, + "loss": 0.9083560705184937, + "step": 291 + }, + { + "epoch": 0.08539260125749379, + "grad_norm": 1.3444847997489586, + "learning_rate": 1.7017543859649125e-05, + "loss": 0.7457551956176758, + "step": 292 + }, + { + "epoch": 0.08568504167275917, + "grad_norm": 1.423532632485526, + "learning_rate": 1.7076023391812867e-05, + "loss": 0.7463638782501221, + "step": 293 + }, + { + "epoch": 0.08597748208802457, + "grad_norm": 1.4584931442713187, + "learning_rate": 1.713450292397661e-05, + "loss": 0.6983559131622314, + "step": 294 + }, + { + "epoch": 0.08626992250328995, + "grad_norm": 1.3612667828489424, + "learning_rate": 1.719298245614035e-05, + "loss": 0.8043842911720276, + "step": 295 + }, + { + "epoch": 0.08656236291855535, + "grad_norm": 1.5042924331122234, + "learning_rate": 1.7251461988304093e-05, + "loss": 0.7150747776031494, + "step": 296 + }, + { + "epoch": 0.08685480333382073, + "grad_norm": 2.0308017082996326, + "learning_rate": 1.7309941520467838e-05, + "loss": 0.7805558443069458, + "step": 297 + }, + { + "epoch": 0.08714724374908613, + "grad_norm": 1.4326584270734728, + "learning_rate": 1.736842105263158e-05, + "loss": 0.7158486843109131, + "step": 298 + }, + { + "epoch": 0.08743968416435151, + "grad_norm": 1.2329719748746066, + "learning_rate": 1.7426900584795322e-05, + "loss": 0.6496458053588867, + "step": 299 + }, + { + "epoch": 0.0877321245796169, + "grad_norm": 1.3255444740397837, + "learning_rate": 1.7485380116959064e-05, + "loss": 0.7488506436347961, + "step": 300 + }, + { + "epoch": 0.08802456499488229, + "grad_norm": 1.5658056782887144, + "learning_rate": 1.754385964912281e-05, + "loss": 0.8370999097824097, + "step": 301 + }, + { + "epoch": 0.08831700541014768, + "grad_norm": 1.3342670844496862, + "learning_rate": 1.760233918128655e-05, + "loss": 0.6624353528022766, + "step": 302 + }, + { + "epoch": 0.08860944582541307, + "grad_norm": 1.4627534576360353, + "learning_rate": 1.7660818713450293e-05, + "loss": 0.6861047148704529, + "step": 303 + }, + { + "epoch": 0.08890188624067846, + "grad_norm": 1.6532053166188327, + "learning_rate": 1.7719298245614035e-05, + "loss": 0.746711015701294, + "step": 304 + }, + { + "epoch": 0.08919432665594385, + "grad_norm": 1.554160121250669, + "learning_rate": 1.7777777777777777e-05, + "loss": 0.7794955968856812, + "step": 305 + }, + { + "epoch": 0.08948676707120924, + "grad_norm": 1.7649976265227958, + "learning_rate": 1.7836257309941522e-05, + "loss": 0.7202489972114563, + "step": 306 + }, + { + "epoch": 0.08977920748647462, + "grad_norm": 1.6262384567896693, + "learning_rate": 1.7894736842105264e-05, + "loss": 0.7252119183540344, + "step": 307 + }, + { + "epoch": 0.09007164790174002, + "grad_norm": 1.5452508352574224, + "learning_rate": 1.7953216374269006e-05, + "loss": 0.9168737530708313, + "step": 308 + }, + { + "epoch": 0.0903640883170054, + "grad_norm": 1.487069935429652, + "learning_rate": 1.8011695906432747e-05, + "loss": 0.7647944688796997, + "step": 309 + }, + { + "epoch": 0.0906565287322708, + "grad_norm": 1.7447386842901849, + "learning_rate": 1.8070175438596493e-05, + "loss": 0.7836136817932129, + "step": 310 + }, + { + "epoch": 0.0909489691475362, + "grad_norm": 1.2604562921756688, + "learning_rate": 1.8128654970760235e-05, + "loss": 0.6495587825775146, + "step": 311 + }, + { + "epoch": 0.09124140956280158, + "grad_norm": 1.5613577023920442, + "learning_rate": 1.8187134502923976e-05, + "loss": 0.7266290187835693, + "step": 312 + }, + { + "epoch": 0.09153384997806698, + "grad_norm": 1.9984801625992445, + "learning_rate": 1.824561403508772e-05, + "loss": 0.8417587876319885, + "step": 313 + }, + { + "epoch": 0.09182629039333236, + "grad_norm": 1.5767499272635297, + "learning_rate": 1.8304093567251464e-05, + "loss": 0.8431564569473267, + "step": 314 + }, + { + "epoch": 0.09211873080859775, + "grad_norm": 1.4390326104450535, + "learning_rate": 1.8362573099415205e-05, + "loss": 0.7724050283432007, + "step": 315 + }, + { + "epoch": 0.09241117122386314, + "grad_norm": 1.4145032164176374, + "learning_rate": 1.8421052631578947e-05, + "loss": 0.6687352657318115, + "step": 316 + }, + { + "epoch": 0.09270361163912853, + "grad_norm": 1.3696816256616517, + "learning_rate": 1.847953216374269e-05, + "loss": 0.7465454339981079, + "step": 317 + }, + { + "epoch": 0.09299605205439392, + "grad_norm": 1.507661205433782, + "learning_rate": 1.8538011695906434e-05, + "loss": 0.6944088935852051, + "step": 318 + }, + { + "epoch": 0.09328849246965931, + "grad_norm": 1.2922205760098913, + "learning_rate": 1.8596491228070176e-05, + "loss": 0.6692598462104797, + "step": 319 + }, + { + "epoch": 0.0935809328849247, + "grad_norm": 1.4345621362788812, + "learning_rate": 1.8654970760233918e-05, + "loss": 0.7287981510162354, + "step": 320 + }, + { + "epoch": 0.09387337330019009, + "grad_norm": 1.426362426046858, + "learning_rate": 1.871345029239766e-05, + "loss": 0.704437255859375, + "step": 321 + }, + { + "epoch": 0.09416581371545547, + "grad_norm": 1.2757141813139592, + "learning_rate": 1.8771929824561405e-05, + "loss": 0.6425009965896606, + "step": 322 + }, + { + "epoch": 0.09445825413072087, + "grad_norm": 1.4929466314279891, + "learning_rate": 1.8830409356725147e-05, + "loss": 0.765799880027771, + "step": 323 + }, + { + "epoch": 0.09475069454598625, + "grad_norm": 1.482293870539422, + "learning_rate": 1.888888888888889e-05, + "loss": 0.9151520133018494, + "step": 324 + }, + { + "epoch": 0.09504313496125165, + "grad_norm": 1.5087468194478204, + "learning_rate": 1.894736842105263e-05, + "loss": 0.8753486275672913, + "step": 325 + }, + { + "epoch": 0.09533557537651703, + "grad_norm": 1.649363404228967, + "learning_rate": 1.9005847953216376e-05, + "loss": 0.7652826309204102, + "step": 326 + }, + { + "epoch": 0.09562801579178243, + "grad_norm": 1.405975419146797, + "learning_rate": 1.9064327485380118e-05, + "loss": 0.7309015393257141, + "step": 327 + }, + { + "epoch": 0.09592045620704781, + "grad_norm": 1.6766609888433524, + "learning_rate": 1.912280701754386e-05, + "loss": 0.7656553983688354, + "step": 328 + }, + { + "epoch": 0.09621289662231321, + "grad_norm": 1.4942542074310006, + "learning_rate": 1.9181286549707602e-05, + "loss": 0.7400631904602051, + "step": 329 + }, + { + "epoch": 0.09650533703757859, + "grad_norm": 1.4740815055784118, + "learning_rate": 1.9239766081871347e-05, + "loss": 0.6812465190887451, + "step": 330 + }, + { + "epoch": 0.09679777745284399, + "grad_norm": 1.4394939888427052, + "learning_rate": 1.929824561403509e-05, + "loss": 0.6820628046989441, + "step": 331 + }, + { + "epoch": 0.09709021786810937, + "grad_norm": 1.9824484648298863, + "learning_rate": 1.935672514619883e-05, + "loss": 0.7437758445739746, + "step": 332 + }, + { + "epoch": 0.09738265828337477, + "grad_norm": 1.4755288186056683, + "learning_rate": 1.9415204678362573e-05, + "loss": 0.8011504411697388, + "step": 333 + }, + { + "epoch": 0.09767509869864015, + "grad_norm": 1.3829561395962537, + "learning_rate": 1.9473684210526318e-05, + "loss": 0.7437810301780701, + "step": 334 + }, + { + "epoch": 0.09796753911390554, + "grad_norm": 1.328838303483977, + "learning_rate": 1.953216374269006e-05, + "loss": 0.7419568300247192, + "step": 335 + }, + { + "epoch": 0.09825997952917093, + "grad_norm": 1.4291436246188844, + "learning_rate": 1.9590643274853802e-05, + "loss": 0.7805042266845703, + "step": 336 + }, + { + "epoch": 0.09855241994443632, + "grad_norm": 1.3104711543583085, + "learning_rate": 1.9649122807017544e-05, + "loss": 0.6952530145645142, + "step": 337 + }, + { + "epoch": 0.0988448603597017, + "grad_norm": 1.313224719465845, + "learning_rate": 1.970760233918129e-05, + "loss": 0.7669289112091064, + "step": 338 + }, + { + "epoch": 0.0991373007749671, + "grad_norm": 1.4101609769639065, + "learning_rate": 1.976608187134503e-05, + "loss": 0.8033919930458069, + "step": 339 + }, + { + "epoch": 0.09942974119023248, + "grad_norm": 1.2883543538345825, + "learning_rate": 1.9824561403508773e-05, + "loss": 0.6523177623748779, + "step": 340 + }, + { + "epoch": 0.09972218160549788, + "grad_norm": 1.3960808628411998, + "learning_rate": 1.9883040935672515e-05, + "loss": 0.7221896648406982, + "step": 341 + }, + { + "epoch": 0.10001462202076326, + "grad_norm": 1.2255647850534943, + "learning_rate": 1.994152046783626e-05, + "loss": 0.6054700016975403, + "step": 342 + }, + { + "epoch": 0.10030706243602866, + "grad_norm": 1.6303566611100393, + "learning_rate": 2e-05, + "loss": 0.8368290662765503, + "step": 343 + }, + { + "epoch": 0.10059950285129404, + "grad_norm": 1.4276425594743465, + "learning_rate": 1.99999988312804e-05, + "loss": 0.9075677990913391, + "step": 344 + }, + { + "epoch": 0.10089194326655944, + "grad_norm": 1.4517524210925274, + "learning_rate": 1.999999532512188e-05, + "loss": 0.7202495336532593, + "step": 345 + }, + { + "epoch": 0.10118438368182482, + "grad_norm": 1.5340311782896001, + "learning_rate": 1.9999989481525245e-05, + "loss": 0.7373536229133606, + "step": 346 + }, + { + "epoch": 0.10147682409709022, + "grad_norm": 1.3128585037330316, + "learning_rate": 1.9999981300491873e-05, + "loss": 0.7292035222053528, + "step": 347 + }, + { + "epoch": 0.10176926451235561, + "grad_norm": 1.2681362139682877, + "learning_rate": 1.9999970782023673e-05, + "loss": 0.8970675468444824, + "step": 348 + }, + { + "epoch": 0.102061704927621, + "grad_norm": 1.384714606589521, + "learning_rate": 1.9999957926123104e-05, + "loss": 0.7909846305847168, + "step": 349 + }, + { + "epoch": 0.1023541453428864, + "grad_norm": 1.3537270396362884, + "learning_rate": 1.999994273279317e-05, + "loss": 0.7784097790718079, + "step": 350 + }, + { + "epoch": 0.10264658575815178, + "grad_norm": 1.4008631296209513, + "learning_rate": 1.9999925202037422e-05, + "loss": 0.7129874229431152, + "step": 351 + }, + { + "epoch": 0.10293902617341717, + "grad_norm": 1.3322666039831734, + "learning_rate": 1.999990533385996e-05, + "loss": 0.7185519337654114, + "step": 352 + }, + { + "epoch": 0.10323146658868255, + "grad_norm": 1.379111892126872, + "learning_rate": 1.9999883128265428e-05, + "loss": 0.812228798866272, + "step": 353 + }, + { + "epoch": 0.10352390700394795, + "grad_norm": 1.2831139743741589, + "learning_rate": 1.999985858525901e-05, + "loss": 0.7187886238098145, + "step": 354 + }, + { + "epoch": 0.10381634741921333, + "grad_norm": 1.133776070922858, + "learning_rate": 1.9999831704846452e-05, + "loss": 0.6618789434432983, + "step": 355 + }, + { + "epoch": 0.10410878783447873, + "grad_norm": 1.5601168208020613, + "learning_rate": 1.999980248703403e-05, + "loss": 0.9226458072662354, + "step": 356 + }, + { + "epoch": 0.10440122824974411, + "grad_norm": 1.3702611517072447, + "learning_rate": 1.9999770931828578e-05, + "loss": 0.7326352596282959, + "step": 357 + }, + { + "epoch": 0.10469366866500951, + "grad_norm": 1.4755549813416367, + "learning_rate": 1.9999737039237472e-05, + "loss": 0.719240128993988, + "step": 358 + }, + { + "epoch": 0.10498610908027489, + "grad_norm": 1.2914576093532248, + "learning_rate": 1.999970080926863e-05, + "loss": 0.7380290031433105, + "step": 359 + }, + { + "epoch": 0.10527854949554029, + "grad_norm": 1.6255135036531254, + "learning_rate": 1.9999662241930523e-05, + "loss": 0.736219048500061, + "step": 360 + }, + { + "epoch": 0.10557098991080567, + "grad_norm": 1.381933387611508, + "learning_rate": 1.999962133723217e-05, + "loss": 0.8160735368728638, + "step": 361 + }, + { + "epoch": 0.10586343032607107, + "grad_norm": 1.4607575491849774, + "learning_rate": 1.9999578095183126e-05, + "loss": 0.6679781675338745, + "step": 362 + }, + { + "epoch": 0.10615587074133645, + "grad_norm": 1.551414308388604, + "learning_rate": 1.9999532515793498e-05, + "loss": 0.7670542001724243, + "step": 363 + }, + { + "epoch": 0.10644831115660185, + "grad_norm": 1.2802491712211252, + "learning_rate": 1.9999484599073945e-05, + "loss": 0.6395057439804077, + "step": 364 + }, + { + "epoch": 0.10674075157186723, + "grad_norm": 1.571289013739176, + "learning_rate": 1.9999434345035666e-05, + "loss": 0.7226368188858032, + "step": 365 + }, + { + "epoch": 0.10703319198713263, + "grad_norm": 1.4755023089198305, + "learning_rate": 1.9999381753690403e-05, + "loss": 0.6236128211021423, + "step": 366 + }, + { + "epoch": 0.10732563240239801, + "grad_norm": 1.2507526885979663, + "learning_rate": 1.9999326825050455e-05, + "loss": 0.5937299132347107, + "step": 367 + }, + { + "epoch": 0.1076180728176634, + "grad_norm": 1.294239826855842, + "learning_rate": 1.999926955912866e-05, + "loss": 0.6014857292175293, + "step": 368 + }, + { + "epoch": 0.10791051323292879, + "grad_norm": 1.1031323946933334, + "learning_rate": 1.9999209955938394e-05, + "loss": 0.5898704528808594, + "step": 369 + }, + { + "epoch": 0.10820295364819418, + "grad_norm": 1.475520460275832, + "learning_rate": 1.9999148015493602e-05, + "loss": 0.6879048943519592, + "step": 370 + }, + { + "epoch": 0.10849539406345957, + "grad_norm": 1.5235484717330832, + "learning_rate": 1.999908373780876e-05, + "loss": 0.781298041343689, + "step": 371 + }, + { + "epoch": 0.10878783447872496, + "grad_norm": 1.2913472995661532, + "learning_rate": 1.9999017122898886e-05, + "loss": 0.6997531652450562, + "step": 372 + }, + { + "epoch": 0.10908027489399034, + "grad_norm": 1.2104967688689228, + "learning_rate": 1.9998948170779556e-05, + "loss": 0.6979694366455078, + "step": 373 + }, + { + "epoch": 0.10937271530925574, + "grad_norm": 1.6154905149339498, + "learning_rate": 1.999887688146689e-05, + "loss": 0.8069214820861816, + "step": 374 + }, + { + "epoch": 0.10966515572452112, + "grad_norm": 1.4534879205249425, + "learning_rate": 1.9998803254977538e-05, + "loss": 0.875137448310852, + "step": 375 + }, + { + "epoch": 0.10995759613978652, + "grad_norm": 1.4252221781216903, + "learning_rate": 1.9998727291328725e-05, + "loss": 0.8267173767089844, + "step": 376 + }, + { + "epoch": 0.1102500365550519, + "grad_norm": 1.3704709368430794, + "learning_rate": 1.99986489905382e-05, + "loss": 0.7589337825775146, + "step": 377 + }, + { + "epoch": 0.1105424769703173, + "grad_norm": 1.7248131297126135, + "learning_rate": 1.999856835262427e-05, + "loss": 0.7479992508888245, + "step": 378 + }, + { + "epoch": 0.11083491738558268, + "grad_norm": 1.2827951417341936, + "learning_rate": 1.999848537760577e-05, + "loss": 0.7315084934234619, + "step": 379 + }, + { + "epoch": 0.11112735780084808, + "grad_norm": 1.2954297558049002, + "learning_rate": 1.9998400065502113e-05, + "loss": 0.6256793737411499, + "step": 380 + }, + { + "epoch": 0.11141979821611346, + "grad_norm": 1.3569633064170001, + "learning_rate": 1.999831241633323e-05, + "loss": 0.7521710395812988, + "step": 381 + }, + { + "epoch": 0.11171223863137886, + "grad_norm": 1.0851029845548303, + "learning_rate": 1.999822243011961e-05, + "loss": 0.6824651956558228, + "step": 382 + }, + { + "epoch": 0.11200467904664424, + "grad_norm": 1.4206429861314096, + "learning_rate": 1.9998130106882286e-05, + "loss": 0.7254977226257324, + "step": 383 + }, + { + "epoch": 0.11229711946190964, + "grad_norm": 1.4795080730717471, + "learning_rate": 1.999803544664284e-05, + "loss": 0.8263741731643677, + "step": 384 + }, + { + "epoch": 0.11258955987717502, + "grad_norm": 1.3096519492267191, + "learning_rate": 1.9997938449423397e-05, + "loss": 0.6829507350921631, + "step": 385 + }, + { + "epoch": 0.11288200029244042, + "grad_norm": 1.2970935037264724, + "learning_rate": 1.9997839115246632e-05, + "loss": 0.7452428340911865, + "step": 386 + }, + { + "epoch": 0.11317444070770581, + "grad_norm": 1.322513824449788, + "learning_rate": 1.999773744413576e-05, + "loss": 0.7900702953338623, + "step": 387 + }, + { + "epoch": 0.1134668811229712, + "grad_norm": 1.288312120065537, + "learning_rate": 1.9997633436114547e-05, + "loss": 0.6215303540229797, + "step": 388 + }, + { + "epoch": 0.11375932153823659, + "grad_norm": 1.3132613017546322, + "learning_rate": 1.999752709120731e-05, + "loss": 0.798041820526123, + "step": 389 + }, + { + "epoch": 0.11405176195350197, + "grad_norm": 1.1590478323977431, + "learning_rate": 1.9997418409438893e-05, + "loss": 0.6033064126968384, + "step": 390 + }, + { + "epoch": 0.11434420236876737, + "grad_norm": 1.0686988063553795, + "learning_rate": 1.9997307390834712e-05, + "loss": 0.6358453631401062, + "step": 391 + }, + { + "epoch": 0.11463664278403275, + "grad_norm": 1.2775095189945147, + "learning_rate": 1.999719403542071e-05, + "loss": 0.6544308662414551, + "step": 392 + }, + { + "epoch": 0.11492908319929815, + "grad_norm": 1.3305771925144483, + "learning_rate": 1.9997078343223393e-05, + "loss": 0.73077392578125, + "step": 393 + }, + { + "epoch": 0.11522152361456353, + "grad_norm": 1.1914838503287841, + "learning_rate": 1.9996960314269792e-05, + "loss": 0.5874192118644714, + "step": 394 + }, + { + "epoch": 0.11551396402982893, + "grad_norm": 1.420658082184349, + "learning_rate": 1.9996839948587503e-05, + "loss": 0.8242438435554504, + "step": 395 + }, + { + "epoch": 0.11580640444509431, + "grad_norm": 1.705790457884444, + "learning_rate": 1.9996717246204655e-05, + "loss": 0.9496668577194214, + "step": 396 + }, + { + "epoch": 0.1160988448603597, + "grad_norm": 1.2258839048083405, + "learning_rate": 1.9996592207149933e-05, + "loss": 0.6940287351608276, + "step": 397 + }, + { + "epoch": 0.11639128527562509, + "grad_norm": 1.4226760671412086, + "learning_rate": 1.999646483145256e-05, + "loss": 0.7403827905654907, + "step": 398 + }, + { + "epoch": 0.11668372569089049, + "grad_norm": 1.441557495225195, + "learning_rate": 1.9996335119142315e-05, + "loss": 0.7493172287940979, + "step": 399 + }, + { + "epoch": 0.11697616610615587, + "grad_norm": 1.1233068749163333, + "learning_rate": 1.9996203070249516e-05, + "loss": 0.6048015356063843, + "step": 400 + }, + { + "epoch": 0.11726860652142126, + "grad_norm": 1.218449987518831, + "learning_rate": 1.9996068684805025e-05, + "loss": 0.7220426797866821, + "step": 401 + }, + { + "epoch": 0.11756104693668665, + "grad_norm": 1.4820269559236292, + "learning_rate": 1.9995931962840255e-05, + "loss": 0.7294620275497437, + "step": 402 + }, + { + "epoch": 0.11785348735195204, + "grad_norm": 1.2693334480850886, + "learning_rate": 1.999579290438717e-05, + "loss": 0.7075647115707397, + "step": 403 + }, + { + "epoch": 0.11814592776721743, + "grad_norm": 1.4353448940274405, + "learning_rate": 1.9995651509478264e-05, + "loss": 0.7396657466888428, + "step": 404 + }, + { + "epoch": 0.11843836818248282, + "grad_norm": 1.5214596029668779, + "learning_rate": 1.999550777814659e-05, + "loss": 0.8240506649017334, + "step": 405 + }, + { + "epoch": 0.1187308085977482, + "grad_norm": 1.3463253886040645, + "learning_rate": 1.9995361710425752e-05, + "loss": 0.7518147826194763, + "step": 406 + }, + { + "epoch": 0.1190232490130136, + "grad_norm": 1.3938258800517485, + "learning_rate": 1.9995213306349886e-05, + "loss": 0.6998933553695679, + "step": 407 + }, + { + "epoch": 0.11931568942827898, + "grad_norm": 2.8811625928277134, + "learning_rate": 1.999506256595368e-05, + "loss": 0.659205973148346, + "step": 408 + }, + { + "epoch": 0.11960812984354438, + "grad_norm": 1.6815673603725616, + "learning_rate": 1.9994909489272372e-05, + "loss": 0.7826964259147644, + "step": 409 + }, + { + "epoch": 0.11990057025880976, + "grad_norm": 1.4225942370637599, + "learning_rate": 1.999475407634174e-05, + "loss": 0.770768404006958, + "step": 410 + }, + { + "epoch": 0.12019301067407516, + "grad_norm": 1.4031411556955713, + "learning_rate": 1.9994596327198113e-05, + "loss": 0.7390692234039307, + "step": 411 + }, + { + "epoch": 0.12048545108934054, + "grad_norm": 1.238945633280151, + "learning_rate": 1.999443624187836e-05, + "loss": 0.7092628479003906, + "step": 412 + }, + { + "epoch": 0.12077789150460594, + "grad_norm": 1.2795019723948553, + "learning_rate": 1.9994273820419903e-05, + "loss": 0.5252765417098999, + "step": 413 + }, + { + "epoch": 0.12107033191987132, + "grad_norm": 1.389583747663469, + "learning_rate": 1.9994109062860707e-05, + "loss": 0.8131704330444336, + "step": 414 + }, + { + "epoch": 0.12136277233513672, + "grad_norm": 1.490804798338551, + "learning_rate": 1.9993941969239284e-05, + "loss": 0.8257562518119812, + "step": 415 + }, + { + "epoch": 0.1216552127504021, + "grad_norm": 1.5541597255876767, + "learning_rate": 1.999377253959469e-05, + "loss": 0.7163048982620239, + "step": 416 + }, + { + "epoch": 0.1219476531656675, + "grad_norm": 1.590877283394053, + "learning_rate": 1.9993600773966528e-05, + "loss": 0.7216504812240601, + "step": 417 + }, + { + "epoch": 0.12224009358093288, + "grad_norm": 1.6748981575800963, + "learning_rate": 1.9993426672394945e-05, + "loss": 0.7831340432167053, + "step": 418 + }, + { + "epoch": 0.12253253399619828, + "grad_norm": 1.3976993960000088, + "learning_rate": 1.9993250234920638e-05, + "loss": 0.7675709128379822, + "step": 419 + }, + { + "epoch": 0.12282497441146366, + "grad_norm": 1.454911379398845, + "learning_rate": 1.999307146158485e-05, + "loss": 0.8085238337516785, + "step": 420 + }, + { + "epoch": 0.12311741482672905, + "grad_norm": 1.2979608734451222, + "learning_rate": 1.9992890352429368e-05, + "loss": 0.735150933265686, + "step": 421 + }, + { + "epoch": 0.12340985524199444, + "grad_norm": 1.2046206432187132, + "learning_rate": 1.9992706907496523e-05, + "loss": 0.612186074256897, + "step": 422 + }, + { + "epoch": 0.12370229565725983, + "grad_norm": 1.364838486847665, + "learning_rate": 1.9992521126829194e-05, + "loss": 0.6636590957641602, + "step": 423 + }, + { + "epoch": 0.12399473607252522, + "grad_norm": 1.4068215451581474, + "learning_rate": 1.9992333010470806e-05, + "loss": 0.6814526319503784, + "step": 424 + }, + { + "epoch": 0.12428717648779061, + "grad_norm": 1.3620595505436823, + "learning_rate": 1.9992142558465335e-05, + "loss": 0.6940894722938538, + "step": 425 + }, + { + "epoch": 0.12457961690305601, + "grad_norm": 1.3427645949787534, + "learning_rate": 1.9991949770857294e-05, + "loss": 0.7485121488571167, + "step": 426 + }, + { + "epoch": 0.12487205731832139, + "grad_norm": 1.266832638558228, + "learning_rate": 1.9991754647691744e-05, + "loss": 0.5315885543823242, + "step": 427 + }, + { + "epoch": 0.1251644977335868, + "grad_norm": 1.2511757429133081, + "learning_rate": 1.9991557189014297e-05, + "loss": 0.7416529655456543, + "step": 428 + }, + { + "epoch": 0.12545693814885217, + "grad_norm": 1.4031357379707678, + "learning_rate": 1.9991357394871106e-05, + "loss": 0.7937026023864746, + "step": 429 + }, + { + "epoch": 0.12574937856411755, + "grad_norm": 1.3448962462478107, + "learning_rate": 1.9991155265308872e-05, + "loss": 0.7009662389755249, + "step": 430 + }, + { + "epoch": 0.12604181897938296, + "grad_norm": 1.3042132277590721, + "learning_rate": 1.999095080037484e-05, + "loss": 0.6577681303024292, + "step": 431 + }, + { + "epoch": 0.12633425939464835, + "grad_norm": 1.4036627734956777, + "learning_rate": 1.9990744000116808e-05, + "loss": 0.7372399568557739, + "step": 432 + }, + { + "epoch": 0.12662669980991373, + "grad_norm": 1.3819832545517663, + "learning_rate": 1.999053486458311e-05, + "loss": 0.5959814190864563, + "step": 433 + }, + { + "epoch": 0.1269191402251791, + "grad_norm": 1.424207998116027, + "learning_rate": 1.999032339382263e-05, + "loss": 0.6684107780456543, + "step": 434 + }, + { + "epoch": 0.12721158064044452, + "grad_norm": 1.7048493578408517, + "learning_rate": 1.99901095878848e-05, + "loss": 0.8837687373161316, + "step": 435 + }, + { + "epoch": 0.1275040210557099, + "grad_norm": 3.7468635382669717, + "learning_rate": 1.9989893446819594e-05, + "loss": 0.7128579616546631, + "step": 436 + }, + { + "epoch": 0.1277964614709753, + "grad_norm": 1.2617709714670788, + "learning_rate": 1.9989674970677533e-05, + "loss": 0.6634687185287476, + "step": 437 + }, + { + "epoch": 0.12808890188624067, + "grad_norm": 1.626814629507008, + "learning_rate": 1.998945415950969e-05, + "loss": 0.7866299152374268, + "step": 438 + }, + { + "epoch": 0.12838134230150608, + "grad_norm": 1.6912246432889755, + "learning_rate": 1.998923101336767e-05, + "loss": 0.8104820251464844, + "step": 439 + }, + { + "epoch": 0.12867378271677146, + "grad_norm": 1.3163679319076276, + "learning_rate": 1.9989005532303637e-05, + "loss": 0.6643097400665283, + "step": 440 + }, + { + "epoch": 0.12896622313203684, + "grad_norm": 1.304280975921877, + "learning_rate": 1.9988777716370293e-05, + "loss": 0.7663843631744385, + "step": 441 + }, + { + "epoch": 0.12925866354730223, + "grad_norm": 1.4275530439491644, + "learning_rate": 1.9988547565620896e-05, + "loss": 0.8831629753112793, + "step": 442 + }, + { + "epoch": 0.12955110396256764, + "grad_norm": 1.2581390355141424, + "learning_rate": 1.9988315080109233e-05, + "loss": 0.6889798045158386, + "step": 443 + }, + { + "epoch": 0.12984354437783302, + "grad_norm": 1.2589816711321935, + "learning_rate": 1.9988080259889652e-05, + "loss": 0.8173589706420898, + "step": 444 + }, + { + "epoch": 0.1301359847930984, + "grad_norm": 1.437216407920067, + "learning_rate": 1.998784310501704e-05, + "loss": 0.7444369196891785, + "step": 445 + }, + { + "epoch": 0.13042842520836379, + "grad_norm": 1.2527388287385341, + "learning_rate": 1.998760361554682e-05, + "loss": 0.6728573441505432, + "step": 446 + }, + { + "epoch": 0.1307208656236292, + "grad_norm": 1.4620149588082576, + "learning_rate": 1.998736179153499e-05, + "loss": 0.6398168802261353, + "step": 447 + }, + { + "epoch": 0.13101330603889458, + "grad_norm": 1.3925962417611275, + "learning_rate": 1.9987117633038063e-05, + "loss": 0.7367146015167236, + "step": 448 + }, + { + "epoch": 0.13130574645415996, + "grad_norm": 1.3497781950543108, + "learning_rate": 1.998687114011311e-05, + "loss": 0.7072159051895142, + "step": 449 + }, + { + "epoch": 0.13159818686942534, + "grad_norm": 1.402234544131691, + "learning_rate": 1.998662231281775e-05, + "loss": 0.7899993062019348, + "step": 450 + }, + { + "epoch": 0.13189062728469075, + "grad_norm": 1.4376114251018388, + "learning_rate": 1.9986371151210146e-05, + "loss": 0.7668592929840088, + "step": 451 + }, + { + "epoch": 0.13218306769995614, + "grad_norm": 1.3943197925338484, + "learning_rate": 1.9986117655349003e-05, + "loss": 0.7222825288772583, + "step": 452 + }, + { + "epoch": 0.13247550811522152, + "grad_norm": 1.2939952744587226, + "learning_rate": 1.9985861825293577e-05, + "loss": 0.7301540374755859, + "step": 453 + }, + { + "epoch": 0.1327679485304869, + "grad_norm": 1.174339392511722, + "learning_rate": 1.998560366110366e-05, + "loss": 0.6517907381057739, + "step": 454 + }, + { + "epoch": 0.1330603889457523, + "grad_norm": 1.5763167634786863, + "learning_rate": 1.99853431628396e-05, + "loss": 0.6889342069625854, + "step": 455 + }, + { + "epoch": 0.1333528293610177, + "grad_norm": 1.525770213874127, + "learning_rate": 1.9985080330562293e-05, + "loss": 0.6804303526878357, + "step": 456 + }, + { + "epoch": 0.13364526977628308, + "grad_norm": 1.3944930335298842, + "learning_rate": 1.9984815164333163e-05, + "loss": 0.7699184417724609, + "step": 457 + }, + { + "epoch": 0.13393771019154846, + "grad_norm": 1.4886205672815649, + "learning_rate": 1.99845476642142e-05, + "loss": 0.7470533847808838, + "step": 458 + }, + { + "epoch": 0.13423015060681387, + "grad_norm": 1.251305257809984, + "learning_rate": 1.9984277830267927e-05, + "loss": 0.6689419746398926, + "step": 459 + }, + { + "epoch": 0.13452259102207925, + "grad_norm": 1.5088252817247363, + "learning_rate": 1.998400566255742e-05, + "loss": 0.6395387649536133, + "step": 460 + }, + { + "epoch": 0.13481503143734463, + "grad_norm": 1.3414013526988133, + "learning_rate": 1.9983731161146288e-05, + "loss": 0.7785208225250244, + "step": 461 + }, + { + "epoch": 0.13510747185261002, + "grad_norm": 1.2995640327613904, + "learning_rate": 1.9983454326098703e-05, + "loss": 0.6864018440246582, + "step": 462 + }, + { + "epoch": 0.13539991226787543, + "grad_norm": 1.424075352019454, + "learning_rate": 1.9983175157479366e-05, + "loss": 0.7201317548751831, + "step": 463 + }, + { + "epoch": 0.1356923526831408, + "grad_norm": 1.4977322356937255, + "learning_rate": 1.9982893655353534e-05, + "loss": 0.7128555774688721, + "step": 464 + }, + { + "epoch": 0.1359847930984062, + "grad_norm": 1.2421635772982216, + "learning_rate": 1.998260981978701e-05, + "loss": 0.7252457141876221, + "step": 465 + }, + { + "epoch": 0.1362772335136716, + "grad_norm": 1.472555101507684, + "learning_rate": 1.9982323650846137e-05, + "loss": 0.7453348636627197, + "step": 466 + }, + { + "epoch": 0.13656967392893699, + "grad_norm": 1.153602031844393, + "learning_rate": 1.9982035148597804e-05, + "loss": 0.6643078923225403, + "step": 467 + }, + { + "epoch": 0.13686211434420237, + "grad_norm": 1.280273878296217, + "learning_rate": 1.9981744313109445e-05, + "loss": 0.7249360084533691, + "step": 468 + }, + { + "epoch": 0.13715455475946775, + "grad_norm": 1.2363385614561972, + "learning_rate": 1.9981451144449042e-05, + "loss": 0.8179303407669067, + "step": 469 + }, + { + "epoch": 0.13744699517473316, + "grad_norm": 1.1335812448130365, + "learning_rate": 1.9981155642685125e-05, + "loss": 0.6763637661933899, + "step": 470 + }, + { + "epoch": 0.13773943558999854, + "grad_norm": 1.4603088026603306, + "learning_rate": 1.998085780788676e-05, + "loss": 0.6684300303459167, + "step": 471 + }, + { + "epoch": 0.13803187600526393, + "grad_norm": 1.2670786265894947, + "learning_rate": 1.9980557640123566e-05, + "loss": 0.7251675128936768, + "step": 472 + }, + { + "epoch": 0.1383243164205293, + "grad_norm": 1.5269819113708596, + "learning_rate": 1.998025513946571e-05, + "loss": 0.7146456241607666, + "step": 473 + }, + { + "epoch": 0.13861675683579472, + "grad_norm": 1.2263952606430522, + "learning_rate": 1.9979950305983895e-05, + "loss": 0.7067978382110596, + "step": 474 + }, + { + "epoch": 0.1389091972510601, + "grad_norm": 1.2396761565289731, + "learning_rate": 1.9979643139749373e-05, + "loss": 0.7017637491226196, + "step": 475 + }, + { + "epoch": 0.13920163766632548, + "grad_norm": 1.397663972134979, + "learning_rate": 1.9979333640833947e-05, + "loss": 0.7511367201805115, + "step": 476 + }, + { + "epoch": 0.13949407808159087, + "grad_norm": 1.5675722536579784, + "learning_rate": 1.997902180930996e-05, + "loss": 0.8129127025604248, + "step": 477 + }, + { + "epoch": 0.13978651849685628, + "grad_norm": 1.3801608404871573, + "learning_rate": 1.9978707645250293e-05, + "loss": 0.7760868072509766, + "step": 478 + }, + { + "epoch": 0.14007895891212166, + "grad_norm": 1.2722362515735255, + "learning_rate": 1.9978391148728388e-05, + "loss": 0.5190733671188354, + "step": 479 + }, + { + "epoch": 0.14037139932738704, + "grad_norm": 1.4267690174722667, + "learning_rate": 1.9978072319818222e-05, + "loss": 0.759798526763916, + "step": 480 + }, + { + "epoch": 0.14066383974265242, + "grad_norm": 1.3594087764036291, + "learning_rate": 1.997775115859432e-05, + "loss": 0.5750235319137573, + "step": 481 + }, + { + "epoch": 0.14095628015791783, + "grad_norm": 1.5288357817907694, + "learning_rate": 1.9977427665131748e-05, + "loss": 0.6837687492370605, + "step": 482 + }, + { + "epoch": 0.14124872057318322, + "grad_norm": 1.4085455647433316, + "learning_rate": 1.9977101839506123e-05, + "loss": 0.8774302005767822, + "step": 483 + }, + { + "epoch": 0.1415411609884486, + "grad_norm": 1.3951237263634118, + "learning_rate": 1.9976773681793605e-05, + "loss": 0.6447024345397949, + "step": 484 + }, + { + "epoch": 0.14183360140371398, + "grad_norm": 1.3077152366881364, + "learning_rate": 1.99764431920709e-05, + "loss": 0.6212965250015259, + "step": 485 + }, + { + "epoch": 0.1421260418189794, + "grad_norm": 1.7246179492768339, + "learning_rate": 1.9976110370415257e-05, + "loss": 0.7606823444366455, + "step": 486 + }, + { + "epoch": 0.14241848223424478, + "grad_norm": 1.6009360634049956, + "learning_rate": 1.9975775216904468e-05, + "loss": 0.792106032371521, + "step": 487 + }, + { + "epoch": 0.14271092264951016, + "grad_norm": 1.526072177508378, + "learning_rate": 1.997543773161688e-05, + "loss": 0.828373372554779, + "step": 488 + }, + { + "epoch": 0.14300336306477554, + "grad_norm": 1.2193329399673667, + "learning_rate": 1.997509791463137e-05, + "loss": 0.7148743867874146, + "step": 489 + }, + { + "epoch": 0.14329580348004095, + "grad_norm": 1.617921839516307, + "learning_rate": 1.9974755766027372e-05, + "loss": 0.6566554307937622, + "step": 490 + }, + { + "epoch": 0.14358824389530633, + "grad_norm": 1.2041404679997165, + "learning_rate": 1.9974411285884865e-05, + "loss": 0.7833706140518188, + "step": 491 + }, + { + "epoch": 0.14388068431057172, + "grad_norm": 1.3715764541616051, + "learning_rate": 1.997406447428436e-05, + "loss": 0.7661226987838745, + "step": 492 + }, + { + "epoch": 0.1441731247258371, + "grad_norm": 1.2510873907811162, + "learning_rate": 1.9973715331306935e-05, + "loss": 0.5403884649276733, + "step": 493 + }, + { + "epoch": 0.1444655651411025, + "grad_norm": 1.417853529635827, + "learning_rate": 1.9973363857034183e-05, + "loss": 0.7744722366333008, + "step": 494 + }, + { + "epoch": 0.1447580055563679, + "grad_norm": 1.7245567814035911, + "learning_rate": 1.9973010051548274e-05, + "loss": 0.9036808013916016, + "step": 495 + }, + { + "epoch": 0.14505044597163327, + "grad_norm": 1.2752769917707012, + "learning_rate": 1.9972653914931902e-05, + "loss": 0.6952388286590576, + "step": 496 + }, + { + "epoch": 0.14534288638689866, + "grad_norm": 1.5454177465030166, + "learning_rate": 1.9972295447268312e-05, + "loss": 0.7818677425384521, + "step": 497 + }, + { + "epoch": 0.14563532680216407, + "grad_norm": 1.2104336195623258, + "learning_rate": 1.9971934648641294e-05, + "loss": 0.8197327256202698, + "step": 498 + }, + { + "epoch": 0.14592776721742945, + "grad_norm": 1.1376920899270277, + "learning_rate": 1.997157151913518e-05, + "loss": 0.5898807644844055, + "step": 499 + }, + { + "epoch": 0.14622020763269483, + "grad_norm": 1.6480348319290024, + "learning_rate": 1.9971206058834857e-05, + "loss": 0.7980005741119385, + "step": 500 + }, + { + "epoch": 0.14651264804796021, + "grad_norm": 1.2480430258500308, + "learning_rate": 1.997083826782574e-05, + "loss": 0.7161837816238403, + "step": 501 + }, + { + "epoch": 0.14680508846322562, + "grad_norm": 1.436852590534495, + "learning_rate": 1.99704681461938e-05, + "loss": 0.7657293081283569, + "step": 502 + }, + { + "epoch": 0.147097528878491, + "grad_norm": 1.256627894457605, + "learning_rate": 1.9970095694025553e-05, + "loss": 0.6638028621673584, + "step": 503 + }, + { + "epoch": 0.1473899692937564, + "grad_norm": 1.344090583049545, + "learning_rate": 1.996972091140806e-05, + "loss": 0.8759262561798096, + "step": 504 + }, + { + "epoch": 0.1476824097090218, + "grad_norm": 1.1099770302505587, + "learning_rate": 1.9969343798428916e-05, + "loss": 0.6686065196990967, + "step": 505 + }, + { + "epoch": 0.14797485012428718, + "grad_norm": 1.5672815870081807, + "learning_rate": 1.9968964355176276e-05, + "loss": 0.7900313138961792, + "step": 506 + }, + { + "epoch": 0.14826729053955257, + "grad_norm": 1.3116088800480374, + "learning_rate": 1.996858258173883e-05, + "loss": 0.699286937713623, + "step": 507 + }, + { + "epoch": 0.14855973095481795, + "grad_norm": 1.149004701425465, + "learning_rate": 1.9968198478205817e-05, + "loss": 0.6613560914993286, + "step": 508 + }, + { + "epoch": 0.14885217137008336, + "grad_norm": 1.471579106109443, + "learning_rate": 1.9967812044667014e-05, + "loss": 0.8586459755897522, + "step": 509 + }, + { + "epoch": 0.14914461178534874, + "grad_norm": 1.5307049334622256, + "learning_rate": 1.9967423281212754e-05, + "loss": 0.6620850563049316, + "step": 510 + }, + { + "epoch": 0.14943705220061412, + "grad_norm": 1.6192191406380994, + "learning_rate": 1.9967032187933905e-05, + "loss": 0.7991048097610474, + "step": 511 + }, + { + "epoch": 0.1497294926158795, + "grad_norm": 1.2792732447271702, + "learning_rate": 1.9966638764921882e-05, + "loss": 0.7301167845726013, + "step": 512 + }, + { + "epoch": 0.15002193303114492, + "grad_norm": 1.244527824938295, + "learning_rate": 1.9966243012268645e-05, + "loss": 0.6470698118209839, + "step": 513 + }, + { + "epoch": 0.1503143734464103, + "grad_norm": 1.3436689137677134, + "learning_rate": 1.99658449300667e-05, + "loss": 0.5766996145248413, + "step": 514 + }, + { + "epoch": 0.15060681386167568, + "grad_norm": 1.2104018154852028, + "learning_rate": 1.9965444518409098e-05, + "loss": 0.6365845203399658, + "step": 515 + }, + { + "epoch": 0.15089925427694106, + "grad_norm": 1.6995742833660814, + "learning_rate": 1.9965041777389426e-05, + "loss": 0.6945745944976807, + "step": 516 + }, + { + "epoch": 0.15119169469220647, + "grad_norm": 1.6841525179657149, + "learning_rate": 1.996463670710183e-05, + "loss": 0.802032470703125, + "step": 517 + }, + { + "epoch": 0.15148413510747186, + "grad_norm": 1.4666130226044234, + "learning_rate": 1.996422930764099e-05, + "loss": 0.7429964542388916, + "step": 518 + }, + { + "epoch": 0.15177657552273724, + "grad_norm": 1.5508181233008433, + "learning_rate": 1.9963819579102134e-05, + "loss": 0.6462180614471436, + "step": 519 + }, + { + "epoch": 0.15206901593800262, + "grad_norm": 1.3226128228565077, + "learning_rate": 1.996340752158103e-05, + "loss": 0.888412594795227, + "step": 520 + }, + { + "epoch": 0.15236145635326803, + "grad_norm": 1.386680099002057, + "learning_rate": 1.9962993135173996e-05, + "loss": 0.6734700798988342, + "step": 521 + }, + { + "epoch": 0.15265389676853341, + "grad_norm": 1.385050142293082, + "learning_rate": 1.9962576419977894e-05, + "loss": 0.6951336860656738, + "step": 522 + }, + { + "epoch": 0.1529463371837988, + "grad_norm": 1.26022036147928, + "learning_rate": 1.9962157376090126e-05, + "loss": 0.7130852341651917, + "step": 523 + }, + { + "epoch": 0.15323877759906418, + "grad_norm": 1.4353500802059385, + "learning_rate": 1.9961736003608646e-05, + "loss": 0.8322055339813232, + "step": 524 + }, + { + "epoch": 0.1535312180143296, + "grad_norm": 1.2563635075596429, + "learning_rate": 1.996131230263194e-05, + "loss": 0.7031791806221008, + "step": 525 + }, + { + "epoch": 0.15382365842959497, + "grad_norm": 1.3606474846075662, + "learning_rate": 1.9960886273259052e-05, + "loss": 0.8268769979476929, + "step": 526 + }, + { + "epoch": 0.15411609884486036, + "grad_norm": 1.048782156231717, + "learning_rate": 1.9960457915589557e-05, + "loss": 0.6843237280845642, + "step": 527 + }, + { + "epoch": 0.15440853926012574, + "grad_norm": 1.29845256190474, + "learning_rate": 1.9960027229723585e-05, + "loss": 0.8267906904220581, + "step": 528 + }, + { + "epoch": 0.15470097967539115, + "grad_norm": 1.502232175088585, + "learning_rate": 1.9959594215761807e-05, + "loss": 0.8259629011154175, + "step": 529 + }, + { + "epoch": 0.15499342009065653, + "grad_norm": 1.3618507954167858, + "learning_rate": 1.9959158873805435e-05, + "loss": 0.654765248298645, + "step": 530 + }, + { + "epoch": 0.1552858605059219, + "grad_norm": 1.3762650099604372, + "learning_rate": 1.9958721203956233e-05, + "loss": 0.7841149568557739, + "step": 531 + }, + { + "epoch": 0.1555783009211873, + "grad_norm": 1.131527995151024, + "learning_rate": 1.9958281206316497e-05, + "loss": 0.7364583015441895, + "step": 532 + }, + { + "epoch": 0.1558707413364527, + "grad_norm": 1.2428392866727909, + "learning_rate": 1.9957838880989076e-05, + "loss": 0.7985796928405762, + "step": 533 + }, + { + "epoch": 0.1561631817517181, + "grad_norm": 1.7674168807742325, + "learning_rate": 1.9957394228077363e-05, + "loss": 0.8432350754737854, + "step": 534 + }, + { + "epoch": 0.15645562216698347, + "grad_norm": 1.409652061557183, + "learning_rate": 1.995694724768529e-05, + "loss": 0.713615894317627, + "step": 535 + }, + { + "epoch": 0.15674806258224885, + "grad_norm": 1.3406073565001748, + "learning_rate": 1.9956497939917336e-05, + "loss": 0.6472936868667603, + "step": 536 + }, + { + "epoch": 0.15704050299751426, + "grad_norm": 1.4828550722777096, + "learning_rate": 1.9956046304878528e-05, + "loss": 0.7963594198226929, + "step": 537 + }, + { + "epoch": 0.15733294341277965, + "grad_norm": 1.3875627998599316, + "learning_rate": 1.9955592342674427e-05, + "loss": 0.8043302893638611, + "step": 538 + }, + { + "epoch": 0.15762538382804503, + "grad_norm": 1.3187786308741334, + "learning_rate": 1.995513605341115e-05, + "loss": 0.6277294754981995, + "step": 539 + }, + { + "epoch": 0.1579178242433104, + "grad_norm": 1.2577326193858611, + "learning_rate": 1.9954677437195345e-05, + "loss": 0.569086492061615, + "step": 540 + }, + { + "epoch": 0.15821026465857582, + "grad_norm": 1.4002846512494251, + "learning_rate": 1.9954216494134217e-05, + "loss": 0.7694308757781982, + "step": 541 + }, + { + "epoch": 0.1585027050738412, + "grad_norm": 1.2602961243105442, + "learning_rate": 1.9953753224335504e-05, + "loss": 0.7782721519470215, + "step": 542 + }, + { + "epoch": 0.1587951454891066, + "grad_norm": 1.4115021596903525, + "learning_rate": 1.9953287627907498e-05, + "loss": 0.6231539249420166, + "step": 543 + }, + { + "epoch": 0.159087585904372, + "grad_norm": 1.6469299670076099, + "learning_rate": 1.9952819704959022e-05, + "loss": 0.6431725025177002, + "step": 544 + }, + { + "epoch": 0.15938002631963738, + "grad_norm": 1.3041234892791729, + "learning_rate": 1.9952349455599455e-05, + "loss": 0.7062366008758545, + "step": 545 + }, + { + "epoch": 0.15967246673490276, + "grad_norm": 1.3521393089140767, + "learning_rate": 1.9951876879938716e-05, + "loss": 0.5376520156860352, + "step": 546 + }, + { + "epoch": 0.15996490715016815, + "grad_norm": 1.385911158215194, + "learning_rate": 1.9951401978087267e-05, + "loss": 0.7693386077880859, + "step": 547 + }, + { + "epoch": 0.16025734756543356, + "grad_norm": 1.7168051396485104, + "learning_rate": 1.9950924750156107e-05, + "loss": 0.6735765337944031, + "step": 548 + }, + { + "epoch": 0.16054978798069894, + "grad_norm": 1.1186135901816567, + "learning_rate": 1.995044519625679e-05, + "loss": 0.5333552360534668, + "step": 549 + }, + { + "epoch": 0.16084222839596432, + "grad_norm": 1.2149038323564916, + "learning_rate": 1.994996331650141e-05, + "loss": 0.6694493293762207, + "step": 550 + }, + { + "epoch": 0.1611346688112297, + "grad_norm": 1.3895443962170193, + "learning_rate": 1.9949479111002596e-05, + "loss": 0.6056857109069824, + "step": 551 + }, + { + "epoch": 0.1614271092264951, + "grad_norm": 1.4990214566868623, + "learning_rate": 1.9948992579873538e-05, + "loss": 0.7174896001815796, + "step": 552 + }, + { + "epoch": 0.1617195496417605, + "grad_norm": 1.4417886999069138, + "learning_rate": 1.9948503723227954e-05, + "loss": 0.9150595664978027, + "step": 553 + }, + { + "epoch": 0.16201199005702588, + "grad_norm": 1.475120009674046, + "learning_rate": 1.9948012541180116e-05, + "loss": 0.7418098449707031, + "step": 554 + }, + { + "epoch": 0.16230443047229126, + "grad_norm": 1.3802668140870205, + "learning_rate": 1.9947519033844828e-05, + "loss": 0.6937648057937622, + "step": 555 + }, + { + "epoch": 0.16259687088755667, + "grad_norm": 1.198267913228467, + "learning_rate": 1.9947023201337448e-05, + "loss": 0.628747820854187, + "step": 556 + }, + { + "epoch": 0.16288931130282205, + "grad_norm": 1.3166666693196283, + "learning_rate": 1.9946525043773875e-05, + "loss": 0.6252326965332031, + "step": 557 + }, + { + "epoch": 0.16318175171808744, + "grad_norm": 1.4085830915284543, + "learning_rate": 1.9946024561270547e-05, + "loss": 0.6243278980255127, + "step": 558 + }, + { + "epoch": 0.16347419213335282, + "grad_norm": 1.515326552036181, + "learning_rate": 1.994552175394445e-05, + "loss": 0.7613602876663208, + "step": 559 + }, + { + "epoch": 0.16376663254861823, + "grad_norm": 1.4167210376939137, + "learning_rate": 1.9945016621913115e-05, + "loss": 0.7680152654647827, + "step": 560 + }, + { + "epoch": 0.1640590729638836, + "grad_norm": 1.4413485992010024, + "learning_rate": 1.9944509165294614e-05, + "loss": 0.6926383972167969, + "step": 561 + }, + { + "epoch": 0.164351513379149, + "grad_norm": 1.3901402403092062, + "learning_rate": 1.9943999384207556e-05, + "loss": 0.6822172403335571, + "step": 562 + }, + { + "epoch": 0.16464395379441438, + "grad_norm": 1.1253426305557543, + "learning_rate": 1.99434872787711e-05, + "loss": 0.6533722281455994, + "step": 563 + }, + { + "epoch": 0.1649363942096798, + "grad_norm": 1.3434183662540475, + "learning_rate": 1.9942972849104955e-05, + "loss": 0.6754113435745239, + "step": 564 + }, + { + "epoch": 0.16522883462494517, + "grad_norm": 1.3906070154993262, + "learning_rate": 1.9942456095329357e-05, + "loss": 0.5585163235664368, + "step": 565 + }, + { + "epoch": 0.16552127504021055, + "grad_norm": 1.18702583603665, + "learning_rate": 1.99419370175651e-05, + "loss": 0.6268453598022461, + "step": 566 + }, + { + "epoch": 0.16581371545547594, + "grad_norm": 1.3564219134919553, + "learning_rate": 1.994141561593351e-05, + "loss": 0.6508245468139648, + "step": 567 + }, + { + "epoch": 0.16610615587074135, + "grad_norm": 1.353057425024783, + "learning_rate": 1.9940891890556468e-05, + "loss": 0.7337379455566406, + "step": 568 + }, + { + "epoch": 0.16639859628600673, + "grad_norm": 1.3764723902611744, + "learning_rate": 1.9940365841556385e-05, + "loss": 0.7888853549957275, + "step": 569 + }, + { + "epoch": 0.1666910367012721, + "grad_norm": 1.5384301744775797, + "learning_rate": 1.993983746905623e-05, + "loss": 0.777199923992157, + "step": 570 + }, + { + "epoch": 0.1669834771165375, + "grad_norm": 1.5194907821323576, + "learning_rate": 1.9939306773179498e-05, + "loss": 0.761531412601471, + "step": 571 + }, + { + "epoch": 0.1672759175318029, + "grad_norm": 1.5063040441270878, + "learning_rate": 1.993877375405024e-05, + "loss": 0.7060664296150208, + "step": 572 + }, + { + "epoch": 0.16756835794706829, + "grad_norm": 1.472994627130685, + "learning_rate": 1.9938238411793045e-05, + "loss": 0.6797431707382202, + "step": 573 + }, + { + "epoch": 0.16786079836233367, + "grad_norm": 1.3131930617818641, + "learning_rate": 1.9937700746533048e-05, + "loss": 0.7202910780906677, + "step": 574 + }, + { + "epoch": 0.16815323877759905, + "grad_norm": 1.198711592546953, + "learning_rate": 1.9937160758395923e-05, + "loss": 0.7241546511650085, + "step": 575 + }, + { + "epoch": 0.16844567919286446, + "grad_norm": 1.3694786109804489, + "learning_rate": 1.993661844750789e-05, + "loss": 0.7055338621139526, + "step": 576 + }, + { + "epoch": 0.16873811960812984, + "grad_norm": 1.4237978283864139, + "learning_rate": 1.993607381399571e-05, + "loss": 0.6973986625671387, + "step": 577 + }, + { + "epoch": 0.16903056002339523, + "grad_norm": 1.1715457050926792, + "learning_rate": 1.993552685798669e-05, + "loss": 0.693436861038208, + "step": 578 + }, + { + "epoch": 0.1693230004386606, + "grad_norm": 1.5585764488361307, + "learning_rate": 1.9934977579608676e-05, + "loss": 0.6687765121459961, + "step": 579 + }, + { + "epoch": 0.16961544085392602, + "grad_norm": 1.3798925262407884, + "learning_rate": 1.9934425978990057e-05, + "loss": 0.7776578068733215, + "step": 580 + }, + { + "epoch": 0.1699078812691914, + "grad_norm": 1.3168335454892666, + "learning_rate": 1.9933872056259768e-05, + "loss": 0.6914045810699463, + "step": 581 + }, + { + "epoch": 0.17020032168445678, + "grad_norm": 1.4649859185166105, + "learning_rate": 1.9933315811547283e-05, + "loss": 0.8005306720733643, + "step": 582 + }, + { + "epoch": 0.1704927620997222, + "grad_norm": 1.3952257625848015, + "learning_rate": 1.9932757244982625e-05, + "loss": 0.6936507225036621, + "step": 583 + }, + { + "epoch": 0.17078520251498758, + "grad_norm": 1.157795409448355, + "learning_rate": 1.9932196356696353e-05, + "loss": 0.6915504932403564, + "step": 584 + }, + { + "epoch": 0.17107764293025296, + "grad_norm": 1.4153568154846778, + "learning_rate": 1.9931633146819573e-05, + "loss": 0.7583723664283752, + "step": 585 + }, + { + "epoch": 0.17137008334551834, + "grad_norm": 1.2959976429359619, + "learning_rate": 1.9931067615483927e-05, + "loss": 0.7097266912460327, + "step": 586 + }, + { + "epoch": 0.17166252376078375, + "grad_norm": 1.5238633829769868, + "learning_rate": 1.9930499762821608e-05, + "loss": 0.7586667537689209, + "step": 587 + }, + { + "epoch": 0.17195496417604914, + "grad_norm": 1.3505202775838374, + "learning_rate": 1.9929929588965352e-05, + "loss": 0.7043411731719971, + "step": 588 + }, + { + "epoch": 0.17224740459131452, + "grad_norm": 1.3150009626714483, + "learning_rate": 1.9929357094048425e-05, + "loss": 0.8502261638641357, + "step": 589 + }, + { + "epoch": 0.1725398450065799, + "grad_norm": 1.3901300269374877, + "learning_rate": 1.992878227820465e-05, + "loss": 0.7196993827819824, + "step": 590 + }, + { + "epoch": 0.1728322854218453, + "grad_norm": 1.5475395216492736, + "learning_rate": 1.9928205141568388e-05, + "loss": 0.6783720850944519, + "step": 591 + }, + { + "epoch": 0.1731247258371107, + "grad_norm": 1.1911883688546063, + "learning_rate": 1.9927625684274534e-05, + "loss": 0.7128307819366455, + "step": 592 + }, + { + "epoch": 0.17341716625237608, + "grad_norm": 1.226507853409212, + "learning_rate": 1.9927043906458538e-05, + "loss": 0.7289423942565918, + "step": 593 + }, + { + "epoch": 0.17370960666764146, + "grad_norm": 1.298942183876381, + "learning_rate": 1.992645980825639e-05, + "loss": 0.6306120157241821, + "step": 594 + }, + { + "epoch": 0.17400204708290687, + "grad_norm": 1.2456494719411173, + "learning_rate": 1.9925873389804614e-05, + "loss": 0.7910655736923218, + "step": 595 + }, + { + "epoch": 0.17429448749817225, + "grad_norm": 1.267940212117298, + "learning_rate": 1.9925284651240282e-05, + "loss": 0.6075282096862793, + "step": 596 + }, + { + "epoch": 0.17458692791343763, + "grad_norm": 1.251937615037275, + "learning_rate": 1.992469359270101e-05, + "loss": 0.6270443201065063, + "step": 597 + }, + { + "epoch": 0.17487936832870302, + "grad_norm": 1.3200413033724028, + "learning_rate": 1.9924100214324955e-05, + "loss": 0.6487830877304077, + "step": 598 + }, + { + "epoch": 0.17517180874396843, + "grad_norm": 1.45237431858529, + "learning_rate": 1.9923504516250814e-05, + "loss": 0.5986843705177307, + "step": 599 + }, + { + "epoch": 0.1754642491592338, + "grad_norm": 1.2191897136056242, + "learning_rate": 1.992290649861783e-05, + "loss": 0.7734183073043823, + "step": 600 + }, + { + "epoch": 0.1757566895744992, + "grad_norm": 1.167414919229407, + "learning_rate": 1.9922306161565782e-05, + "loss": 0.5784964561462402, + "step": 601 + }, + { + "epoch": 0.17604912998976457, + "grad_norm": 1.501564665297397, + "learning_rate": 1.9921703505234995e-05, + "loss": 0.8034321069717407, + "step": 602 + }, + { + "epoch": 0.17634157040502998, + "grad_norm": 1.314622713247698, + "learning_rate": 1.992109852976634e-05, + "loss": 0.8153722882270813, + "step": 603 + }, + { + "epoch": 0.17663401082029537, + "grad_norm": 1.877065501880657, + "learning_rate": 1.992049123530123e-05, + "loss": 0.7293002605438232, + "step": 604 + }, + { + "epoch": 0.17692645123556075, + "grad_norm": 1.514670729590329, + "learning_rate": 1.9919881621981606e-05, + "loss": 0.7108439207077026, + "step": 605 + }, + { + "epoch": 0.17721889165082613, + "grad_norm": 1.4748189889445555, + "learning_rate": 1.9919269689949968e-05, + "loss": 0.7581946849822998, + "step": 606 + }, + { + "epoch": 0.17751133206609154, + "grad_norm": 1.2337358872247315, + "learning_rate": 1.991865543934935e-05, + "loss": 0.6821258068084717, + "step": 607 + }, + { + "epoch": 0.17780377248135693, + "grad_norm": 1.2791852908008183, + "learning_rate": 1.991803887032333e-05, + "loss": 0.7116109728813171, + "step": 608 + }, + { + "epoch": 0.1780962128966223, + "grad_norm": 1.2208883706731903, + "learning_rate": 1.9917419983016025e-05, + "loss": 0.6680186986923218, + "step": 609 + }, + { + "epoch": 0.1783886533118877, + "grad_norm": 1.3494621179320938, + "learning_rate": 1.99167987775721e-05, + "loss": 0.6763704419136047, + "step": 610 + }, + { + "epoch": 0.1786810937271531, + "grad_norm": 1.4133729383070797, + "learning_rate": 1.9916175254136755e-05, + "loss": 0.756158709526062, + "step": 611 + }, + { + "epoch": 0.17897353414241848, + "grad_norm": 1.4652489049885558, + "learning_rate": 1.9915549412855734e-05, + "loss": 0.600861132144928, + "step": 612 + }, + { + "epoch": 0.17926597455768387, + "grad_norm": 1.4731466609399737, + "learning_rate": 1.991492125387533e-05, + "loss": 0.6927047967910767, + "step": 613 + }, + { + "epoch": 0.17955841497294925, + "grad_norm": 1.6937006516406405, + "learning_rate": 1.9914290777342362e-05, + "loss": 0.6908516883850098, + "step": 614 + }, + { + "epoch": 0.17985085538821466, + "grad_norm": 1.4155029526585772, + "learning_rate": 1.9913657983404206e-05, + "loss": 0.7968926429748535, + "step": 615 + }, + { + "epoch": 0.18014329580348004, + "grad_norm": 1.1016955037712495, + "learning_rate": 1.9913022872208773e-05, + "loss": 0.6035164594650269, + "step": 616 + }, + { + "epoch": 0.18043573621874542, + "grad_norm": 1.4061380717551752, + "learning_rate": 1.9912385443904518e-05, + "loss": 0.6733090877532959, + "step": 617 + }, + { + "epoch": 0.1807281766340108, + "grad_norm": 2.2181842231696645, + "learning_rate": 1.9911745698640426e-05, + "loss": 0.6968391537666321, + "step": 618 + }, + { + "epoch": 0.18102061704927622, + "grad_norm": 1.2136657361400474, + "learning_rate": 1.991110363656605e-05, + "loss": 0.7126309871673584, + "step": 619 + }, + { + "epoch": 0.1813130574645416, + "grad_norm": 1.5461052617008268, + "learning_rate": 1.9910459257831455e-05, + "loss": 0.8604997396469116, + "step": 620 + }, + { + "epoch": 0.18160549787980698, + "grad_norm": 1.4378853015325992, + "learning_rate": 1.9909812562587266e-05, + "loss": 0.674797534942627, + "step": 621 + }, + { + "epoch": 0.1818979382950724, + "grad_norm": 1.4538548213207452, + "learning_rate": 1.9909163550984644e-05, + "loss": 0.7439107894897461, + "step": 622 + }, + { + "epoch": 0.18219037871033777, + "grad_norm": 1.4410118469577065, + "learning_rate": 1.9908512223175293e-05, + "loss": 0.7137601971626282, + "step": 623 + }, + { + "epoch": 0.18248281912560316, + "grad_norm": 1.286772355171783, + "learning_rate": 1.9907858579311448e-05, + "loss": 0.6395502090454102, + "step": 624 + }, + { + "epoch": 0.18277525954086854, + "grad_norm": 1.7411485569290241, + "learning_rate": 1.9907202619545905e-05, + "loss": 0.6747852563858032, + "step": 625 + }, + { + "epoch": 0.18306769995613395, + "grad_norm": 1.3891342500470065, + "learning_rate": 1.9906544344031986e-05, + "loss": 0.6995632648468018, + "step": 626 + }, + { + "epoch": 0.18336014037139933, + "grad_norm": 1.3916150531596103, + "learning_rate": 1.9905883752923557e-05, + "loss": 0.7006711363792419, + "step": 627 + }, + { + "epoch": 0.18365258078666472, + "grad_norm": 1.189158109720048, + "learning_rate": 1.990522084637503e-05, + "loss": 0.660778820514679, + "step": 628 + }, + { + "epoch": 0.1839450212019301, + "grad_norm": 1.258003733155152, + "learning_rate": 1.9904555624541362e-05, + "loss": 0.5826665163040161, + "step": 629 + }, + { + "epoch": 0.1842374616171955, + "grad_norm": 1.5565251427155322, + "learning_rate": 1.990388808757803e-05, + "loss": 0.8064266443252563, + "step": 630 + }, + { + "epoch": 0.1845299020324609, + "grad_norm": 1.3066621609893527, + "learning_rate": 1.9903218235641078e-05, + "loss": 0.6856451034545898, + "step": 631 + }, + { + "epoch": 0.18482234244772627, + "grad_norm": 1.325447510265949, + "learning_rate": 1.9902546068887076e-05, + "loss": 0.6423801183700562, + "step": 632 + }, + { + "epoch": 0.18511478286299166, + "grad_norm": 1.252931011950935, + "learning_rate": 1.9901871587473135e-05, + "loss": 0.6903005242347717, + "step": 633 + }, + { + "epoch": 0.18540722327825707, + "grad_norm": 1.2981623515351661, + "learning_rate": 1.9901194791556916e-05, + "loss": 0.636742115020752, + "step": 634 + }, + { + "epoch": 0.18569966369352245, + "grad_norm": 1.154196245030106, + "learning_rate": 1.9900515681296614e-05, + "loss": 0.6541105508804321, + "step": 635 + }, + { + "epoch": 0.18599210410878783, + "grad_norm": 1.2463484642096474, + "learning_rate": 1.9899834256850973e-05, + "loss": 0.7026485204696655, + "step": 636 + }, + { + "epoch": 0.1862845445240532, + "grad_norm": 1.2626549460002545, + "learning_rate": 1.989915051837926e-05, + "loss": 0.6232702732086182, + "step": 637 + }, + { + "epoch": 0.18657698493931862, + "grad_norm": 1.222405284140282, + "learning_rate": 1.9898464466041306e-05, + "loss": 0.5971217155456543, + "step": 638 + }, + { + "epoch": 0.186869425354584, + "grad_norm": 1.228365693552395, + "learning_rate": 1.9897776099997463e-05, + "loss": 0.7942230701446533, + "step": 639 + }, + { + "epoch": 0.1871618657698494, + "grad_norm": 1.4547764939553913, + "learning_rate": 1.9897085420408637e-05, + "loss": 0.6578072309494019, + "step": 640 + }, + { + "epoch": 0.18745430618511477, + "grad_norm": 1.3118111344764942, + "learning_rate": 1.989639242743627e-05, + "loss": 0.6928422451019287, + "step": 641 + }, + { + "epoch": 0.18774674660038018, + "grad_norm": 1.4232777703090678, + "learning_rate": 1.9895697121242346e-05, + "loss": 0.7656213641166687, + "step": 642 + }, + { + "epoch": 0.18803918701564556, + "grad_norm": 1.3841907158773847, + "learning_rate": 1.9894999501989383e-05, + "loss": 0.6540038585662842, + "step": 643 + }, + { + "epoch": 0.18833162743091095, + "grad_norm": 1.5637672668766274, + "learning_rate": 1.989429956984045e-05, + "loss": 0.707741379737854, + "step": 644 + }, + { + "epoch": 0.18862406784617633, + "grad_norm": 1.2389494128425964, + "learning_rate": 1.9893597324959156e-05, + "loss": 0.6191326379776001, + "step": 645 + }, + { + "epoch": 0.18891650826144174, + "grad_norm": 1.2174290538744046, + "learning_rate": 1.9892892767509634e-05, + "loss": 0.616736114025116, + "step": 646 + }, + { + "epoch": 0.18920894867670712, + "grad_norm": 1.4366227278982104, + "learning_rate": 1.989218589765658e-05, + "loss": 0.803301215171814, + "step": 647 + }, + { + "epoch": 0.1895013890919725, + "grad_norm": 1.2775653707157333, + "learning_rate": 1.989147671556522e-05, + "loss": 0.6528021097183228, + "step": 648 + }, + { + "epoch": 0.1897938295072379, + "grad_norm": 1.5463247112798635, + "learning_rate": 1.9890765221401314e-05, + "loss": 0.6966919898986816, + "step": 649 + }, + { + "epoch": 0.1900862699225033, + "grad_norm": 1.2768484224289256, + "learning_rate": 1.9890051415331178e-05, + "loss": 0.7223595380783081, + "step": 650 + }, + { + "epoch": 0.19037871033776868, + "grad_norm": 1.404271714764208, + "learning_rate": 1.9889335297521656e-05, + "loss": 0.6727452278137207, + "step": 651 + }, + { + "epoch": 0.19067115075303406, + "grad_norm": 1.5662163632688932, + "learning_rate": 1.988861686814014e-05, + "loss": 0.7008258104324341, + "step": 652 + }, + { + "epoch": 0.19096359116829945, + "grad_norm": 1.3756400508505757, + "learning_rate": 1.988789612735455e-05, + "loss": 0.7624703049659729, + "step": 653 + }, + { + "epoch": 0.19125603158356486, + "grad_norm": 1.4133612106119275, + "learning_rate": 1.988717307533336e-05, + "loss": 0.6813088655471802, + "step": 654 + }, + { + "epoch": 0.19154847199883024, + "grad_norm": 1.1919173127519105, + "learning_rate": 1.988644771224558e-05, + "loss": 0.5401284694671631, + "step": 655 + }, + { + "epoch": 0.19184091241409562, + "grad_norm": 1.4613018451006843, + "learning_rate": 1.9885720038260756e-05, + "loss": 0.6805379986763, + "step": 656 + }, + { + "epoch": 0.19213335282936103, + "grad_norm": 1.5412845974712732, + "learning_rate": 1.9884990053548982e-05, + "loss": 0.6449974775314331, + "step": 657 + }, + { + "epoch": 0.19242579324462641, + "grad_norm": 1.3481077932409014, + "learning_rate": 1.988425775828088e-05, + "loss": 0.6940032839775085, + "step": 658 + }, + { + "epoch": 0.1927182336598918, + "grad_norm": 1.3088210596354761, + "learning_rate": 1.9883523152627626e-05, + "loss": 0.7089565396308899, + "step": 659 + }, + { + "epoch": 0.19301067407515718, + "grad_norm": 1.3865316758332553, + "learning_rate": 1.9882786236760932e-05, + "loss": 0.7508438229560852, + "step": 660 + }, + { + "epoch": 0.1933031144904226, + "grad_norm": 1.6156320166139564, + "learning_rate": 1.988204701085304e-05, + "loss": 0.6828616261482239, + "step": 661 + }, + { + "epoch": 0.19359555490568797, + "grad_norm": 1.2372815991073003, + "learning_rate": 1.9881305475076744e-05, + "loss": 0.6652963161468506, + "step": 662 + }, + { + "epoch": 0.19388799532095335, + "grad_norm": 1.2410743539313074, + "learning_rate": 1.988056162960537e-05, + "loss": 0.6859447360038757, + "step": 663 + }, + { + "epoch": 0.19418043573621874, + "grad_norm": 1.4440746421071415, + "learning_rate": 1.9879815474612794e-05, + "loss": 0.693805992603302, + "step": 664 + }, + { + "epoch": 0.19447287615148415, + "grad_norm": 1.359257774367856, + "learning_rate": 1.987906701027342e-05, + "loss": 0.7028747200965881, + "step": 665 + }, + { + "epoch": 0.19476531656674953, + "grad_norm": 1.2833261279779522, + "learning_rate": 1.9878316236762195e-05, + "loss": 0.7492112517356873, + "step": 666 + }, + { + "epoch": 0.1950577569820149, + "grad_norm": 1.2065346249489062, + "learning_rate": 1.9877563154254613e-05, + "loss": 0.5394963026046753, + "step": 667 + }, + { + "epoch": 0.1953501973972803, + "grad_norm": 1.1848542596539768, + "learning_rate": 1.98768077629267e-05, + "loss": 0.5185493230819702, + "step": 668 + }, + { + "epoch": 0.1956426378125457, + "grad_norm": 1.2600065416138704, + "learning_rate": 1.9876050062955027e-05, + "loss": 0.7279829382896423, + "step": 669 + }, + { + "epoch": 0.1959350782278111, + "grad_norm": 1.3533145550923509, + "learning_rate": 1.9875290054516692e-05, + "loss": 0.7437206506729126, + "step": 670 + }, + { + "epoch": 0.19622751864307647, + "grad_norm": 1.6022192807514979, + "learning_rate": 1.9874527737789358e-05, + "loss": 0.7294617891311646, + "step": 671 + }, + { + "epoch": 0.19651995905834185, + "grad_norm": 1.3433918645025815, + "learning_rate": 1.9873763112951198e-05, + "loss": 0.7710307240486145, + "step": 672 + }, + { + "epoch": 0.19681239947360726, + "grad_norm": 1.3797998364213817, + "learning_rate": 1.9872996180180947e-05, + "loss": 0.690025806427002, + "step": 673 + }, + { + "epoch": 0.19710483988887265, + "grad_norm": 1.2826936342217614, + "learning_rate": 1.9872226939657867e-05, + "loss": 0.6690589189529419, + "step": 674 + }, + { + "epoch": 0.19739728030413803, + "grad_norm": 1.31971712284742, + "learning_rate": 1.9871455391561764e-05, + "loss": 0.7587239742279053, + "step": 675 + }, + { + "epoch": 0.1976897207194034, + "grad_norm": 1.2583882254944232, + "learning_rate": 1.987068153607298e-05, + "loss": 0.8048006296157837, + "step": 676 + }, + { + "epoch": 0.19798216113466882, + "grad_norm": 1.4904938665104162, + "learning_rate": 1.9869905373372402e-05, + "loss": 0.721023678779602, + "step": 677 + }, + { + "epoch": 0.1982746015499342, + "grad_norm": 1.2975987405043754, + "learning_rate": 1.9869126903641457e-05, + "loss": 0.646798849105835, + "step": 678 + }, + { + "epoch": 0.1985670419651996, + "grad_norm": 1.2591898865565592, + "learning_rate": 1.9868346127062098e-05, + "loss": 0.597393274307251, + "step": 679 + }, + { + "epoch": 0.19885948238046497, + "grad_norm": 1.2773189541737207, + "learning_rate": 1.9867563043816836e-05, + "loss": 0.8619129657745361, + "step": 680 + }, + { + "epoch": 0.19915192279573038, + "grad_norm": 1.2343587826225086, + "learning_rate": 1.986677765408871e-05, + "loss": 0.5391764640808105, + "step": 681 + }, + { + "epoch": 0.19944436321099576, + "grad_norm": 1.360221019641669, + "learning_rate": 1.9865989958061297e-05, + "loss": 0.8185729384422302, + "step": 682 + }, + { + "epoch": 0.19973680362626114, + "grad_norm": 1.3798220626145994, + "learning_rate": 1.9865199955918712e-05, + "loss": 0.6629397869110107, + "step": 683 + }, + { + "epoch": 0.20002924404152653, + "grad_norm": 1.2700323386046573, + "learning_rate": 1.9864407647845626e-05, + "loss": 0.6752325296401978, + "step": 684 + }, + { + "epoch": 0.20032168445679194, + "grad_norm": 1.4583632577866723, + "learning_rate": 1.9863613034027224e-05, + "loss": 0.8509782552719116, + "step": 685 + }, + { + "epoch": 0.20061412487205732, + "grad_norm": 1.2832087066986109, + "learning_rate": 1.986281611464925e-05, + "loss": 0.5573478937149048, + "step": 686 + }, + { + "epoch": 0.2009065652873227, + "grad_norm": 1.4672386586086157, + "learning_rate": 1.9862016889897976e-05, + "loss": 0.8152032494544983, + "step": 687 + }, + { + "epoch": 0.20119900570258809, + "grad_norm": 1.2878245307564982, + "learning_rate": 1.9861215359960217e-05, + "loss": 0.6346902847290039, + "step": 688 + }, + { + "epoch": 0.2014914461178535, + "grad_norm": 1.3877152633732261, + "learning_rate": 1.986041152502332e-05, + "loss": 0.6608721017837524, + "step": 689 + }, + { + "epoch": 0.20178388653311888, + "grad_norm": 1.5061562575575014, + "learning_rate": 1.9859605385275188e-05, + "loss": 0.7753713130950928, + "step": 690 + }, + { + "epoch": 0.20207632694838426, + "grad_norm": 1.2917361787707549, + "learning_rate": 1.9858796940904238e-05, + "loss": 0.6747434139251709, + "step": 691 + }, + { + "epoch": 0.20236876736364964, + "grad_norm": 1.4853341728710303, + "learning_rate": 1.9857986192099446e-05, + "loss": 0.7263737320899963, + "step": 692 + }, + { + "epoch": 0.20266120777891505, + "grad_norm": 1.2072706917482865, + "learning_rate": 1.9857173139050324e-05, + "loss": 0.7910827994346619, + "step": 693 + }, + { + "epoch": 0.20295364819418044, + "grad_norm": 1.479189890111576, + "learning_rate": 1.9856357781946913e-05, + "loss": 0.7245683670043945, + "step": 694 + }, + { + "epoch": 0.20324608860944582, + "grad_norm": 1.146324196354459, + "learning_rate": 1.9855540120979794e-05, + "loss": 0.7440140247344971, + "step": 695 + }, + { + "epoch": 0.20353852902471123, + "grad_norm": 1.823699641073059, + "learning_rate": 1.9854720156340096e-05, + "loss": 0.7485358715057373, + "step": 696 + }, + { + "epoch": 0.2038309694399766, + "grad_norm": 1.3927934028554216, + "learning_rate": 1.985389788821948e-05, + "loss": 0.7658560872077942, + "step": 697 + }, + { + "epoch": 0.204123409855242, + "grad_norm": 1.5269096149843602, + "learning_rate": 1.9853073316810144e-05, + "loss": 0.7366135120391846, + "step": 698 + }, + { + "epoch": 0.20441585027050738, + "grad_norm": 1.2008198015347107, + "learning_rate": 1.985224644230483e-05, + "loss": 0.622355580329895, + "step": 699 + }, + { + "epoch": 0.2047082906857728, + "grad_norm": 1.1924050316279482, + "learning_rate": 1.985141726489681e-05, + "loss": 0.6123125553131104, + "step": 700 + }, + { + "epoch": 0.20500073110103817, + "grad_norm": 1.3537888634275872, + "learning_rate": 1.9850585784779907e-05, + "loss": 0.6768301725387573, + "step": 701 + }, + { + "epoch": 0.20529317151630355, + "grad_norm": 1.2390814549745153, + "learning_rate": 1.9849752002148465e-05, + "loss": 0.6562466621398926, + "step": 702 + }, + { + "epoch": 0.20558561193156893, + "grad_norm": 1.5562868949340583, + "learning_rate": 1.984891591719738e-05, + "loss": 0.7818280458450317, + "step": 703 + }, + { + "epoch": 0.20587805234683434, + "grad_norm": 1.3407102317592055, + "learning_rate": 1.9848077530122083e-05, + "loss": 0.7144001722335815, + "step": 704 + }, + { + "epoch": 0.20617049276209973, + "grad_norm": 1.1671039191657233, + "learning_rate": 1.9847236841118537e-05, + "loss": 0.700564980506897, + "step": 705 + }, + { + "epoch": 0.2064629331773651, + "grad_norm": 1.3051666135645792, + "learning_rate": 1.984639385038326e-05, + "loss": 0.5933517217636108, + "step": 706 + }, + { + "epoch": 0.2067553735926305, + "grad_norm": 1.2749925819283578, + "learning_rate": 1.9845548558113278e-05, + "loss": 0.6174886226654053, + "step": 707 + }, + { + "epoch": 0.2070478140078959, + "grad_norm": 1.3159599421199524, + "learning_rate": 1.9844700964506188e-05, + "loss": 0.7241572141647339, + "step": 708 + }, + { + "epoch": 0.20734025442316129, + "grad_norm": 1.227834334214839, + "learning_rate": 1.9843851069760103e-05, + "loss": 0.6620675325393677, + "step": 709 + }, + { + "epoch": 0.20763269483842667, + "grad_norm": 1.3263327729601424, + "learning_rate": 1.9842998874073682e-05, + "loss": 0.6115273237228394, + "step": 710 + }, + { + "epoch": 0.20792513525369205, + "grad_norm": 1.2961824988419117, + "learning_rate": 1.984214437764612e-05, + "loss": 0.6871848106384277, + "step": 711 + }, + { + "epoch": 0.20821757566895746, + "grad_norm": 1.3134080639211354, + "learning_rate": 1.9841287580677152e-05, + "loss": 0.6887271404266357, + "step": 712 + }, + { + "epoch": 0.20851001608422284, + "grad_norm": 1.4994035488495783, + "learning_rate": 1.9840428483367046e-05, + "loss": 0.8519056439399719, + "step": 713 + }, + { + "epoch": 0.20880245649948823, + "grad_norm": 1.1754556134484295, + "learning_rate": 1.9839567085916617e-05, + "loss": 0.8168978691101074, + "step": 714 + }, + { + "epoch": 0.2090948969147536, + "grad_norm": 1.3651960767502735, + "learning_rate": 1.98387033885272e-05, + "loss": 0.6565415859222412, + "step": 715 + }, + { + "epoch": 0.20938733733001902, + "grad_norm": 1.3008644261492222, + "learning_rate": 1.9837837391400697e-05, + "loss": 0.7305471897125244, + "step": 716 + }, + { + "epoch": 0.2096797777452844, + "grad_norm": 1.4799180289336367, + "learning_rate": 1.9836969094739512e-05, + "loss": 0.7676819562911987, + "step": 717 + }, + { + "epoch": 0.20997221816054978, + "grad_norm": 1.8463650009400876, + "learning_rate": 1.983609849874661e-05, + "loss": 0.6519052982330322, + "step": 718 + }, + { + "epoch": 0.21026465857581517, + "grad_norm": 1.2876599445155823, + "learning_rate": 1.9835225603625488e-05, + "loss": 0.6298089623451233, + "step": 719 + }, + { + "epoch": 0.21055709899108058, + "grad_norm": 1.3906710149258825, + "learning_rate": 1.9834350409580184e-05, + "loss": 0.6384454369544983, + "step": 720 + }, + { + "epoch": 0.21084953940634596, + "grad_norm": 1.1568343654967514, + "learning_rate": 1.9833472916815264e-05, + "loss": 0.6335986852645874, + "step": 721 + }, + { + "epoch": 0.21114197982161134, + "grad_norm": 1.3831022749264381, + "learning_rate": 1.983259312553584e-05, + "loss": 0.6587867736816406, + "step": 722 + }, + { + "epoch": 0.21143442023687672, + "grad_norm": 1.4202837808347009, + "learning_rate": 1.9831711035947552e-05, + "loss": 0.6884294748306274, + "step": 723 + }, + { + "epoch": 0.21172686065214213, + "grad_norm": 1.3257507653834097, + "learning_rate": 1.983082664825659e-05, + "loss": 0.7094298601150513, + "step": 724 + }, + { + "epoch": 0.21201930106740752, + "grad_norm": 1.2528953355997736, + "learning_rate": 1.982993996266967e-05, + "loss": 0.736876368522644, + "step": 725 + }, + { + "epoch": 0.2123117414826729, + "grad_norm": 1.3690939580337487, + "learning_rate": 1.9829050979394052e-05, + "loss": 0.7802199125289917, + "step": 726 + }, + { + "epoch": 0.21260418189793828, + "grad_norm": 1.1986325257536081, + "learning_rate": 1.9828159698637527e-05, + "loss": 0.602590799331665, + "step": 727 + }, + { + "epoch": 0.2128966223132037, + "grad_norm": 1.2705657575851783, + "learning_rate": 1.982726612060843e-05, + "loss": 0.6855295896530151, + "step": 728 + }, + { + "epoch": 0.21318906272846908, + "grad_norm": 1.3075577627317818, + "learning_rate": 1.982637024551563e-05, + "loss": 0.7174949645996094, + "step": 729 + }, + { + "epoch": 0.21348150314373446, + "grad_norm": 1.404568014095412, + "learning_rate": 1.9825472073568527e-05, + "loss": 0.7002695798873901, + "step": 730 + }, + { + "epoch": 0.21377394355899984, + "grad_norm": 1.3606210741478622, + "learning_rate": 1.982457160497707e-05, + "loss": 0.7256268262863159, + "step": 731 + }, + { + "epoch": 0.21406638397426525, + "grad_norm": 1.6598974008247112, + "learning_rate": 1.9823668839951732e-05, + "loss": 0.8223557472229004, + "step": 732 + }, + { + "epoch": 0.21435882438953063, + "grad_norm": 1.361285088499868, + "learning_rate": 1.982276377870353e-05, + "loss": 0.760543942451477, + "step": 733 + }, + { + "epoch": 0.21465126480479602, + "grad_norm": 1.1189262427603888, + "learning_rate": 1.982185642144402e-05, + "loss": 0.5587141513824463, + "step": 734 + }, + { + "epoch": 0.21494370522006143, + "grad_norm": 1.5077440828298982, + "learning_rate": 1.9820946768385295e-05, + "loss": 0.5775829553604126, + "step": 735 + }, + { + "epoch": 0.2152361456353268, + "grad_norm": 1.2761529870001347, + "learning_rate": 1.982003481973997e-05, + "loss": 0.6654443144798279, + "step": 736 + }, + { + "epoch": 0.2155285860505922, + "grad_norm": 1.5826837327135188, + "learning_rate": 1.9819120575721212e-05, + "loss": 0.7963466048240662, + "step": 737 + }, + { + "epoch": 0.21582102646585757, + "grad_norm": 1.3788031698645051, + "learning_rate": 1.981820403654272e-05, + "loss": 0.6748678684234619, + "step": 738 + }, + { + "epoch": 0.21611346688112298, + "grad_norm": 1.4155297807006182, + "learning_rate": 1.9817285202418733e-05, + "loss": 0.7041783928871155, + "step": 739 + }, + { + "epoch": 0.21640590729638837, + "grad_norm": 1.5390789301713295, + "learning_rate": 1.981636407356402e-05, + "loss": 0.8008041381835938, + "step": 740 + }, + { + "epoch": 0.21669834771165375, + "grad_norm": 1.4349473190399622, + "learning_rate": 1.9815440650193887e-05, + "loss": 0.6873682141304016, + "step": 741 + }, + { + "epoch": 0.21699078812691913, + "grad_norm": 1.4041288075629241, + "learning_rate": 1.981451493252418e-05, + "loss": 0.6316831111907959, + "step": 742 + }, + { + "epoch": 0.21728322854218454, + "grad_norm": 1.3377112960270812, + "learning_rate": 1.9813586920771283e-05, + "loss": 0.6481543779373169, + "step": 743 + }, + { + "epoch": 0.21757566895744992, + "grad_norm": 1.2613104485847573, + "learning_rate": 1.9812656615152112e-05, + "loss": 0.6642731428146362, + "step": 744 + }, + { + "epoch": 0.2178681093727153, + "grad_norm": 1.4870873028073741, + "learning_rate": 1.9811724015884115e-05, + "loss": 0.6769483089447021, + "step": 745 + }, + { + "epoch": 0.2181605497879807, + "grad_norm": 1.4050593471281791, + "learning_rate": 1.981078912318529e-05, + "loss": 0.6397525072097778, + "step": 746 + }, + { + "epoch": 0.2184529902032461, + "grad_norm": 1.170420294448055, + "learning_rate": 1.9809851937274154e-05, + "loss": 0.4963756203651428, + "step": 747 + }, + { + "epoch": 0.21874543061851148, + "grad_norm": 1.6049508757911466, + "learning_rate": 1.9808912458369774e-05, + "loss": 0.7352936267852783, + "step": 748 + }, + { + "epoch": 0.21903787103377687, + "grad_norm": 1.3947943752325116, + "learning_rate": 1.980797068669175e-05, + "loss": 0.7177609205245972, + "step": 749 + }, + { + "epoch": 0.21933031144904225, + "grad_norm": 1.2819324457206713, + "learning_rate": 1.980702662246021e-05, + "loss": 0.76703941822052, + "step": 750 + }, + { + "epoch": 0.21962275186430766, + "grad_norm": 1.4885423867402507, + "learning_rate": 1.980608026589582e-05, + "loss": 0.8591324090957642, + "step": 751 + }, + { + "epoch": 0.21991519227957304, + "grad_norm": 1.1920075550965599, + "learning_rate": 1.9805131617219792e-05, + "loss": 0.6216185092926025, + "step": 752 + }, + { + "epoch": 0.22020763269483842, + "grad_norm": 1.359972752643247, + "learning_rate": 1.9804180676653867e-05, + "loss": 0.6067323684692383, + "step": 753 + }, + { + "epoch": 0.2205000731101038, + "grad_norm": 1.329886038437426, + "learning_rate": 1.9803227444420316e-05, + "loss": 0.5832521319389343, + "step": 754 + }, + { + "epoch": 0.22079251352536922, + "grad_norm": 1.3701144460168073, + "learning_rate": 1.9802271920741957e-05, + "loss": 0.6181083917617798, + "step": 755 + }, + { + "epoch": 0.2210849539406346, + "grad_norm": 1.6323941211416428, + "learning_rate": 1.9801314105842135e-05, + "loss": 0.614393949508667, + "step": 756 + }, + { + "epoch": 0.22137739435589998, + "grad_norm": 1.4783150089736257, + "learning_rate": 1.980035399994473e-05, + "loss": 0.7598476409912109, + "step": 757 + }, + { + "epoch": 0.22166983477116536, + "grad_norm": 1.3445249209174277, + "learning_rate": 1.979939160327417e-05, + "loss": 0.7185830473899841, + "step": 758 + }, + { + "epoch": 0.22196227518643077, + "grad_norm": 1.2604381133839313, + "learning_rate": 1.9798426916055403e-05, + "loss": 0.6672362089157104, + "step": 759 + }, + { + "epoch": 0.22225471560169616, + "grad_norm": 1.323605486489286, + "learning_rate": 1.9797459938513918e-05, + "loss": 0.60948646068573, + "step": 760 + }, + { + "epoch": 0.22254715601696154, + "grad_norm": 1.376081699980774, + "learning_rate": 1.979649067087574e-05, + "loss": 0.6073893308639526, + "step": 761 + }, + { + "epoch": 0.22283959643222692, + "grad_norm": 2.11374968768554, + "learning_rate": 1.9795519113367434e-05, + "loss": 0.7521525025367737, + "step": 762 + }, + { + "epoch": 0.22313203684749233, + "grad_norm": 1.3631196959673009, + "learning_rate": 1.979454526621609e-05, + "loss": 0.7281486988067627, + "step": 763 + }, + { + "epoch": 0.22342447726275771, + "grad_norm": 1.3466801989985047, + "learning_rate": 1.9793569129649345e-05, + "loss": 0.5628652572631836, + "step": 764 + }, + { + "epoch": 0.2237169176780231, + "grad_norm": 1.7030188389110175, + "learning_rate": 1.9792590703895364e-05, + "loss": 0.9115084409713745, + "step": 765 + }, + { + "epoch": 0.22400935809328848, + "grad_norm": 1.1906430527809846, + "learning_rate": 1.9791609989182843e-05, + "loss": 0.5793902277946472, + "step": 766 + }, + { + "epoch": 0.2243017985085539, + "grad_norm": 1.319680929079464, + "learning_rate": 1.979062698574102e-05, + "loss": 0.5811150074005127, + "step": 767 + }, + { + "epoch": 0.22459423892381927, + "grad_norm": 1.8337754364313175, + "learning_rate": 1.978964169379967e-05, + "loss": 0.7450643181800842, + "step": 768 + }, + { + "epoch": 0.22488667933908466, + "grad_norm": 1.2696945630714354, + "learning_rate": 1.9788654113589093e-05, + "loss": 0.6617515087127686, + "step": 769 + }, + { + "epoch": 0.22517911975435004, + "grad_norm": 1.1685310150494228, + "learning_rate": 1.9787664245340137e-05, + "loss": 0.6240406036376953, + "step": 770 + }, + { + "epoch": 0.22547156016961545, + "grad_norm": 1.450209328719988, + "learning_rate": 1.978667208928417e-05, + "loss": 0.694688081741333, + "step": 771 + }, + { + "epoch": 0.22576400058488083, + "grad_norm": 1.274649499261431, + "learning_rate": 1.9785677645653107e-05, + "loss": 0.6855190396308899, + "step": 772 + }, + { + "epoch": 0.2260564410001462, + "grad_norm": 1.5531275718881066, + "learning_rate": 1.978468091467939e-05, + "loss": 0.8132567405700684, + "step": 773 + }, + { + "epoch": 0.22634888141541162, + "grad_norm": 1.2819374084058084, + "learning_rate": 1.9783681896596006e-05, + "loss": 0.7011039853096008, + "step": 774 + }, + { + "epoch": 0.226641321830677, + "grad_norm": 1.2317633693628418, + "learning_rate": 1.9782680591636462e-05, + "loss": 0.5754199028015137, + "step": 775 + }, + { + "epoch": 0.2269337622459424, + "grad_norm": 1.3342396229289735, + "learning_rate": 1.9781677000034807e-05, + "loss": 0.7518784403800964, + "step": 776 + }, + { + "epoch": 0.22722620266120777, + "grad_norm": 1.4619385156109748, + "learning_rate": 1.978067112202563e-05, + "loss": 0.6802738904953003, + "step": 777 + }, + { + "epoch": 0.22751864307647318, + "grad_norm": 1.2836639966818497, + "learning_rate": 1.9779662957844046e-05, + "loss": 0.7667055726051331, + "step": 778 + }, + { + "epoch": 0.22781108349173856, + "grad_norm": 1.3402387686228199, + "learning_rate": 1.9778652507725704e-05, + "loss": 0.7590975165367126, + "step": 779 + }, + { + "epoch": 0.22810352390700395, + "grad_norm": 1.5322182562597366, + "learning_rate": 1.9777639771906795e-05, + "loss": 0.8009685277938843, + "step": 780 + }, + { + "epoch": 0.22839596432226933, + "grad_norm": 1.2184372022517955, + "learning_rate": 1.977662475062404e-05, + "loss": 0.6094385981559753, + "step": 781 + }, + { + "epoch": 0.22868840473753474, + "grad_norm": 1.2258891813878965, + "learning_rate": 1.977560744411469e-05, + "loss": 0.5919946432113647, + "step": 782 + }, + { + "epoch": 0.22898084515280012, + "grad_norm": 1.3994922066796667, + "learning_rate": 1.9774587852616537e-05, + "loss": 0.7616838216781616, + "step": 783 + }, + { + "epoch": 0.2292732855680655, + "grad_norm": 1.0864449553171927, + "learning_rate": 1.9773565976367903e-05, + "loss": 0.5107603073120117, + "step": 784 + }, + { + "epoch": 0.2295657259833309, + "grad_norm": 1.3785741559157736, + "learning_rate": 1.9772541815607645e-05, + "loss": 0.6819792985916138, + "step": 785 + }, + { + "epoch": 0.2298581663985963, + "grad_norm": 1.3095462010721952, + "learning_rate": 1.977151537057516e-05, + "loss": 0.748264729976654, + "step": 786 + }, + { + "epoch": 0.23015060681386168, + "grad_norm": 1.511078591377817, + "learning_rate": 1.977048664151037e-05, + "loss": 0.7341534495353699, + "step": 787 + }, + { + "epoch": 0.23044304722912706, + "grad_norm": 1.3481462417331131, + "learning_rate": 1.976945562865373e-05, + "loss": 0.569247841835022, + "step": 788 + }, + { + "epoch": 0.23073548764439245, + "grad_norm": 1.4792545387125078, + "learning_rate": 1.9768422332246233e-05, + "loss": 0.7003188133239746, + "step": 789 + }, + { + "epoch": 0.23102792805965786, + "grad_norm": 1.222254549739519, + "learning_rate": 1.9767386752529415e-05, + "loss": 0.6484041810035706, + "step": 790 + }, + { + "epoch": 0.23132036847492324, + "grad_norm": 1.2921197831934208, + "learning_rate": 1.9766348889745324e-05, + "loss": 0.6635721921920776, + "step": 791 + }, + { + "epoch": 0.23161280889018862, + "grad_norm": 1.3606759597173597, + "learning_rate": 1.9765308744136568e-05, + "loss": 0.5855914354324341, + "step": 792 + }, + { + "epoch": 0.231905249305454, + "grad_norm": 1.3590534475124305, + "learning_rate": 1.976426631594626e-05, + "loss": 0.7606059312820435, + "step": 793 + }, + { + "epoch": 0.2321976897207194, + "grad_norm": 1.399907486961256, + "learning_rate": 1.976322160541807e-05, + "loss": 0.7080718278884888, + "step": 794 + }, + { + "epoch": 0.2324901301359848, + "grad_norm": 1.6372996876909576, + "learning_rate": 1.9762174612796195e-05, + "loss": 0.8838162422180176, + "step": 795 + }, + { + "epoch": 0.23278257055125018, + "grad_norm": 1.1906217629409164, + "learning_rate": 1.9761125338325357e-05, + "loss": 0.5776950120925903, + "step": 796 + }, + { + "epoch": 0.23307501096651556, + "grad_norm": 1.4075761903811832, + "learning_rate": 1.9760073782250817e-05, + "loss": 0.7455854415893555, + "step": 797 + }, + { + "epoch": 0.23336745138178097, + "grad_norm": 1.4778525028622385, + "learning_rate": 1.9759019944818375e-05, + "loss": 0.7160001993179321, + "step": 798 + }, + { + "epoch": 0.23365989179704635, + "grad_norm": 1.2680712563874137, + "learning_rate": 1.9757963826274357e-05, + "loss": 0.6282311081886292, + "step": 799 + }, + { + "epoch": 0.23395233221231174, + "grad_norm": 1.3617325718771658, + "learning_rate": 1.9756905426865626e-05, + "loss": 0.6479916572570801, + "step": 800 + }, + { + "epoch": 0.23424477262757712, + "grad_norm": 1.2789508587545713, + "learning_rate": 1.9755844746839573e-05, + "loss": 0.6519639492034912, + "step": 801 + }, + { + "epoch": 0.23453721304284253, + "grad_norm": 1.4374021901805083, + "learning_rate": 1.9754781786444122e-05, + "loss": 0.5591464638710022, + "step": 802 + }, + { + "epoch": 0.2348296534581079, + "grad_norm": 1.6094479116430809, + "learning_rate": 1.9753716545927745e-05, + "loss": 0.6378511190414429, + "step": 803 + }, + { + "epoch": 0.2351220938733733, + "grad_norm": 1.593476138868701, + "learning_rate": 1.9752649025539424e-05, + "loss": 0.7932485342025757, + "step": 804 + }, + { + "epoch": 0.23541453428863868, + "grad_norm": 1.327032855057245, + "learning_rate": 1.9751579225528694e-05, + "loss": 0.7344592809677124, + "step": 805 + }, + { + "epoch": 0.2357069747039041, + "grad_norm": 1.5060138108990804, + "learning_rate": 1.975050714614561e-05, + "loss": 0.7879096269607544, + "step": 806 + }, + { + "epoch": 0.23599941511916947, + "grad_norm": 1.31391427286964, + "learning_rate": 1.9749432787640764e-05, + "loss": 0.6428436040878296, + "step": 807 + }, + { + "epoch": 0.23629185553443485, + "grad_norm": 1.1924129057081494, + "learning_rate": 1.9748356150265283e-05, + "loss": 0.7018194198608398, + "step": 808 + }, + { + "epoch": 0.23658429594970024, + "grad_norm": 1.3487665777693398, + "learning_rate": 1.974727723427082e-05, + "loss": 0.7696131467819214, + "step": 809 + }, + { + "epoch": 0.23687673636496565, + "grad_norm": 1.2806200429683234, + "learning_rate": 1.974619603990957e-05, + "loss": 0.6429424285888672, + "step": 810 + }, + { + "epoch": 0.23716917678023103, + "grad_norm": 1.4197164517856635, + "learning_rate": 1.9745112567434254e-05, + "loss": 0.7205626964569092, + "step": 811 + }, + { + "epoch": 0.2374616171954964, + "grad_norm": 1.206628595880062, + "learning_rate": 1.9744026817098122e-05, + "loss": 0.7018989324569702, + "step": 812 + }, + { + "epoch": 0.23775405761076182, + "grad_norm": 1.4562632106002198, + "learning_rate": 1.974293878915497e-05, + "loss": 0.6861958503723145, + "step": 813 + }, + { + "epoch": 0.2380464980260272, + "grad_norm": 1.8277672251442496, + "learning_rate": 1.9741848483859117e-05, + "loss": 0.687503457069397, + "step": 814 + }, + { + "epoch": 0.23833893844129259, + "grad_norm": 1.6702364448324796, + "learning_rate": 1.9740755901465408e-05, + "loss": 0.7808526754379272, + "step": 815 + }, + { + "epoch": 0.23863137885655797, + "grad_norm": 1.4777579354772585, + "learning_rate": 1.973966104222923e-05, + "loss": 0.7387286424636841, + "step": 816 + }, + { + "epoch": 0.23892381927182338, + "grad_norm": 1.2761337726208828, + "learning_rate": 1.9738563906406508e-05, + "loss": 0.6262110471725464, + "step": 817 + }, + { + "epoch": 0.23921625968708876, + "grad_norm": 1.2308979686961945, + "learning_rate": 1.973746449425368e-05, + "loss": 0.6618830561637878, + "step": 818 + }, + { + "epoch": 0.23950870010235414, + "grad_norm": 1.3525742869997646, + "learning_rate": 1.9736362806027732e-05, + "loss": 0.5866184234619141, + "step": 819 + }, + { + "epoch": 0.23980114051761953, + "grad_norm": 1.1916120410649227, + "learning_rate": 1.9735258841986175e-05, + "loss": 0.6413314342498779, + "step": 820 + }, + { + "epoch": 0.24009358093288494, + "grad_norm": 1.3855684564301443, + "learning_rate": 1.9734152602387054e-05, + "loss": 0.6125906109809875, + "step": 821 + }, + { + "epoch": 0.24038602134815032, + "grad_norm": 1.3708182915073268, + "learning_rate": 1.973304408748895e-05, + "loss": 0.6128122806549072, + "step": 822 + }, + { + "epoch": 0.2406784617634157, + "grad_norm": 1.4552398411515748, + "learning_rate": 1.973193329755097e-05, + "loss": 0.7763051986694336, + "step": 823 + }, + { + "epoch": 0.24097090217868108, + "grad_norm": 1.406068384249821, + "learning_rate": 1.9730820232832747e-05, + "loss": 0.7187550067901611, + "step": 824 + }, + { + "epoch": 0.2412633425939465, + "grad_norm": 1.4089612736012989, + "learning_rate": 1.972970489359446e-05, + "loss": 0.6564748287200928, + "step": 825 + }, + { + "epoch": 0.24155578300921188, + "grad_norm": 1.2962838731212396, + "learning_rate": 1.9728587280096815e-05, + "loss": 0.6573271751403809, + "step": 826 + }, + { + "epoch": 0.24184822342447726, + "grad_norm": 1.606482466732529, + "learning_rate": 1.9727467392601042e-05, + "loss": 0.8032153844833374, + "step": 827 + }, + { + "epoch": 0.24214066383974264, + "grad_norm": 1.344534982986645, + "learning_rate": 1.972634523136891e-05, + "loss": 0.6781449913978577, + "step": 828 + }, + { + "epoch": 0.24243310425500805, + "grad_norm": 1.3970734980370678, + "learning_rate": 1.972522079666272e-05, + "loss": 0.580757737159729, + "step": 829 + }, + { + "epoch": 0.24272554467027344, + "grad_norm": 1.4569992070347761, + "learning_rate": 1.97240940887453e-05, + "loss": 0.626894474029541, + "step": 830 + }, + { + "epoch": 0.24301798508553882, + "grad_norm": 1.4885978649776115, + "learning_rate": 1.9722965107880005e-05, + "loss": 0.8188163042068481, + "step": 831 + }, + { + "epoch": 0.2433104255008042, + "grad_norm": 1.4514623765445114, + "learning_rate": 1.9721833854330734e-05, + "loss": 0.6943579912185669, + "step": 832 + }, + { + "epoch": 0.2436028659160696, + "grad_norm": 1.3452906489662066, + "learning_rate": 1.972070032836191e-05, + "loss": 0.6177504658699036, + "step": 833 + }, + { + "epoch": 0.243895306331335, + "grad_norm": 1.3249219466208975, + "learning_rate": 1.971956453023849e-05, + "loss": 0.683998703956604, + "step": 834 + }, + { + "epoch": 0.24418774674660038, + "grad_norm": 1.3523687150823345, + "learning_rate": 1.9718426460225952e-05, + "loss": 0.77602219581604, + "step": 835 + }, + { + "epoch": 0.24448018716186576, + "grad_norm": 1.0190390519787025, + "learning_rate": 1.971728611859032e-05, + "loss": 0.4930742383003235, + "step": 836 + }, + { + "epoch": 0.24477262757713117, + "grad_norm": 1.057766741950331, + "learning_rate": 1.971614350559814e-05, + "loss": 0.634628415107727, + "step": 837 + }, + { + "epoch": 0.24506506799239655, + "grad_norm": 1.4273024070967653, + "learning_rate": 1.971499862151649e-05, + "loss": 0.6439167857170105, + "step": 838 + }, + { + "epoch": 0.24535750840766193, + "grad_norm": 1.1385728991135244, + "learning_rate": 1.9713851466612982e-05, + "loss": 0.701258659362793, + "step": 839 + }, + { + "epoch": 0.24564994882292732, + "grad_norm": 1.4590112387376561, + "learning_rate": 1.9712702041155753e-05, + "loss": 0.6488544344902039, + "step": 840 + }, + { + "epoch": 0.24594238923819273, + "grad_norm": 1.3405708553224296, + "learning_rate": 1.9711550345413476e-05, + "loss": 0.6962910890579224, + "step": 841 + }, + { + "epoch": 0.2462348296534581, + "grad_norm": 1.1939053963741824, + "learning_rate": 1.9710396379655355e-05, + "loss": 0.6617723703384399, + "step": 842 + }, + { + "epoch": 0.2465272700687235, + "grad_norm": 1.2279058278823862, + "learning_rate": 1.970924014415112e-05, + "loss": 0.7152801752090454, + "step": 843 + }, + { + "epoch": 0.24681971048398887, + "grad_norm": 1.2796222731345095, + "learning_rate": 1.9708081639171035e-05, + "loss": 0.6712393760681152, + "step": 844 + }, + { + "epoch": 0.24711215089925428, + "grad_norm": 1.3941735155074029, + "learning_rate": 1.970692086498589e-05, + "loss": 0.8413758277893066, + "step": 845 + }, + { + "epoch": 0.24740459131451967, + "grad_norm": 1.423836225011119, + "learning_rate": 1.9705757821867015e-05, + "loss": 0.6460679769515991, + "step": 846 + }, + { + "epoch": 0.24769703172978505, + "grad_norm": 1.3704721229511874, + "learning_rate": 1.970459251008626e-05, + "loss": 0.759244441986084, + "step": 847 + }, + { + "epoch": 0.24798947214505043, + "grad_norm": 1.2356631241001201, + "learning_rate": 1.970342492991601e-05, + "loss": 0.8148110508918762, + "step": 848 + }, + { + "epoch": 0.24828191256031584, + "grad_norm": 1.2587770996787473, + "learning_rate": 1.970225508162918e-05, + "loss": 0.6620084047317505, + "step": 849 + }, + { + "epoch": 0.24857435297558123, + "grad_norm": 1.451838551232366, + "learning_rate": 1.9701082965499217e-05, + "loss": 0.7090305089950562, + "step": 850 + }, + { + "epoch": 0.2488667933908466, + "grad_norm": 1.2074340737341804, + "learning_rate": 1.9699908581800094e-05, + "loss": 0.6846730709075928, + "step": 851 + }, + { + "epoch": 0.24915923380611202, + "grad_norm": 1.0752757256209107, + "learning_rate": 1.9698731930806315e-05, + "loss": 0.5183212757110596, + "step": 852 + }, + { + "epoch": 0.2494516742213774, + "grad_norm": 1.4176078828661092, + "learning_rate": 1.9697553012792915e-05, + "loss": 0.6913097500801086, + "step": 853 + }, + { + "epoch": 0.24974411463664278, + "grad_norm": 1.4996885245263052, + "learning_rate": 1.9696371828035466e-05, + "loss": 0.7896280884742737, + "step": 854 + }, + { + "epoch": 0.2500365550519082, + "grad_norm": 1.4718644942105623, + "learning_rate": 1.9695188376810055e-05, + "loss": 0.947577714920044, + "step": 855 + }, + { + "epoch": 0.2503289954671736, + "grad_norm": 1.3825164821538705, + "learning_rate": 1.9694002659393306e-05, + "loss": 0.7772419452667236, + "step": 856 + }, + { + "epoch": 0.25062143588243896, + "grad_norm": 1.3624521016930335, + "learning_rate": 1.9692814676062376e-05, + "loss": 0.6255912780761719, + "step": 857 + }, + { + "epoch": 0.25091387629770434, + "grad_norm": 1.3319834146029552, + "learning_rate": 1.969162442709495e-05, + "loss": 0.6572105884552002, + "step": 858 + }, + { + "epoch": 0.2512063167129697, + "grad_norm": 1.3718275193420901, + "learning_rate": 1.969043191276924e-05, + "loss": 0.6387436389923096, + "step": 859 + }, + { + "epoch": 0.2514987571282351, + "grad_norm": 1.1976239787141296, + "learning_rate": 1.968923713336399e-05, + "loss": 0.9180483222007751, + "step": 860 + }, + { + "epoch": 0.2517911975435005, + "grad_norm": 1.211847411431562, + "learning_rate": 1.9688040089158473e-05, + "loss": 0.6830536127090454, + "step": 861 + }, + { + "epoch": 0.2520836379587659, + "grad_norm": 1.6904119232689327, + "learning_rate": 1.9686840780432487e-05, + "loss": 0.9061588644981384, + "step": 862 + }, + { + "epoch": 0.2523760783740313, + "grad_norm": 1.157670921080695, + "learning_rate": 1.9685639207466365e-05, + "loss": 0.558010458946228, + "step": 863 + }, + { + "epoch": 0.2526685187892967, + "grad_norm": 1.1825470022948923, + "learning_rate": 1.968443537054097e-05, + "loss": 0.6788249611854553, + "step": 864 + }, + { + "epoch": 0.2529609592045621, + "grad_norm": 1.2105730438992965, + "learning_rate": 1.968322926993769e-05, + "loss": 0.576469898223877, + "step": 865 + }, + { + "epoch": 0.25325339961982746, + "grad_norm": 1.2982512656817862, + "learning_rate": 1.9682020905938438e-05, + "loss": 0.6994123458862305, + "step": 866 + }, + { + "epoch": 0.25354584003509284, + "grad_norm": 1.206872992638966, + "learning_rate": 1.9680810278825672e-05, + "loss": 0.6929521560668945, + "step": 867 + }, + { + "epoch": 0.2538382804503582, + "grad_norm": 1.273656030058159, + "learning_rate": 1.9679597388882363e-05, + "loss": 0.7596743106842041, + "step": 868 + }, + { + "epoch": 0.2541307208656236, + "grad_norm": 1.4805809886864818, + "learning_rate": 1.9678382236392013e-05, + "loss": 0.7925904989242554, + "step": 869 + }, + { + "epoch": 0.25442316128088904, + "grad_norm": 1.3335550122348163, + "learning_rate": 1.9677164821638666e-05, + "loss": 0.722467839717865, + "step": 870 + }, + { + "epoch": 0.2547156016961544, + "grad_norm": 1.3131624182400288, + "learning_rate": 1.9675945144906882e-05, + "loss": 0.7165451049804688, + "step": 871 + }, + { + "epoch": 0.2550080421114198, + "grad_norm": 1.1797512350865442, + "learning_rate": 1.9674723206481746e-05, + "loss": 0.5897061824798584, + "step": 872 + }, + { + "epoch": 0.2553004825266852, + "grad_norm": 1.2365962649439657, + "learning_rate": 1.9673499006648885e-05, + "loss": 0.6634531021118164, + "step": 873 + }, + { + "epoch": 0.2555929229419506, + "grad_norm": 1.3214235822507945, + "learning_rate": 1.9672272545694445e-05, + "loss": 0.7237584590911865, + "step": 874 + }, + { + "epoch": 0.25588536335721596, + "grad_norm": 1.4848759223566366, + "learning_rate": 1.967104382390511e-05, + "loss": 0.6382388472557068, + "step": 875 + }, + { + "epoch": 0.25617780377248134, + "grad_norm": 1.31447030866248, + "learning_rate": 1.966981284156808e-05, + "loss": 0.6788768768310547, + "step": 876 + }, + { + "epoch": 0.2564702441877467, + "grad_norm": 1.3072783419197107, + "learning_rate": 1.966857959897109e-05, + "loss": 0.6347095966339111, + "step": 877 + }, + { + "epoch": 0.25676268460301216, + "grad_norm": 1.4344629064681063, + "learning_rate": 1.9667344096402406e-05, + "loss": 0.8896903991699219, + "step": 878 + }, + { + "epoch": 0.25705512501827754, + "grad_norm": 1.3876445939749689, + "learning_rate": 1.966610633415082e-05, + "loss": 0.71473228931427, + "step": 879 + }, + { + "epoch": 0.2573475654335429, + "grad_norm": 1.3302375445053003, + "learning_rate": 1.9664866312505646e-05, + "loss": 0.7311601638793945, + "step": 880 + }, + { + "epoch": 0.2576400058488083, + "grad_norm": 1.2472942559074918, + "learning_rate": 1.9663624031756737e-05, + "loss": 0.6186199188232422, + "step": 881 + }, + { + "epoch": 0.2579324462640737, + "grad_norm": 1.4896774549089442, + "learning_rate": 1.9662379492194467e-05, + "loss": 0.8059204816818237, + "step": 882 + }, + { + "epoch": 0.25822488667933907, + "grad_norm": 1.4468929069066396, + "learning_rate": 1.9661132694109736e-05, + "loss": 0.6065236330032349, + "step": 883 + }, + { + "epoch": 0.25851732709460445, + "grad_norm": 1.182060018600662, + "learning_rate": 1.965988363779398e-05, + "loss": 0.6491106152534485, + "step": 884 + }, + { + "epoch": 0.25880976750986984, + "grad_norm": 1.197300798410388, + "learning_rate": 1.9658632323539158e-05, + "loss": 0.526267945766449, + "step": 885 + }, + { + "epoch": 0.2591022079251353, + "grad_norm": 1.5008074138248908, + "learning_rate": 1.9657378751637755e-05, + "loss": 0.812760591506958, + "step": 886 + }, + { + "epoch": 0.25939464834040066, + "grad_norm": 1.807239371921464, + "learning_rate": 1.9656122922382786e-05, + "loss": 0.7957908511161804, + "step": 887 + }, + { + "epoch": 0.25968708875566604, + "grad_norm": 1.3552357306732934, + "learning_rate": 1.9654864836067796e-05, + "loss": 0.7426323890686035, + "step": 888 + }, + { + "epoch": 0.2599795291709314, + "grad_norm": 1.3206271267013228, + "learning_rate": 1.9653604492986852e-05, + "loss": 0.602961540222168, + "step": 889 + }, + { + "epoch": 0.2602719695861968, + "grad_norm": 1.7789592821205134, + "learning_rate": 1.965234189343455e-05, + "loss": 0.8706510066986084, + "step": 890 + }, + { + "epoch": 0.2605644100014622, + "grad_norm": 1.3042391493572836, + "learning_rate": 1.965107703770602e-05, + "loss": 0.6245810985565186, + "step": 891 + }, + { + "epoch": 0.26085685041672757, + "grad_norm": 1.3389608750174764, + "learning_rate": 1.964980992609691e-05, + "loss": 0.7455421686172485, + "step": 892 + }, + { + "epoch": 0.261149290831993, + "grad_norm": 1.3769047718413097, + "learning_rate": 1.9648540558903404e-05, + "loss": 0.6917043328285217, + "step": 893 + }, + { + "epoch": 0.2614417312472584, + "grad_norm": 1.5543661242785587, + "learning_rate": 1.9647268936422204e-05, + "loss": 0.6488040685653687, + "step": 894 + }, + { + "epoch": 0.2617341716625238, + "grad_norm": 1.4168880936407573, + "learning_rate": 1.964599505895055e-05, + "loss": 0.7416148781776428, + "step": 895 + }, + { + "epoch": 0.26202661207778916, + "grad_norm": 1.2398123962846468, + "learning_rate": 1.9644718926786196e-05, + "loss": 0.7012773156166077, + "step": 896 + }, + { + "epoch": 0.26231905249305454, + "grad_norm": 1.4024640685787384, + "learning_rate": 1.9643440540227438e-05, + "loss": 0.8644432425498962, + "step": 897 + }, + { + "epoch": 0.2626114929083199, + "grad_norm": 1.2155057674795815, + "learning_rate": 1.9642159899573084e-05, + "loss": 0.614842414855957, + "step": 898 + }, + { + "epoch": 0.2629039333235853, + "grad_norm": 1.406064497865486, + "learning_rate": 1.964087700512248e-05, + "loss": 0.7794508337974548, + "step": 899 + }, + { + "epoch": 0.2631963737388507, + "grad_norm": 1.3041032890013364, + "learning_rate": 1.9639591857175492e-05, + "loss": 0.49217259883880615, + "step": 900 + }, + { + "epoch": 0.2634888141541161, + "grad_norm": 2.5300379427879656, + "learning_rate": 1.9638304456032516e-05, + "loss": 0.6319605708122253, + "step": 901 + }, + { + "epoch": 0.2637812545693815, + "grad_norm": 1.2937854520821135, + "learning_rate": 1.9637014801994478e-05, + "loss": 0.6066744327545166, + "step": 902 + }, + { + "epoch": 0.2640736949846469, + "grad_norm": 1.3364560601793205, + "learning_rate": 1.9635722895362824e-05, + "loss": 0.7529127597808838, + "step": 903 + }, + { + "epoch": 0.26436613539991227, + "grad_norm": 1.1766314649269587, + "learning_rate": 1.9634428736439524e-05, + "loss": 0.6026389598846436, + "step": 904 + }, + { + "epoch": 0.26465857581517765, + "grad_norm": 1.1341480559887087, + "learning_rate": 1.9633132325527092e-05, + "loss": 0.6227229237556458, + "step": 905 + }, + { + "epoch": 0.26495101623044304, + "grad_norm": 1.0934147682033295, + "learning_rate": 1.9631833662928548e-05, + "loss": 0.5959285497665405, + "step": 906 + }, + { + "epoch": 0.2652434566457084, + "grad_norm": 1.5332323248713289, + "learning_rate": 1.9630532748947445e-05, + "loss": 0.8104684352874756, + "step": 907 + }, + { + "epoch": 0.2655358970609738, + "grad_norm": 1.4286964634802555, + "learning_rate": 1.962922958388787e-05, + "loss": 0.6722325682640076, + "step": 908 + }, + { + "epoch": 0.26582833747623924, + "grad_norm": 1.3146328085881052, + "learning_rate": 1.962792416805442e-05, + "loss": 0.5996029376983643, + "step": 909 + }, + { + "epoch": 0.2661207778915046, + "grad_norm": 1.2576705371159294, + "learning_rate": 1.962661650175224e-05, + "loss": 0.7214776873588562, + "step": 910 + }, + { + "epoch": 0.26641321830677, + "grad_norm": 1.3644451050997106, + "learning_rate": 1.9625306585286986e-05, + "loss": 0.6833420991897583, + "step": 911 + }, + { + "epoch": 0.2667056587220354, + "grad_norm": 1.3539788924921423, + "learning_rate": 1.9623994418964834e-05, + "loss": 0.5571368336677551, + "step": 912 + }, + { + "epoch": 0.26699809913730077, + "grad_norm": 1.3710487138213245, + "learning_rate": 1.9622680003092503e-05, + "loss": 0.6748533248901367, + "step": 913 + }, + { + "epoch": 0.26729053955256615, + "grad_norm": 1.3715994474814863, + "learning_rate": 1.9621363337977232e-05, + "loss": 0.6681679487228394, + "step": 914 + }, + { + "epoch": 0.26758297996783154, + "grad_norm": 1.482670676536411, + "learning_rate": 1.9620044423926775e-05, + "loss": 0.6839786767959595, + "step": 915 + }, + { + "epoch": 0.2678754203830969, + "grad_norm": 1.4250296018843953, + "learning_rate": 1.961872326124943e-05, + "loss": 0.7481753826141357, + "step": 916 + }, + { + "epoch": 0.26816786079836236, + "grad_norm": 1.2167024955211783, + "learning_rate": 1.9617399850254e-05, + "loss": 0.6044093370437622, + "step": 917 + }, + { + "epoch": 0.26846030121362774, + "grad_norm": 1.284073365031053, + "learning_rate": 1.9616074191249833e-05, + "loss": 0.6399786472320557, + "step": 918 + }, + { + "epoch": 0.2687527416288931, + "grad_norm": 1.4810486497659208, + "learning_rate": 1.961474628454679e-05, + "loss": 0.6769053339958191, + "step": 919 + }, + { + "epoch": 0.2690451820441585, + "grad_norm": 1.3650368498715015, + "learning_rate": 1.961341613045526e-05, + "loss": 0.7508189678192139, + "step": 920 + }, + { + "epoch": 0.2693376224594239, + "grad_norm": 1.3260194970823536, + "learning_rate": 1.9612083729286164e-05, + "loss": 0.728675365447998, + "step": 921 + }, + { + "epoch": 0.26963006287468927, + "grad_norm": 1.241243201070507, + "learning_rate": 1.9610749081350934e-05, + "loss": 0.6886277794837952, + "step": 922 + }, + { + "epoch": 0.26992250328995465, + "grad_norm": 1.272552251820391, + "learning_rate": 1.9609412186961542e-05, + "loss": 0.6756877899169922, + "step": 923 + }, + { + "epoch": 0.27021494370522003, + "grad_norm": 1.3464083414999921, + "learning_rate": 1.960807304643048e-05, + "loss": 0.6761744022369385, + "step": 924 + }, + { + "epoch": 0.2705073841204855, + "grad_norm": 1.3141872927798783, + "learning_rate": 1.9606731660070758e-05, + "loss": 0.6475736498832703, + "step": 925 + }, + { + "epoch": 0.27079982453575085, + "grad_norm": 1.2576667239396297, + "learning_rate": 1.9605388028195922e-05, + "loss": 0.6169984936714172, + "step": 926 + }, + { + "epoch": 0.27109226495101624, + "grad_norm": 1.36667119537221, + "learning_rate": 1.9604042151120035e-05, + "loss": 0.6411685943603516, + "step": 927 + }, + { + "epoch": 0.2713847053662816, + "grad_norm": 1.203794827188605, + "learning_rate": 1.960269402915769e-05, + "loss": 0.6802625060081482, + "step": 928 + }, + { + "epoch": 0.271677145781547, + "grad_norm": 1.1204382547238934, + "learning_rate": 1.9601343662624e-05, + "loss": 0.6321320533752441, + "step": 929 + }, + { + "epoch": 0.2719695861968124, + "grad_norm": 1.1836254946940896, + "learning_rate": 1.959999105183461e-05, + "loss": 0.6242578029632568, + "step": 930 + }, + { + "epoch": 0.27226202661207777, + "grad_norm": 1.3574626937776866, + "learning_rate": 1.9598636197105672e-05, + "loss": 0.8106271624565125, + "step": 931 + }, + { + "epoch": 0.2725544670273432, + "grad_norm": 1.3336233570386715, + "learning_rate": 1.9597279098753893e-05, + "loss": 0.6810879707336426, + "step": 932 + }, + { + "epoch": 0.2728469074426086, + "grad_norm": 1.4182604377271, + "learning_rate": 1.959591975709647e-05, + "loss": 0.6121781468391418, + "step": 933 + }, + { + "epoch": 0.27313934785787397, + "grad_norm": 1.3855646528211634, + "learning_rate": 1.9594558172451153e-05, + "loss": 0.7347930669784546, + "step": 934 + }, + { + "epoch": 0.27343178827313935, + "grad_norm": 1.7726573891466724, + "learning_rate": 1.9593194345136196e-05, + "loss": 0.8280940651893616, + "step": 935 + }, + { + "epoch": 0.27372422868840474, + "grad_norm": 1.7069126445705718, + "learning_rate": 1.959182827547039e-05, + "loss": 0.8171218633651733, + "step": 936 + }, + { + "epoch": 0.2740166691036701, + "grad_norm": 1.5519639216005559, + "learning_rate": 1.9590459963773043e-05, + "loss": 0.7350337505340576, + "step": 937 + }, + { + "epoch": 0.2743091095189355, + "grad_norm": 1.2380635233009907, + "learning_rate": 1.9589089410363992e-05, + "loss": 0.5648026466369629, + "step": 938 + }, + { + "epoch": 0.2746015499342009, + "grad_norm": 1.2184482229154892, + "learning_rate": 1.9587716615563592e-05, + "loss": 0.630626916885376, + "step": 939 + }, + { + "epoch": 0.2748939903494663, + "grad_norm": 1.247434869071023, + "learning_rate": 1.9586341579692728e-05, + "loss": 0.658649206161499, + "step": 940 + }, + { + "epoch": 0.2751864307647317, + "grad_norm": 1.3583264773002954, + "learning_rate": 1.9584964303072804e-05, + "loss": 0.6938339471817017, + "step": 941 + }, + { + "epoch": 0.2754788711799971, + "grad_norm": 1.2844871691004516, + "learning_rate": 1.9583584786025755e-05, + "loss": 0.7124238014221191, + "step": 942 + }, + { + "epoch": 0.27577131159526247, + "grad_norm": 1.295461976555009, + "learning_rate": 1.9582203028874027e-05, + "loss": 0.5879669189453125, + "step": 943 + }, + { + "epoch": 0.27606375201052785, + "grad_norm": 1.3092326597229536, + "learning_rate": 1.9580819031940605e-05, + "loss": 0.6169895529747009, + "step": 944 + }, + { + "epoch": 0.27635619242579323, + "grad_norm": 1.3408083006486937, + "learning_rate": 1.9579432795548986e-05, + "loss": 0.6367429494857788, + "step": 945 + }, + { + "epoch": 0.2766486328410586, + "grad_norm": 1.294470969807804, + "learning_rate": 1.9578044320023195e-05, + "loss": 0.6198331117630005, + "step": 946 + }, + { + "epoch": 0.276941073256324, + "grad_norm": 1.2934388501492589, + "learning_rate": 1.9576653605687782e-05, + "loss": 0.6731230616569519, + "step": 947 + }, + { + "epoch": 0.27723351367158944, + "grad_norm": 1.3743119206413423, + "learning_rate": 1.957526065286781e-05, + "loss": 0.7185516953468323, + "step": 948 + }, + { + "epoch": 0.2775259540868548, + "grad_norm": 1.5124791251983178, + "learning_rate": 1.9573865461888882e-05, + "loss": 0.7362357378005981, + "step": 949 + }, + { + "epoch": 0.2778183945021202, + "grad_norm": 1.481999625276378, + "learning_rate": 1.9572468033077113e-05, + "loss": 0.7051525712013245, + "step": 950 + }, + { + "epoch": 0.2781108349173856, + "grad_norm": 1.3167000079730038, + "learning_rate": 1.9571068366759143e-05, + "loss": 0.6267420053482056, + "step": 951 + }, + { + "epoch": 0.27840327533265097, + "grad_norm": 1.4667668035632615, + "learning_rate": 1.9569666463262136e-05, + "loss": 0.649080753326416, + "step": 952 + }, + { + "epoch": 0.27869571574791635, + "grad_norm": 1.1940294879505342, + "learning_rate": 1.9568262322913777e-05, + "loss": 0.5700061321258545, + "step": 953 + }, + { + "epoch": 0.27898815616318173, + "grad_norm": 1.21562106075719, + "learning_rate": 1.9566855946042274e-05, + "loss": 0.6121870875358582, + "step": 954 + }, + { + "epoch": 0.2792805965784471, + "grad_norm": 1.3828404656512372, + "learning_rate": 1.9565447332976362e-05, + "loss": 0.8294541239738464, + "step": 955 + }, + { + "epoch": 0.27957303699371255, + "grad_norm": 1.2953263908127255, + "learning_rate": 1.9564036484045295e-05, + "loss": 0.6979323625564575, + "step": 956 + }, + { + "epoch": 0.27986547740897794, + "grad_norm": 1.4787353970640398, + "learning_rate": 1.9562623399578853e-05, + "loss": 0.6847009658813477, + "step": 957 + }, + { + "epoch": 0.2801579178242433, + "grad_norm": 1.174633661295302, + "learning_rate": 1.956120807990733e-05, + "loss": 0.6821733713150024, + "step": 958 + }, + { + "epoch": 0.2804503582395087, + "grad_norm": 1.2766608312969014, + "learning_rate": 1.955979052536155e-05, + "loss": 0.6943963766098022, + "step": 959 + }, + { + "epoch": 0.2807427986547741, + "grad_norm": 1.6283703947702834, + "learning_rate": 1.955837073627286e-05, + "loss": 0.5841893553733826, + "step": 960 + }, + { + "epoch": 0.28103523907003947, + "grad_norm": 1.4526296199919857, + "learning_rate": 1.955694871297313e-05, + "loss": 0.7196778059005737, + "step": 961 + }, + { + "epoch": 0.28132767948530485, + "grad_norm": 1.3568922084457422, + "learning_rate": 1.9555524455794743e-05, + "loss": 0.697501540184021, + "step": 962 + }, + { + "epoch": 0.28162011990057023, + "grad_norm": 1.3269336256780513, + "learning_rate": 1.9554097965070612e-05, + "loss": 0.7265810966491699, + "step": 963 + }, + { + "epoch": 0.28191256031583567, + "grad_norm": 1.1794879937673313, + "learning_rate": 1.955266924113417e-05, + "loss": 0.5766021013259888, + "step": 964 + }, + { + "epoch": 0.28220500073110105, + "grad_norm": 1.1486001787824904, + "learning_rate": 1.955123828431938e-05, + "loss": 0.6885402202606201, + "step": 965 + }, + { + "epoch": 0.28249744114636643, + "grad_norm": 1.4093622546586522, + "learning_rate": 1.954980509496071e-05, + "loss": 0.719329297542572, + "step": 966 + }, + { + "epoch": 0.2827898815616318, + "grad_norm": 1.1657877260705576, + "learning_rate": 1.954836967339316e-05, + "loss": 0.5621368885040283, + "step": 967 + }, + { + "epoch": 0.2830823219768972, + "grad_norm": 1.4684107409650433, + "learning_rate": 1.954693201995226e-05, + "loss": 0.6323715448379517, + "step": 968 + }, + { + "epoch": 0.2833747623921626, + "grad_norm": 1.1727530946898588, + "learning_rate": 1.954549213497404e-05, + "loss": 0.6265028119087219, + "step": 969 + }, + { + "epoch": 0.28366720280742797, + "grad_norm": 1.2740242277637046, + "learning_rate": 1.9544050018795076e-05, + "loss": 0.6234713792800903, + "step": 970 + }, + { + "epoch": 0.2839596432226934, + "grad_norm": 1.2342517719802, + "learning_rate": 1.9542605671752447e-05, + "loss": 0.6505804657936096, + "step": 971 + }, + { + "epoch": 0.2842520836379588, + "grad_norm": 1.408353713096739, + "learning_rate": 1.954115909418376e-05, + "loss": 0.7756558656692505, + "step": 972 + }, + { + "epoch": 0.28454452405322417, + "grad_norm": 1.4275947350210108, + "learning_rate": 1.953971028642715e-05, + "loss": 0.767257034778595, + "step": 973 + }, + { + "epoch": 0.28483696446848955, + "grad_norm": 1.5164327383088176, + "learning_rate": 1.9538259248821265e-05, + "loss": 0.6702018976211548, + "step": 974 + }, + { + "epoch": 0.28512940488375493, + "grad_norm": 1.5385088670888984, + "learning_rate": 1.953680598170527e-05, + "loss": 0.7072827816009521, + "step": 975 + }, + { + "epoch": 0.2854218452990203, + "grad_norm": 1.4449259987675327, + "learning_rate": 1.953535048541886e-05, + "loss": 0.6343571543693542, + "step": 976 + }, + { + "epoch": 0.2857142857142857, + "grad_norm": 1.2668558478543779, + "learning_rate": 1.953389276030225e-05, + "loss": 0.6361520290374756, + "step": 977 + }, + { + "epoch": 0.2860067261295511, + "grad_norm": 1.144363699587152, + "learning_rate": 1.9532432806696178e-05, + "loss": 0.6757364273071289, + "step": 978 + }, + { + "epoch": 0.2862991665448165, + "grad_norm": 1.2373799950730142, + "learning_rate": 1.9530970624941896e-05, + "loss": 0.6311759948730469, + "step": 979 + }, + { + "epoch": 0.2865916069600819, + "grad_norm": 1.3327233434420644, + "learning_rate": 1.9529506215381176e-05, + "loss": 0.6207036972045898, + "step": 980 + }, + { + "epoch": 0.2868840473753473, + "grad_norm": 1.182706201187961, + "learning_rate": 1.952803957835632e-05, + "loss": 0.5154495239257812, + "step": 981 + }, + { + "epoch": 0.28717648779061267, + "grad_norm": 1.4885508278374788, + "learning_rate": 1.9526570714210146e-05, + "loss": 0.797666072845459, + "step": 982 + }, + { + "epoch": 0.28746892820587805, + "grad_norm": 1.5013519512468485, + "learning_rate": 1.9525099623285983e-05, + "loss": 0.659400224685669, + "step": 983 + }, + { + "epoch": 0.28776136862114343, + "grad_norm": 1.565667149921291, + "learning_rate": 1.9523626305927706e-05, + "loss": 0.7638698816299438, + "step": 984 + }, + { + "epoch": 0.2880538090364088, + "grad_norm": 1.282540952352899, + "learning_rate": 1.952215076247968e-05, + "loss": 0.6656497120857239, + "step": 985 + }, + { + "epoch": 0.2883462494516742, + "grad_norm": 1.6004320535828411, + "learning_rate": 1.9520672993286807e-05, + "loss": 0.7701614499092102, + "step": 986 + }, + { + "epoch": 0.28863868986693964, + "grad_norm": 1.4907110687279852, + "learning_rate": 1.951919299869451e-05, + "loss": 0.6710221767425537, + "step": 987 + }, + { + "epoch": 0.288931130282205, + "grad_norm": 1.3912460639172692, + "learning_rate": 1.951771077904873e-05, + "loss": 0.6307191848754883, + "step": 988 + }, + { + "epoch": 0.2892235706974704, + "grad_norm": 1.5585350101159294, + "learning_rate": 1.951622633469592e-05, + "loss": 0.8226636648178101, + "step": 989 + }, + { + "epoch": 0.2895160111127358, + "grad_norm": 1.3925257650330547, + "learning_rate": 1.9514739665983065e-05, + "loss": 0.6286089420318604, + "step": 990 + }, + { + "epoch": 0.28980845152800117, + "grad_norm": 1.3766260212895336, + "learning_rate": 1.9513250773257667e-05, + "loss": 0.8167316317558289, + "step": 991 + }, + { + "epoch": 0.29010089194326655, + "grad_norm": 1.3082034964893225, + "learning_rate": 1.9511759656867738e-05, + "loss": 0.6840806603431702, + "step": 992 + }, + { + "epoch": 0.29039333235853193, + "grad_norm": 4.707433700267527, + "learning_rate": 1.9510266317161823e-05, + "loss": 0.5731699466705322, + "step": 993 + }, + { + "epoch": 0.2906857727737973, + "grad_norm": 1.179743170686313, + "learning_rate": 1.950877075448898e-05, + "loss": 0.696578860282898, + "step": 994 + }, + { + "epoch": 0.29097821318906275, + "grad_norm": 1.28092562469002, + "learning_rate": 1.9507272969198787e-05, + "loss": 0.7194398641586304, + "step": 995 + }, + { + "epoch": 0.29127065360432813, + "grad_norm": 1.7406610068492592, + "learning_rate": 1.9505772961641342e-05, + "loss": 0.7041016817092896, + "step": 996 + }, + { + "epoch": 0.2915630940195935, + "grad_norm": 1.2586308004321554, + "learning_rate": 1.9504270732167267e-05, + "loss": 0.7073841691017151, + "step": 997 + }, + { + "epoch": 0.2918555344348589, + "grad_norm": 1.204085782896564, + "learning_rate": 1.9502766281127693e-05, + "loss": 0.5097789764404297, + "step": 998 + }, + { + "epoch": 0.2921479748501243, + "grad_norm": 1.1340482101200409, + "learning_rate": 1.9501259608874276e-05, + "loss": 0.6522337198257446, + "step": 999 + }, + { + "epoch": 0.29244041526538966, + "grad_norm": 1.2639457143948831, + "learning_rate": 1.9499750715759197e-05, + "loss": 0.8276036381721497, + "step": 1000 + }, + { + "epoch": 0.29273285568065505, + "grad_norm": 1.3336888124261281, + "learning_rate": 1.9498239602135145e-05, + "loss": 0.7701225876808167, + "step": 1001 + }, + { + "epoch": 0.29302529609592043, + "grad_norm": 1.4216994028606598, + "learning_rate": 1.949672626835534e-05, + "loss": 0.6112316846847534, + "step": 1002 + }, + { + "epoch": 0.29331773651118587, + "grad_norm": 1.5055133598944146, + "learning_rate": 1.9495210714773506e-05, + "loss": 0.7196093201637268, + "step": 1003 + }, + { + "epoch": 0.29361017692645125, + "grad_norm": 1.3102459642638802, + "learning_rate": 1.9493692941743903e-05, + "loss": 0.708210825920105, + "step": 1004 + }, + { + "epoch": 0.29390261734171663, + "grad_norm": 1.1576562552023075, + "learning_rate": 1.9492172949621298e-05, + "loss": 0.6156430244445801, + "step": 1005 + }, + { + "epoch": 0.294195057756982, + "grad_norm": 1.3177580817558727, + "learning_rate": 1.9490650738760977e-05, + "loss": 0.6125216484069824, + "step": 1006 + }, + { + "epoch": 0.2944874981722474, + "grad_norm": 1.5792615772910776, + "learning_rate": 1.9489126309518752e-05, + "loss": 0.5691695213317871, + "step": 1007 + }, + { + "epoch": 0.2947799385875128, + "grad_norm": 1.2458453862912673, + "learning_rate": 1.9487599662250945e-05, + "loss": 0.6733062267303467, + "step": 1008 + }, + { + "epoch": 0.29507237900277816, + "grad_norm": 1.3579820847813902, + "learning_rate": 1.94860707973144e-05, + "loss": 0.6069025993347168, + "step": 1009 + }, + { + "epoch": 0.2953648194180436, + "grad_norm": 1.3771790647505693, + "learning_rate": 1.9484539715066488e-05, + "loss": 0.6191028356552124, + "step": 1010 + }, + { + "epoch": 0.295657259833309, + "grad_norm": 1.3927395620788336, + "learning_rate": 1.9483006415865082e-05, + "loss": 0.7423045635223389, + "step": 1011 + }, + { + "epoch": 0.29594970024857437, + "grad_norm": 1.584259935283413, + "learning_rate": 1.9481470900068585e-05, + "loss": 0.854878306388855, + "step": 1012 + }, + { + "epoch": 0.29624214066383975, + "grad_norm": 1.3274147652805814, + "learning_rate": 1.9479933168035914e-05, + "loss": 0.6950500011444092, + "step": 1013 + }, + { + "epoch": 0.29653458107910513, + "grad_norm": 1.2664754529699496, + "learning_rate": 1.9478393220126503e-05, + "loss": 0.6944484710693359, + "step": 1014 + }, + { + "epoch": 0.2968270214943705, + "grad_norm": 1.3385070796010239, + "learning_rate": 1.9476851056700303e-05, + "loss": 0.7120212316513062, + "step": 1015 + }, + { + "epoch": 0.2971194619096359, + "grad_norm": 1.2818173555684258, + "learning_rate": 1.9475306678117792e-05, + "loss": 0.6271052956581116, + "step": 1016 + }, + { + "epoch": 0.2974119023249013, + "grad_norm": 1.386949235285712, + "learning_rate": 1.9473760084739958e-05, + "loss": 0.6398453712463379, + "step": 1017 + }, + { + "epoch": 0.2977043427401667, + "grad_norm": 1.440440679973054, + "learning_rate": 1.94722112769283e-05, + "loss": 0.5563585758209229, + "step": 1018 + }, + { + "epoch": 0.2979967831554321, + "grad_norm": 1.2637928746894573, + "learning_rate": 1.947066025504485e-05, + "loss": 0.7895959615707397, + "step": 1019 + }, + { + "epoch": 0.2982892235706975, + "grad_norm": 1.2684661754258477, + "learning_rate": 1.9469107019452148e-05, + "loss": 0.6304349303245544, + "step": 1020 + }, + { + "epoch": 0.29858166398596286, + "grad_norm": 1.4493096125993807, + "learning_rate": 1.9467551570513257e-05, + "loss": 0.6915549039840698, + "step": 1021 + }, + { + "epoch": 0.29887410440122825, + "grad_norm": 1.2593652754748748, + "learning_rate": 1.9465993908591748e-05, + "loss": 0.6257511377334595, + "step": 1022 + }, + { + "epoch": 0.29916654481649363, + "grad_norm": 1.4075585450481771, + "learning_rate": 1.9464434034051716e-05, + "loss": 0.6409085988998413, + "step": 1023 + }, + { + "epoch": 0.299458985231759, + "grad_norm": 1.358442522813864, + "learning_rate": 1.9462871947257772e-05, + "loss": 0.7281351089477539, + "step": 1024 + }, + { + "epoch": 0.2997514256470244, + "grad_norm": 1.441690145181621, + "learning_rate": 1.9461307648575047e-05, + "loss": 0.8016781806945801, + "step": 1025 + }, + { + "epoch": 0.30004386606228983, + "grad_norm": 1.2844064559637345, + "learning_rate": 1.9459741138369186e-05, + "loss": 0.5883209705352783, + "step": 1026 + }, + { + "epoch": 0.3003363064775552, + "grad_norm": 1.674320224055934, + "learning_rate": 1.9458172417006347e-05, + "loss": 0.6414197683334351, + "step": 1027 + }, + { + "epoch": 0.3006287468928206, + "grad_norm": 1.465437904752509, + "learning_rate": 1.9456601484853218e-05, + "loss": 0.7076515555381775, + "step": 1028 + }, + { + "epoch": 0.300921187308086, + "grad_norm": 1.4091861442316225, + "learning_rate": 1.9455028342276984e-05, + "loss": 0.8102637529373169, + "step": 1029 + }, + { + "epoch": 0.30121362772335136, + "grad_norm": 1.3935099692215975, + "learning_rate": 1.9453452989645362e-05, + "loss": 0.6954574584960938, + "step": 1030 + }, + { + "epoch": 0.30150606813861675, + "grad_norm": 1.1912974865854908, + "learning_rate": 1.9451875427326585e-05, + "loss": 0.6647125482559204, + "step": 1031 + }, + { + "epoch": 0.3017985085538821, + "grad_norm": 1.2637381593470247, + "learning_rate": 1.9450295655689392e-05, + "loss": 0.5501933097839355, + "step": 1032 + }, + { + "epoch": 0.3020909489691475, + "grad_norm": 1.1642394496276798, + "learning_rate": 1.944871367510305e-05, + "loss": 0.6561415195465088, + "step": 1033 + }, + { + "epoch": 0.30238338938441295, + "grad_norm": 1.2818557575199787, + "learning_rate": 1.9447129485937335e-05, + "loss": 0.6768229007720947, + "step": 1034 + }, + { + "epoch": 0.30267582979967833, + "grad_norm": 1.229414584528048, + "learning_rate": 1.9445543088562543e-05, + "loss": 0.5693868398666382, + "step": 1035 + }, + { + "epoch": 0.3029682702149437, + "grad_norm": 1.197937800783061, + "learning_rate": 1.9443954483349485e-05, + "loss": 0.6165708303451538, + "step": 1036 + }, + { + "epoch": 0.3032607106302091, + "grad_norm": 1.0808504567320436, + "learning_rate": 1.944236367066948e-05, + "loss": 0.6116082668304443, + "step": 1037 + }, + { + "epoch": 0.3035531510454745, + "grad_norm": 1.2481100676234638, + "learning_rate": 1.9440770650894384e-05, + "loss": 0.7027714848518372, + "step": 1038 + }, + { + "epoch": 0.30384559146073986, + "grad_norm": 1.3613368127158991, + "learning_rate": 1.943917542439655e-05, + "loss": 0.7339189052581787, + "step": 1039 + }, + { + "epoch": 0.30413803187600524, + "grad_norm": 1.322856585416547, + "learning_rate": 1.943757799154885e-05, + "loss": 0.7975895404815674, + "step": 1040 + }, + { + "epoch": 0.3044304722912706, + "grad_norm": 1.2603507441667385, + "learning_rate": 1.9435978352724673e-05, + "loss": 0.6421841382980347, + "step": 1041 + }, + { + "epoch": 0.30472291270653606, + "grad_norm": 1.3017046883641064, + "learning_rate": 1.943437650829793e-05, + "loss": 0.6731791496276855, + "step": 1042 + }, + { + "epoch": 0.30501535312180145, + "grad_norm": 1.224211690521448, + "learning_rate": 1.943277245864304e-05, + "loss": 0.7008551359176636, + "step": 1043 + }, + { + "epoch": 0.30530779353706683, + "grad_norm": 1.2549197569852149, + "learning_rate": 1.943116620413494e-05, + "loss": 0.6777141094207764, + "step": 1044 + }, + { + "epoch": 0.3056002339523322, + "grad_norm": 1.258074600817151, + "learning_rate": 1.9429557745149084e-05, + "loss": 0.7649033069610596, + "step": 1045 + }, + { + "epoch": 0.3058926743675976, + "grad_norm": 1.2626508350830759, + "learning_rate": 1.9427947082061432e-05, + "loss": 0.6460477709770203, + "step": 1046 + }, + { + "epoch": 0.306185114782863, + "grad_norm": 1.3748035809258794, + "learning_rate": 1.942633421524848e-05, + "loss": 0.5939697623252869, + "step": 1047 + }, + { + "epoch": 0.30647755519812836, + "grad_norm": 1.3696807292374817, + "learning_rate": 1.9424719145087216e-05, + "loss": 0.606407880783081, + "step": 1048 + }, + { + "epoch": 0.3067699956133938, + "grad_norm": 1.2114201905625201, + "learning_rate": 1.9423101871955153e-05, + "loss": 0.5515298843383789, + "step": 1049 + }, + { + "epoch": 0.3070624360286592, + "grad_norm": 1.4449996700249255, + "learning_rate": 1.942148239623032e-05, + "loss": 0.7397217154502869, + "step": 1050 + }, + { + "epoch": 0.30735487644392456, + "grad_norm": 1.708533630902304, + "learning_rate": 1.9419860718291265e-05, + "loss": 0.6397782564163208, + "step": 1051 + }, + { + "epoch": 0.30764731685918995, + "grad_norm": 1.1946031757535738, + "learning_rate": 1.9418236838517036e-05, + "loss": 0.589732825756073, + "step": 1052 + }, + { + "epoch": 0.30793975727445533, + "grad_norm": 1.4196894685331136, + "learning_rate": 1.941661075728721e-05, + "loss": 0.7968351244926453, + "step": 1053 + }, + { + "epoch": 0.3082321976897207, + "grad_norm": 1.35500416476017, + "learning_rate": 1.9414982474981877e-05, + "loss": 0.5740514397621155, + "step": 1054 + }, + { + "epoch": 0.3085246381049861, + "grad_norm": 1.314001411398827, + "learning_rate": 1.9413351991981632e-05, + "loss": 0.656599760055542, + "step": 1055 + }, + { + "epoch": 0.3088170785202515, + "grad_norm": 1.2592244001939052, + "learning_rate": 1.9411719308667593e-05, + "loss": 0.5638262033462524, + "step": 1056 + }, + { + "epoch": 0.3091095189355169, + "grad_norm": 1.3510783569743914, + "learning_rate": 1.9410084425421392e-05, + "loss": 0.6391294002532959, + "step": 1057 + }, + { + "epoch": 0.3094019593507823, + "grad_norm": 1.300451628146748, + "learning_rate": 1.9408447342625167e-05, + "loss": 0.7109906077384949, + "step": 1058 + }, + { + "epoch": 0.3096943997660477, + "grad_norm": 1.35271058872007, + "learning_rate": 1.9406808060661583e-05, + "loss": 0.6922626495361328, + "step": 1059 + }, + { + "epoch": 0.30998684018131306, + "grad_norm": 1.3729160813047252, + "learning_rate": 1.9405166579913808e-05, + "loss": 0.6708151698112488, + "step": 1060 + }, + { + "epoch": 0.31027928059657844, + "grad_norm": 1.3049592711968918, + "learning_rate": 1.940352290076553e-05, + "loss": 0.6259905099868774, + "step": 1061 + }, + { + "epoch": 0.3105717210118438, + "grad_norm": 1.3047971530530311, + "learning_rate": 1.940187702360095e-05, + "loss": 0.6590703725814819, + "step": 1062 + }, + { + "epoch": 0.3108641614271092, + "grad_norm": 1.5136066296614852, + "learning_rate": 1.9400228948804777e-05, + "loss": 0.7371482849121094, + "step": 1063 + }, + { + "epoch": 0.3111566018423746, + "grad_norm": 1.3637094061000257, + "learning_rate": 1.9398578676762243e-05, + "loss": 0.6954984664916992, + "step": 1064 + }, + { + "epoch": 0.31144904225764003, + "grad_norm": 1.197618668709007, + "learning_rate": 1.9396926207859085e-05, + "loss": 0.604501485824585, + "step": 1065 + }, + { + "epoch": 0.3117414826729054, + "grad_norm": 1.4637648544146704, + "learning_rate": 1.939527154248156e-05, + "loss": 0.7580305337905884, + "step": 1066 + }, + { + "epoch": 0.3120339230881708, + "grad_norm": 1.2774221611024956, + "learning_rate": 1.9393614681016443e-05, + "loss": 0.5996969938278198, + "step": 1067 + }, + { + "epoch": 0.3123263635034362, + "grad_norm": 1.2247945329694363, + "learning_rate": 1.9391955623851e-05, + "loss": 0.5939687490463257, + "step": 1068 + }, + { + "epoch": 0.31261880391870156, + "grad_norm": 1.2833481425507127, + "learning_rate": 1.939029437137304e-05, + "loss": 0.6194947957992554, + "step": 1069 + }, + { + "epoch": 0.31291124433396694, + "grad_norm": 1.406800587144287, + "learning_rate": 1.9388630923970862e-05, + "loss": 0.7419420480728149, + "step": 1070 + }, + { + "epoch": 0.3132036847492323, + "grad_norm": 1.4290715744520364, + "learning_rate": 1.938696528203329e-05, + "loss": 0.6950613856315613, + "step": 1071 + }, + { + "epoch": 0.3134961251644977, + "grad_norm": 1.542135386244918, + "learning_rate": 1.9385297445949657e-05, + "loss": 0.7376282215118408, + "step": 1072 + }, + { + "epoch": 0.31378856557976315, + "grad_norm": 1.4197281288148755, + "learning_rate": 1.938362741610981e-05, + "loss": 0.800892174243927, + "step": 1073 + }, + { + "epoch": 0.31408100599502853, + "grad_norm": 1.3238571566647774, + "learning_rate": 1.938195519290411e-05, + "loss": 0.5747013688087463, + "step": 1074 + }, + { + "epoch": 0.3143734464102939, + "grad_norm": 1.3986418419585354, + "learning_rate": 1.9380280776723422e-05, + "loss": 0.7341697216033936, + "step": 1075 + }, + { + "epoch": 0.3146658868255593, + "grad_norm": 1.2703636347468634, + "learning_rate": 1.9378604167959138e-05, + "loss": 0.6229791641235352, + "step": 1076 + }, + { + "epoch": 0.3149583272408247, + "grad_norm": 1.2978515497126824, + "learning_rate": 1.937692536700315e-05, + "loss": 0.7266645431518555, + "step": 1077 + }, + { + "epoch": 0.31525076765609006, + "grad_norm": 1.1885775086050685, + "learning_rate": 1.937524437424787e-05, + "loss": 0.6163127422332764, + "step": 1078 + }, + { + "epoch": 0.31554320807135544, + "grad_norm": 1.3182925237610392, + "learning_rate": 1.9373561190086225e-05, + "loss": 0.6609925031661987, + "step": 1079 + }, + { + "epoch": 0.3158356484866208, + "grad_norm": 1.305005533703013, + "learning_rate": 1.937187581491164e-05, + "loss": 0.7157741785049438, + "step": 1080 + }, + { + "epoch": 0.31612808890188626, + "grad_norm": 1.286980397276467, + "learning_rate": 1.937018824911807e-05, + "loss": 0.6486212015151978, + "step": 1081 + }, + { + "epoch": 0.31642052931715164, + "grad_norm": 1.3383942599839993, + "learning_rate": 1.9368498493099963e-05, + "loss": 0.6931928396224976, + "step": 1082 + }, + { + "epoch": 0.316712969732417, + "grad_norm": 1.3241410530363713, + "learning_rate": 1.9366806547252295e-05, + "loss": 0.9291354417800903, + "step": 1083 + }, + { + "epoch": 0.3170054101476824, + "grad_norm": 1.247174604159187, + "learning_rate": 1.936511241197055e-05, + "loss": 0.6276642084121704, + "step": 1084 + }, + { + "epoch": 0.3172978505629478, + "grad_norm": 1.5622111951472988, + "learning_rate": 1.936341608765072e-05, + "loss": 0.7836581468582153, + "step": 1085 + }, + { + "epoch": 0.3175902909782132, + "grad_norm": 1.3508415072117352, + "learning_rate": 1.9361717574689308e-05, + "loss": 0.6785084009170532, + "step": 1086 + }, + { + "epoch": 0.31788273139347856, + "grad_norm": 1.2099617708073434, + "learning_rate": 1.936001687348333e-05, + "loss": 0.5715218782424927, + "step": 1087 + }, + { + "epoch": 0.318175171808744, + "grad_norm": 1.4697249093694587, + "learning_rate": 1.9358313984430324e-05, + "loss": 0.8417775630950928, + "step": 1088 + }, + { + "epoch": 0.3184676122240094, + "grad_norm": 1.2670961493626953, + "learning_rate": 1.935660890792832e-05, + "loss": 0.598076343536377, + "step": 1089 + }, + { + "epoch": 0.31876005263927476, + "grad_norm": 1.1923598198798329, + "learning_rate": 1.9354901644375876e-05, + "loss": 0.5830154418945312, + "step": 1090 + }, + { + "epoch": 0.31905249305454014, + "grad_norm": 1.307163759376097, + "learning_rate": 1.935319219417205e-05, + "loss": 0.5746437311172485, + "step": 1091 + }, + { + "epoch": 0.3193449334698055, + "grad_norm": 1.1091668518622428, + "learning_rate": 1.9351480557716414e-05, + "loss": 0.5520191788673401, + "step": 1092 + }, + { + "epoch": 0.3196373738850709, + "grad_norm": 1.218411442512637, + "learning_rate": 1.9349766735409058e-05, + "loss": 0.5847123861312866, + "step": 1093 + }, + { + "epoch": 0.3199298143003363, + "grad_norm": 1.568693203893066, + "learning_rate": 1.9348050727650577e-05, + "loss": 0.7390924692153931, + "step": 1094 + }, + { + "epoch": 0.3202222547156017, + "grad_norm": 1.4427842898109178, + "learning_rate": 1.9346332534842074e-05, + "loss": 0.5812145471572876, + "step": 1095 + }, + { + "epoch": 0.3205146951308671, + "grad_norm": 1.615769461575852, + "learning_rate": 1.9344612157385166e-05, + "loss": 0.6958816647529602, + "step": 1096 + }, + { + "epoch": 0.3208071355461325, + "grad_norm": 1.3481467288956208, + "learning_rate": 1.9342889595681986e-05, + "loss": 0.5618177652359009, + "step": 1097 + }, + { + "epoch": 0.3210995759613979, + "grad_norm": 1.2846515235734224, + "learning_rate": 1.9341164850135163e-05, + "loss": 0.6099411845207214, + "step": 1098 + }, + { + "epoch": 0.32139201637666326, + "grad_norm": 1.1242331249756639, + "learning_rate": 1.9339437921147854e-05, + "loss": 0.6772094964981079, + "step": 1099 + }, + { + "epoch": 0.32168445679192864, + "grad_norm": 1.4006184046576602, + "learning_rate": 1.9337708809123718e-05, + "loss": 0.6916643381118774, + "step": 1100 + }, + { + "epoch": 0.321976897207194, + "grad_norm": 1.199381767960838, + "learning_rate": 1.933597751446692e-05, + "loss": 0.5716762542724609, + "step": 1101 + }, + { + "epoch": 0.3222693376224594, + "grad_norm": 1.511781401125701, + "learning_rate": 1.9334244037582143e-05, + "loss": 0.68224036693573, + "step": 1102 + }, + { + "epoch": 0.3225617780377248, + "grad_norm": 1.3199204633429549, + "learning_rate": 1.933250837887457e-05, + "loss": 0.6888231635093689, + "step": 1103 + }, + { + "epoch": 0.3228542184529902, + "grad_norm": 1.4809797608653643, + "learning_rate": 1.933077053874991e-05, + "loss": 0.6469036340713501, + "step": 1104 + }, + { + "epoch": 0.3231466588682556, + "grad_norm": 1.5099365665086963, + "learning_rate": 1.932903051761437e-05, + "loss": 0.6202501058578491, + "step": 1105 + }, + { + "epoch": 0.323439099283521, + "grad_norm": 1.454362918518285, + "learning_rate": 1.932728831587467e-05, + "loss": 0.6041314601898193, + "step": 1106 + }, + { + "epoch": 0.3237315396987864, + "grad_norm": 1.3479422917529533, + "learning_rate": 1.9325543933938034e-05, + "loss": 0.7081667184829712, + "step": 1107 + }, + { + "epoch": 0.32402398011405176, + "grad_norm": 1.477559211803618, + "learning_rate": 1.9323797372212204e-05, + "loss": 0.7743494510650635, + "step": 1108 + }, + { + "epoch": 0.32431642052931714, + "grad_norm": 1.3188148010775738, + "learning_rate": 1.9322048631105428e-05, + "loss": 0.6122584342956543, + "step": 1109 + }, + { + "epoch": 0.3246088609445825, + "grad_norm": 1.279178726850882, + "learning_rate": 1.932029771102646e-05, + "loss": 0.6106122732162476, + "step": 1110 + }, + { + "epoch": 0.3249013013598479, + "grad_norm": 1.1897376224269591, + "learning_rate": 1.9318544612384572e-05, + "loss": 0.5082784295082092, + "step": 1111 + }, + { + "epoch": 0.32519374177511334, + "grad_norm": 1.3081590787355515, + "learning_rate": 1.9316789335589542e-05, + "loss": 0.6845188140869141, + "step": 1112 + }, + { + "epoch": 0.3254861821903787, + "grad_norm": 1.343292960468675, + "learning_rate": 1.9315031881051653e-05, + "loss": 0.5972481966018677, + "step": 1113 + }, + { + "epoch": 0.3257786226056441, + "grad_norm": 1.372744387816622, + "learning_rate": 1.931327224918169e-05, + "loss": 0.6312427520751953, + "step": 1114 + }, + { + "epoch": 0.3260710630209095, + "grad_norm": 1.6334469145871557, + "learning_rate": 1.9311510440390973e-05, + "loss": 0.7904551029205322, + "step": 1115 + }, + { + "epoch": 0.3263635034361749, + "grad_norm": 1.4496533611968336, + "learning_rate": 1.9309746455091302e-05, + "loss": 0.6513646841049194, + "step": 1116 + }, + { + "epoch": 0.32665594385144026, + "grad_norm": 1.2559107839078971, + "learning_rate": 1.9307980293694997e-05, + "loss": 0.5349715948104858, + "step": 1117 + }, + { + "epoch": 0.32694838426670564, + "grad_norm": 1.122130050588245, + "learning_rate": 1.93062119566149e-05, + "loss": 0.5815087556838989, + "step": 1118 + }, + { + "epoch": 0.327240824681971, + "grad_norm": 1.3638823451289013, + "learning_rate": 1.9304441444264335e-05, + "loss": 0.6380286812782288, + "step": 1119 + }, + { + "epoch": 0.32753326509723646, + "grad_norm": 1.2646470578382853, + "learning_rate": 1.9302668757057157e-05, + "loss": 0.7222728729248047, + "step": 1120 + }, + { + "epoch": 0.32782570551250184, + "grad_norm": 1.1611022114208025, + "learning_rate": 1.9300893895407715e-05, + "loss": 0.6262868642807007, + "step": 1121 + }, + { + "epoch": 0.3281181459277672, + "grad_norm": 1.522127361747668, + "learning_rate": 1.929911685973088e-05, + "loss": 0.6387197971343994, + "step": 1122 + }, + { + "epoch": 0.3284105863430326, + "grad_norm": 1.3879760662124887, + "learning_rate": 1.9297337650442015e-05, + "loss": 0.77378249168396, + "step": 1123 + }, + { + "epoch": 0.328703026758298, + "grad_norm": 1.3651260322738243, + "learning_rate": 1.9295556267957004e-05, + "loss": 0.7589142322540283, + "step": 1124 + }, + { + "epoch": 0.32899546717356337, + "grad_norm": 1.481283036614999, + "learning_rate": 1.9293772712692233e-05, + "loss": 0.7153090238571167, + "step": 1125 + }, + { + "epoch": 0.32928790758882875, + "grad_norm": 1.2572705841763243, + "learning_rate": 1.9291986985064595e-05, + "loss": 0.5738104581832886, + "step": 1126 + }, + { + "epoch": 0.3295803480040942, + "grad_norm": 1.2803221849130417, + "learning_rate": 1.92901990854915e-05, + "loss": 0.6530819535255432, + "step": 1127 + }, + { + "epoch": 0.3298727884193596, + "grad_norm": 1.221270033991816, + "learning_rate": 1.9288409014390854e-05, + "loss": 0.59107506275177, + "step": 1128 + }, + { + "epoch": 0.33016522883462496, + "grad_norm": 1.4658806562930384, + "learning_rate": 1.9286616772181072e-05, + "loss": 0.5798863172531128, + "step": 1129 + }, + { + "epoch": 0.33045766924989034, + "grad_norm": 1.689951660615568, + "learning_rate": 1.9284822359281085e-05, + "loss": 0.6957223415374756, + "step": 1130 + }, + { + "epoch": 0.3307501096651557, + "grad_norm": 1.3614959188818774, + "learning_rate": 1.9283025776110326e-05, + "loss": 0.6933379173278809, + "step": 1131 + }, + { + "epoch": 0.3310425500804211, + "grad_norm": 1.2712490538707164, + "learning_rate": 1.928122702308873e-05, + "loss": 0.527482271194458, + "step": 1132 + }, + { + "epoch": 0.3313349904956865, + "grad_norm": 1.2166131933862214, + "learning_rate": 1.927942610063675e-05, + "loss": 0.7244399785995483, + "step": 1133 + }, + { + "epoch": 0.33162743091095187, + "grad_norm": 1.4636848406157517, + "learning_rate": 1.9277623009175338e-05, + "loss": 0.7881563901901245, + "step": 1134 + }, + { + "epoch": 0.3319198713262173, + "grad_norm": 1.415089568819196, + "learning_rate": 1.9275817749125956e-05, + "loss": 0.7523232698440552, + "step": 1135 + }, + { + "epoch": 0.3322123117414827, + "grad_norm": 1.5537538186729503, + "learning_rate": 1.9274010320910575e-05, + "loss": 0.7226657867431641, + "step": 1136 + }, + { + "epoch": 0.3325047521567481, + "grad_norm": 1.4230293062648038, + "learning_rate": 1.9272200724951666e-05, + "loss": 0.6461686491966248, + "step": 1137 + }, + { + "epoch": 0.33279719257201346, + "grad_norm": 1.1785466753796996, + "learning_rate": 1.9270388961672214e-05, + "loss": 0.6343599557876587, + "step": 1138 + }, + { + "epoch": 0.33308963298727884, + "grad_norm": 1.2762072218920462, + "learning_rate": 1.926857503149571e-05, + "loss": 0.5510993599891663, + "step": 1139 + }, + { + "epoch": 0.3333820734025442, + "grad_norm": 1.3887196408907312, + "learning_rate": 1.9266758934846142e-05, + "loss": 0.6022439002990723, + "step": 1140 + }, + { + "epoch": 0.3336745138178096, + "grad_norm": 1.2716403438701216, + "learning_rate": 1.9264940672148018e-05, + "loss": 0.708207368850708, + "step": 1141 + }, + { + "epoch": 0.333966954233075, + "grad_norm": 1.3137035916667523, + "learning_rate": 1.9263120243826345e-05, + "loss": 0.566935122013092, + "step": 1142 + }, + { + "epoch": 0.3342593946483404, + "grad_norm": 1.634118861015607, + "learning_rate": 1.9261297650306635e-05, + "loss": 0.6848355531692505, + "step": 1143 + }, + { + "epoch": 0.3345518350636058, + "grad_norm": 1.3058715943169161, + "learning_rate": 1.9259472892014907e-05, + "loss": 0.7335090637207031, + "step": 1144 + }, + { + "epoch": 0.3348442754788712, + "grad_norm": 1.425387946547354, + "learning_rate": 1.925764596937769e-05, + "loss": 0.7323876023292542, + "step": 1145 + }, + { + "epoch": 0.33513671589413657, + "grad_norm": 1.377083094919456, + "learning_rate": 1.9255816882822017e-05, + "loss": 0.5564731955528259, + "step": 1146 + }, + { + "epoch": 0.33542915630940195, + "grad_norm": 1.5323554162589257, + "learning_rate": 1.925398563277542e-05, + "loss": 0.7699049711227417, + "step": 1147 + }, + { + "epoch": 0.33572159672466734, + "grad_norm": 1.4580663324783634, + "learning_rate": 1.925215221966595e-05, + "loss": 0.688602089881897, + "step": 1148 + }, + { + "epoch": 0.3360140371399327, + "grad_norm": 1.4706838569192882, + "learning_rate": 1.9250316643922153e-05, + "loss": 0.7103208899497986, + "step": 1149 + }, + { + "epoch": 0.3363064775551981, + "grad_norm": 1.2723502109555263, + "learning_rate": 1.9248478905973078e-05, + "loss": 0.6313603520393372, + "step": 1150 + }, + { + "epoch": 0.33659891797046354, + "grad_norm": 1.4985289931464978, + "learning_rate": 1.9246639006248294e-05, + "loss": 0.8420118093490601, + "step": 1151 + }, + { + "epoch": 0.3368913583857289, + "grad_norm": 1.4358130705661303, + "learning_rate": 1.9244796945177864e-05, + "loss": 0.6566640734672546, + "step": 1152 + }, + { + "epoch": 0.3371837988009943, + "grad_norm": 1.3408154011751006, + "learning_rate": 1.9242952723192357e-05, + "loss": 0.6455206274986267, + "step": 1153 + }, + { + "epoch": 0.3374762392162597, + "grad_norm": 1.3469873034007918, + "learning_rate": 1.924110634072285e-05, + "loss": 0.7348071336746216, + "step": 1154 + }, + { + "epoch": 0.33776867963152507, + "grad_norm": 1.7471975705727423, + "learning_rate": 1.9239257798200918e-05, + "loss": 0.7187973260879517, + "step": 1155 + }, + { + "epoch": 0.33806112004679045, + "grad_norm": 1.2712100505239146, + "learning_rate": 1.9237407096058655e-05, + "loss": 0.683181643486023, + "step": 1156 + }, + { + "epoch": 0.33835356046205584, + "grad_norm": 1.2445760635583791, + "learning_rate": 1.9235554234728646e-05, + "loss": 0.7296931743621826, + "step": 1157 + }, + { + "epoch": 0.3386460008773212, + "grad_norm": 1.1890040509691011, + "learning_rate": 1.923369921464399e-05, + "loss": 0.6656480431556702, + "step": 1158 + }, + { + "epoch": 0.33893844129258666, + "grad_norm": 1.3248976863888173, + "learning_rate": 1.923184203623828e-05, + "loss": 0.6284874677658081, + "step": 1159 + }, + { + "epoch": 0.33923088170785204, + "grad_norm": 1.29677623825286, + "learning_rate": 1.922998269994563e-05, + "loss": 0.7065030336380005, + "step": 1160 + }, + { + "epoch": 0.3395233221231174, + "grad_norm": 1.3728212504218815, + "learning_rate": 1.9228121206200637e-05, + "loss": 0.7077580690383911, + "step": 1161 + }, + { + "epoch": 0.3398157625383828, + "grad_norm": 1.4855746260471363, + "learning_rate": 1.9226257555438428e-05, + "loss": 0.6012637615203857, + "step": 1162 + }, + { + "epoch": 0.3401082029536482, + "grad_norm": 1.304745585816947, + "learning_rate": 1.9224391748094607e-05, + "loss": 0.8166115283966064, + "step": 1163 + }, + { + "epoch": 0.34040064336891357, + "grad_norm": 1.502407347484804, + "learning_rate": 1.92225237846053e-05, + "loss": 0.6066576242446899, + "step": 1164 + }, + { + "epoch": 0.34069308378417895, + "grad_norm": 1.2739635711375565, + "learning_rate": 1.922065366540713e-05, + "loss": 0.7226361632347107, + "step": 1165 + }, + { + "epoch": 0.3409855241994444, + "grad_norm": 1.6597939275709621, + "learning_rate": 1.9218781390937233e-05, + "loss": 0.7786005139350891, + "step": 1166 + }, + { + "epoch": 0.3412779646147098, + "grad_norm": 1.4812475412632635, + "learning_rate": 1.9216906961633234e-05, + "loss": 0.6534268856048584, + "step": 1167 + }, + { + "epoch": 0.34157040502997515, + "grad_norm": 1.2208380860350694, + "learning_rate": 1.9215030377933274e-05, + "loss": 0.6048434376716614, + "step": 1168 + }, + { + "epoch": 0.34186284544524054, + "grad_norm": 1.356773734579803, + "learning_rate": 1.921315164027599e-05, + "loss": 0.8321201205253601, + "step": 1169 + }, + { + "epoch": 0.3421552858605059, + "grad_norm": 1.5327102242092, + "learning_rate": 1.9211270749100527e-05, + "loss": 0.7142379283905029, + "step": 1170 + }, + { + "epoch": 0.3424477262757713, + "grad_norm": 1.205344060714777, + "learning_rate": 1.9209387704846535e-05, + "loss": 0.6262812614440918, + "step": 1171 + }, + { + "epoch": 0.3427401666910367, + "grad_norm": 1.4345003452190939, + "learning_rate": 1.920750250795416e-05, + "loss": 0.7242386341094971, + "step": 1172 + }, + { + "epoch": 0.34303260710630207, + "grad_norm": 1.3077522067723235, + "learning_rate": 1.9205615158864054e-05, + "loss": 0.6064128875732422, + "step": 1173 + }, + { + "epoch": 0.3433250475215675, + "grad_norm": 1.3868970600691566, + "learning_rate": 1.9203725658017374e-05, + "loss": 0.6720623970031738, + "step": 1174 + }, + { + "epoch": 0.3436174879368329, + "grad_norm": 1.3281016407079367, + "learning_rate": 1.9201834005855785e-05, + "loss": 0.745712161064148, + "step": 1175 + }, + { + "epoch": 0.34390992835209827, + "grad_norm": 1.3336156391355163, + "learning_rate": 1.9199940202821445e-05, + "loss": 0.6387969255447388, + "step": 1176 + }, + { + "epoch": 0.34420236876736365, + "grad_norm": 1.3104807608053473, + "learning_rate": 1.9198044249357018e-05, + "loss": 0.6634984612464905, + "step": 1177 + }, + { + "epoch": 0.34449480918262904, + "grad_norm": 1.0968916991502757, + "learning_rate": 1.919614614590567e-05, + "loss": 0.4732145667076111, + "step": 1178 + }, + { + "epoch": 0.3447872495978944, + "grad_norm": 1.4315145497505135, + "learning_rate": 1.9194245892911077e-05, + "loss": 0.6621897220611572, + "step": 1179 + }, + { + "epoch": 0.3450796900131598, + "grad_norm": 1.3492357768294603, + "learning_rate": 1.9192343490817412e-05, + "loss": 0.5691112279891968, + "step": 1180 + }, + { + "epoch": 0.3453721304284252, + "grad_norm": 1.5009979829344267, + "learning_rate": 1.919043894006934e-05, + "loss": 0.6326683759689331, + "step": 1181 + }, + { + "epoch": 0.3456645708436906, + "grad_norm": 1.3965991672121214, + "learning_rate": 1.9188532241112047e-05, + "loss": 0.6068567037582397, + "step": 1182 + }, + { + "epoch": 0.345957011258956, + "grad_norm": 1.5425955582670972, + "learning_rate": 1.918662339439121e-05, + "loss": 0.707065761089325, + "step": 1183 + }, + { + "epoch": 0.3462494516742214, + "grad_norm": 1.4506511105853803, + "learning_rate": 1.9184712400353008e-05, + "loss": 0.7821887135505676, + "step": 1184 + }, + { + "epoch": 0.34654189208948677, + "grad_norm": 1.3192596730278041, + "learning_rate": 1.918279925944413e-05, + "loss": 0.6759425401687622, + "step": 1185 + }, + { + "epoch": 0.34683433250475215, + "grad_norm": 1.2819153702423505, + "learning_rate": 1.9180883972111756e-05, + "loss": 0.5660048127174377, + "step": 1186 + }, + { + "epoch": 0.34712677292001753, + "grad_norm": 1.392117573401842, + "learning_rate": 1.9178966538803574e-05, + "loss": 0.708798885345459, + "step": 1187 + }, + { + "epoch": 0.3474192133352829, + "grad_norm": 1.0828895012382165, + "learning_rate": 1.9177046959967774e-05, + "loss": 0.603208065032959, + "step": 1188 + }, + { + "epoch": 0.3477116537505483, + "grad_norm": 1.2856052178527815, + "learning_rate": 1.9175125236053043e-05, + "loss": 0.8259323835372925, + "step": 1189 + }, + { + "epoch": 0.34800409416581374, + "grad_norm": 1.2349901090123199, + "learning_rate": 1.9173201367508572e-05, + "loss": 0.573014497756958, + "step": 1190 + }, + { + "epoch": 0.3482965345810791, + "grad_norm": 1.49130421629148, + "learning_rate": 1.9171275354784062e-05, + "loss": 0.8202974200248718, + "step": 1191 + }, + { + "epoch": 0.3485889749963445, + "grad_norm": 1.313328733803151, + "learning_rate": 1.9169347198329693e-05, + "loss": 0.5352192521095276, + "step": 1192 + }, + { + "epoch": 0.3488814154116099, + "grad_norm": 1.4707600848748155, + "learning_rate": 1.916741689859617e-05, + "loss": 0.7303881645202637, + "step": 1193 + }, + { + "epoch": 0.34917385582687527, + "grad_norm": 1.136402601726834, + "learning_rate": 1.9165484456034683e-05, + "loss": 0.670224666595459, + "step": 1194 + }, + { + "epoch": 0.34946629624214065, + "grad_norm": 1.213410956274994, + "learning_rate": 1.9163549871096934e-05, + "loss": 0.7311158776283264, + "step": 1195 + }, + { + "epoch": 0.34975873665740603, + "grad_norm": 1.3163563045896416, + "learning_rate": 1.9161613144235117e-05, + "loss": 0.6346032619476318, + "step": 1196 + }, + { + "epoch": 0.3500511770726714, + "grad_norm": 1.3538502473866518, + "learning_rate": 1.9159674275901932e-05, + "loss": 0.66914302110672, + "step": 1197 + }, + { + "epoch": 0.35034361748793685, + "grad_norm": 1.408804907617288, + "learning_rate": 1.9157733266550577e-05, + "loss": 0.6775194406509399, + "step": 1198 + }, + { + "epoch": 0.35063605790320224, + "grad_norm": 1.3219370751555166, + "learning_rate": 1.915579011663475e-05, + "loss": 0.6887085437774658, + "step": 1199 + }, + { + "epoch": 0.3509284983184676, + "grad_norm": 1.4120877262018603, + "learning_rate": 1.9153844826608652e-05, + "loss": 0.7474929690361023, + "step": 1200 + }, + { + "epoch": 0.351220938733733, + "grad_norm": 1.3551417524104399, + "learning_rate": 1.915189739692698e-05, + "loss": 0.5665907859802246, + "step": 1201 + }, + { + "epoch": 0.3515133791489984, + "grad_norm": 1.4582334765772325, + "learning_rate": 1.9149947828044938e-05, + "loss": 0.6044580340385437, + "step": 1202 + }, + { + "epoch": 0.35180581956426377, + "grad_norm": 1.1481279810019642, + "learning_rate": 1.914799612041822e-05, + "loss": 0.6590601205825806, + "step": 1203 + }, + { + "epoch": 0.35209825997952915, + "grad_norm": 1.1796025597233206, + "learning_rate": 1.9146042274503033e-05, + "loss": 0.5204451084136963, + "step": 1204 + }, + { + "epoch": 0.3523907003947946, + "grad_norm": 1.3267878452954167, + "learning_rate": 1.9144086290756077e-05, + "loss": 0.6036473512649536, + "step": 1205 + }, + { + "epoch": 0.35268314081005997, + "grad_norm": 1.438922587418907, + "learning_rate": 1.914212816963454e-05, + "loss": 0.5652757883071899, + "step": 1206 + }, + { + "epoch": 0.35297558122532535, + "grad_norm": 1.3041918712359999, + "learning_rate": 1.9140167911596133e-05, + "loss": 0.707310676574707, + "step": 1207 + }, + { + "epoch": 0.35326802164059073, + "grad_norm": 1.4881761799215045, + "learning_rate": 1.9138205517099048e-05, + "loss": 0.8539729714393616, + "step": 1208 + }, + { + "epoch": 0.3535604620558561, + "grad_norm": 1.6882139973772572, + "learning_rate": 1.9136240986601986e-05, + "loss": 0.6502546072006226, + "step": 1209 + }, + { + "epoch": 0.3538529024711215, + "grad_norm": 1.3852727639404194, + "learning_rate": 1.9134274320564145e-05, + "loss": 0.7279889583587646, + "step": 1210 + }, + { + "epoch": 0.3541453428863869, + "grad_norm": 1.380784482123245, + "learning_rate": 1.9132305519445215e-05, + "loss": 0.6916895508766174, + "step": 1211 + }, + { + "epoch": 0.35443778330165226, + "grad_norm": 1.3426787464995344, + "learning_rate": 1.9130334583705395e-05, + "loss": 0.6941961050033569, + "step": 1212 + }, + { + "epoch": 0.3547302237169177, + "grad_norm": 1.211958694677935, + "learning_rate": 1.912836151380538e-05, + "loss": 0.6686822175979614, + "step": 1213 + }, + { + "epoch": 0.3550226641321831, + "grad_norm": 1.4335891633323221, + "learning_rate": 1.912638631020636e-05, + "loss": 0.818913459777832, + "step": 1214 + }, + { + "epoch": 0.35531510454744847, + "grad_norm": 1.3902796641746433, + "learning_rate": 1.9124408973370034e-05, + "loss": 0.6461240649223328, + "step": 1215 + }, + { + "epoch": 0.35560754496271385, + "grad_norm": 1.3521177557458626, + "learning_rate": 1.9122429503758586e-05, + "loss": 0.6982225179672241, + "step": 1216 + }, + { + "epoch": 0.35589998537797923, + "grad_norm": 1.2726818955529642, + "learning_rate": 1.9120447901834708e-05, + "loss": 0.6319124698638916, + "step": 1217 + }, + { + "epoch": 0.3561924257932446, + "grad_norm": 1.4379853975185637, + "learning_rate": 1.9118464168061584e-05, + "loss": 0.7092441320419312, + "step": 1218 + }, + { + "epoch": 0.35648486620851, + "grad_norm": 1.4989677994022448, + "learning_rate": 1.9116478302902904e-05, + "loss": 0.7696874141693115, + "step": 1219 + }, + { + "epoch": 0.3567773066237754, + "grad_norm": 1.348418923049424, + "learning_rate": 1.9114490306822846e-05, + "loss": 0.6944275498390198, + "step": 1220 + }, + { + "epoch": 0.3570697470390408, + "grad_norm": 1.4736146352332777, + "learning_rate": 1.9112500180286098e-05, + "loss": 0.6179015636444092, + "step": 1221 + }, + { + "epoch": 0.3573621874543062, + "grad_norm": 1.4832162039625727, + "learning_rate": 1.911050792375784e-05, + "loss": 0.6964149475097656, + "step": 1222 + }, + { + "epoch": 0.3576546278695716, + "grad_norm": 1.314680516503926, + "learning_rate": 1.9108513537703746e-05, + "loss": 0.6923096776008606, + "step": 1223 + }, + { + "epoch": 0.35794706828483697, + "grad_norm": 1.3108509564109556, + "learning_rate": 1.9106517022589993e-05, + "loss": 0.5205660462379456, + "step": 1224 + }, + { + "epoch": 0.35823950870010235, + "grad_norm": 1.604738205722927, + "learning_rate": 1.910451837888325e-05, + "loss": 0.7488006353378296, + "step": 1225 + }, + { + "epoch": 0.35853194911536773, + "grad_norm": 1.1847506052614252, + "learning_rate": 1.91025176070507e-05, + "loss": 0.5414390563964844, + "step": 1226 + }, + { + "epoch": 0.3588243895306331, + "grad_norm": 1.2745914596652235, + "learning_rate": 1.910051470756e-05, + "loss": 0.6891577839851379, + "step": 1227 + }, + { + "epoch": 0.3591168299458985, + "grad_norm": 1.3018823092824294, + "learning_rate": 1.9098509680879318e-05, + "loss": 0.6496376991271973, + "step": 1228 + }, + { + "epoch": 0.35940927036116394, + "grad_norm": 1.386313672695145, + "learning_rate": 1.909650252747732e-05, + "loss": 0.758609414100647, + "step": 1229 + }, + { + "epoch": 0.3597017107764293, + "grad_norm": 1.544442120518355, + "learning_rate": 1.9094493247823164e-05, + "loss": 0.7509145736694336, + "step": 1230 + }, + { + "epoch": 0.3599941511916947, + "grad_norm": 1.2125512669659357, + "learning_rate": 1.9092481842386506e-05, + "loss": 0.7432405352592468, + "step": 1231 + }, + { + "epoch": 0.3602865916069601, + "grad_norm": 1.4492900887661606, + "learning_rate": 1.90904683116375e-05, + "loss": 0.7208698391914368, + "step": 1232 + }, + { + "epoch": 0.36057903202222547, + "grad_norm": 1.4245050002638069, + "learning_rate": 1.9088452656046798e-05, + "loss": 0.638593852519989, + "step": 1233 + }, + { + "epoch": 0.36087147243749085, + "grad_norm": 1.4428449097608804, + "learning_rate": 1.9086434876085548e-05, + "loss": 0.6663007736206055, + "step": 1234 + }, + { + "epoch": 0.36116391285275623, + "grad_norm": 1.4112526680406456, + "learning_rate": 1.908441497222539e-05, + "loss": 0.7132781744003296, + "step": 1235 + }, + { + "epoch": 0.3614563532680216, + "grad_norm": 1.3024532647304885, + "learning_rate": 1.9082392944938467e-05, + "loss": 0.6545308828353882, + "step": 1236 + }, + { + "epoch": 0.36174879368328705, + "grad_norm": 1.1385624733680002, + "learning_rate": 1.908036879469741e-05, + "loss": 0.7525626420974731, + "step": 1237 + }, + { + "epoch": 0.36204123409855243, + "grad_norm": 1.4286424106237192, + "learning_rate": 1.9078342521975365e-05, + "loss": 0.7336804866790771, + "step": 1238 + }, + { + "epoch": 0.3623336745138178, + "grad_norm": 1.3025101490885231, + "learning_rate": 1.907631412724595e-05, + "loss": 0.5822359323501587, + "step": 1239 + }, + { + "epoch": 0.3626261149290832, + "grad_norm": 1.1928464678887247, + "learning_rate": 1.907428361098329e-05, + "loss": 0.6110040545463562, + "step": 1240 + }, + { + "epoch": 0.3629185553443486, + "grad_norm": 1.3329812952112776, + "learning_rate": 1.9072250973662008e-05, + "loss": 0.5363205671310425, + "step": 1241 + }, + { + "epoch": 0.36321099575961396, + "grad_norm": 1.298737392722519, + "learning_rate": 1.9070216215757225e-05, + "loss": 0.6804911494255066, + "step": 1242 + }, + { + "epoch": 0.36350343617487935, + "grad_norm": 1.306574555012534, + "learning_rate": 1.906817933774455e-05, + "loss": 0.5670056343078613, + "step": 1243 + }, + { + "epoch": 0.3637958765901448, + "grad_norm": 1.6342501045897717, + "learning_rate": 1.9066140340100086e-05, + "loss": 0.6839423775672913, + "step": 1244 + }, + { + "epoch": 0.36408831700541017, + "grad_norm": 1.3102468000864722, + "learning_rate": 1.906409922330044e-05, + "loss": 0.6512447595596313, + "step": 1245 + }, + { + "epoch": 0.36438075742067555, + "grad_norm": 1.3767881480650324, + "learning_rate": 1.9062055987822713e-05, + "loss": 0.6602088212966919, + "step": 1246 + }, + { + "epoch": 0.36467319783594093, + "grad_norm": 1.3684046563228518, + "learning_rate": 1.9060010634144502e-05, + "loss": 0.6859074831008911, + "step": 1247 + }, + { + "epoch": 0.3649656382512063, + "grad_norm": 1.4029132597681886, + "learning_rate": 1.9057963162743888e-05, + "loss": 0.6871531009674072, + "step": 1248 + }, + { + "epoch": 0.3652580786664717, + "grad_norm": 1.2778457575589584, + "learning_rate": 1.9055913574099454e-05, + "loss": 0.7396048307418823, + "step": 1249 + }, + { + "epoch": 0.3655505190817371, + "grad_norm": 2.313748947770577, + "learning_rate": 1.9053861868690283e-05, + "loss": 0.7013602256774902, + "step": 1250 + }, + { + "epoch": 0.36584295949700246, + "grad_norm": 1.2854553849472183, + "learning_rate": 1.905180804699595e-05, + "loss": 0.6355527639389038, + "step": 1251 + }, + { + "epoch": 0.3661353999122679, + "grad_norm": 1.1923686434429392, + "learning_rate": 1.9049752109496526e-05, + "loss": 0.6869304180145264, + "step": 1252 + }, + { + "epoch": 0.3664278403275333, + "grad_norm": 1.2404032301108463, + "learning_rate": 1.9047694056672566e-05, + "loss": 0.5267671346664429, + "step": 1253 + }, + { + "epoch": 0.36672028074279867, + "grad_norm": 1.2479293372256655, + "learning_rate": 1.9045633889005134e-05, + "loss": 0.6586635112762451, + "step": 1254 + }, + { + "epoch": 0.36701272115806405, + "grad_norm": 1.2783901733768512, + "learning_rate": 1.9043571606975776e-05, + "loss": 0.6743361949920654, + "step": 1255 + }, + { + "epoch": 0.36730516157332943, + "grad_norm": 1.267912865737822, + "learning_rate": 1.9041507211066543e-05, + "loss": 0.5779668688774109, + "step": 1256 + }, + { + "epoch": 0.3675976019885948, + "grad_norm": 1.240910914837657, + "learning_rate": 1.9039440701759972e-05, + "loss": 0.693313479423523, + "step": 1257 + }, + { + "epoch": 0.3678900424038602, + "grad_norm": 1.2581810913293596, + "learning_rate": 1.9037372079539096e-05, + "loss": 0.6314960718154907, + "step": 1258 + }, + { + "epoch": 0.3681824828191256, + "grad_norm": 1.4026915606466803, + "learning_rate": 1.9035301344887445e-05, + "loss": 0.6483266949653625, + "step": 1259 + }, + { + "epoch": 0.368474923234391, + "grad_norm": 1.1963714897771014, + "learning_rate": 1.903322849828904e-05, + "loss": 0.5896739959716797, + "step": 1260 + }, + { + "epoch": 0.3687673636496564, + "grad_norm": 1.3246139419549132, + "learning_rate": 1.9031153540228398e-05, + "loss": 0.6760983467102051, + "step": 1261 + }, + { + "epoch": 0.3690598040649218, + "grad_norm": 1.409129098147532, + "learning_rate": 1.9029076471190525e-05, + "loss": 0.7453440427780151, + "step": 1262 + }, + { + "epoch": 0.36935224448018716, + "grad_norm": 1.4768395375517958, + "learning_rate": 1.9026997291660926e-05, + "loss": 0.7382408380508423, + "step": 1263 + }, + { + "epoch": 0.36964468489545255, + "grad_norm": 1.3416426687197567, + "learning_rate": 1.9024916002125594e-05, + "loss": 0.6420471668243408, + "step": 1264 + }, + { + "epoch": 0.36993712531071793, + "grad_norm": 1.406350116015231, + "learning_rate": 1.9022832603071017e-05, + "loss": 0.6436389684677124, + "step": 1265 + }, + { + "epoch": 0.3702295657259833, + "grad_norm": 1.3047843220477244, + "learning_rate": 1.9020747094984182e-05, + "loss": 0.689171314239502, + "step": 1266 + }, + { + "epoch": 0.3705220061412487, + "grad_norm": 1.2640328794263636, + "learning_rate": 1.9018659478352556e-05, + "loss": 0.6704196333885193, + "step": 1267 + }, + { + "epoch": 0.37081444655651413, + "grad_norm": 2.0690106215423536, + "learning_rate": 1.9016569753664118e-05, + "loss": 0.6598329544067383, + "step": 1268 + }, + { + "epoch": 0.3711068869717795, + "grad_norm": 1.8262603065561684, + "learning_rate": 1.901447792140732e-05, + "loss": 0.7353986501693726, + "step": 1269 + }, + { + "epoch": 0.3713993273870449, + "grad_norm": 1.4285098808767827, + "learning_rate": 1.9012383982071112e-05, + "loss": 0.666167140007019, + "step": 1270 + }, + { + "epoch": 0.3716917678023103, + "grad_norm": 1.2598465904930443, + "learning_rate": 1.9010287936144948e-05, + "loss": 0.6097015738487244, + "step": 1271 + }, + { + "epoch": 0.37198420821757566, + "grad_norm": 1.107025542737965, + "learning_rate": 1.9008189784118764e-05, + "loss": 0.6352437138557434, + "step": 1272 + }, + { + "epoch": 0.37227664863284105, + "grad_norm": 1.5662430122293758, + "learning_rate": 1.9006089526482982e-05, + "loss": 0.6686104536056519, + "step": 1273 + }, + { + "epoch": 0.3725690890481064, + "grad_norm": 1.1719719158143125, + "learning_rate": 1.9003987163728535e-05, + "loss": 0.6504377126693726, + "step": 1274 + }, + { + "epoch": 0.3728615294633718, + "grad_norm": 1.2550627286183815, + "learning_rate": 1.9001882696346835e-05, + "loss": 0.5834585428237915, + "step": 1275 + }, + { + "epoch": 0.37315396987863725, + "grad_norm": 1.5420452194055032, + "learning_rate": 1.8999776124829788e-05, + "loss": 0.665432870388031, + "step": 1276 + }, + { + "epoch": 0.37344641029390263, + "grad_norm": 1.4022956370096276, + "learning_rate": 1.899766744966979e-05, + "loss": 0.659697949886322, + "step": 1277 + }, + { + "epoch": 0.373738850709168, + "grad_norm": 1.194404836566078, + "learning_rate": 1.899555667135973e-05, + "loss": 0.5703476071357727, + "step": 1278 + }, + { + "epoch": 0.3740312911244334, + "grad_norm": 1.5482464090140011, + "learning_rate": 1.8993443790392994e-05, + "loss": 0.809308648109436, + "step": 1279 + }, + { + "epoch": 0.3743237315396988, + "grad_norm": 1.293354946450912, + "learning_rate": 1.8991328807263455e-05, + "loss": 0.7120508551597595, + "step": 1280 + }, + { + "epoch": 0.37461617195496416, + "grad_norm": 1.2261555314771986, + "learning_rate": 1.898921172246547e-05, + "loss": 0.625985860824585, + "step": 1281 + }, + { + "epoch": 0.37490861237022954, + "grad_norm": 1.3045001966325798, + "learning_rate": 1.898709253649391e-05, + "loss": 0.637261152267456, + "step": 1282 + }, + { + "epoch": 0.375201052785495, + "grad_norm": 1.3550273094265433, + "learning_rate": 1.89849712498441e-05, + "loss": 0.7420133352279663, + "step": 1283 + }, + { + "epoch": 0.37549349320076036, + "grad_norm": 1.2854448504545577, + "learning_rate": 1.8982847863011898e-05, + "loss": 0.6230417490005493, + "step": 1284 + }, + { + "epoch": 0.37578593361602575, + "grad_norm": 1.2127007776565961, + "learning_rate": 1.8980722376493622e-05, + "loss": 0.6896604299545288, + "step": 1285 + }, + { + "epoch": 0.37607837403129113, + "grad_norm": 1.3900367736992565, + "learning_rate": 1.8978594790786092e-05, + "loss": 0.5767710208892822, + "step": 1286 + }, + { + "epoch": 0.3763708144465565, + "grad_norm": 1.3829044432724817, + "learning_rate": 1.8976465106386625e-05, + "loss": 0.6945392489433289, + "step": 1287 + }, + { + "epoch": 0.3766632548618219, + "grad_norm": 1.215943914903153, + "learning_rate": 1.8974333323793014e-05, + "loss": 0.7208314538002014, + "step": 1288 + }, + { + "epoch": 0.3769556952770873, + "grad_norm": 1.5769052361743978, + "learning_rate": 1.8972199443503556e-05, + "loss": 0.7201139330863953, + "step": 1289 + }, + { + "epoch": 0.37724813569235266, + "grad_norm": 1.3366567930451483, + "learning_rate": 1.8970063466017028e-05, + "loss": 0.6791107654571533, + "step": 1290 + }, + { + "epoch": 0.3775405761076181, + "grad_norm": 1.2566261030582595, + "learning_rate": 1.89679253918327e-05, + "loss": 0.6535364389419556, + "step": 1291 + }, + { + "epoch": 0.3778330165228835, + "grad_norm": 1.3498011568256927, + "learning_rate": 1.8965785221450343e-05, + "loss": 0.5910370349884033, + "step": 1292 + }, + { + "epoch": 0.37812545693814886, + "grad_norm": 1.273886266732073, + "learning_rate": 1.8963642955370203e-05, + "loss": 0.7025415897369385, + "step": 1293 + }, + { + "epoch": 0.37841789735341425, + "grad_norm": 1.3743129752593892, + "learning_rate": 1.8961498594093018e-05, + "loss": 0.8007702827453613, + "step": 1294 + }, + { + "epoch": 0.37871033776867963, + "grad_norm": 1.3170193178053329, + "learning_rate": 1.895935213812003e-05, + "loss": 0.6947172284126282, + "step": 1295 + }, + { + "epoch": 0.379002778183945, + "grad_norm": 1.1178211676030798, + "learning_rate": 1.895720358795295e-05, + "loss": 0.7024818658828735, + "step": 1296 + }, + { + "epoch": 0.3792952185992104, + "grad_norm": 1.3447819598276562, + "learning_rate": 1.895505294409399e-05, + "loss": 0.8202607035636902, + "step": 1297 + }, + { + "epoch": 0.3795876590144758, + "grad_norm": 1.3114070841017331, + "learning_rate": 1.8952900207045853e-05, + "loss": 0.8001795411109924, + "step": 1298 + }, + { + "epoch": 0.3798800994297412, + "grad_norm": 1.4499936638579116, + "learning_rate": 1.895074537731173e-05, + "loss": 0.8068668842315674, + "step": 1299 + }, + { + "epoch": 0.3801725398450066, + "grad_norm": 1.5847094151692727, + "learning_rate": 1.8948588455395294e-05, + "loss": 0.7685220241546631, + "step": 1300 + }, + { + "epoch": 0.380464980260272, + "grad_norm": 1.5870604224504243, + "learning_rate": 1.8946429441800715e-05, + "loss": 0.695665717124939, + "step": 1301 + }, + { + "epoch": 0.38075742067553736, + "grad_norm": 1.2451750201018865, + "learning_rate": 1.894426833703265e-05, + "loss": 0.6073132753372192, + "step": 1302 + }, + { + "epoch": 0.38104986109080274, + "grad_norm": 1.3373381283666117, + "learning_rate": 1.894210514159624e-05, + "loss": 0.6334577798843384, + "step": 1303 + }, + { + "epoch": 0.3813423015060681, + "grad_norm": 1.519675902061051, + "learning_rate": 1.8939939855997125e-05, + "loss": 0.6448806524276733, + "step": 1304 + }, + { + "epoch": 0.3816347419213335, + "grad_norm": 1.438744535892561, + "learning_rate": 1.8937772480741427e-05, + "loss": 0.7587993144989014, + "step": 1305 + }, + { + "epoch": 0.3819271823365989, + "grad_norm": 1.3270277700231368, + "learning_rate": 1.8935603016335752e-05, + "loss": 0.6924787759780884, + "step": 1306 + }, + { + "epoch": 0.38221962275186433, + "grad_norm": 1.4268553989545638, + "learning_rate": 1.8933431463287197e-05, + "loss": 0.678055465221405, + "step": 1307 + }, + { + "epoch": 0.3825120631671297, + "grad_norm": 1.5391207675187488, + "learning_rate": 1.8931257822103357e-05, + "loss": 0.7519007325172424, + "step": 1308 + }, + { + "epoch": 0.3828045035823951, + "grad_norm": 1.3654471111870499, + "learning_rate": 1.8929082093292306e-05, + "loss": 0.6905468702316284, + "step": 1309 + }, + { + "epoch": 0.3830969439976605, + "grad_norm": 1.2938870443591295, + "learning_rate": 1.8926904277362603e-05, + "loss": 0.6718122363090515, + "step": 1310 + }, + { + "epoch": 0.38338938441292586, + "grad_norm": 1.2471147738993698, + "learning_rate": 1.89247243748233e-05, + "loss": 0.6903961896896362, + "step": 1311 + }, + { + "epoch": 0.38368182482819124, + "grad_norm": 1.2542516264352948, + "learning_rate": 1.8922542386183942e-05, + "loss": 0.6947582960128784, + "step": 1312 + }, + { + "epoch": 0.3839742652434566, + "grad_norm": 1.188134072228004, + "learning_rate": 1.8920358311954548e-05, + "loss": 0.5850759148597717, + "step": 1313 + }, + { + "epoch": 0.38426670565872206, + "grad_norm": 1.3420186334522382, + "learning_rate": 1.891817215264564e-05, + "loss": 0.6512178778648376, + "step": 1314 + }, + { + "epoch": 0.38455914607398745, + "grad_norm": 1.217758250797112, + "learning_rate": 1.891598390876821e-05, + "loss": 0.5910850167274475, + "step": 1315 + }, + { + "epoch": 0.38485158648925283, + "grad_norm": 1.5593571397265127, + "learning_rate": 1.891379358083375e-05, + "loss": 0.7113536596298218, + "step": 1316 + }, + { + "epoch": 0.3851440269045182, + "grad_norm": 1.293224731928583, + "learning_rate": 1.891160116935424e-05, + "loss": 0.705318808555603, + "step": 1317 + }, + { + "epoch": 0.3854364673197836, + "grad_norm": 1.3559272013315313, + "learning_rate": 1.890940667484214e-05, + "loss": 0.7524716258049011, + "step": 1318 + }, + { + "epoch": 0.385728907735049, + "grad_norm": 1.4516012043532711, + "learning_rate": 1.89072100978104e-05, + "loss": 0.6130248308181763, + "step": 1319 + }, + { + "epoch": 0.38602134815031436, + "grad_norm": 1.4354959704098622, + "learning_rate": 1.8905011438772455e-05, + "loss": 0.6535071134567261, + "step": 1320 + }, + { + "epoch": 0.38631378856557974, + "grad_norm": 1.3663991139031981, + "learning_rate": 1.890281069824223e-05, + "loss": 0.7027082443237305, + "step": 1321 + }, + { + "epoch": 0.3866062289808452, + "grad_norm": 1.4293858623040305, + "learning_rate": 1.8900607876734133e-05, + "loss": 0.7055719494819641, + "step": 1322 + }, + { + "epoch": 0.38689866939611056, + "grad_norm": 1.3719150653410752, + "learning_rate": 1.8898402974763063e-05, + "loss": 0.7403384447097778, + "step": 1323 + }, + { + "epoch": 0.38719110981137594, + "grad_norm": 1.4816215708629428, + "learning_rate": 1.88961959928444e-05, + "loss": 0.6771470308303833, + "step": 1324 + }, + { + "epoch": 0.3874835502266413, + "grad_norm": 1.552809609148836, + "learning_rate": 1.8893986931494015e-05, + "loss": 0.7258767485618591, + "step": 1325 + }, + { + "epoch": 0.3877759906419067, + "grad_norm": 1.4168779145398758, + "learning_rate": 1.889177579122826e-05, + "loss": 0.7587069869041443, + "step": 1326 + }, + { + "epoch": 0.3880684310571721, + "grad_norm": 1.0432394702021985, + "learning_rate": 1.888956257256398e-05, + "loss": 0.5434668660163879, + "step": 1327 + }, + { + "epoch": 0.3883608714724375, + "grad_norm": 1.2927507112469059, + "learning_rate": 1.8887347276018496e-05, + "loss": 0.5311154127120972, + "step": 1328 + }, + { + "epoch": 0.38865331188770286, + "grad_norm": 1.2533915775325788, + "learning_rate": 1.888512990210962e-05, + "loss": 0.5651747584342957, + "step": 1329 + }, + { + "epoch": 0.3889457523029683, + "grad_norm": 1.1863162008873491, + "learning_rate": 1.8882910451355654e-05, + "loss": 0.628046989440918, + "step": 1330 + }, + { + "epoch": 0.3892381927182337, + "grad_norm": 1.362511771688155, + "learning_rate": 1.888068892427538e-05, + "loss": 0.644639253616333, + "step": 1331 + }, + { + "epoch": 0.38953063313349906, + "grad_norm": 1.2081658901416763, + "learning_rate": 1.887846532138806e-05, + "loss": 0.6290382742881775, + "step": 1332 + }, + { + "epoch": 0.38982307354876444, + "grad_norm": 1.319310840364395, + "learning_rate": 1.8876239643213456e-05, + "loss": 0.6881425380706787, + "step": 1333 + }, + { + "epoch": 0.3901155139640298, + "grad_norm": 1.398816649776473, + "learning_rate": 1.8874011890271807e-05, + "loss": 0.645643949508667, + "step": 1334 + }, + { + "epoch": 0.3904079543792952, + "grad_norm": 1.3694554603281324, + "learning_rate": 1.887178206308383e-05, + "loss": 0.6965867280960083, + "step": 1335 + }, + { + "epoch": 0.3907003947945606, + "grad_norm": 1.1683610353079796, + "learning_rate": 1.886955016217074e-05, + "loss": 0.7326550483703613, + "step": 1336 + }, + { + "epoch": 0.390992835209826, + "grad_norm": 1.1781300264884254, + "learning_rate": 1.886731618805422e-05, + "loss": 0.6570208668708801, + "step": 1337 + }, + { + "epoch": 0.3912852756250914, + "grad_norm": 1.3563073747469718, + "learning_rate": 1.886508014125646e-05, + "loss": 0.7391610145568848, + "step": 1338 + }, + { + "epoch": 0.3915777160403568, + "grad_norm": 1.2946978380119605, + "learning_rate": 1.8862842022300124e-05, + "loss": 0.651665985584259, + "step": 1339 + }, + { + "epoch": 0.3918701564556222, + "grad_norm": 1.383137909559831, + "learning_rate": 1.8860601831708346e-05, + "loss": 0.695915699005127, + "step": 1340 + }, + { + "epoch": 0.39216259687088756, + "grad_norm": 1.4373340263094165, + "learning_rate": 1.885835957000476e-05, + "loss": 0.7209347486495972, + "step": 1341 + }, + { + "epoch": 0.39245503728615294, + "grad_norm": 1.3210736597005808, + "learning_rate": 1.885611523771349e-05, + "loss": 0.6083317995071411, + "step": 1342 + }, + { + "epoch": 0.3927474777014183, + "grad_norm": 1.1271235346878163, + "learning_rate": 1.8853868835359127e-05, + "loss": 0.5544713735580444, + "step": 1343 + }, + { + "epoch": 0.3930399181166837, + "grad_norm": 1.5309462253741093, + "learning_rate": 1.8851620363466756e-05, + "loss": 0.6333836317062378, + "step": 1344 + }, + { + "epoch": 0.3933323585319491, + "grad_norm": 1.2454908963117732, + "learning_rate": 1.8849369822561943e-05, + "loss": 0.554995059967041, + "step": 1345 + }, + { + "epoch": 0.3936247989472145, + "grad_norm": 1.1941759610212306, + "learning_rate": 1.884711721317074e-05, + "loss": 0.6696420907974243, + "step": 1346 + }, + { + "epoch": 0.3939172393624799, + "grad_norm": 1.3685512221597254, + "learning_rate": 1.8844862535819682e-05, + "loss": 0.6031695604324341, + "step": 1347 + }, + { + "epoch": 0.3942096797777453, + "grad_norm": 1.2956806020723108, + "learning_rate": 1.884260579103578e-05, + "loss": 0.686814546585083, + "step": 1348 + }, + { + "epoch": 0.3945021201930107, + "grad_norm": 1.3341047094387086, + "learning_rate": 1.884034697934654e-05, + "loss": 0.8113317489624023, + "step": 1349 + }, + { + "epoch": 0.39479456060827606, + "grad_norm": 1.2860694131453334, + "learning_rate": 1.8838086101279946e-05, + "loss": 0.5645952820777893, + "step": 1350 + }, + { + "epoch": 0.39508700102354144, + "grad_norm": 1.2016385972759884, + "learning_rate": 1.883582315736446e-05, + "loss": 0.6431643962860107, + "step": 1351 + }, + { + "epoch": 0.3953794414388068, + "grad_norm": 1.5868415992731069, + "learning_rate": 1.8833558148129034e-05, + "loss": 0.7691985368728638, + "step": 1352 + }, + { + "epoch": 0.39567188185407226, + "grad_norm": 1.2630640231572245, + "learning_rate": 1.88312910741031e-05, + "loss": 0.5951793789863586, + "step": 1353 + }, + { + "epoch": 0.39596432226933764, + "grad_norm": 1.4300765958854422, + "learning_rate": 1.8829021935816572e-05, + "loss": 0.671844482421875, + "step": 1354 + }, + { + "epoch": 0.396256762684603, + "grad_norm": 1.1270834377200167, + "learning_rate": 1.8826750733799845e-05, + "loss": 0.5290843844413757, + "step": 1355 + }, + { + "epoch": 0.3965492030998684, + "grad_norm": 1.5593842090138308, + "learning_rate": 1.8824477468583806e-05, + "loss": 0.6492103934288025, + "step": 1356 + }, + { + "epoch": 0.3968416435151338, + "grad_norm": 1.5289372619537642, + "learning_rate": 1.882220214069981e-05, + "loss": 0.6111055016517639, + "step": 1357 + }, + { + "epoch": 0.3971340839303992, + "grad_norm": 1.5921417426189186, + "learning_rate": 1.8819924750679702e-05, + "loss": 0.8123398423194885, + "step": 1358 + }, + { + "epoch": 0.39742652434566456, + "grad_norm": 1.1798948792158594, + "learning_rate": 1.8817645299055815e-05, + "loss": 0.6118077039718628, + "step": 1359 + }, + { + "epoch": 0.39771896476092994, + "grad_norm": 1.3941391170101545, + "learning_rate": 1.8815363786360948e-05, + "loss": 0.6916248798370361, + "step": 1360 + }, + { + "epoch": 0.3980114051761954, + "grad_norm": 1.2139550861968382, + "learning_rate": 1.8813080213128394e-05, + "loss": 0.5586028099060059, + "step": 1361 + }, + { + "epoch": 0.39830384559146076, + "grad_norm": 1.4269154081314215, + "learning_rate": 1.8810794579891925e-05, + "loss": 0.6132841110229492, + "step": 1362 + }, + { + "epoch": 0.39859628600672614, + "grad_norm": 1.6891257084549798, + "learning_rate": 1.8808506887185793e-05, + "loss": 0.7329133749008179, + "step": 1363 + }, + { + "epoch": 0.3988887264219915, + "grad_norm": 1.2704606915664418, + "learning_rate": 1.8806217135544736e-05, + "loss": 0.5975138545036316, + "step": 1364 + }, + { + "epoch": 0.3991811668372569, + "grad_norm": 1.315662529163245, + "learning_rate": 1.8803925325503963e-05, + "loss": 0.5790295600891113, + "step": 1365 + }, + { + "epoch": 0.3994736072525223, + "grad_norm": 1.2020939373145192, + "learning_rate": 1.8801631457599173e-05, + "loss": 0.6506124138832092, + "step": 1366 + }, + { + "epoch": 0.39976604766778767, + "grad_norm": 1.1981976421950613, + "learning_rate": 1.8799335532366547e-05, + "loss": 0.5577528476715088, + "step": 1367 + }, + { + "epoch": 0.40005848808305305, + "grad_norm": 1.4216027512167424, + "learning_rate": 1.879703755034274e-05, + "loss": 0.675471305847168, + "step": 1368 + }, + { + "epoch": 0.4003509284983185, + "grad_norm": 1.2857175045016282, + "learning_rate": 1.879473751206489e-05, + "loss": 0.5826357007026672, + "step": 1369 + }, + { + "epoch": 0.4006433689135839, + "grad_norm": 1.31090591162355, + "learning_rate": 1.8792435418070623e-05, + "loss": 0.5146772265434265, + "step": 1370 + }, + { + "epoch": 0.40093580932884926, + "grad_norm": 1.224194477069696, + "learning_rate": 1.879013126889804e-05, + "loss": 0.6049208641052246, + "step": 1371 + }, + { + "epoch": 0.40122824974411464, + "grad_norm": 1.4047657351006413, + "learning_rate": 1.878782506508571e-05, + "loss": 0.7058207392692566, + "step": 1372 + }, + { + "epoch": 0.40152069015938, + "grad_norm": 1.3782924104285919, + "learning_rate": 1.8785516807172704e-05, + "loss": 0.6281940937042236, + "step": 1373 + }, + { + "epoch": 0.4018131305746454, + "grad_norm": 1.5710053658084482, + "learning_rate": 1.878320649569856e-05, + "loss": 0.6318703889846802, + "step": 1374 + }, + { + "epoch": 0.4021055709899108, + "grad_norm": 1.369183615763356, + "learning_rate": 1.87808941312033e-05, + "loss": 0.6595311164855957, + "step": 1375 + }, + { + "epoch": 0.40239801140517617, + "grad_norm": 1.2850685362090555, + "learning_rate": 1.8778579714227433e-05, + "loss": 0.6964930295944214, + "step": 1376 + }, + { + "epoch": 0.4026904518204416, + "grad_norm": 1.338060741631637, + "learning_rate": 1.8776263245311926e-05, + "loss": 0.6093966364860535, + "step": 1377 + }, + { + "epoch": 0.402982892235707, + "grad_norm": 1.2438362189425571, + "learning_rate": 1.8773944724998248e-05, + "loss": 0.5337893962860107, + "step": 1378 + }, + { + "epoch": 0.4032753326509724, + "grad_norm": 1.3665212711176857, + "learning_rate": 1.8771624153828336e-05, + "loss": 0.5899128317832947, + "step": 1379 + }, + { + "epoch": 0.40356777306623776, + "grad_norm": 1.41983053503157, + "learning_rate": 1.876930153234461e-05, + "loss": 0.7054699659347534, + "step": 1380 + }, + { + "epoch": 0.40386021348150314, + "grad_norm": 1.4240217447880834, + "learning_rate": 1.876697686108997e-05, + "loss": 0.6910602450370789, + "step": 1381 + }, + { + "epoch": 0.4041526538967685, + "grad_norm": 1.383183489617276, + "learning_rate": 1.876465014060779e-05, + "loss": 0.605659008026123, + "step": 1382 + }, + { + "epoch": 0.4044450943120339, + "grad_norm": 1.4798727853043008, + "learning_rate": 1.8762321371441934e-05, + "loss": 0.7159937620162964, + "step": 1383 + }, + { + "epoch": 0.4047375347272993, + "grad_norm": 1.394589338486404, + "learning_rate": 1.8759990554136733e-05, + "loss": 0.7568333148956299, + "step": 1384 + }, + { + "epoch": 0.4050299751425647, + "grad_norm": 1.613153659624872, + "learning_rate": 1.8757657689236998e-05, + "loss": 0.7117356657981873, + "step": 1385 + }, + { + "epoch": 0.4053224155578301, + "grad_norm": 1.4078248670298101, + "learning_rate": 1.8755322777288027e-05, + "loss": 0.6429109573364258, + "step": 1386 + }, + { + "epoch": 0.4056148559730955, + "grad_norm": 1.571804966063755, + "learning_rate": 1.875298581883559e-05, + "loss": 0.643811821937561, + "step": 1387 + }, + { + "epoch": 0.40590729638836087, + "grad_norm": 1.439883937827134, + "learning_rate": 1.875064681442594e-05, + "loss": 0.7143295407295227, + "step": 1388 + }, + { + "epoch": 0.40619973680362625, + "grad_norm": 1.5655377581350667, + "learning_rate": 1.8748305764605798e-05, + "loss": 0.732312023639679, + "step": 1389 + }, + { + "epoch": 0.40649217721889164, + "grad_norm": 1.2430462426936875, + "learning_rate": 1.8745962669922375e-05, + "loss": 0.6878848075866699, + "step": 1390 + }, + { + "epoch": 0.406784617634157, + "grad_norm": 1.40828278888595, + "learning_rate": 1.8743617530923356e-05, + "loss": 0.6716262698173523, + "step": 1391 + }, + { + "epoch": 0.40707705804942246, + "grad_norm": 1.346254534859124, + "learning_rate": 1.87412703481569e-05, + "loss": 0.4990834593772888, + "step": 1392 + }, + { + "epoch": 0.40736949846468784, + "grad_norm": 1.4289189141042684, + "learning_rate": 1.8738921122171647e-05, + "loss": 0.6541857719421387, + "step": 1393 + }, + { + "epoch": 0.4076619388799532, + "grad_norm": 1.4587922420879296, + "learning_rate": 1.8736569853516715e-05, + "loss": 0.6310811042785645, + "step": 1394 + }, + { + "epoch": 0.4079543792952186, + "grad_norm": 1.5328348699666439, + "learning_rate": 1.8734216542741702e-05, + "loss": 0.6335423588752747, + "step": 1395 + }, + { + "epoch": 0.408246819710484, + "grad_norm": 1.4540701020587141, + "learning_rate": 1.873186119039667e-05, + "loss": 0.6315034627914429, + "step": 1396 + }, + { + "epoch": 0.40853926012574937, + "grad_norm": 1.2591166704430221, + "learning_rate": 1.872950379703218e-05, + "loss": 0.6822362542152405, + "step": 1397 + }, + { + "epoch": 0.40883170054101475, + "grad_norm": 1.3128671260601936, + "learning_rate": 1.8727144363199257e-05, + "loss": 0.699965238571167, + "step": 1398 + }, + { + "epoch": 0.40912414095628014, + "grad_norm": 1.376512615463435, + "learning_rate": 1.8724782889449397e-05, + "loss": 0.6769841313362122, + "step": 1399 + }, + { + "epoch": 0.4094165813715456, + "grad_norm": 1.242831468646962, + "learning_rate": 1.8722419376334584e-05, + "loss": 0.5219473838806152, + "step": 1400 + }, + { + "epoch": 0.40970902178681096, + "grad_norm": 1.1119386554431685, + "learning_rate": 1.872005382440728e-05, + "loss": 0.6091574430465698, + "step": 1401 + }, + { + "epoch": 0.41000146220207634, + "grad_norm": 1.3005524040148213, + "learning_rate": 1.8717686234220406e-05, + "loss": 0.7589390277862549, + "step": 1402 + }, + { + "epoch": 0.4102939026173417, + "grad_norm": 1.2931582987016021, + "learning_rate": 1.8715316606327384e-05, + "loss": 0.7042895555496216, + "step": 1403 + }, + { + "epoch": 0.4105863430326071, + "grad_norm": 1.2509877991876854, + "learning_rate": 1.8712944941282095e-05, + "loss": 0.6490949988365173, + "step": 1404 + }, + { + "epoch": 0.4108787834478725, + "grad_norm": 1.3726951776657805, + "learning_rate": 1.87105712396389e-05, + "loss": 0.6614132523536682, + "step": 1405 + }, + { + "epoch": 0.41117122386313787, + "grad_norm": 1.3416970895813871, + "learning_rate": 1.8708195501952637e-05, + "loss": 0.666157603263855, + "step": 1406 + }, + { + "epoch": 0.41146366427840325, + "grad_norm": 1.5455429688837699, + "learning_rate": 1.8705817728778626e-05, + "loss": 0.7347884178161621, + "step": 1407 + }, + { + "epoch": 0.4117561046936687, + "grad_norm": 1.6323767587093516, + "learning_rate": 1.8703437920672652e-05, + "loss": 0.8129836320877075, + "step": 1408 + }, + { + "epoch": 0.4120485451089341, + "grad_norm": 1.2690047775005027, + "learning_rate": 1.870105607819098e-05, + "loss": 0.645210862159729, + "step": 1409 + }, + { + "epoch": 0.41234098552419945, + "grad_norm": 1.367879279910813, + "learning_rate": 1.8698672201890355e-05, + "loss": 0.6716916561126709, + "step": 1410 + }, + { + "epoch": 0.41263342593946484, + "grad_norm": 1.66933384894401, + "learning_rate": 1.869628629232799e-05, + "loss": 0.8190855383872986, + "step": 1411 + }, + { + "epoch": 0.4129258663547302, + "grad_norm": 1.3900061091611966, + "learning_rate": 1.8693898350061582e-05, + "loss": 0.7618075609207153, + "step": 1412 + }, + { + "epoch": 0.4132183067699956, + "grad_norm": 1.3486290329442485, + "learning_rate": 1.869150837564929e-05, + "loss": 0.719980001449585, + "step": 1413 + }, + { + "epoch": 0.413510747185261, + "grad_norm": 1.2278158990840933, + "learning_rate": 1.8689116369649763e-05, + "loss": 0.6601548194885254, + "step": 1414 + }, + { + "epoch": 0.41380318760052637, + "grad_norm": 1.4157736896401232, + "learning_rate": 1.8686722332622112e-05, + "loss": 0.5991787314414978, + "step": 1415 + }, + { + "epoch": 0.4140956280157918, + "grad_norm": 1.2916299361998576, + "learning_rate": 1.8684326265125935e-05, + "loss": 0.6089641451835632, + "step": 1416 + }, + { + "epoch": 0.4143880684310572, + "grad_norm": 1.4857622706167455, + "learning_rate": 1.8681928167721297e-05, + "loss": 0.8143327236175537, + "step": 1417 + }, + { + "epoch": 0.41468050884632257, + "grad_norm": 1.1137129272750816, + "learning_rate": 1.8679528040968733e-05, + "loss": 0.6127045154571533, + "step": 1418 + }, + { + "epoch": 0.41497294926158795, + "grad_norm": 1.2684856043432204, + "learning_rate": 1.8677125885429262e-05, + "loss": 0.659069299697876, + "step": 1419 + }, + { + "epoch": 0.41526538967685334, + "grad_norm": 1.3122733176612695, + "learning_rate": 1.8674721701664377e-05, + "loss": 0.7277505397796631, + "step": 1420 + }, + { + "epoch": 0.4155578300921187, + "grad_norm": 1.2107555045955465, + "learning_rate": 1.8672315490236034e-05, + "loss": 0.6128710508346558, + "step": 1421 + }, + { + "epoch": 0.4158502705073841, + "grad_norm": 1.7155790773588848, + "learning_rate": 1.866990725170667e-05, + "loss": 0.7439340949058533, + "step": 1422 + }, + { + "epoch": 0.4161427109226495, + "grad_norm": 1.1423179387443951, + "learning_rate": 1.8667496986639206e-05, + "loss": 0.5855459570884705, + "step": 1423 + }, + { + "epoch": 0.4164351513379149, + "grad_norm": 1.3335637335552337, + "learning_rate": 1.866508469559702e-05, + "loss": 0.6865170001983643, + "step": 1424 + }, + { + "epoch": 0.4167275917531803, + "grad_norm": 1.3191415692644766, + "learning_rate": 1.866267037914397e-05, + "loss": 0.6648446917533875, + "step": 1425 + }, + { + "epoch": 0.4170200321684457, + "grad_norm": 1.5198580088053322, + "learning_rate": 1.866025403784439e-05, + "loss": 0.6919275522232056, + "step": 1426 + }, + { + "epoch": 0.41731247258371107, + "grad_norm": 1.3309988770277923, + "learning_rate": 1.865783567226308e-05, + "loss": 0.7270313501358032, + "step": 1427 + }, + { + "epoch": 0.41760491299897645, + "grad_norm": 1.2814450794742573, + "learning_rate": 1.8655415282965327e-05, + "loss": 0.5938387513160706, + "step": 1428 + }, + { + "epoch": 0.41789735341424183, + "grad_norm": 1.378984312222445, + "learning_rate": 1.8652992870516872e-05, + "loss": 0.6517149209976196, + "step": 1429 + }, + { + "epoch": 0.4181897938295072, + "grad_norm": 1.1862439123900306, + "learning_rate": 1.8650568435483948e-05, + "loss": 0.6688356399536133, + "step": 1430 + }, + { + "epoch": 0.41848223424477266, + "grad_norm": 1.153419964025717, + "learning_rate": 1.864814197843325e-05, + "loss": 0.5300855040550232, + "step": 1431 + }, + { + "epoch": 0.41877467466003804, + "grad_norm": 1.270766161064103, + "learning_rate": 1.8645713499931943e-05, + "loss": 0.6404704451560974, + "step": 1432 + }, + { + "epoch": 0.4190671150753034, + "grad_norm": 1.5430855845367462, + "learning_rate": 1.8643283000547673e-05, + "loss": 0.6758813858032227, + "step": 1433 + }, + { + "epoch": 0.4193595554905688, + "grad_norm": 1.3783205387944717, + "learning_rate": 1.8640850480848552e-05, + "loss": 0.6328250169754028, + "step": 1434 + }, + { + "epoch": 0.4196519959058342, + "grad_norm": 1.26459826517306, + "learning_rate": 1.863841594140317e-05, + "loss": 0.6747157573699951, + "step": 1435 + }, + { + "epoch": 0.41994443632109957, + "grad_norm": 1.3504769695047412, + "learning_rate": 1.8635979382780584e-05, + "loss": 0.5314475893974304, + "step": 1436 + }, + { + "epoch": 0.42023687673636495, + "grad_norm": 1.345420561697831, + "learning_rate": 1.863354080555033e-05, + "loss": 0.478320837020874, + "step": 1437 + }, + { + "epoch": 0.42052931715163033, + "grad_norm": 1.7039787917499718, + "learning_rate": 1.86311002102824e-05, + "loss": 0.7389972805976868, + "step": 1438 + }, + { + "epoch": 0.42082175756689577, + "grad_norm": 1.5752835852867741, + "learning_rate": 1.8628657597547273e-05, + "loss": 0.5449938178062439, + "step": 1439 + }, + { + "epoch": 0.42111419798216115, + "grad_norm": 1.417913338852298, + "learning_rate": 1.8626212967915897e-05, + "loss": 0.6752811670303345, + "step": 1440 + }, + { + "epoch": 0.42140663839742654, + "grad_norm": 1.3268814915367182, + "learning_rate": 1.862376632195969e-05, + "loss": 0.7750412821769714, + "step": 1441 + }, + { + "epoch": 0.4216990788126919, + "grad_norm": 1.4150998717703018, + "learning_rate": 1.8621317660250535e-05, + "loss": 0.5967680215835571, + "step": 1442 + }, + { + "epoch": 0.4219915192279573, + "grad_norm": 1.2836272802739963, + "learning_rate": 1.86188669833608e-05, + "loss": 0.6781327724456787, + "step": 1443 + }, + { + "epoch": 0.4222839596432227, + "grad_norm": 1.421988457915262, + "learning_rate": 1.8616414291863307e-05, + "loss": 0.7539681196212769, + "step": 1444 + }, + { + "epoch": 0.42257640005848807, + "grad_norm": 1.5265432564271315, + "learning_rate": 1.8613959586331364e-05, + "loss": 0.6976957321166992, + "step": 1445 + }, + { + "epoch": 0.42286884047375345, + "grad_norm": 1.3365892238255053, + "learning_rate": 1.861150286733874e-05, + "loss": 0.6616528034210205, + "step": 1446 + }, + { + "epoch": 0.4231612808890189, + "grad_norm": 1.4482994306877846, + "learning_rate": 1.860904413545968e-05, + "loss": 0.6407957077026367, + "step": 1447 + }, + { + "epoch": 0.42345372130428427, + "grad_norm": 1.4193133822561126, + "learning_rate": 1.86065833912689e-05, + "loss": 0.5918550491333008, + "step": 1448 + }, + { + "epoch": 0.42374616171954965, + "grad_norm": 1.421765780188314, + "learning_rate": 1.8604120635341574e-05, + "loss": 0.6142056584358215, + "step": 1449 + }, + { + "epoch": 0.42403860213481503, + "grad_norm": 1.4371201128611453, + "learning_rate": 1.8601655868253368e-05, + "loss": 0.6359597444534302, + "step": 1450 + }, + { + "epoch": 0.4243310425500804, + "grad_norm": 1.2914617625794835, + "learning_rate": 1.8599189090580402e-05, + "loss": 0.7149467468261719, + "step": 1451 + }, + { + "epoch": 0.4246234829653458, + "grad_norm": 1.2900964447275098, + "learning_rate": 1.8596720302899272e-05, + "loss": 0.6015822887420654, + "step": 1452 + }, + { + "epoch": 0.4249159233806112, + "grad_norm": 1.1866564154864978, + "learning_rate": 1.8594249505787035e-05, + "loss": 0.6389881372451782, + "step": 1453 + }, + { + "epoch": 0.42520836379587656, + "grad_norm": 1.381321058965008, + "learning_rate": 1.8591776699821235e-05, + "loss": 0.7479783892631531, + "step": 1454 + }, + { + "epoch": 0.425500804211142, + "grad_norm": 1.2271977568055246, + "learning_rate": 1.8589301885579866e-05, + "loss": 0.6574498414993286, + "step": 1455 + }, + { + "epoch": 0.4257932446264074, + "grad_norm": 1.3187836865578064, + "learning_rate": 1.858682506364141e-05, + "loss": 0.6314088702201843, + "step": 1456 + }, + { + "epoch": 0.42608568504167277, + "grad_norm": 1.4747450600155867, + "learning_rate": 1.85843462345848e-05, + "loss": 0.605385959148407, + "step": 1457 + }, + { + "epoch": 0.42637812545693815, + "grad_norm": 1.280849948973879, + "learning_rate": 1.8581865398989452e-05, + "loss": 0.6355551481246948, + "step": 1458 + }, + { + "epoch": 0.42667056587220353, + "grad_norm": 1.3012840164028812, + "learning_rate": 1.8579382557435247e-05, + "loss": 0.6303017139434814, + "step": 1459 + }, + { + "epoch": 0.4269630062874689, + "grad_norm": 1.2629380280411955, + "learning_rate": 1.8576897710502532e-05, + "loss": 0.5916526317596436, + "step": 1460 + }, + { + "epoch": 0.4272554467027343, + "grad_norm": 1.2467440963341316, + "learning_rate": 1.8574410858772126e-05, + "loss": 0.5709279179573059, + "step": 1461 + }, + { + "epoch": 0.4275478871179997, + "grad_norm": 1.2909430743502928, + "learning_rate": 1.8571922002825317e-05, + "loss": 0.571231484413147, + "step": 1462 + }, + { + "epoch": 0.4278403275332651, + "grad_norm": 1.310017395907512, + "learning_rate": 1.8569431143243856e-05, + "loss": 0.6352202892303467, + "step": 1463 + }, + { + "epoch": 0.4281327679485305, + "grad_norm": 1.316165374470179, + "learning_rate": 1.8566938280609965e-05, + "loss": 0.553265392780304, + "step": 1464 + }, + { + "epoch": 0.4284252083637959, + "grad_norm": 1.1127868543655046, + "learning_rate": 1.8564443415506343e-05, + "loss": 0.4913727045059204, + "step": 1465 + }, + { + "epoch": 0.42871764877906127, + "grad_norm": 1.4457215110099157, + "learning_rate": 1.8561946548516143e-05, + "loss": 0.542539119720459, + "step": 1466 + }, + { + "epoch": 0.42901008919432665, + "grad_norm": 1.5261496853017646, + "learning_rate": 1.8559447680222994e-05, + "loss": 0.719292163848877, + "step": 1467 + }, + { + "epoch": 0.42930252960959203, + "grad_norm": 1.4842625427656275, + "learning_rate": 1.8556946811210993e-05, + "loss": 0.8443170785903931, + "step": 1468 + }, + { + "epoch": 0.4295949700248574, + "grad_norm": 1.4024545882927506, + "learning_rate": 1.8554443942064705e-05, + "loss": 0.7899821996688843, + "step": 1469 + }, + { + "epoch": 0.42988741044012285, + "grad_norm": 1.3637198474337424, + "learning_rate": 1.8551939073369155e-05, + "loss": 0.617426872253418, + "step": 1470 + }, + { + "epoch": 0.43017985085538823, + "grad_norm": 1.284473833943433, + "learning_rate": 1.8549432205709842e-05, + "loss": 0.5573505163192749, + "step": 1471 + }, + { + "epoch": 0.4304722912706536, + "grad_norm": 1.2050796372555104, + "learning_rate": 1.8546923339672734e-05, + "loss": 0.5571975111961365, + "step": 1472 + }, + { + "epoch": 0.430764731685919, + "grad_norm": 1.2452948917501594, + "learning_rate": 1.854441247584426e-05, + "loss": 0.6411981582641602, + "step": 1473 + }, + { + "epoch": 0.4310571721011844, + "grad_norm": 1.4342124934143161, + "learning_rate": 1.8541899614811323e-05, + "loss": 0.4766804277896881, + "step": 1474 + }, + { + "epoch": 0.43134961251644977, + "grad_norm": 1.5114551227786939, + "learning_rate": 1.8539384757161285e-05, + "loss": 0.7479405403137207, + "step": 1475 + }, + { + "epoch": 0.43164205293171515, + "grad_norm": 1.3476436799817348, + "learning_rate": 1.8536867903481983e-05, + "loss": 0.6848211288452148, + "step": 1476 + }, + { + "epoch": 0.43193449334698053, + "grad_norm": 1.2973665530504777, + "learning_rate": 1.8534349054361708e-05, + "loss": 0.7413634061813354, + "step": 1477 + }, + { + "epoch": 0.43222693376224597, + "grad_norm": 1.1870657052305638, + "learning_rate": 1.8531828210389236e-05, + "loss": 0.5880843997001648, + "step": 1478 + }, + { + "epoch": 0.43251937417751135, + "grad_norm": 1.16075786792099, + "learning_rate": 1.852930537215379e-05, + "loss": 0.5885627269744873, + "step": 1479 + }, + { + "epoch": 0.43281181459277673, + "grad_norm": 1.3270242768891243, + "learning_rate": 1.8526780540245077e-05, + "loss": 0.706636905670166, + "step": 1480 + }, + { + "epoch": 0.4331042550080421, + "grad_norm": 1.3793959384028218, + "learning_rate": 1.8524253715253255e-05, + "loss": 0.6521843075752258, + "step": 1481 + }, + { + "epoch": 0.4333966954233075, + "grad_norm": 1.3825746336646279, + "learning_rate": 1.8521724897768955e-05, + "loss": 0.6231021881103516, + "step": 1482 + }, + { + "epoch": 0.4336891358385729, + "grad_norm": 1.4460679872410762, + "learning_rate": 1.851919408838327e-05, + "loss": 0.6859451532363892, + "step": 1483 + }, + { + "epoch": 0.43398157625383826, + "grad_norm": 1.2507527028404273, + "learning_rate": 1.851666128768777e-05, + "loss": 0.7948323488235474, + "step": 1484 + }, + { + "epoch": 0.43427401666910365, + "grad_norm": 1.3631419376990976, + "learning_rate": 1.8514126496274473e-05, + "loss": 0.7815203070640564, + "step": 1485 + }, + { + "epoch": 0.4345664570843691, + "grad_norm": 1.2904619284943133, + "learning_rate": 1.8511589714735875e-05, + "loss": 0.6941452622413635, + "step": 1486 + }, + { + "epoch": 0.43485889749963447, + "grad_norm": 1.41567858231915, + "learning_rate": 1.850905094366493e-05, + "loss": 0.5500549674034119, + "step": 1487 + }, + { + "epoch": 0.43515133791489985, + "grad_norm": 1.2918667262960315, + "learning_rate": 1.8506510183655066e-05, + "loss": 0.6616400480270386, + "step": 1488 + }, + { + "epoch": 0.43544377833016523, + "grad_norm": 1.2491627898498192, + "learning_rate": 1.8503967435300166e-05, + "loss": 0.6920043230056763, + "step": 1489 + }, + { + "epoch": 0.4357362187454306, + "grad_norm": 1.215912086863742, + "learning_rate": 1.8501422699194584e-05, + "loss": 0.6080813407897949, + "step": 1490 + }, + { + "epoch": 0.436028659160696, + "grad_norm": 1.2215283867587456, + "learning_rate": 1.8498875975933135e-05, + "loss": 0.576184868812561, + "step": 1491 + }, + { + "epoch": 0.4363210995759614, + "grad_norm": 1.3544983329172053, + "learning_rate": 1.84963272661111e-05, + "loss": 0.6647310256958008, + "step": 1492 + }, + { + "epoch": 0.43661353999122676, + "grad_norm": 1.5126248587795905, + "learning_rate": 1.8493776570324224e-05, + "loss": 0.6738306283950806, + "step": 1493 + }, + { + "epoch": 0.4369059804064922, + "grad_norm": 1.306695091605799, + "learning_rate": 1.849122388916872e-05, + "loss": 0.681056022644043, + "step": 1494 + }, + { + "epoch": 0.4371984208217576, + "grad_norm": 1.2802492616875505, + "learning_rate": 1.848866922324126e-05, + "loss": 0.7844547033309937, + "step": 1495 + }, + { + "epoch": 0.43749086123702297, + "grad_norm": 1.278338668380481, + "learning_rate": 1.8486112573138977e-05, + "loss": 0.6478928327560425, + "step": 1496 + }, + { + "epoch": 0.43778330165228835, + "grad_norm": 1.1565510309984284, + "learning_rate": 1.8483553939459477e-05, + "loss": 0.6035341024398804, + "step": 1497 + }, + { + "epoch": 0.43807574206755373, + "grad_norm": 1.5407821231530743, + "learning_rate": 1.8480993322800826e-05, + "loss": 0.6664912700653076, + "step": 1498 + }, + { + "epoch": 0.4383681824828191, + "grad_norm": 1.2757017491830842, + "learning_rate": 1.847843072376155e-05, + "loss": 0.7171953916549683, + "step": 1499 + }, + { + "epoch": 0.4386606228980845, + "grad_norm": 1.6930649567828897, + "learning_rate": 1.8475866142940646e-05, + "loss": 0.8400344848632812, + "step": 1500 + }, + { + "epoch": 0.4389530633133499, + "grad_norm": 1.4411024776302432, + "learning_rate": 1.8473299580937563e-05, + "loss": 0.5119056701660156, + "step": 1501 + }, + { + "epoch": 0.4392455037286153, + "grad_norm": 1.2781692932924433, + "learning_rate": 1.847073103835222e-05, + "loss": 0.5864866375923157, + "step": 1502 + }, + { + "epoch": 0.4395379441438807, + "grad_norm": 1.1391351003013295, + "learning_rate": 1.8468160515785e-05, + "loss": 0.6389576196670532, + "step": 1503 + }, + { + "epoch": 0.4398303845591461, + "grad_norm": 1.3447539998849671, + "learning_rate": 1.846558801383675e-05, + "loss": 0.6745110750198364, + "step": 1504 + }, + { + "epoch": 0.44012282497441146, + "grad_norm": 1.4359844129069297, + "learning_rate": 1.846301353310877e-05, + "loss": 0.6207559704780579, + "step": 1505 + }, + { + "epoch": 0.44041526538967685, + "grad_norm": 1.4143769366285628, + "learning_rate": 1.8460437074202832e-05, + "loss": 0.6818139553070068, + "step": 1506 + }, + { + "epoch": 0.44070770580494223, + "grad_norm": 1.4877202307925406, + "learning_rate": 1.845785863772117e-05, + "loss": 0.652062714099884, + "step": 1507 + }, + { + "epoch": 0.4410001462202076, + "grad_norm": 1.340284980688535, + "learning_rate": 1.8455278224266476e-05, + "loss": 0.6842166185379028, + "step": 1508 + }, + { + "epoch": 0.44129258663547305, + "grad_norm": 1.3899905625699573, + "learning_rate": 1.8452695834441904e-05, + "loss": 0.6459342837333679, + "step": 1509 + }, + { + "epoch": 0.44158502705073843, + "grad_norm": 1.3677235686172902, + "learning_rate": 1.8450111468851078e-05, + "loss": 0.6036739349365234, + "step": 1510 + }, + { + "epoch": 0.4418774674660038, + "grad_norm": 1.401326082704981, + "learning_rate": 1.844752512809807e-05, + "loss": 0.7530199289321899, + "step": 1511 + }, + { + "epoch": 0.4421699078812692, + "grad_norm": 1.249585374389202, + "learning_rate": 1.8444936812787428e-05, + "loss": 0.6098290085792542, + "step": 1512 + }, + { + "epoch": 0.4424623482965346, + "grad_norm": 1.6252323705163014, + "learning_rate": 1.844234652352415e-05, + "loss": 0.7142464518547058, + "step": 1513 + }, + { + "epoch": 0.44275478871179996, + "grad_norm": 1.3215155589821708, + "learning_rate": 1.8439754260913703e-05, + "loss": 0.4895970821380615, + "step": 1514 + }, + { + "epoch": 0.44304722912706535, + "grad_norm": 1.2855871920553614, + "learning_rate": 1.8437160025562012e-05, + "loss": 0.6166520118713379, + "step": 1515 + }, + { + "epoch": 0.4433396695423307, + "grad_norm": 1.3621423468696194, + "learning_rate": 1.8434563818075462e-05, + "loss": 0.6020585894584656, + "step": 1516 + }, + { + "epoch": 0.44363210995759617, + "grad_norm": 1.3215872914676274, + "learning_rate": 1.8431965639060904e-05, + "loss": 0.6879030466079712, + "step": 1517 + }, + { + "epoch": 0.44392455037286155, + "grad_norm": 1.2000763930073624, + "learning_rate": 1.8429365489125644e-05, + "loss": 0.5753897428512573, + "step": 1518 + }, + { + "epoch": 0.44421699078812693, + "grad_norm": 1.2916902596192155, + "learning_rate": 1.8426763368877455e-05, + "loss": 0.5165301561355591, + "step": 1519 + }, + { + "epoch": 0.4445094312033923, + "grad_norm": 1.630208225804633, + "learning_rate": 1.842415927892456e-05, + "loss": 0.6377310752868652, + "step": 1520 + }, + { + "epoch": 0.4448018716186577, + "grad_norm": 1.4221002668397775, + "learning_rate": 1.842155321987566e-05, + "loss": 0.7429912090301514, + "step": 1521 + }, + { + "epoch": 0.4450943120339231, + "grad_norm": 1.5079395076396265, + "learning_rate": 1.8418945192339892e-05, + "loss": 0.6177542209625244, + "step": 1522 + }, + { + "epoch": 0.44538675244918846, + "grad_norm": 1.2784904022569494, + "learning_rate": 1.8416335196926877e-05, + "loss": 0.662541389465332, + "step": 1523 + }, + { + "epoch": 0.44567919286445384, + "grad_norm": 1.2782173083325044, + "learning_rate": 1.841372323424668e-05, + "loss": 0.6026759743690491, + "step": 1524 + }, + { + "epoch": 0.4459716332797193, + "grad_norm": 1.5759742604234355, + "learning_rate": 1.8411109304909837e-05, + "loss": 0.7902384400367737, + "step": 1525 + }, + { + "epoch": 0.44626407369498466, + "grad_norm": 1.4904175669631523, + "learning_rate": 1.840849340952733e-05, + "loss": 0.6588590145111084, + "step": 1526 + }, + { + "epoch": 0.44655651411025005, + "grad_norm": 1.1682358413615135, + "learning_rate": 1.8405875548710614e-05, + "loss": 0.49133825302124023, + "step": 1527 + }, + { + "epoch": 0.44684895452551543, + "grad_norm": 1.4464174570347765, + "learning_rate": 1.8403255723071597e-05, + "loss": 0.6644654273986816, + "step": 1528 + }, + { + "epoch": 0.4471413949407808, + "grad_norm": 1.2325053536943291, + "learning_rate": 1.8400633933222647e-05, + "loss": 0.6257454752922058, + "step": 1529 + }, + { + "epoch": 0.4474338353560462, + "grad_norm": 1.4100106920950097, + "learning_rate": 1.8398010179776597e-05, + "loss": 0.6671919226646423, + "step": 1530 + }, + { + "epoch": 0.4477262757713116, + "grad_norm": 1.1625081058782702, + "learning_rate": 1.839538446334672e-05, + "loss": 0.6001447439193726, + "step": 1531 + }, + { + "epoch": 0.44801871618657696, + "grad_norm": 1.6509081383772402, + "learning_rate": 1.8392756784546775e-05, + "loss": 0.8103213310241699, + "step": 1532 + }, + { + "epoch": 0.4483111566018424, + "grad_norm": 1.1675484766628168, + "learning_rate": 1.839012714399096e-05, + "loss": 0.7010835409164429, + "step": 1533 + }, + { + "epoch": 0.4486035970171078, + "grad_norm": 1.0773967688725017, + "learning_rate": 1.8387495542293935e-05, + "loss": 0.5709215402603149, + "step": 1534 + }, + { + "epoch": 0.44889603743237316, + "grad_norm": 1.3558935245332375, + "learning_rate": 1.8384861980070826e-05, + "loss": 0.6410949230194092, + "step": 1535 + }, + { + "epoch": 0.44918847784763855, + "grad_norm": 1.358963272892771, + "learning_rate": 1.838222645793721e-05, + "loss": 0.8036839962005615, + "step": 1536 + }, + { + "epoch": 0.44948091826290393, + "grad_norm": 1.1470889977158967, + "learning_rate": 1.8379588976509123e-05, + "loss": 0.49213099479675293, + "step": 1537 + }, + { + "epoch": 0.4497733586781693, + "grad_norm": 1.5829843161961048, + "learning_rate": 1.8376949536403063e-05, + "loss": 0.7111018896102905, + "step": 1538 + }, + { + "epoch": 0.4500657990934347, + "grad_norm": 1.313995907545699, + "learning_rate": 1.837430813823598e-05, + "loss": 0.8506999015808105, + "step": 1539 + }, + { + "epoch": 0.4503582395087001, + "grad_norm": 1.2175571229137518, + "learning_rate": 1.8371664782625287e-05, + "loss": 0.7369798421859741, + "step": 1540 + }, + { + "epoch": 0.4506506799239655, + "grad_norm": 1.3435168892785054, + "learning_rate": 1.8369019470188855e-05, + "loss": 0.5982831120491028, + "step": 1541 + }, + { + "epoch": 0.4509431203392309, + "grad_norm": 1.2303590063922416, + "learning_rate": 1.8366372201545002e-05, + "loss": 0.6129144430160522, + "step": 1542 + }, + { + "epoch": 0.4512355607544963, + "grad_norm": 1.5191607059455674, + "learning_rate": 1.8363722977312512e-05, + "loss": 0.7142921686172485, + "step": 1543 + }, + { + "epoch": 0.45152800116976166, + "grad_norm": 1.1545455601160404, + "learning_rate": 1.8361071798110635e-05, + "loss": 0.515651524066925, + "step": 1544 + }, + { + "epoch": 0.45182044158502704, + "grad_norm": 1.3144713138844157, + "learning_rate": 1.8358418664559058e-05, + "loss": 0.5544168949127197, + "step": 1545 + }, + { + "epoch": 0.4521128820002924, + "grad_norm": 1.2540637765053078, + "learning_rate": 1.8355763577277938e-05, + "loss": 0.6801918745040894, + "step": 1546 + }, + { + "epoch": 0.4524053224155578, + "grad_norm": 1.3664850716479517, + "learning_rate": 1.835310653688789e-05, + "loss": 0.683785080909729, + "step": 1547 + }, + { + "epoch": 0.45269776283082325, + "grad_norm": 1.363558169999723, + "learning_rate": 1.835044754400997e-05, + "loss": 0.5689892172813416, + "step": 1548 + }, + { + "epoch": 0.45299020324608863, + "grad_norm": 1.1621305276584806, + "learning_rate": 1.8347786599265713e-05, + "loss": 0.5260726809501648, + "step": 1549 + }, + { + "epoch": 0.453282643661354, + "grad_norm": 1.2201116845769602, + "learning_rate": 1.834512370327709e-05, + "loss": 0.6792432069778442, + "step": 1550 + }, + { + "epoch": 0.4535750840766194, + "grad_norm": 1.198643016289117, + "learning_rate": 1.8342458856666545e-05, + "loss": 0.6336524486541748, + "step": 1551 + }, + { + "epoch": 0.4538675244918848, + "grad_norm": 1.3472994421503108, + "learning_rate": 1.8339792060056965e-05, + "loss": 0.5929614901542664, + "step": 1552 + }, + { + "epoch": 0.45415996490715016, + "grad_norm": 1.2599505430948363, + "learning_rate": 1.8337123314071696e-05, + "loss": 0.6683382391929626, + "step": 1553 + }, + { + "epoch": 0.45445240532241554, + "grad_norm": 1.2860246628200298, + "learning_rate": 1.833445261933454e-05, + "loss": 0.6256811618804932, + "step": 1554 + }, + { + "epoch": 0.4547448457376809, + "grad_norm": 1.3499468606960694, + "learning_rate": 1.8331779976469765e-05, + "loss": 0.5974653959274292, + "step": 1555 + }, + { + "epoch": 0.45503728615294636, + "grad_norm": 1.2078321854850618, + "learning_rate": 1.8329105386102074e-05, + "loss": 0.5471535325050354, + "step": 1556 + }, + { + "epoch": 0.45532972656821175, + "grad_norm": 1.284169615938693, + "learning_rate": 1.832642884885664e-05, + "loss": 0.5751267075538635, + "step": 1557 + }, + { + "epoch": 0.45562216698347713, + "grad_norm": 1.3234326952626145, + "learning_rate": 1.8323750365359092e-05, + "loss": 0.7003380060195923, + "step": 1558 + }, + { + "epoch": 0.4559146073987425, + "grad_norm": 1.3333099062603002, + "learning_rate": 1.8321069936235503e-05, + "loss": 0.6351351737976074, + "step": 1559 + }, + { + "epoch": 0.4562070478140079, + "grad_norm": 1.4452410048586575, + "learning_rate": 1.8318387562112407e-05, + "loss": 0.6083345413208008, + "step": 1560 + }, + { + "epoch": 0.4564994882292733, + "grad_norm": 1.230127453588353, + "learning_rate": 1.83157032436168e-05, + "loss": 0.589935302734375, + "step": 1561 + }, + { + "epoch": 0.45679192864453866, + "grad_norm": 1.3491229847821233, + "learning_rate": 1.8313016981376116e-05, + "loss": 0.7648014426231384, + "step": 1562 + }, + { + "epoch": 0.45708436905980404, + "grad_norm": 1.2461686063365083, + "learning_rate": 1.831032877601826e-05, + "loss": 0.7309973239898682, + "step": 1563 + }, + { + "epoch": 0.4573768094750695, + "grad_norm": 1.4691097869713072, + "learning_rate": 1.8307638628171575e-05, + "loss": 0.7231593728065491, + "step": 1564 + }, + { + "epoch": 0.45766924989033486, + "grad_norm": 1.4770239307253334, + "learning_rate": 1.8304946538464876e-05, + "loss": 0.7321262359619141, + "step": 1565 + }, + { + "epoch": 0.45796169030560024, + "grad_norm": 1.1157038717428966, + "learning_rate": 1.830225250752742e-05, + "loss": 0.5866271257400513, + "step": 1566 + }, + { + "epoch": 0.4582541307208656, + "grad_norm": 1.4899327841327124, + "learning_rate": 1.8299556535988917e-05, + "loss": 0.7146202325820923, + "step": 1567 + }, + { + "epoch": 0.458546571136131, + "grad_norm": 1.0989226716242009, + "learning_rate": 1.8296858624479536e-05, + "loss": 0.4600168466567993, + "step": 1568 + }, + { + "epoch": 0.4588390115513964, + "grad_norm": 1.5647421342147445, + "learning_rate": 1.8294158773629896e-05, + "loss": 0.5710705518722534, + "step": 1569 + }, + { + "epoch": 0.4591314519666618, + "grad_norm": 1.4737029572986353, + "learning_rate": 1.8291456984071073e-05, + "loss": 0.7075216770172119, + "step": 1570 + }, + { + "epoch": 0.45942389238192716, + "grad_norm": 1.2087048615463696, + "learning_rate": 1.828875325643459e-05, + "loss": 0.5262739062309265, + "step": 1571 + }, + { + "epoch": 0.4597163327971926, + "grad_norm": 1.2732843462549814, + "learning_rate": 1.8286047591352436e-05, + "loss": 0.724657416343689, + "step": 1572 + }, + { + "epoch": 0.460008773212458, + "grad_norm": 1.2778614004914874, + "learning_rate": 1.8283339989457033e-05, + "loss": 0.6047587394714355, + "step": 1573 + }, + { + "epoch": 0.46030121362772336, + "grad_norm": 1.481028950467352, + "learning_rate": 1.828063045138127e-05, + "loss": 0.6647980213165283, + "step": 1574 + }, + { + "epoch": 0.46059365404298874, + "grad_norm": 1.3031844151965102, + "learning_rate": 1.827791897775849e-05, + "loss": 0.6081969738006592, + "step": 1575 + }, + { + "epoch": 0.4608860944582541, + "grad_norm": 1.2574668609577524, + "learning_rate": 1.827520556922248e-05, + "loss": 0.6815003156661987, + "step": 1576 + }, + { + "epoch": 0.4611785348735195, + "grad_norm": 1.25588669780601, + "learning_rate": 1.8272490226407476e-05, + "loss": 0.5571715235710144, + "step": 1577 + }, + { + "epoch": 0.4614709752887849, + "grad_norm": 1.241115553107667, + "learning_rate": 1.8269772949948185e-05, + "loss": 0.7562757730484009, + "step": 1578 + }, + { + "epoch": 0.4617634157040503, + "grad_norm": 1.3753582703744767, + "learning_rate": 1.8267053740479745e-05, + "loss": 0.6330382227897644, + "step": 1579 + }, + { + "epoch": 0.4620558561193157, + "grad_norm": 1.5331426598457012, + "learning_rate": 1.826433259863776e-05, + "loss": 0.7696597576141357, + "step": 1580 + }, + { + "epoch": 0.4623482965345811, + "grad_norm": 1.3594821877317964, + "learning_rate": 1.8261609525058275e-05, + "loss": 0.6953772306442261, + "step": 1581 + }, + { + "epoch": 0.4626407369498465, + "grad_norm": 1.3957443557298115, + "learning_rate": 1.8258884520377797e-05, + "loss": 0.5856037735939026, + "step": 1582 + }, + { + "epoch": 0.46293317736511186, + "grad_norm": 1.3245931479550002, + "learning_rate": 1.8256157585233277e-05, + "loss": 0.5988172888755798, + "step": 1583 + }, + { + "epoch": 0.46322561778037724, + "grad_norm": 1.3153037118046438, + "learning_rate": 1.8253428720262117e-05, + "loss": 0.6320241689682007, + "step": 1584 + }, + { + "epoch": 0.4635180581956426, + "grad_norm": 1.1680775814478943, + "learning_rate": 1.8250697926102182e-05, + "loss": 0.5758935213088989, + "step": 1585 + }, + { + "epoch": 0.463810498610908, + "grad_norm": 1.4295465315991271, + "learning_rate": 1.8247965203391763e-05, + "loss": 0.7104986906051636, + "step": 1586 + }, + { + "epoch": 0.46410293902617344, + "grad_norm": 1.4739846709331708, + "learning_rate": 1.8245230552769634e-05, + "loss": 0.6322015523910522, + "step": 1587 + }, + { + "epoch": 0.4643953794414388, + "grad_norm": 1.4263760736603013, + "learning_rate": 1.824249397487499e-05, + "loss": 0.5881235003471375, + "step": 1588 + }, + { + "epoch": 0.4646878198567042, + "grad_norm": 1.5652864190332019, + "learning_rate": 1.8239755470347497e-05, + "loss": 0.8097240924835205, + "step": 1589 + }, + { + "epoch": 0.4649802602719696, + "grad_norm": 1.4192861983980027, + "learning_rate": 1.823701503982726e-05, + "loss": 0.6538649201393127, + "step": 1590 + }, + { + "epoch": 0.465272700687235, + "grad_norm": 1.2329012857349442, + "learning_rate": 1.8234272683954842e-05, + "loss": 0.5868922472000122, + "step": 1591 + }, + { + "epoch": 0.46556514110250036, + "grad_norm": 1.3076575357637654, + "learning_rate": 1.8231528403371248e-05, + "loss": 0.6747265458106995, + "step": 1592 + }, + { + "epoch": 0.46585758151776574, + "grad_norm": 1.2961728564371904, + "learning_rate": 1.8228782198717936e-05, + "loss": 0.6519996523857117, + "step": 1593 + }, + { + "epoch": 0.4661500219330311, + "grad_norm": 1.2212124627082057, + "learning_rate": 1.822603407063682e-05, + "loss": 0.7268975973129272, + "step": 1594 + }, + { + "epoch": 0.46644246234829656, + "grad_norm": 1.1603454255193932, + "learning_rate": 1.8223284019770252e-05, + "loss": 0.6554980278015137, + "step": 1595 + }, + { + "epoch": 0.46673490276356194, + "grad_norm": 1.350233636463654, + "learning_rate": 1.8220532046761047e-05, + "loss": 0.7014105319976807, + "step": 1596 + }, + { + "epoch": 0.4670273431788273, + "grad_norm": 1.4228663397014873, + "learning_rate": 1.821777815225245e-05, + "loss": 0.5766602158546448, + "step": 1597 + }, + { + "epoch": 0.4673197835940927, + "grad_norm": 1.304159292005938, + "learning_rate": 1.8215022336888182e-05, + "loss": 0.5106521844863892, + "step": 1598 + }, + { + "epoch": 0.4676122240093581, + "grad_norm": 1.419250792414019, + "learning_rate": 1.821226460131239e-05, + "loss": 0.801375150680542, + "step": 1599 + }, + { + "epoch": 0.4679046644246235, + "grad_norm": 1.2845833863087142, + "learning_rate": 1.8209504946169677e-05, + "loss": 0.6189062595367432, + "step": 1600 + }, + { + "epoch": 0.46819710483988886, + "grad_norm": 1.2468379881228138, + "learning_rate": 1.8206743372105098e-05, + "loss": 0.6719359159469604, + "step": 1601 + }, + { + "epoch": 0.46848954525515424, + "grad_norm": 1.4660205035921348, + "learning_rate": 1.8203979879764153e-05, + "loss": 0.7437123656272888, + "step": 1602 + }, + { + "epoch": 0.4687819856704197, + "grad_norm": 1.3639008290802046, + "learning_rate": 1.8201214469792793e-05, + "loss": 0.7273217439651489, + "step": 1603 + }, + { + "epoch": 0.46907442608568506, + "grad_norm": 1.332814377531963, + "learning_rate": 1.8198447142837416e-05, + "loss": 0.6467087268829346, + "step": 1604 + }, + { + "epoch": 0.46936686650095044, + "grad_norm": 1.1167815102053054, + "learning_rate": 1.8195677899544866e-05, + "loss": 0.5764428973197937, + "step": 1605 + }, + { + "epoch": 0.4696593069162158, + "grad_norm": 1.4761144768835275, + "learning_rate": 1.8192906740562437e-05, + "loss": 0.5969977378845215, + "step": 1606 + }, + { + "epoch": 0.4699517473314812, + "grad_norm": 1.3424638711815577, + "learning_rate": 1.819013366653787e-05, + "loss": 0.7237746119499207, + "step": 1607 + }, + { + "epoch": 0.4702441877467466, + "grad_norm": 1.4494789457227795, + "learning_rate": 1.8187358678119355e-05, + "loss": 0.6289568543434143, + "step": 1608 + }, + { + "epoch": 0.47053662816201197, + "grad_norm": 1.1494676131886132, + "learning_rate": 1.8184581775955533e-05, + "loss": 0.5773013234138489, + "step": 1609 + }, + { + "epoch": 0.47082906857727735, + "grad_norm": 1.3055308518970814, + "learning_rate": 1.818180296069548e-05, + "loss": 0.5940284729003906, + "step": 1610 + }, + { + "epoch": 0.4711215089925428, + "grad_norm": 1.506634303312927, + "learning_rate": 1.8179022232988735e-05, + "loss": 0.7051881551742554, + "step": 1611 + }, + { + "epoch": 0.4714139494078082, + "grad_norm": 1.2817274142705404, + "learning_rate": 1.8176239593485267e-05, + "loss": 0.6427813768386841, + "step": 1612 + }, + { + "epoch": 0.47170638982307356, + "grad_norm": 1.3150009445137423, + "learning_rate": 1.817345504283551e-05, + "loss": 0.7041782736778259, + "step": 1613 + }, + { + "epoch": 0.47199883023833894, + "grad_norm": 1.1960422316530261, + "learning_rate": 1.817066858169033e-05, + "loss": 0.6568688154220581, + "step": 1614 + }, + { + "epoch": 0.4722912706536043, + "grad_norm": 1.1082706297141673, + "learning_rate": 1.816788021070105e-05, + "loss": 0.4784452021121979, + "step": 1615 + }, + { + "epoch": 0.4725837110688697, + "grad_norm": 1.403652579196444, + "learning_rate": 1.816508993051943e-05, + "loss": 0.6012705564498901, + "step": 1616 + }, + { + "epoch": 0.4728761514841351, + "grad_norm": 1.441258763214559, + "learning_rate": 1.8162297741797685e-05, + "loss": 0.6414428949356079, + "step": 1617 + }, + { + "epoch": 0.47316859189940047, + "grad_norm": 1.4131643644174843, + "learning_rate": 1.815950364518847e-05, + "loss": 0.6446187496185303, + "step": 1618 + }, + { + "epoch": 0.4734610323146659, + "grad_norm": 1.2552495046018781, + "learning_rate": 1.8156707641344885e-05, + "loss": 0.5153034329414368, + "step": 1619 + }, + { + "epoch": 0.4737534727299313, + "grad_norm": 1.5159052607593526, + "learning_rate": 1.8153909730920485e-05, + "loss": 0.7209463715553284, + "step": 1620 + }, + { + "epoch": 0.4740459131451967, + "grad_norm": 1.2933785450044248, + "learning_rate": 1.8151109914569267e-05, + "loss": 0.5990744829177856, + "step": 1621 + }, + { + "epoch": 0.47433835356046206, + "grad_norm": 1.3033668993107679, + "learning_rate": 1.814830819294566e-05, + "loss": 0.5706672668457031, + "step": 1622 + }, + { + "epoch": 0.47463079397572744, + "grad_norm": 1.1946317041445573, + "learning_rate": 1.814550456670456e-05, + "loss": 0.538548469543457, + "step": 1623 + }, + { + "epoch": 0.4749232343909928, + "grad_norm": 1.3282078081285205, + "learning_rate": 1.8142699036501288e-05, + "loss": 0.6450623273849487, + "step": 1624 + }, + { + "epoch": 0.4752156748062582, + "grad_norm": 1.336508209824809, + "learning_rate": 1.813989160299163e-05, + "loss": 0.6537624597549438, + "step": 1625 + }, + { + "epoch": 0.47550811522152364, + "grad_norm": 1.2777879020397362, + "learning_rate": 1.8137082266831794e-05, + "loss": 0.7126362323760986, + "step": 1626 + }, + { + "epoch": 0.475800555636789, + "grad_norm": 1.4542616967071014, + "learning_rate": 1.813427102867846e-05, + "loss": 0.6686921119689941, + "step": 1627 + }, + { + "epoch": 0.4760929960520544, + "grad_norm": 1.4231643377055359, + "learning_rate": 1.8131457889188723e-05, + "loss": 0.5925619602203369, + "step": 1628 + }, + { + "epoch": 0.4763854364673198, + "grad_norm": 1.2702390975554385, + "learning_rate": 1.8128642849020147e-05, + "loss": 0.7251017689704895, + "step": 1629 + }, + { + "epoch": 0.47667787688258517, + "grad_norm": 1.5675645867645378, + "learning_rate": 1.8125825908830733e-05, + "loss": 0.7524283528327942, + "step": 1630 + }, + { + "epoch": 0.47697031729785055, + "grad_norm": 1.2843975237623166, + "learning_rate": 1.8123007069278914e-05, + "loss": 0.7593197226524353, + "step": 1631 + }, + { + "epoch": 0.47726275771311594, + "grad_norm": 1.2304771008785658, + "learning_rate": 1.812018633102358e-05, + "loss": 0.43353578448295593, + "step": 1632 + }, + { + "epoch": 0.4775551981283813, + "grad_norm": 1.1488804965894268, + "learning_rate": 1.8117363694724063e-05, + "loss": 0.6254708766937256, + "step": 1633 + }, + { + "epoch": 0.47784763854364676, + "grad_norm": 1.2467231401784862, + "learning_rate": 1.811453916104014e-05, + "loss": 0.5970091223716736, + "step": 1634 + }, + { + "epoch": 0.47814007895891214, + "grad_norm": 1.2798152763028137, + "learning_rate": 1.8111712730632024e-05, + "loss": 0.6299331188201904, + "step": 1635 + }, + { + "epoch": 0.4784325193741775, + "grad_norm": 1.4325282365212126, + "learning_rate": 1.810888440416038e-05, + "loss": 0.7461789846420288, + "step": 1636 + }, + { + "epoch": 0.4787249597894429, + "grad_norm": 1.2539146793136515, + "learning_rate": 1.8106054182286305e-05, + "loss": 0.5053290724754333, + "step": 1637 + }, + { + "epoch": 0.4790174002047083, + "grad_norm": 1.2809048918941985, + "learning_rate": 1.810322206567135e-05, + "loss": 0.6853327751159668, + "step": 1638 + }, + { + "epoch": 0.47930984061997367, + "grad_norm": 1.4027979186429358, + "learning_rate": 1.8100388054977508e-05, + "loss": 0.5337134599685669, + "step": 1639 + }, + { + "epoch": 0.47960228103523905, + "grad_norm": 1.357622845311743, + "learning_rate": 1.809755215086721e-05, + "loss": 0.7082560062408447, + "step": 1640 + }, + { + "epoch": 0.47989472145050444, + "grad_norm": 1.3590974916852807, + "learning_rate": 1.8094714354003325e-05, + "loss": 0.680424153804779, + "step": 1641 + }, + { + "epoch": 0.4801871618657699, + "grad_norm": 1.1398057291819046, + "learning_rate": 1.8091874665049183e-05, + "loss": 0.5235139727592468, + "step": 1642 + }, + { + "epoch": 0.48047960228103526, + "grad_norm": 1.3822416905178454, + "learning_rate": 1.8089033084668535e-05, + "loss": 0.7843992114067078, + "step": 1643 + }, + { + "epoch": 0.48077204269630064, + "grad_norm": 1.4941957252025324, + "learning_rate": 1.8086189613525587e-05, + "loss": 0.6736497282981873, + "step": 1644 + }, + { + "epoch": 0.481064483111566, + "grad_norm": 1.3326594399820286, + "learning_rate": 1.808334425228498e-05, + "loss": 0.6898948550224304, + "step": 1645 + }, + { + "epoch": 0.4813569235268314, + "grad_norm": 1.3419429940100798, + "learning_rate": 1.80804970016118e-05, + "loss": 0.6719726324081421, + "step": 1646 + }, + { + "epoch": 0.4816493639420968, + "grad_norm": 1.316270232362313, + "learning_rate": 1.807764786217158e-05, + "loss": 0.6904356479644775, + "step": 1647 + }, + { + "epoch": 0.48194180435736217, + "grad_norm": 1.3009257254922486, + "learning_rate": 1.8074796834630285e-05, + "loss": 0.5956645011901855, + "step": 1648 + }, + { + "epoch": 0.48223424477262755, + "grad_norm": 1.162557710559535, + "learning_rate": 1.8071943919654323e-05, + "loss": 0.5676499009132385, + "step": 1649 + }, + { + "epoch": 0.482526685187893, + "grad_norm": 1.3145895725362904, + "learning_rate": 1.8069089117910547e-05, + "loss": 0.6006937026977539, + "step": 1650 + }, + { + "epoch": 0.48281912560315837, + "grad_norm": 1.3694341047830378, + "learning_rate": 1.806623243006625e-05, + "loss": 0.6241977214813232, + "step": 1651 + }, + { + "epoch": 0.48311156601842375, + "grad_norm": 1.4152304986784254, + "learning_rate": 1.806337385678917e-05, + "loss": 0.7359870672225952, + "step": 1652 + }, + { + "epoch": 0.48340400643368914, + "grad_norm": 1.155725074088707, + "learning_rate": 1.806051339874748e-05, + "loss": 0.6113119125366211, + "step": 1653 + }, + { + "epoch": 0.4836964468489545, + "grad_norm": 1.3288798785197886, + "learning_rate": 1.8057651056609784e-05, + "loss": 0.642951488494873, + "step": 1654 + }, + { + "epoch": 0.4839888872642199, + "grad_norm": 1.3081605749498326, + "learning_rate": 1.8054786831045147e-05, + "loss": 0.7020113468170166, + "step": 1655 + }, + { + "epoch": 0.4842813276794853, + "grad_norm": 1.355302216036822, + "learning_rate": 1.8051920722723063e-05, + "loss": 0.678231418132782, + "step": 1656 + }, + { + "epoch": 0.48457376809475067, + "grad_norm": 1.2407750790627203, + "learning_rate": 1.8049052732313466e-05, + "loss": 0.604765772819519, + "step": 1657 + }, + { + "epoch": 0.4848662085100161, + "grad_norm": 1.501775861517808, + "learning_rate": 1.8046182860486735e-05, + "loss": 0.6812270879745483, + "step": 1658 + }, + { + "epoch": 0.4851586489252815, + "grad_norm": 1.329019452940817, + "learning_rate": 1.8043311107913675e-05, + "loss": 0.6284930109977722, + "step": 1659 + }, + { + "epoch": 0.48545108934054687, + "grad_norm": 1.4460160298748268, + "learning_rate": 1.8040437475265554e-05, + "loss": 0.665177583694458, + "step": 1660 + }, + { + "epoch": 0.48574352975581225, + "grad_norm": 1.365611165893268, + "learning_rate": 1.8037561963214058e-05, + "loss": 0.7628738284111023, + "step": 1661 + }, + { + "epoch": 0.48603597017107764, + "grad_norm": 1.4917601408905583, + "learning_rate": 1.8034684572431322e-05, + "loss": 0.6372654438018799, + "step": 1662 + }, + { + "epoch": 0.486328410586343, + "grad_norm": 1.2986927468884095, + "learning_rate": 1.803180530358992e-05, + "loss": 0.5915756225585938, + "step": 1663 + }, + { + "epoch": 0.4866208510016084, + "grad_norm": 1.3509164579114188, + "learning_rate": 1.802892415736286e-05, + "loss": 0.6821908950805664, + "step": 1664 + }, + { + "epoch": 0.48691329141687384, + "grad_norm": 1.3857679722145793, + "learning_rate": 1.80260411344236e-05, + "loss": 0.6418279409408569, + "step": 1665 + }, + { + "epoch": 0.4872057318321392, + "grad_norm": 1.154306591574384, + "learning_rate": 1.802315623544602e-05, + "loss": 0.5582526922225952, + "step": 1666 + }, + { + "epoch": 0.4874981722474046, + "grad_norm": 1.3431793608397968, + "learning_rate": 1.8020269461104448e-05, + "loss": 0.7145007848739624, + "step": 1667 + }, + { + "epoch": 0.48779061266267, + "grad_norm": 1.2110741699326812, + "learning_rate": 1.8017380812073658e-05, + "loss": 0.5415871739387512, + "step": 1668 + }, + { + "epoch": 0.48808305307793537, + "grad_norm": 1.488356994545647, + "learning_rate": 1.801449028902885e-05, + "loss": 0.728327751159668, + "step": 1669 + }, + { + "epoch": 0.48837549349320075, + "grad_norm": 1.3273378299589804, + "learning_rate": 1.8011597892645665e-05, + "loss": 0.6469160914421082, + "step": 1670 + }, + { + "epoch": 0.48866793390846613, + "grad_norm": 1.3096259850876997, + "learning_rate": 1.8008703623600185e-05, + "loss": 0.7107353210449219, + "step": 1671 + }, + { + "epoch": 0.4889603743237315, + "grad_norm": 1.4201847213896843, + "learning_rate": 1.8005807482568926e-05, + "loss": 0.6918982267379761, + "step": 1672 + }, + { + "epoch": 0.48925281473899696, + "grad_norm": 1.4096024584844806, + "learning_rate": 1.800290947022884e-05, + "loss": 0.661738932132721, + "step": 1673 + }, + { + "epoch": 0.48954525515426234, + "grad_norm": 1.4938181766281158, + "learning_rate": 1.800000958725733e-05, + "loss": 0.6816283464431763, + "step": 1674 + }, + { + "epoch": 0.4898376955695277, + "grad_norm": 1.348689926804817, + "learning_rate": 1.7997107834332217e-05, + "loss": 0.6988941431045532, + "step": 1675 + }, + { + "epoch": 0.4901301359847931, + "grad_norm": 1.5696470599370025, + "learning_rate": 1.799420421213177e-05, + "loss": 0.7997519969940186, + "step": 1676 + }, + { + "epoch": 0.4904225764000585, + "grad_norm": 1.3512394042939826, + "learning_rate": 1.7991298721334697e-05, + "loss": 0.6552794575691223, + "step": 1677 + }, + { + "epoch": 0.49071501681532387, + "grad_norm": 1.2446219807906005, + "learning_rate": 1.7988391362620135e-05, + "loss": 0.6144021153450012, + "step": 1678 + }, + { + "epoch": 0.49100745723058925, + "grad_norm": 1.2086851376188177, + "learning_rate": 1.798548213666766e-05, + "loss": 0.5036276578903198, + "step": 1679 + }, + { + "epoch": 0.49129989764585463, + "grad_norm": 1.1620444251602322, + "learning_rate": 1.7982571044157288e-05, + "loss": 0.5152162313461304, + "step": 1680 + }, + { + "epoch": 0.49159233806112007, + "grad_norm": 1.4266855366652862, + "learning_rate": 1.797965808576947e-05, + "loss": 0.7249797582626343, + "step": 1681 + }, + { + "epoch": 0.49188477847638545, + "grad_norm": 1.138885414798186, + "learning_rate": 1.7976743262185094e-05, + "loss": 0.5769079923629761, + "step": 1682 + }, + { + "epoch": 0.49217721889165084, + "grad_norm": 1.2523240509929359, + "learning_rate": 1.797382657408548e-05, + "loss": 0.7017331123352051, + "step": 1683 + }, + { + "epoch": 0.4924696593069162, + "grad_norm": 1.3095438640742119, + "learning_rate": 1.797090802215238e-05, + "loss": 0.788599967956543, + "step": 1684 + }, + { + "epoch": 0.4927620997221816, + "grad_norm": 1.3652642181905799, + "learning_rate": 1.7967987607067997e-05, + "loss": 0.5716612935066223, + "step": 1685 + }, + { + "epoch": 0.493054540137447, + "grad_norm": 1.396592202891807, + "learning_rate": 1.796506532951496e-05, + "loss": 0.6808345913887024, + "step": 1686 + }, + { + "epoch": 0.49334698055271237, + "grad_norm": 1.421363062787346, + "learning_rate": 1.7962141190176326e-05, + "loss": 0.6540817022323608, + "step": 1687 + }, + { + "epoch": 0.49363942096797775, + "grad_norm": 1.3162774070898267, + "learning_rate": 1.7959215189735604e-05, + "loss": 0.6522870063781738, + "step": 1688 + }, + { + "epoch": 0.4939318613832432, + "grad_norm": 1.2120992084575881, + "learning_rate": 1.7956287328876724e-05, + "loss": 0.5217882990837097, + "step": 1689 + }, + { + "epoch": 0.49422430179850857, + "grad_norm": 1.1456971313507769, + "learning_rate": 1.795335760828405e-05, + "loss": 0.6985372304916382, + "step": 1690 + }, + { + "epoch": 0.49451674221377395, + "grad_norm": 1.6308222645679713, + "learning_rate": 1.7950426028642397e-05, + "loss": 0.7199063301086426, + "step": 1691 + }, + { + "epoch": 0.49480918262903933, + "grad_norm": 1.2503132677681021, + "learning_rate": 1.7947492590636998e-05, + "loss": 0.5810575485229492, + "step": 1692 + }, + { + "epoch": 0.4951016230443047, + "grad_norm": 1.5393913616038981, + "learning_rate": 1.7944557294953528e-05, + "loss": 0.7443726658821106, + "step": 1693 + }, + { + "epoch": 0.4953940634595701, + "grad_norm": 1.4257690332105803, + "learning_rate": 1.7941620142278092e-05, + "loss": 0.6774560213088989, + "step": 1694 + }, + { + "epoch": 0.4956865038748355, + "grad_norm": 1.4876883296800856, + "learning_rate": 1.793868113329724e-05, + "loss": 0.6983137726783752, + "step": 1695 + }, + { + "epoch": 0.49597894429010086, + "grad_norm": 1.500775887710686, + "learning_rate": 1.793574026869793e-05, + "loss": 0.6481274366378784, + "step": 1696 + }, + { + "epoch": 0.4962713847053663, + "grad_norm": 1.5261372345633493, + "learning_rate": 1.793279754916759e-05, + "loss": 0.6489002704620361, + "step": 1697 + }, + { + "epoch": 0.4965638251206317, + "grad_norm": 1.200851338265551, + "learning_rate": 1.7929852975394056e-05, + "loss": 0.7054505348205566, + "step": 1698 + }, + { + "epoch": 0.49685626553589707, + "grad_norm": 1.1948769153228862, + "learning_rate": 1.79269065480656e-05, + "loss": 0.5257681608200073, + "step": 1699 + }, + { + "epoch": 0.49714870595116245, + "grad_norm": 1.2760885846913066, + "learning_rate": 1.7923958267870936e-05, + "loss": 0.8625251054763794, + "step": 1700 + }, + { + "epoch": 0.49744114636642783, + "grad_norm": 1.223950331700182, + "learning_rate": 1.7921008135499205e-05, + "loss": 0.6736147999763489, + "step": 1701 + }, + { + "epoch": 0.4977335867816932, + "grad_norm": 1.351351583663473, + "learning_rate": 1.7918056151639985e-05, + "loss": 0.5079643130302429, + "step": 1702 + }, + { + "epoch": 0.4980260271969586, + "grad_norm": 1.2324398794203584, + "learning_rate": 1.791510231698328e-05, + "loss": 0.597242534160614, + "step": 1703 + }, + { + "epoch": 0.49831846761222404, + "grad_norm": 1.3776511171825507, + "learning_rate": 1.791214663221953e-05, + "loss": 0.6695376038551331, + "step": 1704 + }, + { + "epoch": 0.4986109080274894, + "grad_norm": 1.2400454845090276, + "learning_rate": 1.7909189098039616e-05, + "loss": 0.6411684155464172, + "step": 1705 + }, + { + "epoch": 0.4989033484427548, + "grad_norm": 1.3917271277458743, + "learning_rate": 1.790622971513484e-05, + "loss": 0.6671754121780396, + "step": 1706 + }, + { + "epoch": 0.4991957888580202, + "grad_norm": 1.1384272276613905, + "learning_rate": 1.7903268484196936e-05, + "loss": 0.5312573909759521, + "step": 1707 + }, + { + "epoch": 0.49948822927328557, + "grad_norm": 1.3626241120949947, + "learning_rate": 1.7900305405918076e-05, + "loss": 0.643236517906189, + "step": 1708 + }, + { + "epoch": 0.49978066968855095, + "grad_norm": 1.4093385837144417, + "learning_rate": 1.7897340480990863e-05, + "loss": 0.7942951321601868, + "step": 1709 + }, + { + "epoch": 0.5000731101038164, + "grad_norm": 1.3198251548980515, + "learning_rate": 1.789437371010833e-05, + "loss": 0.701362133026123, + "step": 1710 + }, + { + "epoch": 0.5003655505190817, + "grad_norm": 1.3304955567316399, + "learning_rate": 1.789140509396394e-05, + "loss": 0.6993157863616943, + "step": 1711 + }, + { + "epoch": 0.5006579909343472, + "grad_norm": 1.0719148279657758, + "learning_rate": 1.788843463325159e-05, + "loss": 0.568405270576477, + "step": 1712 + }, + { + "epoch": 0.5009504313496125, + "grad_norm": 0.976150644308567, + "learning_rate": 1.7885462328665605e-05, + "loss": 0.4948374032974243, + "step": 1713 + }, + { + "epoch": 0.5012428717648779, + "grad_norm": 1.4692514127239873, + "learning_rate": 1.7882488180900743e-05, + "loss": 0.6679480671882629, + "step": 1714 + }, + { + "epoch": 0.5015353121801432, + "grad_norm": 1.5018221461401142, + "learning_rate": 1.78795121906522e-05, + "loss": 0.706131100654602, + "step": 1715 + }, + { + "epoch": 0.5018277525954087, + "grad_norm": 1.207740414795638, + "learning_rate": 1.787653435861559e-05, + "loss": 0.6691830158233643, + "step": 1716 + }, + { + "epoch": 0.5021201930106741, + "grad_norm": 1.163150990025552, + "learning_rate": 1.787355468548696e-05, + "loss": 0.5624213218688965, + "step": 1717 + }, + { + "epoch": 0.5024126334259394, + "grad_norm": 1.3394004970303723, + "learning_rate": 1.78705731719628e-05, + "loss": 0.4589618444442749, + "step": 1718 + }, + { + "epoch": 0.5027050738412049, + "grad_norm": 1.384883869852314, + "learning_rate": 1.7867589818740012e-05, + "loss": 0.571403980255127, + "step": 1719 + }, + { + "epoch": 0.5029975142564702, + "grad_norm": 1.0668853872947273, + "learning_rate": 1.786460462651594e-05, + "loss": 0.5395561456680298, + "step": 1720 + }, + { + "epoch": 0.5032899546717357, + "grad_norm": 1.243223907233259, + "learning_rate": 1.7861617595988355e-05, + "loss": 0.6166945695877075, + "step": 1721 + }, + { + "epoch": 0.503582395087001, + "grad_norm": 1.4857752879775032, + "learning_rate": 1.7858628727855458e-05, + "loss": 0.6812523603439331, + "step": 1722 + }, + { + "epoch": 0.5038748355022664, + "grad_norm": 1.2390654420633957, + "learning_rate": 1.7855638022815872e-05, + "loss": 0.6602752208709717, + "step": 1723 + }, + { + "epoch": 0.5041672759175319, + "grad_norm": 1.0873682718880517, + "learning_rate": 1.7852645481568665e-05, + "loss": 0.49925822019577026, + "step": 1724 + }, + { + "epoch": 0.5044597163327972, + "grad_norm": 1.3265310908908576, + "learning_rate": 1.784965110481332e-05, + "loss": 0.5557682514190674, + "step": 1725 + }, + { + "epoch": 0.5047521567480626, + "grad_norm": 1.2775644185514514, + "learning_rate": 1.7846654893249756e-05, + "loss": 0.6576372981071472, + "step": 1726 + }, + { + "epoch": 0.505044597163328, + "grad_norm": 2.047704943438843, + "learning_rate": 1.7843656847578317e-05, + "loss": 0.5266367197036743, + "step": 1727 + }, + { + "epoch": 0.5053370375785934, + "grad_norm": 1.6086224094226402, + "learning_rate": 1.7840656968499782e-05, + "loss": 0.7368261218070984, + "step": 1728 + }, + { + "epoch": 0.5056294779938587, + "grad_norm": 1.2755318597370908, + "learning_rate": 1.7837655256715355e-05, + "loss": 0.6583619117736816, + "step": 1729 + }, + { + "epoch": 0.5059219184091241, + "grad_norm": 1.4196511617190575, + "learning_rate": 1.7834651712926662e-05, + "loss": 0.7323073148727417, + "step": 1730 + }, + { + "epoch": 0.5062143588243895, + "grad_norm": 1.540686270234863, + "learning_rate": 1.783164633783577e-05, + "loss": 0.6059812307357788, + "step": 1731 + }, + { + "epoch": 0.5065067992396549, + "grad_norm": 1.451028079648097, + "learning_rate": 1.782863913214516e-05, + "loss": 0.5992608070373535, + "step": 1732 + }, + { + "epoch": 0.5067992396549204, + "grad_norm": 1.3452146161553644, + "learning_rate": 1.7825630096557754e-05, + "loss": 0.5729147791862488, + "step": 1733 + }, + { + "epoch": 0.5070916800701857, + "grad_norm": 1.4383912240083958, + "learning_rate": 1.782261923177689e-05, + "loss": 0.6708269119262695, + "step": 1734 + }, + { + "epoch": 0.5073841204854511, + "grad_norm": 1.0922943221428454, + "learning_rate": 1.7819606538506347e-05, + "loss": 0.5377235412597656, + "step": 1735 + }, + { + "epoch": 0.5076765609007164, + "grad_norm": 1.3060450837457043, + "learning_rate": 1.781659201745032e-05, + "loss": 0.6899171471595764, + "step": 1736 + }, + { + "epoch": 0.5079690013159819, + "grad_norm": 1.2574262616785272, + "learning_rate": 1.7813575669313434e-05, + "loss": 0.6712576150894165, + "step": 1737 + }, + { + "epoch": 0.5082614417312472, + "grad_norm": 1.3797290531865334, + "learning_rate": 1.781055749480074e-05, + "loss": 0.6989667415618896, + "step": 1738 + }, + { + "epoch": 0.5085538821465126, + "grad_norm": 1.4976341004458755, + "learning_rate": 1.7807537494617723e-05, + "loss": 0.6103490591049194, + "step": 1739 + }, + { + "epoch": 0.5088463225617781, + "grad_norm": 1.2059878229475702, + "learning_rate": 1.7804515669470287e-05, + "loss": 0.4882289171218872, + "step": 1740 + }, + { + "epoch": 0.5091387629770434, + "grad_norm": 1.3963253268337052, + "learning_rate": 1.7801492020064764e-05, + "loss": 0.7244713306427002, + "step": 1741 + }, + { + "epoch": 0.5094312033923089, + "grad_norm": 1.2588544303384788, + "learning_rate": 1.7798466547107918e-05, + "loss": 0.6055952310562134, + "step": 1742 + }, + { + "epoch": 0.5097236438075742, + "grad_norm": 1.3449125705801426, + "learning_rate": 1.779543925130693e-05, + "loss": 0.5893995761871338, + "step": 1743 + }, + { + "epoch": 0.5100160842228396, + "grad_norm": 1.4169541262971606, + "learning_rate": 1.7792410133369413e-05, + "loss": 0.6154330968856812, + "step": 1744 + }, + { + "epoch": 0.5103085246381049, + "grad_norm": 1.294650393818464, + "learning_rate": 1.778937919400341e-05, + "loss": 0.6227806806564331, + "step": 1745 + }, + { + "epoch": 0.5106009650533704, + "grad_norm": 1.563882907776874, + "learning_rate": 1.7786346433917376e-05, + "loss": 0.6192313432693481, + "step": 1746 + }, + { + "epoch": 0.5108934054686358, + "grad_norm": 1.324638073205218, + "learning_rate": 1.7783311853820205e-05, + "loss": 0.6175359487533569, + "step": 1747 + }, + { + "epoch": 0.5111858458839011, + "grad_norm": 1.17912928754983, + "learning_rate": 1.7780275454421218e-05, + "loss": 0.5588991641998291, + "step": 1748 + }, + { + "epoch": 0.5114782862991666, + "grad_norm": 1.0201894222615457, + "learning_rate": 1.777723723643014e-05, + "loss": 0.637115478515625, + "step": 1749 + }, + { + "epoch": 0.5117707267144319, + "grad_norm": 1.5101308062255179, + "learning_rate": 1.777419720055715e-05, + "loss": 0.6762860417366028, + "step": 1750 + }, + { + "epoch": 0.5120631671296973, + "grad_norm": 1.5211239881114056, + "learning_rate": 1.7771155347512828e-05, + "loss": 0.6980293989181519, + "step": 1751 + }, + { + "epoch": 0.5123556075449627, + "grad_norm": 1.3145597239587745, + "learning_rate": 1.7768111678008194e-05, + "loss": 0.6587250232696533, + "step": 1752 + }, + { + "epoch": 0.5126480479602281, + "grad_norm": 1.4750219793579704, + "learning_rate": 1.776506619275469e-05, + "loss": 0.6571120619773865, + "step": 1753 + }, + { + "epoch": 0.5129404883754934, + "grad_norm": 1.705487520120489, + "learning_rate": 1.7762018892464172e-05, + "loss": 0.8127633333206177, + "step": 1754 + }, + { + "epoch": 0.5132329287907589, + "grad_norm": 1.4136977790679228, + "learning_rate": 1.7758969777848935e-05, + "loss": 0.6585550308227539, + "step": 1755 + }, + { + "epoch": 0.5135253692060243, + "grad_norm": 1.5019600327645424, + "learning_rate": 1.7755918849621686e-05, + "loss": 0.6347511410713196, + "step": 1756 + }, + { + "epoch": 0.5138178096212896, + "grad_norm": 1.4489353235186164, + "learning_rate": 1.775286610849556e-05, + "loss": 0.5918457508087158, + "step": 1757 + }, + { + "epoch": 0.5141102500365551, + "grad_norm": 1.2541802522573693, + "learning_rate": 1.774981155518412e-05, + "loss": 0.7042769193649292, + "step": 1758 + }, + { + "epoch": 0.5144026904518204, + "grad_norm": 1.4327318826910254, + "learning_rate": 1.7746755190401353e-05, + "loss": 0.8014250993728638, + "step": 1759 + }, + { + "epoch": 0.5146951308670858, + "grad_norm": 1.339232110324459, + "learning_rate": 1.774369701486166e-05, + "loss": 0.6703939437866211, + "step": 1760 + }, + { + "epoch": 0.5149875712823512, + "grad_norm": 1.1710558248660605, + "learning_rate": 1.774063702927987e-05, + "loss": 0.6189682483673096, + "step": 1761 + }, + { + "epoch": 0.5152800116976166, + "grad_norm": 1.4110546220906648, + "learning_rate": 1.7737575234371238e-05, + "loss": 0.5386991500854492, + "step": 1762 + }, + { + "epoch": 0.515572452112882, + "grad_norm": 1.4204019461155708, + "learning_rate": 1.773451163085144e-05, + "loss": 0.6389357447624207, + "step": 1763 + }, + { + "epoch": 0.5158648925281474, + "grad_norm": 1.1798787279597898, + "learning_rate": 1.7731446219436577e-05, + "loss": 0.7247746586799622, + "step": 1764 + }, + { + "epoch": 0.5161573329434128, + "grad_norm": 1.2114702713778023, + "learning_rate": 1.7728379000843164e-05, + "loss": 0.5538983941078186, + "step": 1765 + }, + { + "epoch": 0.5164497733586781, + "grad_norm": 1.155329008927324, + "learning_rate": 1.7725309975788155e-05, + "loss": 0.6003320813179016, + "step": 1766 + }, + { + "epoch": 0.5167422137739436, + "grad_norm": 1.4065479816352848, + "learning_rate": 1.7722239144988908e-05, + "loss": 0.603177011013031, + "step": 1767 + }, + { + "epoch": 0.5170346541892089, + "grad_norm": 1.1699743536266287, + "learning_rate": 1.771916650916321e-05, + "loss": 0.6071338653564453, + "step": 1768 + }, + { + "epoch": 0.5173270946044743, + "grad_norm": 1.4268603398797357, + "learning_rate": 1.7716092069029275e-05, + "loss": 0.6148535013198853, + "step": 1769 + }, + { + "epoch": 0.5176195350197397, + "grad_norm": 1.3460628970570976, + "learning_rate": 1.7713015825305735e-05, + "loss": 0.6236969828605652, + "step": 1770 + }, + { + "epoch": 0.5179119754350051, + "grad_norm": 1.4613715991480511, + "learning_rate": 1.770993777871164e-05, + "loss": 0.5439775586128235, + "step": 1771 + }, + { + "epoch": 0.5182044158502705, + "grad_norm": 1.3246469866549868, + "learning_rate": 1.770685792996647e-05, + "loss": 0.6498249769210815, + "step": 1772 + }, + { + "epoch": 0.5184968562655359, + "grad_norm": 1.307598965769502, + "learning_rate": 1.7703776279790113e-05, + "loss": 0.5838749408721924, + "step": 1773 + }, + { + "epoch": 0.5187892966808013, + "grad_norm": 1.44861400348765, + "learning_rate": 1.770069282890289e-05, + "loss": 0.6467812657356262, + "step": 1774 + }, + { + "epoch": 0.5190817370960666, + "grad_norm": 1.3332181124442455, + "learning_rate": 1.7697607578025543e-05, + "loss": 0.5878627896308899, + "step": 1775 + }, + { + "epoch": 0.5193741775113321, + "grad_norm": 1.2905348700615993, + "learning_rate": 1.7694520527879223e-05, + "loss": 0.6252161264419556, + "step": 1776 + }, + { + "epoch": 0.5196666179265974, + "grad_norm": 1.2071686484495499, + "learning_rate": 1.7691431679185518e-05, + "loss": 0.6098401546478271, + "step": 1777 + }, + { + "epoch": 0.5199590583418628, + "grad_norm": 1.4529959736387221, + "learning_rate": 1.7688341032666415e-05, + "loss": 0.7401748299598694, + "step": 1778 + }, + { + "epoch": 0.5202514987571283, + "grad_norm": 1.278188059333223, + "learning_rate": 1.768524858904435e-05, + "loss": 0.5398571491241455, + "step": 1779 + }, + { + "epoch": 0.5205439391723936, + "grad_norm": 1.211971903081478, + "learning_rate": 1.768215434904215e-05, + "loss": 0.5565935969352722, + "step": 1780 + }, + { + "epoch": 0.520836379587659, + "grad_norm": 1.3982258941889667, + "learning_rate": 1.7679058313383078e-05, + "loss": 0.5510461926460266, + "step": 1781 + }, + { + "epoch": 0.5211288200029244, + "grad_norm": 1.5839871959956162, + "learning_rate": 1.7675960482790818e-05, + "loss": 0.670242428779602, + "step": 1782 + }, + { + "epoch": 0.5214212604181898, + "grad_norm": 1.309838763427276, + "learning_rate": 1.7672860857989463e-05, + "loss": 0.6556246280670166, + "step": 1783 + }, + { + "epoch": 0.5217137008334551, + "grad_norm": 1.3555406156984307, + "learning_rate": 1.7669759439703537e-05, + "loss": 0.7133421897888184, + "step": 1784 + }, + { + "epoch": 0.5220061412487206, + "grad_norm": 1.340410804208978, + "learning_rate": 1.766665622865797e-05, + "loss": 0.5520647168159485, + "step": 1785 + }, + { + "epoch": 0.522298581663986, + "grad_norm": 1.2754706768801123, + "learning_rate": 1.766355122557813e-05, + "loss": 0.6906430125236511, + "step": 1786 + }, + { + "epoch": 0.5225910220792513, + "grad_norm": 1.331418831759662, + "learning_rate": 1.766044443118978e-05, + "loss": 0.6847748756408691, + "step": 1787 + }, + { + "epoch": 0.5228834624945168, + "grad_norm": 1.6656678493050783, + "learning_rate": 1.7657335846219125e-05, + "loss": 0.6690354347229004, + "step": 1788 + }, + { + "epoch": 0.5231759029097821, + "grad_norm": 1.5097667681145126, + "learning_rate": 1.765422547139277e-05, + "loss": 0.6508032083511353, + "step": 1789 + }, + { + "epoch": 0.5234683433250475, + "grad_norm": 1.3545274700404182, + "learning_rate": 1.7651113307437754e-05, + "loss": 0.7686585187911987, + "step": 1790 + }, + { + "epoch": 0.5237607837403129, + "grad_norm": 1.5694388106807053, + "learning_rate": 1.764799935508152e-05, + "loss": 0.7669490575790405, + "step": 1791 + }, + { + "epoch": 0.5240532241555783, + "grad_norm": 1.3694245126086426, + "learning_rate": 1.7644883615051936e-05, + "loss": 0.6630266308784485, + "step": 1792 + }, + { + "epoch": 0.5243456645708436, + "grad_norm": 1.350854180871217, + "learning_rate": 1.764176608807729e-05, + "loss": 0.6054951548576355, + "step": 1793 + }, + { + "epoch": 0.5246381049861091, + "grad_norm": 1.3573271710882402, + "learning_rate": 1.7638646774886282e-05, + "loss": 0.6519330739974976, + "step": 1794 + }, + { + "epoch": 0.5249305454013745, + "grad_norm": 1.3013890836364408, + "learning_rate": 1.7635525676208034e-05, + "loss": 0.6797915101051331, + "step": 1795 + }, + { + "epoch": 0.5252229858166398, + "grad_norm": 1.4138018427804997, + "learning_rate": 1.7632402792772084e-05, + "loss": 0.7296736240386963, + "step": 1796 + }, + { + "epoch": 0.5255154262319053, + "grad_norm": 1.4894816204298726, + "learning_rate": 1.7629278125308388e-05, + "loss": 0.6371006965637207, + "step": 1797 + }, + { + "epoch": 0.5258078666471706, + "grad_norm": 1.1913157227609021, + "learning_rate": 1.762615167454732e-05, + "loss": 0.5315746068954468, + "step": 1798 + }, + { + "epoch": 0.526100307062436, + "grad_norm": 1.115665172593258, + "learning_rate": 1.762302344121966e-05, + "loss": 0.5285685062408447, + "step": 1799 + }, + { + "epoch": 0.5263927474777014, + "grad_norm": 1.269936179033053, + "learning_rate": 1.7619893426056622e-05, + "loss": 0.623146653175354, + "step": 1800 + }, + { + "epoch": 0.5266851878929668, + "grad_norm": 1.3314922698636598, + "learning_rate": 1.7616761629789824e-05, + "loss": 0.5433363318443298, + "step": 1801 + }, + { + "epoch": 0.5269776283082322, + "grad_norm": 1.422200045831386, + "learning_rate": 1.7613628053151307e-05, + "loss": 0.5035480260848999, + "step": 1802 + }, + { + "epoch": 0.5272700687234976, + "grad_norm": 1.3947936859584276, + "learning_rate": 1.7610492696873523e-05, + "loss": 0.678544819355011, + "step": 1803 + }, + { + "epoch": 0.527562509138763, + "grad_norm": 1.2973841494755158, + "learning_rate": 1.7607355561689347e-05, + "loss": 0.6237714290618896, + "step": 1804 + }, + { + "epoch": 0.5278549495540283, + "grad_norm": 1.8411758190439966, + "learning_rate": 1.760421664833206e-05, + "loss": 0.6943943500518799, + "step": 1805 + }, + { + "epoch": 0.5281473899692938, + "grad_norm": 1.1545458109151105, + "learning_rate": 1.7601075957535366e-05, + "loss": 0.5477268695831299, + "step": 1806 + }, + { + "epoch": 0.5284398303845591, + "grad_norm": 1.5589440207416567, + "learning_rate": 1.759793349003338e-05, + "loss": 0.6627641320228577, + "step": 1807 + }, + { + "epoch": 0.5287322707998245, + "grad_norm": 1.169894530317387, + "learning_rate": 1.7594789246560638e-05, + "loss": 0.5394496917724609, + "step": 1808 + }, + { + "epoch": 0.5290247112150899, + "grad_norm": 1.5989109343746286, + "learning_rate": 1.759164322785209e-05, + "loss": 0.7824013233184814, + "step": 1809 + }, + { + "epoch": 0.5293171516303553, + "grad_norm": 1.5859531867022811, + "learning_rate": 1.7588495434643094e-05, + "loss": 0.6959671974182129, + "step": 1810 + }, + { + "epoch": 0.5296095920456207, + "grad_norm": 1.256097179377318, + "learning_rate": 1.7585345867669427e-05, + "loss": 0.7036902904510498, + "step": 1811 + }, + { + "epoch": 0.5299020324608861, + "grad_norm": 1.2520265115718123, + "learning_rate": 1.7582194527667285e-05, + "loss": 0.6700775623321533, + "step": 1812 + }, + { + "epoch": 0.5301944728761515, + "grad_norm": 1.4077714911889505, + "learning_rate": 1.7579041415373273e-05, + "loss": 0.648280918598175, + "step": 1813 + }, + { + "epoch": 0.5304869132914168, + "grad_norm": 1.3424741441047479, + "learning_rate": 1.757588653152441e-05, + "loss": 0.688485324382782, + "step": 1814 + }, + { + "epoch": 0.5307793537066823, + "grad_norm": 1.4718330240816029, + "learning_rate": 1.757272987685813e-05, + "loss": 0.6743370890617371, + "step": 1815 + }, + { + "epoch": 0.5310717941219476, + "grad_norm": 1.2524252340987996, + "learning_rate": 1.7569571452112288e-05, + "loss": 0.5597015619277954, + "step": 1816 + }, + { + "epoch": 0.531364234537213, + "grad_norm": 1.0387462800714626, + "learning_rate": 1.756641125802514e-05, + "loss": 0.48607051372528076, + "step": 1817 + }, + { + "epoch": 0.5316566749524785, + "grad_norm": 1.3375496888713005, + "learning_rate": 1.7563249295335366e-05, + "loss": 0.6712289452552795, + "step": 1818 + }, + { + "epoch": 0.5319491153677438, + "grad_norm": 1.4037646661677698, + "learning_rate": 1.7560085564782057e-05, + "loss": 0.5937772989273071, + "step": 1819 + }, + { + "epoch": 0.5322415557830092, + "grad_norm": 1.5529497860681427, + "learning_rate": 1.7556920067104714e-05, + "loss": 0.7416468262672424, + "step": 1820 + }, + { + "epoch": 0.5325339961982746, + "grad_norm": 1.1975217725231788, + "learning_rate": 1.7553752803043247e-05, + "loss": 0.6302096247673035, + "step": 1821 + }, + { + "epoch": 0.53282643661354, + "grad_norm": 1.268842982106158, + "learning_rate": 1.7550583773337992e-05, + "loss": 0.5576045513153076, + "step": 1822 + }, + { + "epoch": 0.5331188770288053, + "grad_norm": 1.3076658324014316, + "learning_rate": 1.7547412978729688e-05, + "loss": 0.5436257123947144, + "step": 1823 + }, + { + "epoch": 0.5334113174440708, + "grad_norm": 1.2387778464918946, + "learning_rate": 1.754424041995949e-05, + "loss": 0.5674831867218018, + "step": 1824 + }, + { + "epoch": 0.5337037578593362, + "grad_norm": 1.3729116406743342, + "learning_rate": 1.7541066097768965e-05, + "loss": 0.7254515290260315, + "step": 1825 + }, + { + "epoch": 0.5339961982746015, + "grad_norm": 1.1721694105309242, + "learning_rate": 1.7537890012900088e-05, + "loss": 0.5706701278686523, + "step": 1826 + }, + { + "epoch": 0.534288638689867, + "grad_norm": 1.4929452380767032, + "learning_rate": 1.7534712166095253e-05, + "loss": 0.6801357269287109, + "step": 1827 + }, + { + "epoch": 0.5345810791051323, + "grad_norm": 1.115878861059579, + "learning_rate": 1.753153255809726e-05, + "loss": 0.6851463317871094, + "step": 1828 + }, + { + "epoch": 0.5348735195203977, + "grad_norm": 1.3277835192492438, + "learning_rate": 1.7528351189649324e-05, + "loss": 0.6475861072540283, + "step": 1829 + }, + { + "epoch": 0.5351659599356631, + "grad_norm": 1.462925601634232, + "learning_rate": 1.752516806149507e-05, + "loss": 0.6953648924827576, + "step": 1830 + }, + { + "epoch": 0.5354584003509285, + "grad_norm": 1.5314952476377168, + "learning_rate": 1.7521983174378537e-05, + "loss": 0.5128777623176575, + "step": 1831 + }, + { + "epoch": 0.5357508407661938, + "grad_norm": 1.3754167803768682, + "learning_rate": 1.751879652904417e-05, + "loss": 0.5780255198478699, + "step": 1832 + }, + { + "epoch": 0.5360432811814593, + "grad_norm": 1.1326334157819233, + "learning_rate": 1.751560812623683e-05, + "loss": 0.581814169883728, + "step": 1833 + }, + { + "epoch": 0.5363357215967247, + "grad_norm": 1.2244339664502468, + "learning_rate": 1.7512417966701788e-05, + "loss": 0.5609169006347656, + "step": 1834 + }, + { + "epoch": 0.53662816201199, + "grad_norm": 1.2348222464159622, + "learning_rate": 1.7509226051184716e-05, + "loss": 0.6029868125915527, + "step": 1835 + }, + { + "epoch": 0.5369206024272555, + "grad_norm": 1.5575658935823142, + "learning_rate": 1.7506032380431718e-05, + "loss": 0.6749545335769653, + "step": 1836 + }, + { + "epoch": 0.5372130428425208, + "grad_norm": 1.4261868258477342, + "learning_rate": 1.750283695518929e-05, + "loss": 0.7710991501808167, + "step": 1837 + }, + { + "epoch": 0.5375054832577862, + "grad_norm": 1.2797893583505542, + "learning_rate": 1.7499639776204334e-05, + "loss": 0.6330907940864563, + "step": 1838 + }, + { + "epoch": 0.5377979236730516, + "grad_norm": 1.3697405221939354, + "learning_rate": 1.7496440844224186e-05, + "loss": 0.655827522277832, + "step": 1839 + }, + { + "epoch": 0.538090364088317, + "grad_norm": 1.3640883815652403, + "learning_rate": 1.7493240159996565e-05, + "loss": 0.723412275314331, + "step": 1840 + }, + { + "epoch": 0.5383828045035824, + "grad_norm": 1.273855459734962, + "learning_rate": 1.7490037724269618e-05, + "loss": 0.5504157543182373, + "step": 1841 + }, + { + "epoch": 0.5386752449188478, + "grad_norm": 1.3867652356352673, + "learning_rate": 1.7486833537791895e-05, + "loss": 0.6258282661437988, + "step": 1842 + }, + { + "epoch": 0.5389676853341132, + "grad_norm": 1.3063024833172743, + "learning_rate": 1.748362760131235e-05, + "loss": 0.7044231295585632, + "step": 1843 + }, + { + "epoch": 0.5392601257493785, + "grad_norm": 1.329844005030904, + "learning_rate": 1.7480419915580357e-05, + "loss": 0.5979568362236023, + "step": 1844 + }, + { + "epoch": 0.539552566164644, + "grad_norm": 1.2396904419147898, + "learning_rate": 1.7477210481345686e-05, + "loss": 0.558562159538269, + "step": 1845 + }, + { + "epoch": 0.5398450065799093, + "grad_norm": 1.5914882070233294, + "learning_rate": 1.747399929935853e-05, + "loss": 0.5965149402618408, + "step": 1846 + }, + { + "epoch": 0.5401374469951747, + "grad_norm": 1.2286076413347484, + "learning_rate": 1.7470786370369483e-05, + "loss": 0.6202878355979919, + "step": 1847 + }, + { + "epoch": 0.5404298874104401, + "grad_norm": 1.4696847585462156, + "learning_rate": 1.746757169512954e-05, + "loss": 0.652141273021698, + "step": 1848 + }, + { + "epoch": 0.5407223278257055, + "grad_norm": 1.3491880900702233, + "learning_rate": 1.746435527439012e-05, + "loss": 0.5713402628898621, + "step": 1849 + }, + { + "epoch": 0.541014768240971, + "grad_norm": 1.1036198614058235, + "learning_rate": 1.7461137108903042e-05, + "loss": 0.49776554107666016, + "step": 1850 + }, + { + "epoch": 0.5413072086562363, + "grad_norm": 1.3593053008733638, + "learning_rate": 1.7457917199420525e-05, + "loss": 0.7047991752624512, + "step": 1851 + }, + { + "epoch": 0.5415996490715017, + "grad_norm": 1.249302868601747, + "learning_rate": 1.7454695546695207e-05, + "loss": 0.7019875049591064, + "step": 1852 + }, + { + "epoch": 0.541892089486767, + "grad_norm": 1.1395410254023401, + "learning_rate": 1.745147215148013e-05, + "loss": 0.5448435544967651, + "step": 1853 + }, + { + "epoch": 0.5421845299020325, + "grad_norm": 1.3392616230054089, + "learning_rate": 1.7448247014528745e-05, + "loss": 0.6042202711105347, + "step": 1854 + }, + { + "epoch": 0.5424769703172978, + "grad_norm": 1.6632726033150385, + "learning_rate": 1.744502013659491e-05, + "loss": 0.8448539972305298, + "step": 1855 + }, + { + "epoch": 0.5427694107325632, + "grad_norm": 1.5168637416823716, + "learning_rate": 1.7441791518432877e-05, + "loss": 0.6541755795478821, + "step": 1856 + }, + { + "epoch": 0.5430618511478287, + "grad_norm": 1.3214742528031191, + "learning_rate": 1.7438561160797326e-05, + "loss": 0.6700184345245361, + "step": 1857 + }, + { + "epoch": 0.543354291563094, + "grad_norm": 1.5975598198717695, + "learning_rate": 1.7435329064443335e-05, + "loss": 0.6407896280288696, + "step": 1858 + }, + { + "epoch": 0.5436467319783594, + "grad_norm": 1.1007084555597737, + "learning_rate": 1.7432095230126382e-05, + "loss": 0.5380120277404785, + "step": 1859 + }, + { + "epoch": 0.5439391723936248, + "grad_norm": 1.4184366915429367, + "learning_rate": 1.7428859658602353e-05, + "loss": 0.6561373472213745, + "step": 1860 + }, + { + "epoch": 0.5442316128088902, + "grad_norm": 1.7211281199225186, + "learning_rate": 1.7425622350627545e-05, + "loss": 0.724541962146759, + "step": 1861 + }, + { + "epoch": 0.5445240532241555, + "grad_norm": 1.3361773700031112, + "learning_rate": 1.7422383306958666e-05, + "loss": 0.6258946657180786, + "step": 1862 + }, + { + "epoch": 0.544816493639421, + "grad_norm": 1.4343211647036773, + "learning_rate": 1.7419142528352815e-05, + "loss": 0.560769259929657, + "step": 1863 + }, + { + "epoch": 0.5451089340546864, + "grad_norm": 1.3199774156859019, + "learning_rate": 1.741590001556751e-05, + "loss": 0.7782202363014221, + "step": 1864 + }, + { + "epoch": 0.5454013744699517, + "grad_norm": 1.1330260111547463, + "learning_rate": 1.7412655769360663e-05, + "loss": 0.5956888198852539, + "step": 1865 + }, + { + "epoch": 0.5456938148852172, + "grad_norm": 1.2304180375361309, + "learning_rate": 1.7409409790490602e-05, + "loss": 0.6251999139785767, + "step": 1866 + }, + { + "epoch": 0.5459862553004825, + "grad_norm": 1.201828702533108, + "learning_rate": 1.740616207971605e-05, + "loss": 0.5864061713218689, + "step": 1867 + }, + { + "epoch": 0.5462786957157479, + "grad_norm": 1.1335552643310969, + "learning_rate": 1.7402912637796146e-05, + "loss": 0.6241225004196167, + "step": 1868 + }, + { + "epoch": 0.5465711361310133, + "grad_norm": 1.4457655679285375, + "learning_rate": 1.739966146549042e-05, + "loss": 0.7190053462982178, + "step": 1869 + }, + { + "epoch": 0.5468635765462787, + "grad_norm": 1.3107442552185273, + "learning_rate": 1.739640856355882e-05, + "loss": 0.6771985292434692, + "step": 1870 + }, + { + "epoch": 0.547156016961544, + "grad_norm": 1.3163112428890422, + "learning_rate": 1.7393153932761687e-05, + "loss": 0.5480636954307556, + "step": 1871 + }, + { + "epoch": 0.5474484573768095, + "grad_norm": 1.5272520029044583, + "learning_rate": 1.7389897573859773e-05, + "loss": 0.7362977862358093, + "step": 1872 + }, + { + "epoch": 0.5477408977920749, + "grad_norm": 1.3701377425052599, + "learning_rate": 1.7386639487614232e-05, + "loss": 0.6483198404312134, + "step": 1873 + }, + { + "epoch": 0.5480333382073402, + "grad_norm": 1.137215399363759, + "learning_rate": 1.7383379674786622e-05, + "loss": 0.479977548122406, + "step": 1874 + }, + { + "epoch": 0.5483257786226057, + "grad_norm": 1.2815568792711947, + "learning_rate": 1.738011813613891e-05, + "loss": 0.6824718117713928, + "step": 1875 + }, + { + "epoch": 0.548618219037871, + "grad_norm": 1.4252738341228008, + "learning_rate": 1.737685487243345e-05, + "loss": 0.564873218536377, + "step": 1876 + }, + { + "epoch": 0.5489106594531364, + "grad_norm": 1.208162951014484, + "learning_rate": 1.7373589884433015e-05, + "loss": 0.5748772621154785, + "step": 1877 + }, + { + "epoch": 0.5492030998684018, + "grad_norm": 1.329038884364766, + "learning_rate": 1.7370323172900778e-05, + "loss": 0.6403437852859497, + "step": 1878 + }, + { + "epoch": 0.5494955402836672, + "grad_norm": 1.7288162586927747, + "learning_rate": 1.7367054738600312e-05, + "loss": 0.8253078460693359, + "step": 1879 + }, + { + "epoch": 0.5497879806989326, + "grad_norm": 1.204164217164209, + "learning_rate": 1.7363784582295596e-05, + "loss": 0.6823058128356934, + "step": 1880 + }, + { + "epoch": 0.550080421114198, + "grad_norm": 1.0289811643005782, + "learning_rate": 1.7360512704751003e-05, + "loss": 0.48659563064575195, + "step": 1881 + }, + { + "epoch": 0.5503728615294634, + "grad_norm": 1.5395158772607802, + "learning_rate": 1.735723910673132e-05, + "loss": 0.6380710601806641, + "step": 1882 + }, + { + "epoch": 0.5506653019447287, + "grad_norm": 1.512121712639047, + "learning_rate": 1.7353963789001723e-05, + "loss": 0.6956683397293091, + "step": 1883 + }, + { + "epoch": 0.5509577423599942, + "grad_norm": 1.2992852551955654, + "learning_rate": 1.735068675232781e-05, + "loss": 0.5751473903656006, + "step": 1884 + }, + { + "epoch": 0.5512501827752595, + "grad_norm": 1.3297689987083825, + "learning_rate": 1.734740799747556e-05, + "loss": 0.7265490293502808, + "step": 1885 + }, + { + "epoch": 0.5515426231905249, + "grad_norm": 1.3317519459591716, + "learning_rate": 1.734412752521136e-05, + "loss": 0.7419843673706055, + "step": 1886 + }, + { + "epoch": 0.5518350636057903, + "grad_norm": 1.3385317545855182, + "learning_rate": 1.734084533630201e-05, + "loss": 0.7381073236465454, + "step": 1887 + }, + { + "epoch": 0.5521275040210557, + "grad_norm": 1.4687535531628584, + "learning_rate": 1.7337561431514692e-05, + "loss": 0.6542054414749146, + "step": 1888 + }, + { + "epoch": 0.5524199444363211, + "grad_norm": 1.3144027889366288, + "learning_rate": 1.7334275811617e-05, + "loss": 0.6283866167068481, + "step": 1889 + }, + { + "epoch": 0.5527123848515865, + "grad_norm": 1.34879443340803, + "learning_rate": 1.7330988477376935e-05, + "loss": 0.6171330809593201, + "step": 1890 + }, + { + "epoch": 0.5530048252668519, + "grad_norm": 1.309075407888037, + "learning_rate": 1.7327699429562887e-05, + "loss": 0.5181430578231812, + "step": 1891 + }, + { + "epoch": 0.5532972656821172, + "grad_norm": 1.4382455208413174, + "learning_rate": 1.7324408668943645e-05, + "loss": 0.7337771058082581, + "step": 1892 + }, + { + "epoch": 0.5535897060973827, + "grad_norm": 1.3677542553778577, + "learning_rate": 1.7321116196288413e-05, + "loss": 0.5193721055984497, + "step": 1893 + }, + { + "epoch": 0.553882146512648, + "grad_norm": 1.2912522952038028, + "learning_rate": 1.731782201236678e-05, + "loss": 0.7743211388587952, + "step": 1894 + }, + { + "epoch": 0.5541745869279134, + "grad_norm": 1.5457463678190766, + "learning_rate": 1.731452611794875e-05, + "loss": 0.8244242072105408, + "step": 1895 + }, + { + "epoch": 0.5544670273431789, + "grad_norm": 1.3153817051947132, + "learning_rate": 1.7311228513804712e-05, + "loss": 0.6276153326034546, + "step": 1896 + }, + { + "epoch": 0.5547594677584442, + "grad_norm": 1.4741498614217154, + "learning_rate": 1.7307929200705463e-05, + "loss": 0.7919771671295166, + "step": 1897 + }, + { + "epoch": 0.5550519081737096, + "grad_norm": 1.527110359994231, + "learning_rate": 1.7304628179422192e-05, + "loss": 0.6187459230422974, + "step": 1898 + }, + { + "epoch": 0.555344348588975, + "grad_norm": 1.1766146767977552, + "learning_rate": 1.7301325450726497e-05, + "loss": 0.6190363764762878, + "step": 1899 + }, + { + "epoch": 0.5556367890042404, + "grad_norm": 1.209178127119406, + "learning_rate": 1.7298021015390375e-05, + "loss": 0.5537956953048706, + "step": 1900 + }, + { + "epoch": 0.5559292294195057, + "grad_norm": 1.434637926231007, + "learning_rate": 1.729471487418621e-05, + "loss": 0.7164788246154785, + "step": 1901 + }, + { + "epoch": 0.5562216698347712, + "grad_norm": 1.2878374944552806, + "learning_rate": 1.7291407027886796e-05, + "loss": 0.6101689338684082, + "step": 1902 + }, + { + "epoch": 0.5565141102500366, + "grad_norm": 1.4102535348815881, + "learning_rate": 1.7288097477265322e-05, + "loss": 0.7112093567848206, + "step": 1903 + }, + { + "epoch": 0.5568065506653019, + "grad_norm": 1.698804519808014, + "learning_rate": 1.7284786223095376e-05, + "loss": 0.7807149291038513, + "step": 1904 + }, + { + "epoch": 0.5570989910805674, + "grad_norm": 1.3150296925108194, + "learning_rate": 1.7281473266150942e-05, + "loss": 0.5723121166229248, + "step": 1905 + }, + { + "epoch": 0.5573914314958327, + "grad_norm": 1.4287078485940368, + "learning_rate": 1.7278158607206402e-05, + "loss": 0.6901307106018066, + "step": 1906 + }, + { + "epoch": 0.5576838719110981, + "grad_norm": 1.3895105915390893, + "learning_rate": 1.7274842247036547e-05, + "loss": 0.8247314095497131, + "step": 1907 + }, + { + "epoch": 0.5579763123263635, + "grad_norm": 1.2902939634670878, + "learning_rate": 1.727152418641654e-05, + "loss": 0.758405327796936, + "step": 1908 + }, + { + "epoch": 0.5582687527416289, + "grad_norm": 1.1507745861737273, + "learning_rate": 1.7268204426121967e-05, + "loss": 0.6448276042938232, + "step": 1909 + }, + { + "epoch": 0.5585611931568942, + "grad_norm": 1.4597983603763345, + "learning_rate": 1.7264882966928803e-05, + "loss": 0.6846790313720703, + "step": 1910 + }, + { + "epoch": 0.5588536335721597, + "grad_norm": 1.494960410585431, + "learning_rate": 1.726155980961342e-05, + "loss": 0.6427637338638306, + "step": 1911 + }, + { + "epoch": 0.5591460739874251, + "grad_norm": 1.6049335332675108, + "learning_rate": 1.7258234954952578e-05, + "loss": 0.7105496525764465, + "step": 1912 + }, + { + "epoch": 0.5594385144026904, + "grad_norm": 1.247874236176648, + "learning_rate": 1.7254908403723446e-05, + "loss": 0.6307404041290283, + "step": 1913 + }, + { + "epoch": 0.5597309548179559, + "grad_norm": 1.3460021193743466, + "learning_rate": 1.7251580156703587e-05, + "loss": 0.7194197177886963, + "step": 1914 + }, + { + "epoch": 0.5600233952332212, + "grad_norm": 1.4541814827650097, + "learning_rate": 1.7248250214670955e-05, + "loss": 0.676772952079773, + "step": 1915 + }, + { + "epoch": 0.5603158356484866, + "grad_norm": 1.4231220185819522, + "learning_rate": 1.724491857840391e-05, + "loss": 0.6047924160957336, + "step": 1916 + }, + { + "epoch": 0.560608276063752, + "grad_norm": 1.4639689581400968, + "learning_rate": 1.7241585248681192e-05, + "loss": 0.7412474155426025, + "step": 1917 + }, + { + "epoch": 0.5609007164790174, + "grad_norm": 1.3634846491128696, + "learning_rate": 1.7238250226281952e-05, + "loss": 0.6337922215461731, + "step": 1918 + }, + { + "epoch": 0.5611931568942828, + "grad_norm": 1.325394488194612, + "learning_rate": 1.7234913511985733e-05, + "loss": 0.7192416787147522, + "step": 1919 + }, + { + "epoch": 0.5614855973095482, + "grad_norm": 1.5807591545293311, + "learning_rate": 1.723157510657247e-05, + "loss": 0.6576168537139893, + "step": 1920 + }, + { + "epoch": 0.5617780377248136, + "grad_norm": 1.2677184116479052, + "learning_rate": 1.722823501082249e-05, + "loss": 0.6592451333999634, + "step": 1921 + }, + { + "epoch": 0.5620704781400789, + "grad_norm": 1.3384834377307993, + "learning_rate": 1.722489322551653e-05, + "loss": 0.8042774796485901, + "step": 1922 + }, + { + "epoch": 0.5623629185553444, + "grad_norm": 1.4566017039283872, + "learning_rate": 1.7221549751435706e-05, + "loss": 0.727135181427002, + "step": 1923 + }, + { + "epoch": 0.5626553589706097, + "grad_norm": 1.3099994778880142, + "learning_rate": 1.7218204589361535e-05, + "loss": 0.5641134977340698, + "step": 1924 + }, + { + "epoch": 0.5629477993858751, + "grad_norm": 1.5113194940037022, + "learning_rate": 1.7214857740075924e-05, + "loss": 0.6354084610939026, + "step": 1925 + }, + { + "epoch": 0.5632402398011405, + "grad_norm": 1.3038206210364904, + "learning_rate": 1.7211509204361187e-05, + "loss": 0.6044377088546753, + "step": 1926 + }, + { + "epoch": 0.5635326802164059, + "grad_norm": 1.2045011077136063, + "learning_rate": 1.7208158983000022e-05, + "loss": 0.5519559383392334, + "step": 1927 + }, + { + "epoch": 0.5638251206316713, + "grad_norm": 1.162061868190052, + "learning_rate": 1.7204807076775514e-05, + "loss": 0.4480612277984619, + "step": 1928 + }, + { + "epoch": 0.5641175610469367, + "grad_norm": 1.3899173129631617, + "learning_rate": 1.7201453486471167e-05, + "loss": 0.5929607152938843, + "step": 1929 + }, + { + "epoch": 0.5644100014622021, + "grad_norm": 2.021763483016241, + "learning_rate": 1.7198098212870847e-05, + "loss": 0.6863572001457214, + "step": 1930 + }, + { + "epoch": 0.5647024418774674, + "grad_norm": 1.0938398450209694, + "learning_rate": 1.719474125675884e-05, + "loss": 0.5551834106445312, + "step": 1931 + }, + { + "epoch": 0.5649948822927329, + "grad_norm": 1.3644128319132816, + "learning_rate": 1.7191382618919802e-05, + "loss": 0.6113166809082031, + "step": 1932 + }, + { + "epoch": 0.5652873227079982, + "grad_norm": 1.419009993473521, + "learning_rate": 1.7188022300138805e-05, + "loss": 0.7833362817764282, + "step": 1933 + }, + { + "epoch": 0.5655797631232636, + "grad_norm": 1.3899666208681147, + "learning_rate": 1.71846603012013e-05, + "loss": 0.5981882810592651, + "step": 1934 + }, + { + "epoch": 0.5658722035385291, + "grad_norm": 1.3211180154101085, + "learning_rate": 1.7181296622893132e-05, + "loss": 0.6009912490844727, + "step": 1935 + }, + { + "epoch": 0.5661646439537944, + "grad_norm": 1.5201002205446237, + "learning_rate": 1.717793126600054e-05, + "loss": 0.5605272054672241, + "step": 1936 + }, + { + "epoch": 0.5664570843690598, + "grad_norm": 1.9581129231236365, + "learning_rate": 1.717456423131016e-05, + "loss": 0.6310821771621704, + "step": 1937 + }, + { + "epoch": 0.5667495247843252, + "grad_norm": 1.3290964241159713, + "learning_rate": 1.7171195519609013e-05, + "loss": 0.6776266694068909, + "step": 1938 + }, + { + "epoch": 0.5670419651995906, + "grad_norm": 1.5744599660597636, + "learning_rate": 1.7167825131684516e-05, + "loss": 0.6369091868400574, + "step": 1939 + }, + { + "epoch": 0.5673344056148559, + "grad_norm": 1.5708596771950396, + "learning_rate": 1.7164453068324472e-05, + "loss": 0.6241647005081177, + "step": 1940 + }, + { + "epoch": 0.5676268460301214, + "grad_norm": 1.1863544042032323, + "learning_rate": 1.7161079330317086e-05, + "loss": 0.6411961317062378, + "step": 1941 + }, + { + "epoch": 0.5679192864453868, + "grad_norm": 1.4635134179889109, + "learning_rate": 1.7157703918450942e-05, + "loss": 0.6148936152458191, + "step": 1942 + }, + { + "epoch": 0.5682117268606521, + "grad_norm": 1.3183225060577142, + "learning_rate": 1.7154326833515034e-05, + "loss": 0.5006934404373169, + "step": 1943 + }, + { + "epoch": 0.5685041672759176, + "grad_norm": 1.462356689812602, + "learning_rate": 1.7150948076298722e-05, + "loss": 0.7446701526641846, + "step": 1944 + }, + { + "epoch": 0.5687966076911829, + "grad_norm": 1.2052848826016378, + "learning_rate": 1.7147567647591777e-05, + "loss": 0.6159533262252808, + "step": 1945 + }, + { + "epoch": 0.5690890481064483, + "grad_norm": 1.4298530885651661, + "learning_rate": 1.7144185548184355e-05, + "loss": 0.6437554359436035, + "step": 1946 + }, + { + "epoch": 0.5693814885217137, + "grad_norm": 1.3361469734250542, + "learning_rate": 1.7140801778866995e-05, + "loss": 0.6229397654533386, + "step": 1947 + }, + { + "epoch": 0.5696739289369791, + "grad_norm": 1.4197238006731758, + "learning_rate": 1.7137416340430636e-05, + "loss": 0.5777184963226318, + "step": 1948 + }, + { + "epoch": 0.5699663693522444, + "grad_norm": 1.543436374887725, + "learning_rate": 1.7134029233666603e-05, + "loss": 0.7817827463150024, + "step": 1949 + }, + { + "epoch": 0.5702588097675099, + "grad_norm": 1.3527927450904613, + "learning_rate": 1.713064045936662e-05, + "loss": 0.6784861087799072, + "step": 1950 + }, + { + "epoch": 0.5705512501827753, + "grad_norm": 1.2839254399050724, + "learning_rate": 1.7127250018322777e-05, + "loss": 0.6883150339126587, + "step": 1951 + }, + { + "epoch": 0.5708436905980406, + "grad_norm": 1.093202890209594, + "learning_rate": 1.712385791132758e-05, + "loss": 0.5464504957199097, + "step": 1952 + }, + { + "epoch": 0.5711361310133061, + "grad_norm": 1.2617859237604026, + "learning_rate": 1.7120464139173908e-05, + "loss": 0.5950040817260742, + "step": 1953 + }, + { + "epoch": 0.5714285714285714, + "grad_norm": 1.374864335037442, + "learning_rate": 1.7117068702655034e-05, + "loss": 0.6381576061248779, + "step": 1954 + }, + { + "epoch": 0.5717210118438368, + "grad_norm": 1.2624571465966312, + "learning_rate": 1.7113671602564628e-05, + "loss": 0.6611777544021606, + "step": 1955 + }, + { + "epoch": 0.5720134522591022, + "grad_norm": 1.2625162580462326, + "learning_rate": 1.7110272839696735e-05, + "loss": 0.5057446956634521, + "step": 1956 + }, + { + "epoch": 0.5723058926743676, + "grad_norm": 1.3802970727547992, + "learning_rate": 1.7106872414845798e-05, + "loss": 0.6095671653747559, + "step": 1957 + }, + { + "epoch": 0.572598333089633, + "grad_norm": 1.4171107803407814, + "learning_rate": 1.710347032880664e-05, + "loss": 0.5514808893203735, + "step": 1958 + }, + { + "epoch": 0.5728907735048984, + "grad_norm": 2.1059044775107516, + "learning_rate": 1.7100066582374487e-05, + "loss": 0.6491304039955139, + "step": 1959 + }, + { + "epoch": 0.5731832139201638, + "grad_norm": 1.2887931231971388, + "learning_rate": 1.7096661176344936e-05, + "loss": 0.6759692430496216, + "step": 1960 + }, + { + "epoch": 0.5734756543354291, + "grad_norm": 1.4738884192318065, + "learning_rate": 1.709325411151399e-05, + "loss": 0.5897858142852783, + "step": 1961 + }, + { + "epoch": 0.5737680947506946, + "grad_norm": 1.537196415964603, + "learning_rate": 1.7089845388678015e-05, + "loss": 0.6822922229766846, + "step": 1962 + }, + { + "epoch": 0.5740605351659599, + "grad_norm": 1.2963583337618676, + "learning_rate": 1.7086435008633792e-05, + "loss": 0.7694820165634155, + "step": 1963 + }, + { + "epoch": 0.5743529755812253, + "grad_norm": 1.5109651591265172, + "learning_rate": 1.7083022972178473e-05, + "loss": 0.702151358127594, + "step": 1964 + }, + { + "epoch": 0.5746454159964907, + "grad_norm": 1.564445011536072, + "learning_rate": 1.7079609280109597e-05, + "loss": 0.768844485282898, + "step": 1965 + }, + { + "epoch": 0.5749378564117561, + "grad_norm": 1.4251497195478635, + "learning_rate": 1.7076193933225097e-05, + "loss": 0.6641331911087036, + "step": 1966 + }, + { + "epoch": 0.5752302968270215, + "grad_norm": 1.3577479649866828, + "learning_rate": 1.707277693232329e-05, + "loss": 0.7176777124404907, + "step": 1967 + }, + { + "epoch": 0.5755227372422869, + "grad_norm": 1.4539026175393464, + "learning_rate": 1.7069358278202877e-05, + "loss": 0.6543929576873779, + "step": 1968 + }, + { + "epoch": 0.5758151776575523, + "grad_norm": 1.422676342883674, + "learning_rate": 1.7065937971662953e-05, + "loss": 0.7501214742660522, + "step": 1969 + }, + { + "epoch": 0.5761076180728176, + "grad_norm": 1.1830543705848042, + "learning_rate": 1.7062516013502984e-05, + "loss": 0.6013212203979492, + "step": 1970 + }, + { + "epoch": 0.5764000584880831, + "grad_norm": 1.489892931502725, + "learning_rate": 1.7059092404522843e-05, + "loss": 0.5920547246932983, + "step": 1971 + }, + { + "epoch": 0.5766924989033484, + "grad_norm": 1.1082983109051399, + "learning_rate": 1.7055667145522767e-05, + "loss": 0.6720744371414185, + "step": 1972 + }, + { + "epoch": 0.5769849393186138, + "grad_norm": 1.3476214386922525, + "learning_rate": 1.70522402373034e-05, + "loss": 0.6938234567642212, + "step": 1973 + }, + { + "epoch": 0.5772773797338793, + "grad_norm": 1.321699429936501, + "learning_rate": 1.704881168066575e-05, + "loss": 0.6430555582046509, + "step": 1974 + }, + { + "epoch": 0.5775698201491446, + "grad_norm": 1.331724408429167, + "learning_rate": 1.7045381476411234e-05, + "loss": 0.7738221883773804, + "step": 1975 + }, + { + "epoch": 0.57786226056441, + "grad_norm": 1.2033511527827634, + "learning_rate": 1.704194962534163e-05, + "loss": 0.5335453748703003, + "step": 1976 + }, + { + "epoch": 0.5781547009796754, + "grad_norm": 1.4123366931040846, + "learning_rate": 1.7038516128259118e-05, + "loss": 0.691404402256012, + "step": 1977 + }, + { + "epoch": 0.5784471413949408, + "grad_norm": 1.6032589522393152, + "learning_rate": 1.7035080985966253e-05, + "loss": 0.7371880412101746, + "step": 1978 + }, + { + "epoch": 0.5787395818102061, + "grad_norm": 1.356558066648364, + "learning_rate": 1.7031644199265987e-05, + "loss": 0.5661574602127075, + "step": 1979 + }, + { + "epoch": 0.5790320222254716, + "grad_norm": 1.069750621474732, + "learning_rate": 1.702820576896164e-05, + "loss": 0.5823863744735718, + "step": 1980 + }, + { + "epoch": 0.579324462640737, + "grad_norm": 1.608685609966537, + "learning_rate": 1.7024765695856924e-05, + "loss": 0.6228796243667603, + "step": 1981 + }, + { + "epoch": 0.5796169030560023, + "grad_norm": 1.3395261062815815, + "learning_rate": 1.702132398075594e-05, + "loss": 0.5788040161132812, + "step": 1982 + }, + { + "epoch": 0.5799093434712678, + "grad_norm": 1.1540676629937416, + "learning_rate": 1.701788062446317e-05, + "loss": 0.5950253009796143, + "step": 1983 + }, + { + "epoch": 0.5802017838865331, + "grad_norm": 1.2446098890682338, + "learning_rate": 1.7014435627783466e-05, + "loss": 0.5672034025192261, + "step": 1984 + }, + { + "epoch": 0.5804942243017985, + "grad_norm": 1.329055336569987, + "learning_rate": 1.7010988991522085e-05, + "loss": 0.6646316051483154, + "step": 1985 + }, + { + "epoch": 0.5807866647170639, + "grad_norm": 1.2423480846022465, + "learning_rate": 1.7007540716484657e-05, + "loss": 0.6430097818374634, + "step": 1986 + }, + { + "epoch": 0.5810791051323293, + "grad_norm": 1.2889752174339557, + "learning_rate": 1.700409080347719e-05, + "loss": 0.5803329348564148, + "step": 1987 + }, + { + "epoch": 0.5813715455475946, + "grad_norm": 1.613226747300198, + "learning_rate": 1.7000639253306085e-05, + "loss": 0.7526525259017944, + "step": 1988 + }, + { + "epoch": 0.5816639859628601, + "grad_norm": 1.329271357875936, + "learning_rate": 1.6997186066778118e-05, + "loss": 0.6679468750953674, + "step": 1989 + }, + { + "epoch": 0.5819564263781255, + "grad_norm": 1.5773364597040387, + "learning_rate": 1.6993731244700454e-05, + "loss": 0.7233256101608276, + "step": 1990 + }, + { + "epoch": 0.5822488667933908, + "grad_norm": 1.3632345541871926, + "learning_rate": 1.6990274787880633e-05, + "loss": 0.5986290574073792, + "step": 1991 + }, + { + "epoch": 0.5825413072086563, + "grad_norm": 1.3136772281139917, + "learning_rate": 1.6986816697126583e-05, + "loss": 0.6898672580718994, + "step": 1992 + }, + { + "epoch": 0.5828337476239216, + "grad_norm": 1.6057802032529045, + "learning_rate": 1.698335697324661e-05, + "loss": 0.6888613104820251, + "step": 1993 + }, + { + "epoch": 0.583126188039187, + "grad_norm": 1.3469913891844598, + "learning_rate": 1.6979895617049404e-05, + "loss": 0.6002428531646729, + "step": 1994 + }, + { + "epoch": 0.5834186284544524, + "grad_norm": 1.3517104173069454, + "learning_rate": 1.6976432629344036e-05, + "loss": 0.6372438669204712, + "step": 1995 + }, + { + "epoch": 0.5837110688697178, + "grad_norm": 1.0868680846473084, + "learning_rate": 1.6972968010939953e-05, + "loss": 0.529569149017334, + "step": 1996 + }, + { + "epoch": 0.5840035092849832, + "grad_norm": 1.415626330345063, + "learning_rate": 1.6969501762647002e-05, + "loss": 0.5534025430679321, + "step": 1997 + }, + { + "epoch": 0.5842959497002486, + "grad_norm": 1.5855609078257513, + "learning_rate": 1.6966033885275384e-05, + "loss": 0.8105937242507935, + "step": 1998 + }, + { + "epoch": 0.584588390115514, + "grad_norm": 1.4927698791899027, + "learning_rate": 1.6962564379635702e-05, + "loss": 0.7657530903816223, + "step": 1999 + }, + { + "epoch": 0.5848808305307793, + "grad_norm": 1.2186213815751603, + "learning_rate": 1.6959093246538927e-05, + "loss": 0.5941641330718994, + "step": 2000 + }, + { + "epoch": 0.5851732709460448, + "grad_norm": 1.2310851342087676, + "learning_rate": 1.695562048679642e-05, + "loss": 0.6130149364471436, + "step": 2001 + }, + { + "epoch": 0.5854657113613101, + "grad_norm": 1.4904324383349616, + "learning_rate": 1.6952146101219914e-05, + "loss": 0.7078043222427368, + "step": 2002 + }, + { + "epoch": 0.5857581517765755, + "grad_norm": 1.4412882425814895, + "learning_rate": 1.6948670090621528e-05, + "loss": 0.6330863237380981, + "step": 2003 + }, + { + "epoch": 0.5860505921918409, + "grad_norm": 1.2835823957491164, + "learning_rate": 1.6945192455813755e-05, + "loss": 0.6631220579147339, + "step": 2004 + }, + { + "epoch": 0.5863430326071063, + "grad_norm": 1.411600977622384, + "learning_rate": 1.6941713197609476e-05, + "loss": 0.6669473648071289, + "step": 2005 + }, + { + "epoch": 0.5866354730223717, + "grad_norm": 1.370088328820007, + "learning_rate": 1.6938232316821938e-05, + "loss": 0.608252763748169, + "step": 2006 + }, + { + "epoch": 0.5869279134376371, + "grad_norm": 1.3777699704962545, + "learning_rate": 1.6934749814264786e-05, + "loss": 0.5979427695274353, + "step": 2007 + }, + { + "epoch": 0.5872203538529025, + "grad_norm": 1.541200433158731, + "learning_rate": 1.6931265690752027e-05, + "loss": 0.5653454661369324, + "step": 2008 + }, + { + "epoch": 0.5875127942681678, + "grad_norm": 1.1212005773159774, + "learning_rate": 1.6927779947098052e-05, + "loss": 0.6399147510528564, + "step": 2009 + }, + { + "epoch": 0.5878052346834333, + "grad_norm": 1.1797468758477498, + "learning_rate": 1.6924292584117642e-05, + "loss": 0.41824793815612793, + "step": 2010 + }, + { + "epoch": 0.5880976750986986, + "grad_norm": 1.917297128854583, + "learning_rate": 1.6920803602625938e-05, + "loss": 0.8881042003631592, + "step": 2011 + }, + { + "epoch": 0.588390115513964, + "grad_norm": 1.331713386917835, + "learning_rate": 1.6917313003438473e-05, + "loss": 0.636030912399292, + "step": 2012 + }, + { + "epoch": 0.5886825559292295, + "grad_norm": 1.4002891525649699, + "learning_rate": 1.6913820787371147e-05, + "loss": 0.6038305759429932, + "step": 2013 + }, + { + "epoch": 0.5889749963444948, + "grad_norm": 1.1435051787090085, + "learning_rate": 1.6910326955240252e-05, + "loss": 0.7073840498924255, + "step": 2014 + }, + { + "epoch": 0.5892674367597602, + "grad_norm": 1.4386346426993692, + "learning_rate": 1.6906831507862446e-05, + "loss": 0.5804994106292725, + "step": 2015 + }, + { + "epoch": 0.5895598771750256, + "grad_norm": 1.5741785374654678, + "learning_rate": 1.6903334446054768e-05, + "loss": 0.8194780349731445, + "step": 2016 + }, + { + "epoch": 0.589852317590291, + "grad_norm": 1.812303850133564, + "learning_rate": 1.689983577063464e-05, + "loss": 0.7348685264587402, + "step": 2017 + }, + { + "epoch": 0.5901447580055563, + "grad_norm": 1.1971589423872142, + "learning_rate": 1.689633548241985e-05, + "loss": 0.5855007171630859, + "step": 2018 + }, + { + "epoch": 0.5904371984208218, + "grad_norm": 1.3707253561652837, + "learning_rate": 1.689283358222857e-05, + "loss": 0.7387616634368896, + "step": 2019 + }, + { + "epoch": 0.5907296388360872, + "grad_norm": 1.1680954205847025, + "learning_rate": 1.688933007087935e-05, + "loss": 0.688759446144104, + "step": 2020 + }, + { + "epoch": 0.5910220792513525, + "grad_norm": 1.5341989172452428, + "learning_rate": 1.6885824949191117e-05, + "loss": 0.7203953266143799, + "step": 2021 + }, + { + "epoch": 0.591314519666618, + "grad_norm": 1.2850552689542662, + "learning_rate": 1.6882318217983165e-05, + "loss": 0.6465663909912109, + "step": 2022 + }, + { + "epoch": 0.5916069600818833, + "grad_norm": 1.1725524993946357, + "learning_rate": 1.6878809878075176e-05, + "loss": 0.6625394821166992, + "step": 2023 + }, + { + "epoch": 0.5918994004971487, + "grad_norm": 1.1518619162929866, + "learning_rate": 1.68752999302872e-05, + "loss": 0.6577074527740479, + "step": 2024 + }, + { + "epoch": 0.5921918409124141, + "grad_norm": 1.2660442226503865, + "learning_rate": 1.6871788375439667e-05, + "loss": 0.50509113073349, + "step": 2025 + }, + { + "epoch": 0.5924842813276795, + "grad_norm": 1.1506732126554624, + "learning_rate": 1.6868275214353387e-05, + "loss": 0.5723974704742432, + "step": 2026 + }, + { + "epoch": 0.5927767217429448, + "grad_norm": 1.5630741195611901, + "learning_rate": 1.6864760447849533e-05, + "loss": 0.6383459568023682, + "step": 2027 + }, + { + "epoch": 0.5930691621582103, + "grad_norm": 1.5937791400894217, + "learning_rate": 1.6861244076749663e-05, + "loss": 0.5307388305664062, + "step": 2028 + }, + { + "epoch": 0.5933616025734757, + "grad_norm": 1.3756662975981515, + "learning_rate": 1.6857726101875706e-05, + "loss": 0.8009265661239624, + "step": 2029 + }, + { + "epoch": 0.593654042988741, + "grad_norm": 1.3635510886639874, + "learning_rate": 1.685420652404997e-05, + "loss": 0.5505321025848389, + "step": 2030 + }, + { + "epoch": 0.5939464834040065, + "grad_norm": 1.2645625310092812, + "learning_rate": 1.6850685344095134e-05, + "loss": 0.680927038192749, + "step": 2031 + }, + { + "epoch": 0.5942389238192718, + "grad_norm": 1.419624052256642, + "learning_rate": 1.684716256283425e-05, + "loss": 0.7357309460639954, + "step": 2032 + }, + { + "epoch": 0.5945313642345372, + "grad_norm": 1.2277919560967578, + "learning_rate": 1.6843638181090748e-05, + "loss": 0.5896620750427246, + "step": 2033 + }, + { + "epoch": 0.5948238046498026, + "grad_norm": 1.261982037348603, + "learning_rate": 1.6840112199688432e-05, + "loss": 0.5567387342453003, + "step": 2034 + }, + { + "epoch": 0.595116245065068, + "grad_norm": 1.2606984508496513, + "learning_rate": 1.6836584619451478e-05, + "loss": 0.6428712606430054, + "step": 2035 + }, + { + "epoch": 0.5954086854803334, + "grad_norm": 1.3387753764851709, + "learning_rate": 1.6833055441204436e-05, + "loss": 0.7430459260940552, + "step": 2036 + }, + { + "epoch": 0.5957011258955988, + "grad_norm": 1.250181817593343, + "learning_rate": 1.682952466577223e-05, + "loss": 0.5982654690742493, + "step": 2037 + }, + { + "epoch": 0.5959935663108642, + "grad_norm": 1.2721973260460164, + "learning_rate": 1.6825992293980158e-05, + "loss": 0.5807450413703918, + "step": 2038 + }, + { + "epoch": 0.5962860067261295, + "grad_norm": 1.4202543697420538, + "learning_rate": 1.6822458326653888e-05, + "loss": 0.7667814493179321, + "step": 2039 + }, + { + "epoch": 0.596578447141395, + "grad_norm": 1.4555539952275451, + "learning_rate": 1.6818922764619467e-05, + "loss": 0.8192781805992126, + "step": 2040 + }, + { + "epoch": 0.5968708875566603, + "grad_norm": 1.3146767820144227, + "learning_rate": 1.681538560870331e-05, + "loss": 0.6652504205703735, + "step": 2041 + }, + { + "epoch": 0.5971633279719257, + "grad_norm": 1.4465108366403951, + "learning_rate": 1.6811846859732207e-05, + "loss": 0.6227332353591919, + "step": 2042 + }, + { + "epoch": 0.597455768387191, + "grad_norm": 1.1394575473936808, + "learning_rate": 1.6808306518533315e-05, + "loss": 0.5459558963775635, + "step": 2043 + }, + { + "epoch": 0.5977482088024565, + "grad_norm": 1.3498516241816683, + "learning_rate": 1.6804764585934167e-05, + "loss": 0.5176202058792114, + "step": 2044 + }, + { + "epoch": 0.5980406492177219, + "grad_norm": 1.5025501377940633, + "learning_rate": 1.6801221062762677e-05, + "loss": 0.5818016529083252, + "step": 2045 + }, + { + "epoch": 0.5983330896329873, + "grad_norm": 1.3397658451047565, + "learning_rate": 1.679767594984711e-05, + "loss": 0.622256875038147, + "step": 2046 + }, + { + "epoch": 0.5986255300482527, + "grad_norm": 1.2198859984633783, + "learning_rate": 1.6794129248016124e-05, + "loss": 0.5538911819458008, + "step": 2047 + }, + { + "epoch": 0.598917970463518, + "grad_norm": 1.1939205886096602, + "learning_rate": 1.6790580958098733e-05, + "loss": 0.4934890568256378, + "step": 2048 + }, + { + "epoch": 0.5992104108787835, + "grad_norm": 1.5628453531282531, + "learning_rate": 1.678703108092433e-05, + "loss": 0.6754223108291626, + "step": 2049 + }, + { + "epoch": 0.5995028512940488, + "grad_norm": 1.3047429440272302, + "learning_rate": 1.678347961732268e-05, + "loss": 0.48618268966674805, + "step": 2050 + }, + { + "epoch": 0.5997952917093142, + "grad_norm": 2.239352665042965, + "learning_rate": 1.6779926568123913e-05, + "loss": 0.6844758987426758, + "step": 2051 + }, + { + "epoch": 0.6000877321245797, + "grad_norm": 1.222439693123936, + "learning_rate": 1.677637193415853e-05, + "loss": 0.5258621573448181, + "step": 2052 + }, + { + "epoch": 0.600380172539845, + "grad_norm": 1.5856950316684058, + "learning_rate": 1.6772815716257414e-05, + "loss": 0.5571128129959106, + "step": 2053 + }, + { + "epoch": 0.6006726129551104, + "grad_norm": 1.514916071293939, + "learning_rate": 1.67692579152518e-05, + "loss": 0.5881344079971313, + "step": 2054 + }, + { + "epoch": 0.6009650533703758, + "grad_norm": 1.5804701546241575, + "learning_rate": 1.6765698531973305e-05, + "loss": 0.7162419557571411, + "step": 2055 + }, + { + "epoch": 0.6012574937856412, + "grad_norm": 1.487082432347586, + "learning_rate": 1.6762137567253917e-05, + "loss": 0.7470849752426147, + "step": 2056 + }, + { + "epoch": 0.6015499342009065, + "grad_norm": 1.4154424289161787, + "learning_rate": 1.6758575021925987e-05, + "loss": 0.6043628454208374, + "step": 2057 + }, + { + "epoch": 0.601842374616172, + "grad_norm": 1.4033964052969388, + "learning_rate": 1.6755010896822237e-05, + "loss": 0.6574143171310425, + "step": 2058 + }, + { + "epoch": 0.6021348150314374, + "grad_norm": 1.3508419478610747, + "learning_rate": 1.675144519277576e-05, + "loss": 0.605838418006897, + "step": 2059 + }, + { + "epoch": 0.6024272554467027, + "grad_norm": 1.2890691190480261, + "learning_rate": 1.6747877910620022e-05, + "loss": 0.5859218239784241, + "step": 2060 + }, + { + "epoch": 0.6027196958619682, + "grad_norm": 1.3985794655421304, + "learning_rate": 1.674430905118885e-05, + "loss": 0.7272971868515015, + "step": 2061 + }, + { + "epoch": 0.6030121362772335, + "grad_norm": 1.2870566467248659, + "learning_rate": 1.674073861531644e-05, + "loss": 0.606023907661438, + "step": 2062 + }, + { + "epoch": 0.6033045766924989, + "grad_norm": 1.345090429761192, + "learning_rate": 1.6737166603837364e-05, + "loss": 0.6029521822929382, + "step": 2063 + }, + { + "epoch": 0.6035970171077643, + "grad_norm": 1.1860277395685632, + "learning_rate": 1.673359301758656e-05, + "loss": 0.7544999122619629, + "step": 2064 + }, + { + "epoch": 0.6038894575230297, + "grad_norm": 1.3953376279645262, + "learning_rate": 1.6730017857399327e-05, + "loss": 0.7487601637840271, + "step": 2065 + }, + { + "epoch": 0.604181897938295, + "grad_norm": 1.3904468062872732, + "learning_rate": 1.672644112411134e-05, + "loss": 0.6429200172424316, + "step": 2066 + }, + { + "epoch": 0.6044743383535605, + "grad_norm": 1.4246263416975375, + "learning_rate": 1.6722862818558635e-05, + "loss": 0.7337179183959961, + "step": 2067 + }, + { + "epoch": 0.6047667787688259, + "grad_norm": 1.432290850861675, + "learning_rate": 1.671928294157762e-05, + "loss": 0.6644014120101929, + "step": 2068 + }, + { + "epoch": 0.6050592191840912, + "grad_norm": 1.3048966935224826, + "learning_rate": 1.6715701494005078e-05, + "loss": 0.5987672805786133, + "step": 2069 + }, + { + "epoch": 0.6053516595993567, + "grad_norm": 1.5176113056744007, + "learning_rate": 1.671211847667814e-05, + "loss": 0.5878695845603943, + "step": 2070 + }, + { + "epoch": 0.605644100014622, + "grad_norm": 1.3348485026555847, + "learning_rate": 1.670853389043432e-05, + "loss": 0.540128231048584, + "step": 2071 + }, + { + "epoch": 0.6059365404298874, + "grad_norm": 1.3888450119982874, + "learning_rate": 1.670494773611149e-05, + "loss": 0.667206346988678, + "step": 2072 + }, + { + "epoch": 0.6062289808451528, + "grad_norm": 1.5911825658421195, + "learning_rate": 1.6701360014547896e-05, + "loss": 0.6433641910552979, + "step": 2073 + }, + { + "epoch": 0.6065214212604182, + "grad_norm": 1.447981653333928, + "learning_rate": 1.669777072658214e-05, + "loss": 0.5803529024124146, + "step": 2074 + }, + { + "epoch": 0.6068138616756836, + "grad_norm": 1.125005009009719, + "learning_rate": 1.6694179873053202e-05, + "loss": 0.6203820705413818, + "step": 2075 + }, + { + "epoch": 0.607106302090949, + "grad_norm": 1.3092542979615172, + "learning_rate": 1.669058745480042e-05, + "loss": 0.6194918155670166, + "step": 2076 + }, + { + "epoch": 0.6073987425062144, + "grad_norm": 1.593480689755987, + "learning_rate": 1.66869934726635e-05, + "loss": 0.6797547936439514, + "step": 2077 + }, + { + "epoch": 0.6076911829214797, + "grad_norm": 1.3923211889522802, + "learning_rate": 1.6683397927482512e-05, + "loss": 0.6076459884643555, + "step": 2078 + }, + { + "epoch": 0.6079836233367452, + "grad_norm": 1.3874225830336557, + "learning_rate": 1.6679800820097895e-05, + "loss": 0.6958068609237671, + "step": 2079 + }, + { + "epoch": 0.6082760637520105, + "grad_norm": 1.3355509335032223, + "learning_rate": 1.6676202151350453e-05, + "loss": 0.5819929242134094, + "step": 2080 + }, + { + "epoch": 0.6085685041672759, + "grad_norm": 1.3476445996808082, + "learning_rate": 1.6672601922081347e-05, + "loss": 0.7125047445297241, + "step": 2081 + }, + { + "epoch": 0.6088609445825413, + "grad_norm": 1.4432332437479862, + "learning_rate": 1.6669000133132108e-05, + "loss": 0.8046560287475586, + "step": 2082 + }, + { + "epoch": 0.6091533849978067, + "grad_norm": 1.192025927247586, + "learning_rate": 1.666539678534464e-05, + "loss": 0.5468478202819824, + "step": 2083 + }, + { + "epoch": 0.6094458254130721, + "grad_norm": 1.3403719695971306, + "learning_rate": 1.6661791879561204e-05, + "loss": 0.6387852430343628, + "step": 2084 + }, + { + "epoch": 0.6097382658283375, + "grad_norm": 1.3327872578740647, + "learning_rate": 1.6658185416624415e-05, + "loss": 0.643539547920227, + "step": 2085 + }, + { + "epoch": 0.6100307062436029, + "grad_norm": 1.2236148701775094, + "learning_rate": 1.6654577397377266e-05, + "loss": 0.5031965374946594, + "step": 2086 + }, + { + "epoch": 0.6103231466588682, + "grad_norm": 1.507439246425782, + "learning_rate": 1.6650967822663115e-05, + "loss": 0.6690273284912109, + "step": 2087 + }, + { + "epoch": 0.6106155870741337, + "grad_norm": 1.2924449065282086, + "learning_rate": 1.6647356693325672e-05, + "loss": 0.6396887302398682, + "step": 2088 + }, + { + "epoch": 0.610908027489399, + "grad_norm": 1.4444361497865652, + "learning_rate": 1.664374401020902e-05, + "loss": 0.6306549310684204, + "step": 2089 + }, + { + "epoch": 0.6112004679046644, + "grad_norm": 1.3565777173208147, + "learning_rate": 1.66401297741576e-05, + "loss": 0.5936366319656372, + "step": 2090 + }, + { + "epoch": 0.6114929083199299, + "grad_norm": 1.1669567203268514, + "learning_rate": 1.6636513986016215e-05, + "loss": 0.6153277158737183, + "step": 2091 + }, + { + "epoch": 0.6117853487351952, + "grad_norm": 1.2085146124175858, + "learning_rate": 1.663289664663004e-05, + "loss": 0.6361621618270874, + "step": 2092 + }, + { + "epoch": 0.6120777891504606, + "grad_norm": 1.2163858440552462, + "learning_rate": 1.6629277756844603e-05, + "loss": 0.6511524319648743, + "step": 2093 + }, + { + "epoch": 0.612370229565726, + "grad_norm": 1.2219001757495958, + "learning_rate": 1.6625657317505792e-05, + "loss": 0.5811333656311035, + "step": 2094 + }, + { + "epoch": 0.6126626699809914, + "grad_norm": 1.4531007944498606, + "learning_rate": 1.6622035329459872e-05, + "loss": 0.6935377717018127, + "step": 2095 + }, + { + "epoch": 0.6129551103962567, + "grad_norm": 1.3697721797296887, + "learning_rate": 1.6618411793553455e-05, + "loss": 0.6363199949264526, + "step": 2096 + }, + { + "epoch": 0.6132475508115222, + "grad_norm": 1.6107434013725794, + "learning_rate": 1.6614786710633525e-05, + "loss": 0.7325713634490967, + "step": 2097 + }, + { + "epoch": 0.6135399912267876, + "grad_norm": 1.3944095356365322, + "learning_rate": 1.6611160081547414e-05, + "loss": 0.5739182829856873, + "step": 2098 + }, + { + "epoch": 0.6138324316420529, + "grad_norm": 1.4193388816384238, + "learning_rate": 1.6607531907142835e-05, + "loss": 0.611133873462677, + "step": 2099 + }, + { + "epoch": 0.6141248720573184, + "grad_norm": 1.579788361702439, + "learning_rate": 1.6603902188267842e-05, + "loss": 0.6419532299041748, + "step": 2100 + }, + { + "epoch": 0.6144173124725837, + "grad_norm": 1.482873128334509, + "learning_rate": 1.660027092577087e-05, + "loss": 0.7736743688583374, + "step": 2101 + }, + { + "epoch": 0.6147097528878491, + "grad_norm": 1.199857125427724, + "learning_rate": 1.6596638120500696e-05, + "loss": 0.5249119400978088, + "step": 2102 + }, + { + "epoch": 0.6150021933031145, + "grad_norm": 2.505852142425954, + "learning_rate": 1.6593003773306475e-05, + "loss": 0.7145636081695557, + "step": 2103 + }, + { + "epoch": 0.6152946337183799, + "grad_norm": 1.3335089477583737, + "learning_rate": 1.65893678850377e-05, + "loss": 0.5807666182518005, + "step": 2104 + }, + { + "epoch": 0.6155870741336452, + "grad_norm": 1.2437068513912055, + "learning_rate": 1.6585730456544255e-05, + "loss": 0.5049663782119751, + "step": 2105 + }, + { + "epoch": 0.6158795145489107, + "grad_norm": 1.4826397888996732, + "learning_rate": 1.658209148867635e-05, + "loss": 0.6744092702865601, + "step": 2106 + }, + { + "epoch": 0.6161719549641761, + "grad_norm": 1.4821897923446594, + "learning_rate": 1.6578450982284584e-05, + "loss": 0.605404794216156, + "step": 2107 + }, + { + "epoch": 0.6164643953794414, + "grad_norm": 1.1917544416711534, + "learning_rate": 1.6574808938219894e-05, + "loss": 0.6074866056442261, + "step": 2108 + }, + { + "epoch": 0.6167568357947069, + "grad_norm": 1.284543555588908, + "learning_rate": 1.6571165357333594e-05, + "loss": 0.6758207082748413, + "step": 2109 + }, + { + "epoch": 0.6170492762099722, + "grad_norm": 1.580962080275822, + "learning_rate": 1.6567520240477344e-05, + "loss": 0.7669274806976318, + "step": 2110 + }, + { + "epoch": 0.6173417166252376, + "grad_norm": 1.3997913559025885, + "learning_rate": 1.6563873588503173e-05, + "loss": 0.497562050819397, + "step": 2111 + }, + { + "epoch": 0.617634157040503, + "grad_norm": 1.6655652024231358, + "learning_rate": 1.656022540226345e-05, + "loss": 0.6398104429244995, + "step": 2112 + }, + { + "epoch": 0.6179265974557684, + "grad_norm": 1.4155810596985208, + "learning_rate": 1.6556575682610935e-05, + "loss": 0.6739988327026367, + "step": 2113 + }, + { + "epoch": 0.6182190378710338, + "grad_norm": 1.3164921836609038, + "learning_rate": 1.6552924430398716e-05, + "loss": 0.5710165500640869, + "step": 2114 + }, + { + "epoch": 0.6185114782862992, + "grad_norm": 1.1567442833736337, + "learning_rate": 1.6549271646480253e-05, + "loss": 0.6087738871574402, + "step": 2115 + }, + { + "epoch": 0.6188039187015646, + "grad_norm": 1.1877649418617353, + "learning_rate": 1.6545617331709364e-05, + "loss": 0.5300824642181396, + "step": 2116 + }, + { + "epoch": 0.6190963591168299, + "grad_norm": 1.3759503189909044, + "learning_rate": 1.6541961486940222e-05, + "loss": 0.7384774684906006, + "step": 2117 + }, + { + "epoch": 0.6193887995320954, + "grad_norm": 1.1608035895573054, + "learning_rate": 1.6538304113027356e-05, + "loss": 0.5867838263511658, + "step": 2118 + }, + { + "epoch": 0.6196812399473607, + "grad_norm": 1.4435135524238625, + "learning_rate": 1.653464521082566e-05, + "loss": 0.617068886756897, + "step": 2119 + }, + { + "epoch": 0.6199736803626261, + "grad_norm": 1.2420433862943483, + "learning_rate": 1.6530984781190374e-05, + "loss": 0.7316439151763916, + "step": 2120 + }, + { + "epoch": 0.6202661207778914, + "grad_norm": 1.3153827472233475, + "learning_rate": 1.6527322824977104e-05, + "loss": 0.5469995737075806, + "step": 2121 + }, + { + "epoch": 0.6205585611931569, + "grad_norm": 1.4608354678316708, + "learning_rate": 1.6523659343041815e-05, + "loss": 0.6577411890029907, + "step": 2122 + }, + { + "epoch": 0.6208510016084223, + "grad_norm": 1.5130442860821829, + "learning_rate": 1.6519994336240816e-05, + "loss": 0.7425049543380737, + "step": 2123 + }, + { + "epoch": 0.6211434420236877, + "grad_norm": 1.7408354143028393, + "learning_rate": 1.6516327805430785e-05, + "loss": 0.7894090414047241, + "step": 2124 + }, + { + "epoch": 0.6214358824389531, + "grad_norm": 1.2267269656084083, + "learning_rate": 1.651265975146875e-05, + "loss": 0.5739543437957764, + "step": 2125 + }, + { + "epoch": 0.6217283228542184, + "grad_norm": 1.2973694692382243, + "learning_rate": 1.6508990175212092e-05, + "loss": 0.6987308263778687, + "step": 2126 + }, + { + "epoch": 0.6220207632694839, + "grad_norm": 1.237403110571432, + "learning_rate": 1.650531907751856e-05, + "loss": 0.5956544280052185, + "step": 2127 + }, + { + "epoch": 0.6223132036847492, + "grad_norm": 1.3646659152675398, + "learning_rate": 1.6501646459246245e-05, + "loss": 0.582348108291626, + "step": 2128 + }, + { + "epoch": 0.6226056441000146, + "grad_norm": 1.327256978138479, + "learning_rate": 1.64979723212536e-05, + "loss": 0.8057917356491089, + "step": 2129 + }, + { + "epoch": 0.6228980845152801, + "grad_norm": 1.1623408864017983, + "learning_rate": 1.6494296664399428e-05, + "loss": 0.6237305402755737, + "step": 2130 + }, + { + "epoch": 0.6231905249305454, + "grad_norm": 1.3152067943219485, + "learning_rate": 1.6490619489542905e-05, + "loss": 0.6445767879486084, + "step": 2131 + }, + { + "epoch": 0.6234829653458108, + "grad_norm": 1.4611569228302668, + "learning_rate": 1.648694079754354e-05, + "loss": 0.6397994160652161, + "step": 2132 + }, + { + "epoch": 0.6237754057610762, + "grad_norm": 1.3955823025243248, + "learning_rate": 1.64832605892612e-05, + "loss": 0.8216533660888672, + "step": 2133 + }, + { + "epoch": 0.6240678461763416, + "grad_norm": 1.3134524569329014, + "learning_rate": 1.6479578865556115e-05, + "loss": 0.6894406080245972, + "step": 2134 + }, + { + "epoch": 0.6243602865916069, + "grad_norm": 1.2940264658828888, + "learning_rate": 1.6475895627288873e-05, + "loss": 0.6608946323394775, + "step": 2135 + }, + { + "epoch": 0.6246527270068724, + "grad_norm": 1.4094544295935185, + "learning_rate": 1.6472210875320397e-05, + "loss": 0.6070076823234558, + "step": 2136 + }, + { + "epoch": 0.6249451674221378, + "grad_norm": 1.4359082412623407, + "learning_rate": 1.6468524610511982e-05, + "loss": 0.7357348799705505, + "step": 2137 + }, + { + "epoch": 0.6252376078374031, + "grad_norm": 1.201965871501085, + "learning_rate": 1.6464836833725267e-05, + "loss": 0.5959880352020264, + "step": 2138 + }, + { + "epoch": 0.6255300482526686, + "grad_norm": 1.3046810888024383, + "learning_rate": 1.646114754582225e-05, + "loss": 0.7812649011611938, + "step": 2139 + }, + { + "epoch": 0.6258224886679339, + "grad_norm": 1.6609760293820528, + "learning_rate": 1.6457456747665282e-05, + "loss": 0.5985091924667358, + "step": 2140 + }, + { + "epoch": 0.6261149290831993, + "grad_norm": 1.5609316045902142, + "learning_rate": 1.645376444011706e-05, + "loss": 0.6610564589500427, + "step": 2141 + }, + { + "epoch": 0.6264073694984647, + "grad_norm": 1.3917319855245425, + "learning_rate": 1.6450070624040636e-05, + "loss": 0.6876299381256104, + "step": 2142 + }, + { + "epoch": 0.6266998099137301, + "grad_norm": 1.3567193814213938, + "learning_rate": 1.6446375300299425e-05, + "loss": 0.6715782284736633, + "step": 2143 + }, + { + "epoch": 0.6269922503289954, + "grad_norm": 1.6061237563072754, + "learning_rate": 1.644267846975718e-05, + "loss": 0.6066923141479492, + "step": 2144 + }, + { + "epoch": 0.6272846907442609, + "grad_norm": 1.2493532553829008, + "learning_rate": 1.6438980133278017e-05, + "loss": 0.5642968416213989, + "step": 2145 + }, + { + "epoch": 0.6275771311595263, + "grad_norm": 1.0703284322753808, + "learning_rate": 1.6435280291726394e-05, + "loss": 0.604590654373169, + "step": 2146 + }, + { + "epoch": 0.6278695715747916, + "grad_norm": 1.3292746736885825, + "learning_rate": 1.643157894596713e-05, + "loss": 0.6313889026641846, + "step": 2147 + }, + { + "epoch": 0.6281620119900571, + "grad_norm": 1.0767305616181233, + "learning_rate": 1.6427876096865394e-05, + "loss": 0.5084092617034912, + "step": 2148 + }, + { + "epoch": 0.6284544524053224, + "grad_norm": 1.250433663172197, + "learning_rate": 1.6424171745286704e-05, + "loss": 0.5191931128501892, + "step": 2149 + }, + { + "epoch": 0.6287468928205878, + "grad_norm": 1.3567625810681667, + "learning_rate": 1.6420465892096924e-05, + "loss": 0.7397615909576416, + "step": 2150 + }, + { + "epoch": 0.6290393332358531, + "grad_norm": 1.1359315638082286, + "learning_rate": 1.641675853816228e-05, + "loss": 0.622586727142334, + "step": 2151 + }, + { + "epoch": 0.6293317736511186, + "grad_norm": 1.433028642480203, + "learning_rate": 1.6413049684349344e-05, + "loss": 0.7894928455352783, + "step": 2152 + }, + { + "epoch": 0.629624214066384, + "grad_norm": 1.4395392231763253, + "learning_rate": 1.640933933152504e-05, + "loss": 0.5752773284912109, + "step": 2153 + }, + { + "epoch": 0.6299166544816494, + "grad_norm": 1.3952520818076775, + "learning_rate": 1.640562748055663e-05, + "loss": 0.6738473176956177, + "step": 2154 + }, + { + "epoch": 0.6302090948969148, + "grad_norm": 1.2597002399242925, + "learning_rate": 1.6401914132311745e-05, + "loss": 0.5789517164230347, + "step": 2155 + }, + { + "epoch": 0.6305015353121801, + "grad_norm": 1.2840904364476742, + "learning_rate": 1.6398199287658358e-05, + "loss": 0.5925524830818176, + "step": 2156 + }, + { + "epoch": 0.6307939757274456, + "grad_norm": 1.4374336859820211, + "learning_rate": 1.6394482947464784e-05, + "loss": 0.6949414610862732, + "step": 2157 + }, + { + "epoch": 0.6310864161427109, + "grad_norm": 1.3617313094593515, + "learning_rate": 1.6390765112599705e-05, + "loss": 0.7435301542282104, + "step": 2158 + }, + { + "epoch": 0.6313788565579763, + "grad_norm": 1.5109256996682827, + "learning_rate": 1.6387045783932137e-05, + "loss": 0.6931856274604797, + "step": 2159 + }, + { + "epoch": 0.6316712969732416, + "grad_norm": 1.4369843702380298, + "learning_rate": 1.638332496233145e-05, + "loss": 0.7856471538543701, + "step": 2160 + }, + { + "epoch": 0.6319637373885071, + "grad_norm": 1.460850634730034, + "learning_rate": 1.6379602648667362e-05, + "loss": 0.6299946308135986, + "step": 2161 + }, + { + "epoch": 0.6322561778037725, + "grad_norm": 1.5299113211206812, + "learning_rate": 1.6375878843809946e-05, + "loss": 0.6209328174591064, + "step": 2162 + }, + { + "epoch": 0.6325486182190379, + "grad_norm": 1.4269696757613273, + "learning_rate": 1.6372153548629617e-05, + "loss": 0.6498390436172485, + "step": 2163 + }, + { + "epoch": 0.6328410586343033, + "grad_norm": 2.1028833494160573, + "learning_rate": 1.6368426763997137e-05, + "loss": 0.6757122278213501, + "step": 2164 + }, + { + "epoch": 0.6331334990495686, + "grad_norm": 1.289589419762841, + "learning_rate": 1.6364698490783623e-05, + "loss": 0.5137026906013489, + "step": 2165 + }, + { + "epoch": 0.633425939464834, + "grad_norm": 1.3914324771074273, + "learning_rate": 1.6360968729860536e-05, + "loss": 0.5876519680023193, + "step": 2166 + }, + { + "epoch": 0.6337183798800994, + "grad_norm": 1.2533286000898018, + "learning_rate": 1.6357237482099682e-05, + "loss": 0.5804057717323303, + "step": 2167 + }, + { + "epoch": 0.6340108202953648, + "grad_norm": 1.361440329822907, + "learning_rate": 1.635350474837322e-05, + "loss": 0.6186444759368896, + "step": 2168 + }, + { + "epoch": 0.6343032607106303, + "grad_norm": 1.4479908785794617, + "learning_rate": 1.6349770529553654e-05, + "loss": 0.6358560919761658, + "step": 2169 + }, + { + "epoch": 0.6345957011258956, + "grad_norm": 1.2507636068938528, + "learning_rate": 1.6346034826513834e-05, + "loss": 0.64283686876297, + "step": 2170 + }, + { + "epoch": 0.634888141541161, + "grad_norm": 1.3854516647796151, + "learning_rate": 1.6342297640126955e-05, + "loss": 0.5269169807434082, + "step": 2171 + }, + { + "epoch": 0.6351805819564263, + "grad_norm": 1.3233372829927026, + "learning_rate": 1.6338558971266563e-05, + "loss": 0.5338561534881592, + "step": 2172 + }, + { + "epoch": 0.6354730223716918, + "grad_norm": 1.365606957045604, + "learning_rate": 1.6334818820806555e-05, + "loss": 0.5587184429168701, + "step": 2173 + }, + { + "epoch": 0.6357654627869571, + "grad_norm": 1.2288709810094502, + "learning_rate": 1.633107718962116e-05, + "loss": 0.6468764543533325, + "step": 2174 + }, + { + "epoch": 0.6360579032022226, + "grad_norm": 1.4431243955955453, + "learning_rate": 1.6327334078584967e-05, + "loss": 0.7305203676223755, + "step": 2175 + }, + { + "epoch": 0.636350343617488, + "grad_norm": 1.3207763162749322, + "learning_rate": 1.6323589488572908e-05, + "loss": 0.6226189136505127, + "step": 2176 + }, + { + "epoch": 0.6366427840327533, + "grad_norm": 1.4828987038724675, + "learning_rate": 1.631984342046025e-05, + "loss": 0.6552053093910217, + "step": 2177 + }, + { + "epoch": 0.6369352244480188, + "grad_norm": 1.6836072588979352, + "learning_rate": 1.6316095875122617e-05, + "loss": 0.8121978044509888, + "step": 2178 + }, + { + "epoch": 0.6372276648632841, + "grad_norm": 1.3359221660901908, + "learning_rate": 1.6312346853435976e-05, + "loss": 0.5826296806335449, + "step": 2179 + }, + { + "epoch": 0.6375201052785495, + "grad_norm": 1.3567795832303162, + "learning_rate": 1.630859635627664e-05, + "loss": 0.5862709283828735, + "step": 2180 + }, + { + "epoch": 0.6378125456938148, + "grad_norm": 1.2132204868801326, + "learning_rate": 1.6304844384521263e-05, + "loss": 0.7081524133682251, + "step": 2181 + }, + { + "epoch": 0.6381049861090803, + "grad_norm": 1.2359384159808198, + "learning_rate": 1.6301090939046843e-05, + "loss": 0.6394449472427368, + "step": 2182 + }, + { + "epoch": 0.6383974265243456, + "grad_norm": 1.25131780401235, + "learning_rate": 1.6297336020730727e-05, + "loss": 0.6184799075126648, + "step": 2183 + }, + { + "epoch": 0.638689866939611, + "grad_norm": 1.3090426226978378, + "learning_rate": 1.6293579630450606e-05, + "loss": 0.6877666711807251, + "step": 2184 + }, + { + "epoch": 0.6389823073548765, + "grad_norm": 1.3648594367613462, + "learning_rate": 1.6289821769084512e-05, + "loss": 0.5596371293067932, + "step": 2185 + }, + { + "epoch": 0.6392747477701418, + "grad_norm": 1.1779148594123119, + "learning_rate": 1.6286062437510823e-05, + "loss": 0.5378291010856628, + "step": 2186 + }, + { + "epoch": 0.6395671881854073, + "grad_norm": 1.2132664638530417, + "learning_rate": 1.6282301636608256e-05, + "loss": 0.6965627670288086, + "step": 2187 + }, + { + "epoch": 0.6398596286006726, + "grad_norm": 1.3017112466193883, + "learning_rate": 1.6278539367255885e-05, + "loss": 0.5939220190048218, + "step": 2188 + }, + { + "epoch": 0.640152069015938, + "grad_norm": 1.3743138396251577, + "learning_rate": 1.6274775630333104e-05, + "loss": 0.6225341558456421, + "step": 2189 + }, + { + "epoch": 0.6404445094312033, + "grad_norm": 1.103061387587319, + "learning_rate": 1.6271010426719672e-05, + "loss": 0.471333384513855, + "step": 2190 + }, + { + "epoch": 0.6407369498464688, + "grad_norm": 1.3505910885858836, + "learning_rate": 1.626724375729568e-05, + "loss": 0.6066263914108276, + "step": 2191 + }, + { + "epoch": 0.6410293902617342, + "grad_norm": 1.2842885881869934, + "learning_rate": 1.626347562294157e-05, + "loss": 0.6525982618331909, + "step": 2192 + }, + { + "epoch": 0.6413218306769995, + "grad_norm": 1.375624970339684, + "learning_rate": 1.6259706024538113e-05, + "loss": 0.7395817041397095, + "step": 2193 + }, + { + "epoch": 0.641614271092265, + "grad_norm": 1.326045982489242, + "learning_rate": 1.6255934962966432e-05, + "loss": 0.720014214515686, + "step": 2194 + }, + { + "epoch": 0.6419067115075303, + "grad_norm": 1.4102074363113735, + "learning_rate": 1.625216243910799e-05, + "loss": 0.6905295252799988, + "step": 2195 + }, + { + "epoch": 0.6421991519227958, + "grad_norm": 1.3533501829991437, + "learning_rate": 1.6248388453844596e-05, + "loss": 0.6877295970916748, + "step": 2196 + }, + { + "epoch": 0.6424915923380611, + "grad_norm": 1.414790050061214, + "learning_rate": 1.6244613008058386e-05, + "loss": 0.5782181024551392, + "step": 2197 + }, + { + "epoch": 0.6427840327533265, + "grad_norm": 1.2129092557671588, + "learning_rate": 1.6240836102631856e-05, + "loss": 0.5253425240516663, + "step": 2198 + }, + { + "epoch": 0.6430764731685918, + "grad_norm": 1.2461747547364295, + "learning_rate": 1.623705773844783e-05, + "loss": 0.6631319522857666, + "step": 2199 + }, + { + "epoch": 0.6433689135838573, + "grad_norm": 1.6130890971192966, + "learning_rate": 1.6233277916389482e-05, + "loss": 0.6458526849746704, + "step": 2200 + }, + { + "epoch": 0.6436613539991227, + "grad_norm": 1.5712729506149452, + "learning_rate": 1.622949663734032e-05, + "loss": 0.5723023414611816, + "step": 2201 + }, + { + "epoch": 0.643953794414388, + "grad_norm": 1.4119455791937807, + "learning_rate": 1.6225713902184193e-05, + "loss": 0.6852096319198608, + "step": 2202 + }, + { + "epoch": 0.6442462348296535, + "grad_norm": 1.460558869527006, + "learning_rate": 1.6221929711805297e-05, + "loss": 0.6343507170677185, + "step": 2203 + }, + { + "epoch": 0.6445386752449188, + "grad_norm": 1.217897103510346, + "learning_rate": 1.6218144067088157e-05, + "loss": 0.6378631591796875, + "step": 2204 + }, + { + "epoch": 0.6448311156601843, + "grad_norm": 1.1203441428966674, + "learning_rate": 1.621435696891765e-05, + "loss": 0.6550023555755615, + "step": 2205 + }, + { + "epoch": 0.6451235560754496, + "grad_norm": 1.3522778560223117, + "learning_rate": 1.6210568418178983e-05, + "loss": 0.5555052757263184, + "step": 2206 + }, + { + "epoch": 0.645415996490715, + "grad_norm": 1.330819772406298, + "learning_rate": 1.6206778415757715e-05, + "loss": 0.7171934247016907, + "step": 2207 + }, + { + "epoch": 0.6457084369059805, + "grad_norm": 1.2953726655501339, + "learning_rate": 1.6202986962539726e-05, + "loss": 0.6464889049530029, + "step": 2208 + }, + { + "epoch": 0.6460008773212458, + "grad_norm": 1.5324773487302452, + "learning_rate": 1.619919405941125e-05, + "loss": 0.6316033601760864, + "step": 2209 + }, + { + "epoch": 0.6462933177365112, + "grad_norm": 1.2083095479015487, + "learning_rate": 1.6195399707258855e-05, + "loss": 0.5548732876777649, + "step": 2210 + }, + { + "epoch": 0.6465857581517765, + "grad_norm": 1.088879983740594, + "learning_rate": 1.6191603906969447e-05, + "loss": 0.5055203437805176, + "step": 2211 + }, + { + "epoch": 0.646878198567042, + "grad_norm": 1.3416079726495937, + "learning_rate": 1.6187806659430268e-05, + "loss": 0.7010073661804199, + "step": 2212 + }, + { + "epoch": 0.6471706389823073, + "grad_norm": 1.39696751963916, + "learning_rate": 1.6184007965528908e-05, + "loss": 0.6188487410545349, + "step": 2213 + }, + { + "epoch": 0.6474630793975727, + "grad_norm": 1.1122504211535682, + "learning_rate": 1.6180207826153284e-05, + "loss": 0.46920153498649597, + "step": 2214 + }, + { + "epoch": 0.6477555198128382, + "grad_norm": 1.1420938414191775, + "learning_rate": 1.617640624219166e-05, + "loss": 0.6811172962188721, + "step": 2215 + }, + { + "epoch": 0.6480479602281035, + "grad_norm": 1.456471656413964, + "learning_rate": 1.617260321453263e-05, + "loss": 0.6425800323486328, + "step": 2216 + }, + { + "epoch": 0.648340400643369, + "grad_norm": 1.5968265799871777, + "learning_rate": 1.6168798744065123e-05, + "loss": 0.7020897269248962, + "step": 2217 + }, + { + "epoch": 0.6486328410586343, + "grad_norm": 1.1227944263783516, + "learning_rate": 1.6164992831678422e-05, + "loss": 0.5872179865837097, + "step": 2218 + }, + { + "epoch": 0.6489252814738997, + "grad_norm": 1.6374275819992907, + "learning_rate": 1.6161185478262127e-05, + "loss": 0.7414118647575378, + "step": 2219 + }, + { + "epoch": 0.649217721889165, + "grad_norm": 1.2707285395428818, + "learning_rate": 1.615737668470619e-05, + "loss": 0.5408385396003723, + "step": 2220 + }, + { + "epoch": 0.6495101623044305, + "grad_norm": 1.2587309097221344, + "learning_rate": 1.6153566451900887e-05, + "loss": 0.6145513653755188, + "step": 2221 + }, + { + "epoch": 0.6498026027196958, + "grad_norm": 1.1746181148032837, + "learning_rate": 1.6149754780736847e-05, + "loss": 0.556422233581543, + "step": 2222 + }, + { + "epoch": 0.6500950431349612, + "grad_norm": 1.4903419319059785, + "learning_rate": 1.614594167210501e-05, + "loss": 0.7155405282974243, + "step": 2223 + }, + { + "epoch": 0.6503874835502267, + "grad_norm": 1.2945043385192228, + "learning_rate": 1.6142127126896682e-05, + "loss": 0.4988427758216858, + "step": 2224 + }, + { + "epoch": 0.650679923965492, + "grad_norm": 1.3962995233264988, + "learning_rate": 1.6138311146003477e-05, + "loss": 0.6187007427215576, + "step": 2225 + }, + { + "epoch": 0.6509723643807575, + "grad_norm": 1.329312474096709, + "learning_rate": 1.6134493730317364e-05, + "loss": 0.5668798685073853, + "step": 2226 + }, + { + "epoch": 0.6512648047960228, + "grad_norm": 1.2528148742640925, + "learning_rate": 1.6130674880730642e-05, + "loss": 0.6354215145111084, + "step": 2227 + }, + { + "epoch": 0.6515572452112882, + "grad_norm": 1.3738601794334195, + "learning_rate": 1.612685459813594e-05, + "loss": 0.5409573912620544, + "step": 2228 + }, + { + "epoch": 0.6518496856265535, + "grad_norm": 1.24582725943008, + "learning_rate": 1.612303288342623e-05, + "loss": 0.5622435808181763, + "step": 2229 + }, + { + "epoch": 0.652142126041819, + "grad_norm": 1.3303126336426627, + "learning_rate": 1.6119209737494814e-05, + "loss": 0.786159873008728, + "step": 2230 + }, + { + "epoch": 0.6524345664570844, + "grad_norm": 1.3038971892359654, + "learning_rate": 1.611538516123532e-05, + "loss": 0.6359272003173828, + "step": 2231 + }, + { + "epoch": 0.6527270068723497, + "grad_norm": 1.2508619512631416, + "learning_rate": 1.6111559155541732e-05, + "loss": 0.5688974261283875, + "step": 2232 + }, + { + "epoch": 0.6530194472876152, + "grad_norm": 1.1877745994435736, + "learning_rate": 1.610773172130835e-05, + "loss": 0.581497311592102, + "step": 2233 + }, + { + "epoch": 0.6533118877028805, + "grad_norm": 1.6577687870030173, + "learning_rate": 1.6103902859429812e-05, + "loss": 0.674004316329956, + "step": 2234 + }, + { + "epoch": 0.653604328118146, + "grad_norm": 1.4167456148188138, + "learning_rate": 1.6100072570801092e-05, + "loss": 0.6798728108406067, + "step": 2235 + }, + { + "epoch": 0.6538967685334113, + "grad_norm": 1.245467514643811, + "learning_rate": 1.60962408563175e-05, + "loss": 0.5742023587226868, + "step": 2236 + }, + { + "epoch": 0.6541892089486767, + "grad_norm": 1.1993067492933944, + "learning_rate": 1.6092407716874674e-05, + "loss": 0.470009446144104, + "step": 2237 + }, + { + "epoch": 0.654481649363942, + "grad_norm": 1.3725626324774514, + "learning_rate": 1.6088573153368586e-05, + "loss": 0.8113270998001099, + "step": 2238 + }, + { + "epoch": 0.6547740897792075, + "grad_norm": 1.4825942391015299, + "learning_rate": 1.6084737166695542e-05, + "loss": 0.7737559676170349, + "step": 2239 + }, + { + "epoch": 0.6550665301944729, + "grad_norm": 1.5932921988768602, + "learning_rate": 1.6080899757752183e-05, + "loss": 0.6499667167663574, + "step": 2240 + }, + { + "epoch": 0.6553589706097382, + "grad_norm": 1.5295213411109583, + "learning_rate": 1.6077060927435476e-05, + "loss": 0.6898500323295593, + "step": 2241 + }, + { + "epoch": 0.6556514110250037, + "grad_norm": 1.264521733401818, + "learning_rate": 1.6073220676642724e-05, + "loss": 0.5933262705802917, + "step": 2242 + }, + { + "epoch": 0.655943851440269, + "grad_norm": 1.6150723182894215, + "learning_rate": 1.606937900627157e-05, + "loss": 0.6566172242164612, + "step": 2243 + }, + { + "epoch": 0.6562362918555344, + "grad_norm": 1.5267009306631556, + "learning_rate": 1.606553591721997e-05, + "loss": 0.6955286264419556, + "step": 2244 + }, + { + "epoch": 0.6565287322707998, + "grad_norm": 1.2904648803296817, + "learning_rate": 1.6061691410386234e-05, + "loss": 0.6905182600021362, + "step": 2245 + }, + { + "epoch": 0.6568211726860652, + "grad_norm": 1.3780634556903595, + "learning_rate": 1.6057845486668984e-05, + "loss": 0.6733677387237549, + "step": 2246 + }, + { + "epoch": 0.6571136131013307, + "grad_norm": 1.2340466884298544, + "learning_rate": 1.6053998146967186e-05, + "loss": 0.5368545055389404, + "step": 2247 + }, + { + "epoch": 0.657406053516596, + "grad_norm": 1.4627351725055429, + "learning_rate": 1.6050149392180125e-05, + "loss": 0.6995619535446167, + "step": 2248 + }, + { + "epoch": 0.6576984939318614, + "grad_norm": 1.2552392614352392, + "learning_rate": 1.6046299223207432e-05, + "loss": 0.6637085676193237, + "step": 2249 + }, + { + "epoch": 0.6579909343471267, + "grad_norm": 1.3894808498189977, + "learning_rate": 1.6042447640949058e-05, + "loss": 0.5834380388259888, + "step": 2250 + }, + { + "epoch": 0.6582833747623922, + "grad_norm": 1.1700440243092598, + "learning_rate": 1.6038594646305285e-05, + "loss": 0.5735288858413696, + "step": 2251 + }, + { + "epoch": 0.6585758151776575, + "grad_norm": 1.274727070163542, + "learning_rate": 1.6034740240176728e-05, + "loss": 0.6227413415908813, + "step": 2252 + }, + { + "epoch": 0.658868255592923, + "grad_norm": 1.5091805441488135, + "learning_rate": 1.6030884423464336e-05, + "loss": 0.6881246566772461, + "step": 2253 + }, + { + "epoch": 0.6591606960081884, + "grad_norm": 1.3237201049051734, + "learning_rate": 1.6027027197069376e-05, + "loss": 0.6059132814407349, + "step": 2254 + }, + { + "epoch": 0.6594531364234537, + "grad_norm": 1.5070949945133527, + "learning_rate": 1.6023168561893453e-05, + "loss": 0.5829097032546997, + "step": 2255 + }, + { + "epoch": 0.6597455768387191, + "grad_norm": 1.1821076640408643, + "learning_rate": 1.60193085188385e-05, + "loss": 0.5173588991165161, + "step": 2256 + }, + { + "epoch": 0.6600380172539845, + "grad_norm": 1.0404057140160172, + "learning_rate": 1.601544706880678e-05, + "loss": 0.5128534436225891, + "step": 2257 + }, + { + "epoch": 0.6603304576692499, + "grad_norm": 1.4274902732235735, + "learning_rate": 1.601158421270088e-05, + "loss": 0.5472848415374756, + "step": 2258 + }, + { + "epoch": 0.6606228980845152, + "grad_norm": 1.2505155913554076, + "learning_rate": 1.6007719951423725e-05, + "loss": 0.5775434970855713, + "step": 2259 + }, + { + "epoch": 0.6609153384997807, + "grad_norm": 1.2760490287043558, + "learning_rate": 1.6003854285878558e-05, + "loss": 0.5529654622077942, + "step": 2260 + }, + { + "epoch": 0.661207778915046, + "grad_norm": 1.2950239037035343, + "learning_rate": 1.5999987216968954e-05, + "loss": 0.5295222997665405, + "step": 2261 + }, + { + "epoch": 0.6615002193303114, + "grad_norm": 1.42880093351922, + "learning_rate": 1.5996118745598817e-05, + "loss": 0.6782759428024292, + "step": 2262 + }, + { + "epoch": 0.6617926597455769, + "grad_norm": 1.5123560217291456, + "learning_rate": 1.5992248872672384e-05, + "loss": 0.7698723077774048, + "step": 2263 + }, + { + "epoch": 0.6620851001608422, + "grad_norm": 1.224014553870767, + "learning_rate": 1.5988377599094208e-05, + "loss": 0.5056325793266296, + "step": 2264 + }, + { + "epoch": 0.6623775405761076, + "grad_norm": 1.2811286417806291, + "learning_rate": 1.598450492576918e-05, + "loss": 0.6748740673065186, + "step": 2265 + }, + { + "epoch": 0.662669980991373, + "grad_norm": 1.4413699029522251, + "learning_rate": 1.598063085360251e-05, + "loss": 0.6594111919403076, + "step": 2266 + }, + { + "epoch": 0.6629624214066384, + "grad_norm": 1.490546706478741, + "learning_rate": 1.5976755383499743e-05, + "loss": 0.5942472815513611, + "step": 2267 + }, + { + "epoch": 0.6632548618219037, + "grad_norm": 1.4166382340274284, + "learning_rate": 1.5972878516366742e-05, + "loss": 0.6956725120544434, + "step": 2268 + }, + { + "epoch": 0.6635473022371692, + "grad_norm": 1.5479108671282409, + "learning_rate": 1.5969000253109707e-05, + "loss": 0.6743103265762329, + "step": 2269 + }, + { + "epoch": 0.6638397426524346, + "grad_norm": 1.2415014970437994, + "learning_rate": 1.596512059463515e-05, + "loss": 0.5452187061309814, + "step": 2270 + }, + { + "epoch": 0.6641321830676999, + "grad_norm": 1.305856048148522, + "learning_rate": 1.5961239541849923e-05, + "loss": 0.6064754128456116, + "step": 2271 + }, + { + "epoch": 0.6644246234829654, + "grad_norm": 1.1672873660489786, + "learning_rate": 1.59573570956612e-05, + "loss": 0.5879498720169067, + "step": 2272 + }, + { + "epoch": 0.6647170638982307, + "grad_norm": 1.2464190562799757, + "learning_rate": 1.595347325697648e-05, + "loss": 0.6610721945762634, + "step": 2273 + }, + { + "epoch": 0.6650095043134961, + "grad_norm": 1.5001752360693776, + "learning_rate": 1.594958802670358e-05, + "loss": 0.6674839854240417, + "step": 2274 + }, + { + "epoch": 0.6653019447287615, + "grad_norm": 1.2669024802691538, + "learning_rate": 1.5945701405750654e-05, + "loss": 0.5189186334609985, + "step": 2275 + }, + { + "epoch": 0.6655943851440269, + "grad_norm": 1.096047033017533, + "learning_rate": 1.5941813395026174e-05, + "loss": 0.5225304365158081, + "step": 2276 + }, + { + "epoch": 0.6658868255592922, + "grad_norm": 1.1982797539630743, + "learning_rate": 1.5937923995438942e-05, + "loss": 0.5426747798919678, + "step": 2277 + }, + { + "epoch": 0.6661792659745577, + "grad_norm": 1.1331316680397499, + "learning_rate": 1.593403320789808e-05, + "loss": 0.6408158540725708, + "step": 2278 + }, + { + "epoch": 0.6664717063898231, + "grad_norm": 1.2777185085969938, + "learning_rate": 1.5930141033313034e-05, + "loss": 0.6213311553001404, + "step": 2279 + }, + { + "epoch": 0.6667641468050884, + "grad_norm": 1.2938845863415658, + "learning_rate": 1.5926247472593575e-05, + "loss": 0.6538233757019043, + "step": 2280 + }, + { + "epoch": 0.6670565872203539, + "grad_norm": 1.4396815547692279, + "learning_rate": 1.5922352526649803e-05, + "loss": 0.6714701056480408, + "step": 2281 + }, + { + "epoch": 0.6673490276356192, + "grad_norm": 1.2875131974555427, + "learning_rate": 1.5918456196392137e-05, + "loss": 0.501068115234375, + "step": 2282 + }, + { + "epoch": 0.6676414680508846, + "grad_norm": 1.483722651200639, + "learning_rate": 1.5914558482731317e-05, + "loss": 0.6551339626312256, + "step": 2283 + }, + { + "epoch": 0.66793390846615, + "grad_norm": 1.575561891265528, + "learning_rate": 1.5910659386578415e-05, + "loss": 0.666611909866333, + "step": 2284 + }, + { + "epoch": 0.6682263488814154, + "grad_norm": 1.3058077151253007, + "learning_rate": 1.590675890884482e-05, + "loss": 0.6612483859062195, + "step": 2285 + }, + { + "epoch": 0.6685187892966808, + "grad_norm": 1.535602248808955, + "learning_rate": 1.590285705044224e-05, + "loss": 0.5299272537231445, + "step": 2286 + }, + { + "epoch": 0.6688112297119462, + "grad_norm": 1.5209550044520355, + "learning_rate": 1.589895381228272e-05, + "loss": 0.6873815655708313, + "step": 2287 + }, + { + "epoch": 0.6691036701272116, + "grad_norm": 1.333463107294571, + "learning_rate": 1.5895049195278608e-05, + "loss": 0.6473613977432251, + "step": 2288 + }, + { + "epoch": 0.6693961105424769, + "grad_norm": 1.4389212790848083, + "learning_rate": 1.589114320034259e-05, + "loss": 0.6600902080535889, + "step": 2289 + }, + { + "epoch": 0.6696885509577424, + "grad_norm": 1.7581559017014303, + "learning_rate": 1.5887235828387667e-05, + "loss": 0.6066039800643921, + "step": 2290 + }, + { + "epoch": 0.6699809913730077, + "grad_norm": 1.2475073124572584, + "learning_rate": 1.5883327080327165e-05, + "loss": 0.5411461591720581, + "step": 2291 + }, + { + "epoch": 0.6702734317882731, + "grad_norm": 1.3264098990068387, + "learning_rate": 1.587941695707473e-05, + "loss": 0.5678138136863708, + "step": 2292 + }, + { + "epoch": 0.6705658722035386, + "grad_norm": 1.2017893940389541, + "learning_rate": 1.5875505459544327e-05, + "loss": 0.6175323724746704, + "step": 2293 + }, + { + "epoch": 0.6708583126188039, + "grad_norm": 1.2255154092981597, + "learning_rate": 1.587159258865025e-05, + "loss": 0.5790976285934448, + "step": 2294 + }, + { + "epoch": 0.6711507530340693, + "grad_norm": 1.4070059880127774, + "learning_rate": 1.58676783453071e-05, + "loss": 0.5891247391700745, + "step": 2295 + }, + { + "epoch": 0.6714431934493347, + "grad_norm": 1.3680740765730994, + "learning_rate": 1.5863762730429817e-05, + "loss": 0.5604299902915955, + "step": 2296 + }, + { + "epoch": 0.6717356338646001, + "grad_norm": 1.156075846793115, + "learning_rate": 1.585984574493365e-05, + "loss": 0.5402317047119141, + "step": 2297 + }, + { + "epoch": 0.6720280742798654, + "grad_norm": 1.2729484704762741, + "learning_rate": 1.5855927389734163e-05, + "loss": 0.5569097995758057, + "step": 2298 + }, + { + "epoch": 0.6723205146951309, + "grad_norm": 1.792109537125727, + "learning_rate": 1.5852007665747255e-05, + "loss": 0.6754734516143799, + "step": 2299 + }, + { + "epoch": 0.6726129551103962, + "grad_norm": 1.2015482502693244, + "learning_rate": 1.584808657388914e-05, + "loss": 0.5555064678192139, + "step": 2300 + }, + { + "epoch": 0.6729053955256616, + "grad_norm": 1.2978798977032824, + "learning_rate": 1.584416411507634e-05, + "loss": 0.5735480785369873, + "step": 2301 + }, + { + "epoch": 0.6731978359409271, + "grad_norm": 1.3948021707686127, + "learning_rate": 1.5840240290225713e-05, + "loss": 0.6084697842597961, + "step": 2302 + }, + { + "epoch": 0.6734902763561924, + "grad_norm": 1.3972987341637648, + "learning_rate": 1.5836315100254427e-05, + "loss": 0.5747361779212952, + "step": 2303 + }, + { + "epoch": 0.6737827167714578, + "grad_norm": 1.3042539657521541, + "learning_rate": 1.583238854607997e-05, + "loss": 0.6597394943237305, + "step": 2304 + }, + { + "epoch": 0.6740751571867232, + "grad_norm": 1.2885200657030746, + "learning_rate": 1.582846062862016e-05, + "loss": 0.6054418087005615, + "step": 2305 + }, + { + "epoch": 0.6743675976019886, + "grad_norm": 1.4670353156004656, + "learning_rate": 1.5824531348793106e-05, + "loss": 0.6897715330123901, + "step": 2306 + }, + { + "epoch": 0.6746600380172539, + "grad_norm": 1.2379672312585208, + "learning_rate": 1.5820600707517265e-05, + "loss": 0.5438888072967529, + "step": 2307 + }, + { + "epoch": 0.6749524784325194, + "grad_norm": 1.3511076823584265, + "learning_rate": 1.5816668705711402e-05, + "loss": 0.5139850378036499, + "step": 2308 + }, + { + "epoch": 0.6752449188477848, + "grad_norm": 1.3878243291723096, + "learning_rate": 1.5812735344294594e-05, + "loss": 0.5970615744590759, + "step": 2309 + }, + { + "epoch": 0.6755373592630501, + "grad_norm": 1.5290136714699685, + "learning_rate": 1.580880062418624e-05, + "loss": 0.6206730604171753, + "step": 2310 + }, + { + "epoch": 0.6758297996783156, + "grad_norm": 1.5283867982171593, + "learning_rate": 1.580486454630606e-05, + "loss": 0.6545864939689636, + "step": 2311 + }, + { + "epoch": 0.6761222400935809, + "grad_norm": 1.6726831788405112, + "learning_rate": 1.5800927111574084e-05, + "loss": 0.6284571290016174, + "step": 2312 + }, + { + "epoch": 0.6764146805088463, + "grad_norm": 1.3062366838416066, + "learning_rate": 1.5796988320910665e-05, + "loss": 0.6662822365760803, + "step": 2313 + }, + { + "epoch": 0.6767071209241117, + "grad_norm": 1.4857961720461585, + "learning_rate": 1.5793048175236477e-05, + "loss": 0.6952080130577087, + "step": 2314 + }, + { + "epoch": 0.6769995613393771, + "grad_norm": 1.1527122349254486, + "learning_rate": 1.5789106675472496e-05, + "loss": 0.55562424659729, + "step": 2315 + }, + { + "epoch": 0.6772920017546424, + "grad_norm": 1.417075363017466, + "learning_rate": 1.578516382254003e-05, + "loss": 0.696354866027832, + "step": 2316 + }, + { + "epoch": 0.6775844421699079, + "grad_norm": 1.2481046919985836, + "learning_rate": 1.5781219617360695e-05, + "loss": 0.5764954686164856, + "step": 2317 + }, + { + "epoch": 0.6778768825851733, + "grad_norm": 1.5617477082955222, + "learning_rate": 1.577727406085642e-05, + "loss": 0.6944533586502075, + "step": 2318 + }, + { + "epoch": 0.6781693230004386, + "grad_norm": 1.5273473613933928, + "learning_rate": 1.5773327153949465e-05, + "loss": 0.5517882704734802, + "step": 2319 + }, + { + "epoch": 0.6784617634157041, + "grad_norm": 1.3495609581159556, + "learning_rate": 1.576937889756239e-05, + "loss": 0.6151533126831055, + "step": 2320 + }, + { + "epoch": 0.6787542038309694, + "grad_norm": 1.3729348393231853, + "learning_rate": 1.5765429292618075e-05, + "loss": 0.6221417784690857, + "step": 2321 + }, + { + "epoch": 0.6790466442462348, + "grad_norm": 1.5561656408525308, + "learning_rate": 1.576147834003972e-05, + "loss": 0.6218827962875366, + "step": 2322 + }, + { + "epoch": 0.6793390846615002, + "grad_norm": 1.2844085482190328, + "learning_rate": 1.575752604075083e-05, + "loss": 0.689696192741394, + "step": 2323 + }, + { + "epoch": 0.6796315250767656, + "grad_norm": 1.459910366351317, + "learning_rate": 1.5753572395675234e-05, + "loss": 0.6457825899124146, + "step": 2324 + }, + { + "epoch": 0.679923965492031, + "grad_norm": 1.660980107305809, + "learning_rate": 1.5749617405737075e-05, + "loss": 0.6261845827102661, + "step": 2325 + }, + { + "epoch": 0.6802164059072964, + "grad_norm": 1.5113706854166593, + "learning_rate": 1.5745661071860802e-05, + "loss": 0.6631760597229004, + "step": 2326 + }, + { + "epoch": 0.6805088463225618, + "grad_norm": 1.4700703601826162, + "learning_rate": 1.574170339497119e-05, + "loss": 0.6223125457763672, + "step": 2327 + }, + { + "epoch": 0.6808012867378271, + "grad_norm": 1.4289384563362724, + "learning_rate": 1.5737744375993318e-05, + "loss": 0.5649152398109436, + "step": 2328 + }, + { + "epoch": 0.6810937271530926, + "grad_norm": 1.3637036537520066, + "learning_rate": 1.573378401585259e-05, + "loss": 0.6822011470794678, + "step": 2329 + }, + { + "epoch": 0.6813861675683579, + "grad_norm": 1.243454490323945, + "learning_rate": 1.5729822315474704e-05, + "loss": 0.4853206276893616, + "step": 2330 + }, + { + "epoch": 0.6816786079836233, + "grad_norm": 1.3491879449563893, + "learning_rate": 1.572585927578569e-05, + "loss": 0.6410783529281616, + "step": 2331 + }, + { + "epoch": 0.6819710483988888, + "grad_norm": 1.2349335330440738, + "learning_rate": 1.572189489771189e-05, + "loss": 0.607154369354248, + "step": 2332 + }, + { + "epoch": 0.6822634888141541, + "grad_norm": 1.2303800918258645, + "learning_rate": 1.571792918217994e-05, + "loss": 0.5079061388969421, + "step": 2333 + }, + { + "epoch": 0.6825559292294195, + "grad_norm": 1.355109139858454, + "learning_rate": 1.5713962130116812e-05, + "loss": 0.534178614616394, + "step": 2334 + }, + { + "epoch": 0.6828483696446849, + "grad_norm": 1.099124567807314, + "learning_rate": 1.5709993742449777e-05, + "loss": 0.6172807812690735, + "step": 2335 + }, + { + "epoch": 0.6831408100599503, + "grad_norm": 1.468863618054796, + "learning_rate": 1.5706024020106425e-05, + "loss": 0.6863975524902344, + "step": 2336 + }, + { + "epoch": 0.6834332504752156, + "grad_norm": 1.3542187494807805, + "learning_rate": 1.570205296401465e-05, + "loss": 0.6314880847930908, + "step": 2337 + }, + { + "epoch": 0.6837256908904811, + "grad_norm": 1.4888474767820694, + "learning_rate": 1.5698080575102662e-05, + "loss": 0.5420910120010376, + "step": 2338 + }, + { + "epoch": 0.6840181313057464, + "grad_norm": 1.545548665208996, + "learning_rate": 1.5694106854298988e-05, + "loss": 0.6598352789878845, + "step": 2339 + }, + { + "epoch": 0.6843105717210118, + "grad_norm": 1.1855737189309028, + "learning_rate": 1.5690131802532454e-05, + "loss": 0.49957770109176636, + "step": 2340 + }, + { + "epoch": 0.6846030121362773, + "grad_norm": 1.3910703437631544, + "learning_rate": 1.568615542073221e-05, + "loss": 0.7217017412185669, + "step": 2341 + }, + { + "epoch": 0.6848954525515426, + "grad_norm": 1.383168011584397, + "learning_rate": 1.5682177709827705e-05, + "loss": 0.5824606418609619, + "step": 2342 + }, + { + "epoch": 0.685187892966808, + "grad_norm": 1.4861418668417947, + "learning_rate": 1.567819867074871e-05, + "loss": 0.5932704210281372, + "step": 2343 + }, + { + "epoch": 0.6854803333820734, + "grad_norm": 1.1927307747773088, + "learning_rate": 1.5674218304425304e-05, + "loss": 0.6098836660385132, + "step": 2344 + }, + { + "epoch": 0.6857727737973388, + "grad_norm": 1.3302018518433079, + "learning_rate": 1.5670236611787865e-05, + "loss": 0.5158270597457886, + "step": 2345 + }, + { + "epoch": 0.6860652142126041, + "grad_norm": 1.431950758183516, + "learning_rate": 1.5666253593767095e-05, + "loss": 0.7840174436569214, + "step": 2346 + }, + { + "epoch": 0.6863576546278696, + "grad_norm": 1.3462478651155303, + "learning_rate": 1.5662269251294e-05, + "loss": 0.5665150880813599, + "step": 2347 + }, + { + "epoch": 0.686650095043135, + "grad_norm": 1.2308130347699304, + "learning_rate": 1.5658283585299894e-05, + "loss": 0.5801588296890259, + "step": 2348 + }, + { + "epoch": 0.6869425354584003, + "grad_norm": 1.487298330014143, + "learning_rate": 1.56542965967164e-05, + "loss": 0.759188175201416, + "step": 2349 + }, + { + "epoch": 0.6872349758736658, + "grad_norm": 1.5717076197736846, + "learning_rate": 1.565030828647546e-05, + "loss": 0.7182703018188477, + "step": 2350 + }, + { + "epoch": 0.6875274162889311, + "grad_norm": 1.3681215378392677, + "learning_rate": 1.564631865550931e-05, + "loss": 0.7172018885612488, + "step": 2351 + }, + { + "epoch": 0.6878198567041965, + "grad_norm": 1.3897042930637002, + "learning_rate": 1.5642327704750502e-05, + "loss": 0.5959519743919373, + "step": 2352 + }, + { + "epoch": 0.6881122971194619, + "grad_norm": 1.3686338632915553, + "learning_rate": 1.5638335435131902e-05, + "loss": 0.5531836748123169, + "step": 2353 + }, + { + "epoch": 0.6884047375347273, + "grad_norm": 1.2097339017222586, + "learning_rate": 1.5634341847586676e-05, + "loss": 0.672225296497345, + "step": 2354 + }, + { + "epoch": 0.6886971779499926, + "grad_norm": 1.3740176007353215, + "learning_rate": 1.5630346943048297e-05, + "loss": 0.5721465349197388, + "step": 2355 + }, + { + "epoch": 0.6889896183652581, + "grad_norm": 1.2416767467837069, + "learning_rate": 1.5626350722450555e-05, + "loss": 0.6357900500297546, + "step": 2356 + }, + { + "epoch": 0.6892820587805235, + "grad_norm": 1.241847883566859, + "learning_rate": 1.5622353186727542e-05, + "loss": 0.6348878145217896, + "step": 2357 + }, + { + "epoch": 0.6895744991957888, + "grad_norm": 1.390537638221337, + "learning_rate": 1.5618354336813656e-05, + "loss": 0.5473623275756836, + "step": 2358 + }, + { + "epoch": 0.6898669396110543, + "grad_norm": 1.4299851255948683, + "learning_rate": 1.5614354173643606e-05, + "loss": 0.8284158706665039, + "step": 2359 + }, + { + "epoch": 0.6901593800263196, + "grad_norm": 1.3561063303885135, + "learning_rate": 1.5610352698152396e-05, + "loss": 0.5915359854698181, + "step": 2360 + }, + { + "epoch": 0.690451820441585, + "grad_norm": 1.434488423567872, + "learning_rate": 1.560634991127536e-05, + "loss": 0.6173555254936218, + "step": 2361 + }, + { + "epoch": 0.6907442608568504, + "grad_norm": 1.2348756002421877, + "learning_rate": 1.560234581394812e-05, + "loss": 0.5551577806472778, + "step": 2362 + }, + { + "epoch": 0.6910367012721158, + "grad_norm": 1.6912535037446208, + "learning_rate": 1.559834040710661e-05, + "loss": 0.7160264253616333, + "step": 2363 + }, + { + "epoch": 0.6913291416873812, + "grad_norm": 1.4348139771874249, + "learning_rate": 1.5594333691687062e-05, + "loss": 0.5986248850822449, + "step": 2364 + }, + { + "epoch": 0.6916215821026466, + "grad_norm": 1.6827348555719241, + "learning_rate": 1.559032566862603e-05, + "loss": 0.7347019910812378, + "step": 2365 + }, + { + "epoch": 0.691914022517912, + "grad_norm": 1.1496166027771255, + "learning_rate": 1.5586316338860363e-05, + "loss": 0.502663791179657, + "step": 2366 + }, + { + "epoch": 0.6922064629331773, + "grad_norm": 1.1610976211375774, + "learning_rate": 1.558230570332722e-05, + "loss": 0.5026617050170898, + "step": 2367 + }, + { + "epoch": 0.6924989033484428, + "grad_norm": 1.3196703072069724, + "learning_rate": 1.5578293762964057e-05, + "loss": 0.6091101169586182, + "step": 2368 + }, + { + "epoch": 0.6927913437637081, + "grad_norm": 1.1607138049044183, + "learning_rate": 1.5574280518708645e-05, + "loss": 0.6202579736709595, + "step": 2369 + }, + { + "epoch": 0.6930837841789735, + "grad_norm": 1.3867301068189375, + "learning_rate": 1.557026597149905e-05, + "loss": 0.6532948017120361, + "step": 2370 + }, + { + "epoch": 0.693376224594239, + "grad_norm": 1.2799465632685962, + "learning_rate": 1.5566250122273658e-05, + "loss": 0.6197448372840881, + "step": 2371 + }, + { + "epoch": 0.6936686650095043, + "grad_norm": 1.330123548058068, + "learning_rate": 1.556223297197114e-05, + "loss": 0.6181553602218628, + "step": 2372 + }, + { + "epoch": 0.6939611054247697, + "grad_norm": 1.3757625130132767, + "learning_rate": 1.5558214521530482e-05, + "loss": 0.6015427112579346, + "step": 2373 + }, + { + "epoch": 0.6942535458400351, + "grad_norm": 1.4511778478720454, + "learning_rate": 1.555419477189098e-05, + "loss": 0.6204534769058228, + "step": 2374 + }, + { + "epoch": 0.6945459862553005, + "grad_norm": 1.2237746404921626, + "learning_rate": 1.5550173723992218e-05, + "loss": 0.5914584994316101, + "step": 2375 + }, + { + "epoch": 0.6948384266705658, + "grad_norm": 1.2633817911858796, + "learning_rate": 1.554615137877409e-05, + "loss": 0.5077188611030579, + "step": 2376 + }, + { + "epoch": 0.6951308670858313, + "grad_norm": 1.1523903505061626, + "learning_rate": 1.55421277371768e-05, + "loss": 0.5560270547866821, + "step": 2377 + }, + { + "epoch": 0.6954233075010966, + "grad_norm": 1.6214020445600121, + "learning_rate": 1.553810280014085e-05, + "loss": 0.7064549922943115, + "step": 2378 + }, + { + "epoch": 0.695715747916362, + "grad_norm": 1.4249847873824701, + "learning_rate": 1.5534076568607043e-05, + "loss": 0.7433110475540161, + "step": 2379 + }, + { + "epoch": 0.6960081883316275, + "grad_norm": 1.4661372034410074, + "learning_rate": 1.553004904351648e-05, + "loss": 0.6061110496520996, + "step": 2380 + }, + { + "epoch": 0.6963006287468928, + "grad_norm": 1.3530915937691412, + "learning_rate": 1.5526020225810583e-05, + "loss": 0.604006290435791, + "step": 2381 + }, + { + "epoch": 0.6965930691621582, + "grad_norm": 1.3193058416919141, + "learning_rate": 1.5521990116431052e-05, + "loss": 0.6221635341644287, + "step": 2382 + }, + { + "epoch": 0.6968855095774236, + "grad_norm": 1.17260855579956, + "learning_rate": 1.551795871631991e-05, + "loss": 0.5848093032836914, + "step": 2383 + }, + { + "epoch": 0.697177949992689, + "grad_norm": 1.3909866883805502, + "learning_rate": 1.5513926026419464e-05, + "loss": 0.6451606154441833, + "step": 2384 + }, + { + "epoch": 0.6974703904079543, + "grad_norm": 1.2515682694896817, + "learning_rate": 1.5509892047672336e-05, + "loss": 0.7922245264053345, + "step": 2385 + }, + { + "epoch": 0.6977628308232198, + "grad_norm": 1.501698757307051, + "learning_rate": 1.5505856781021443e-05, + "loss": 0.6458885073661804, + "step": 2386 + }, + { + "epoch": 0.6980552712384852, + "grad_norm": 1.3253141303151825, + "learning_rate": 1.5501820227410002e-05, + "loss": 0.5989570617675781, + "step": 2387 + }, + { + "epoch": 0.6983477116537505, + "grad_norm": 1.4240123629840666, + "learning_rate": 1.5497782387781536e-05, + "loss": 0.740998387336731, + "step": 2388 + }, + { + "epoch": 0.698640152069016, + "grad_norm": 1.4547948512453808, + "learning_rate": 1.5493743263079866e-05, + "loss": 0.63981032371521, + "step": 2389 + }, + { + "epoch": 0.6989325924842813, + "grad_norm": 1.325001348454028, + "learning_rate": 1.5489702854249106e-05, + "loss": 0.766716480255127, + "step": 2390 + }, + { + "epoch": 0.6992250328995467, + "grad_norm": 1.541044208915787, + "learning_rate": 1.5485661162233684e-05, + "loss": 0.7879365086555481, + "step": 2391 + }, + { + "epoch": 0.6995174733148121, + "grad_norm": 1.3532949065271656, + "learning_rate": 1.5481618187978322e-05, + "loss": 0.6005786657333374, + "step": 2392 + }, + { + "epoch": 0.6998099137300775, + "grad_norm": 1.2952910023515818, + "learning_rate": 1.5477573932428033e-05, + "loss": 0.6207927465438843, + "step": 2393 + }, + { + "epoch": 0.7001023541453428, + "grad_norm": 1.4490674696543298, + "learning_rate": 1.5473528396528144e-05, + "loss": 0.5582053661346436, + "step": 2394 + }, + { + "epoch": 0.7003947945606083, + "grad_norm": 1.6315416515790502, + "learning_rate": 1.5469481581224274e-05, + "loss": 0.5701307058334351, + "step": 2395 + }, + { + "epoch": 0.7006872349758737, + "grad_norm": 1.3804181292115258, + "learning_rate": 1.546543348746233e-05, + "loss": 0.6201068162918091, + "step": 2396 + }, + { + "epoch": 0.700979675391139, + "grad_norm": 1.3282086716914991, + "learning_rate": 1.5461384116188546e-05, + "loss": 0.6102321147918701, + "step": 2397 + }, + { + "epoch": 0.7012721158064045, + "grad_norm": 1.361382387889105, + "learning_rate": 1.545733346834943e-05, + "loss": 0.5445820093154907, + "step": 2398 + }, + { + "epoch": 0.7015645562216698, + "grad_norm": 1.3134018034606705, + "learning_rate": 1.5453281544891797e-05, + "loss": 0.5278012752532959, + "step": 2399 + }, + { + "epoch": 0.7018569966369352, + "grad_norm": 1.6159840401286016, + "learning_rate": 1.544922834676276e-05, + "loss": 0.7051252126693726, + "step": 2400 + }, + { + "epoch": 0.7021494370522006, + "grad_norm": 1.3552623655435003, + "learning_rate": 1.544517387490973e-05, + "loss": 0.6024646759033203, + "step": 2401 + }, + { + "epoch": 0.702441877467466, + "grad_norm": 1.3323978020414873, + "learning_rate": 1.5441118130280406e-05, + "loss": 0.5563746094703674, + "step": 2402 + }, + { + "epoch": 0.7027343178827314, + "grad_norm": 1.3671297363224464, + "learning_rate": 1.5437061113822805e-05, + "loss": 0.5971669554710388, + "step": 2403 + }, + { + "epoch": 0.7030267582979968, + "grad_norm": 1.5082475685517047, + "learning_rate": 1.5433002826485234e-05, + "loss": 0.5846019983291626, + "step": 2404 + }, + { + "epoch": 0.7033191987132622, + "grad_norm": 1.2921876796744827, + "learning_rate": 1.5428943269216278e-05, + "loss": 0.5571885108947754, + "step": 2405 + }, + { + "epoch": 0.7036116391285275, + "grad_norm": 1.15652993390593, + "learning_rate": 1.542488244296484e-05, + "loss": 0.4770846962928772, + "step": 2406 + }, + { + "epoch": 0.703904079543793, + "grad_norm": 1.6398352091801953, + "learning_rate": 1.542082034868012e-05, + "loss": 0.636760950088501, + "step": 2407 + }, + { + "epoch": 0.7041965199590583, + "grad_norm": 1.6877906333209267, + "learning_rate": 1.5416756987311603e-05, + "loss": 0.7264662981033325, + "step": 2408 + }, + { + "epoch": 0.7044889603743237, + "grad_norm": 1.372256728403267, + "learning_rate": 1.5412692359809073e-05, + "loss": 0.6723978519439697, + "step": 2409 + }, + { + "epoch": 0.7047814007895892, + "grad_norm": 1.4362583031777838, + "learning_rate": 1.5408626467122612e-05, + "loss": 0.6205083727836609, + "step": 2410 + }, + { + "epoch": 0.7050738412048545, + "grad_norm": 1.4495567778043355, + "learning_rate": 1.54045593102026e-05, + "loss": 0.5980903506278992, + "step": 2411 + }, + { + "epoch": 0.7053662816201199, + "grad_norm": 1.4897959908790472, + "learning_rate": 1.540049088999971e-05, + "loss": 0.6311691999435425, + "step": 2412 + }, + { + "epoch": 0.7056587220353853, + "grad_norm": 1.428243709143454, + "learning_rate": 1.539642120746491e-05, + "loss": 0.5872593522071838, + "step": 2413 + }, + { + "epoch": 0.7059511624506507, + "grad_norm": 1.351001450570791, + "learning_rate": 1.5392350263549462e-05, + "loss": 0.5037539005279541, + "step": 2414 + }, + { + "epoch": 0.706243602865916, + "grad_norm": 1.4775045660401276, + "learning_rate": 1.538827805920493e-05, + "loss": 0.5917855501174927, + "step": 2415 + }, + { + "epoch": 0.7065360432811815, + "grad_norm": 1.3687769613569196, + "learning_rate": 1.538420459538316e-05, + "loss": 0.6350749731063843, + "step": 2416 + }, + { + "epoch": 0.7068284836964468, + "grad_norm": 1.330110483636511, + "learning_rate": 1.53801298730363e-05, + "loss": 0.6828908920288086, + "step": 2417 + }, + { + "epoch": 0.7071209241117122, + "grad_norm": 1.5864329436081315, + "learning_rate": 1.5376053893116796e-05, + "loss": 0.6307995319366455, + "step": 2418 + }, + { + "epoch": 0.7074133645269777, + "grad_norm": 1.3609756396375527, + "learning_rate": 1.5371976656577385e-05, + "loss": 0.5305014252662659, + "step": 2419 + }, + { + "epoch": 0.707705804942243, + "grad_norm": 1.2953614031977334, + "learning_rate": 1.536789816437109e-05, + "loss": 0.560103178024292, + "step": 2420 + }, + { + "epoch": 0.7079982453575084, + "grad_norm": 1.4823675619867462, + "learning_rate": 1.5363818417451236e-05, + "loss": 0.5449249148368835, + "step": 2421 + }, + { + "epoch": 0.7082906857727738, + "grad_norm": 1.575423149049035, + "learning_rate": 1.5359737416771438e-05, + "loss": 0.7456427812576294, + "step": 2422 + }, + { + "epoch": 0.7085831261880392, + "grad_norm": 1.4606336998212586, + "learning_rate": 1.5355655163285607e-05, + "loss": 0.5401932597160339, + "step": 2423 + }, + { + "epoch": 0.7088755666033045, + "grad_norm": 1.4384817217494414, + "learning_rate": 1.5351571657947947e-05, + "loss": 0.6215255856513977, + "step": 2424 + }, + { + "epoch": 0.70916800701857, + "grad_norm": 1.454238489435378, + "learning_rate": 1.5347486901712946e-05, + "loss": 0.724073052406311, + "step": 2425 + }, + { + "epoch": 0.7094604474338354, + "grad_norm": 1.280381472439187, + "learning_rate": 1.5343400895535402e-05, + "loss": 0.6375223398208618, + "step": 2426 + }, + { + "epoch": 0.7097528878491007, + "grad_norm": 1.4740965908748953, + "learning_rate": 1.533931364037038e-05, + "loss": 0.6087045669555664, + "step": 2427 + }, + { + "epoch": 0.7100453282643662, + "grad_norm": 1.4709664710326, + "learning_rate": 1.5335225137173262e-05, + "loss": 0.7927658557891846, + "step": 2428 + }, + { + "epoch": 0.7103377686796315, + "grad_norm": 1.4583904783773962, + "learning_rate": 1.5331135386899702e-05, + "loss": 0.6312417387962341, + "step": 2429 + }, + { + "epoch": 0.7106302090948969, + "grad_norm": 1.3514647325044575, + "learning_rate": 1.5327044390505666e-05, + "loss": 0.6856948137283325, + "step": 2430 + }, + { + "epoch": 0.7109226495101623, + "grad_norm": 1.3486136616110067, + "learning_rate": 1.532295214894739e-05, + "loss": 0.5683865547180176, + "step": 2431 + }, + { + "epoch": 0.7112150899254277, + "grad_norm": 1.5290242403967753, + "learning_rate": 1.5318858663181412e-05, + "loss": 0.6208291053771973, + "step": 2432 + }, + { + "epoch": 0.711507530340693, + "grad_norm": 1.5265891330435364, + "learning_rate": 1.531476393416456e-05, + "loss": 0.6751389503479004, + "step": 2433 + }, + { + "epoch": 0.7117999707559585, + "grad_norm": 1.1685210774635664, + "learning_rate": 1.5310667962853954e-05, + "loss": 0.422024667263031, + "step": 2434 + }, + { + "epoch": 0.7120924111712239, + "grad_norm": 1.377587949543332, + "learning_rate": 1.5306570750207003e-05, + "loss": 0.6714169979095459, + "step": 2435 + }, + { + "epoch": 0.7123848515864892, + "grad_norm": 1.4808127088080212, + "learning_rate": 1.53024722971814e-05, + "loss": 0.5757386088371277, + "step": 2436 + }, + { + "epoch": 0.7126772920017547, + "grad_norm": 1.4790386820456973, + "learning_rate": 1.529837260473514e-05, + "loss": 0.5686037540435791, + "step": 2437 + }, + { + "epoch": 0.71296973241702, + "grad_norm": 1.570681384959534, + "learning_rate": 1.5294271673826498e-05, + "loss": 0.7601959705352783, + "step": 2438 + }, + { + "epoch": 0.7132621728322854, + "grad_norm": 1.44814607189911, + "learning_rate": 1.529016950541404e-05, + "loss": 0.5654840469360352, + "step": 2439 + }, + { + "epoch": 0.7135546132475508, + "grad_norm": 1.5844428110219366, + "learning_rate": 1.5286066100456623e-05, + "loss": 0.7009234428405762, + "step": 2440 + }, + { + "epoch": 0.7138470536628162, + "grad_norm": 1.166961279939158, + "learning_rate": 1.52819614599134e-05, + "loss": 0.4856370687484741, + "step": 2441 + }, + { + "epoch": 0.7141394940780816, + "grad_norm": 1.2283639685035557, + "learning_rate": 1.52778555847438e-05, + "loss": 0.5135019421577454, + "step": 2442 + }, + { + "epoch": 0.714431934493347, + "grad_norm": 1.3437978900697465, + "learning_rate": 1.5273748475907542e-05, + "loss": 0.7350283861160278, + "step": 2443 + }, + { + "epoch": 0.7147243749086124, + "grad_norm": 1.1274424242274286, + "learning_rate": 1.5269640134364646e-05, + "loss": 0.5985803604125977, + "step": 2444 + }, + { + "epoch": 0.7150168153238777, + "grad_norm": 1.2982732418248375, + "learning_rate": 1.5265530561075407e-05, + "loss": 0.6840892434120178, + "step": 2445 + }, + { + "epoch": 0.7153092557391432, + "grad_norm": 1.2979743249484705, + "learning_rate": 1.5261419757000417e-05, + "loss": 0.6921327114105225, + "step": 2446 + }, + { + "epoch": 0.7156016961544085, + "grad_norm": 1.429595570109343, + "learning_rate": 1.525730772310055e-05, + "loss": 0.6428500413894653, + "step": 2447 + }, + { + "epoch": 0.7158941365696739, + "grad_norm": 1.3812578358040712, + "learning_rate": 1.5253194460336964e-05, + "loss": 0.645559549331665, + "step": 2448 + }, + { + "epoch": 0.7161865769849394, + "grad_norm": 1.7222193716043204, + "learning_rate": 1.5249079969671114e-05, + "loss": 0.6211013793945312, + "step": 2449 + }, + { + "epoch": 0.7164790174002047, + "grad_norm": 1.2302035745629583, + "learning_rate": 1.5244964252064737e-05, + "loss": 0.5709721446037292, + "step": 2450 + }, + { + "epoch": 0.7167714578154701, + "grad_norm": 1.4516717315033434, + "learning_rate": 1.5240847308479855e-05, + "loss": 0.6781377196311951, + "step": 2451 + }, + { + "epoch": 0.7170638982307355, + "grad_norm": 1.5070563114338018, + "learning_rate": 1.523672913987878e-05, + "loss": 0.6476876735687256, + "step": 2452 + }, + { + "epoch": 0.7173563386460009, + "grad_norm": 1.5653544129198373, + "learning_rate": 1.523260974722411e-05, + "loss": 0.6564218997955322, + "step": 2453 + }, + { + "epoch": 0.7176487790612662, + "grad_norm": 1.3630096136191563, + "learning_rate": 1.5228489131478722e-05, + "loss": 0.6455773711204529, + "step": 2454 + }, + { + "epoch": 0.7179412194765317, + "grad_norm": 1.393672298684458, + "learning_rate": 1.5224367293605791e-05, + "loss": 0.6039570569992065, + "step": 2455 + }, + { + "epoch": 0.718233659891797, + "grad_norm": 1.337144764968105, + "learning_rate": 1.522024423456877e-05, + "loss": 0.7060747146606445, + "step": 2456 + }, + { + "epoch": 0.7185261003070624, + "grad_norm": 1.3843662041380984, + "learning_rate": 1.52161199553314e-05, + "loss": 0.561469316482544, + "step": 2457 + }, + { + "epoch": 0.7188185407223279, + "grad_norm": 1.4419398084710615, + "learning_rate": 1.5211994456857706e-05, + "loss": 0.6682697534561157, + "step": 2458 + }, + { + "epoch": 0.7191109811375932, + "grad_norm": 1.5477256567407798, + "learning_rate": 1.5207867740111994e-05, + "loss": 0.7893983125686646, + "step": 2459 + }, + { + "epoch": 0.7194034215528586, + "grad_norm": 1.3454706918314496, + "learning_rate": 1.5203739806058863e-05, + "loss": 0.617809534072876, + "step": 2460 + }, + { + "epoch": 0.719695861968124, + "grad_norm": 1.2866429351470308, + "learning_rate": 1.5199610655663193e-05, + "loss": 0.5444413423538208, + "step": 2461 + }, + { + "epoch": 0.7199883023833894, + "grad_norm": 1.342633438363169, + "learning_rate": 1.5195480289890146e-05, + "loss": 0.615330696105957, + "step": 2462 + }, + { + "epoch": 0.7202807427986547, + "grad_norm": 1.7658118623485195, + "learning_rate": 1.5191348709705169e-05, + "loss": 0.6811497211456299, + "step": 2463 + }, + { + "epoch": 0.7205731832139202, + "grad_norm": 1.3224396770739022, + "learning_rate": 1.5187215916073997e-05, + "loss": 0.612322211265564, + "step": 2464 + }, + { + "epoch": 0.7208656236291856, + "grad_norm": 1.2201627110269677, + "learning_rate": 1.518308190996264e-05, + "loss": 0.6106880903244019, + "step": 2465 + }, + { + "epoch": 0.7211580640444509, + "grad_norm": 1.2431923365136468, + "learning_rate": 1.5178946692337405e-05, + "loss": 0.4901464581489563, + "step": 2466 + }, + { + "epoch": 0.7214505044597164, + "grad_norm": 2.053814058775723, + "learning_rate": 1.5174810264164865e-05, + "loss": 0.6777167320251465, + "step": 2467 + }, + { + "epoch": 0.7217429448749817, + "grad_norm": 1.4212256530727148, + "learning_rate": 1.5170672626411888e-05, + "loss": 0.6353746056556702, + "step": 2468 + }, + { + "epoch": 0.7220353852902471, + "grad_norm": 1.4867453474426244, + "learning_rate": 1.516653378004563e-05, + "loss": 0.6218847632408142, + "step": 2469 + }, + { + "epoch": 0.7223278257055125, + "grad_norm": 1.2225434595050702, + "learning_rate": 1.5162393726033508e-05, + "loss": 0.5001585483551025, + "step": 2470 + }, + { + "epoch": 0.7226202661207779, + "grad_norm": 1.5129628743171017, + "learning_rate": 1.5158252465343242e-05, + "loss": 0.6801280975341797, + "step": 2471 + }, + { + "epoch": 0.7229127065360432, + "grad_norm": 1.208746836224967, + "learning_rate": 1.5154109998942823e-05, + "loss": 0.6739565134048462, + "step": 2472 + }, + { + "epoch": 0.7232051469513087, + "grad_norm": 1.1414220178862078, + "learning_rate": 1.5149966327800532e-05, + "loss": 0.5970213413238525, + "step": 2473 + }, + { + "epoch": 0.7234975873665741, + "grad_norm": 1.295455082889375, + "learning_rate": 1.5145821452884923e-05, + "loss": 0.7367317080497742, + "step": 2474 + }, + { + "epoch": 0.7237900277818394, + "grad_norm": 1.3877158266331615, + "learning_rate": 1.5141675375164839e-05, + "loss": 0.6332153677940369, + "step": 2475 + }, + { + "epoch": 0.7240824681971049, + "grad_norm": 1.223786080062607, + "learning_rate": 1.5137528095609395e-05, + "loss": 0.6185739636421204, + "step": 2476 + }, + { + "epoch": 0.7243749086123702, + "grad_norm": 1.436341367228992, + "learning_rate": 1.5133379615187996e-05, + "loss": 0.5982746481895447, + "step": 2477 + }, + { + "epoch": 0.7246673490276356, + "grad_norm": 1.32306496712973, + "learning_rate": 1.512922993487032e-05, + "loss": 0.5946815013885498, + "step": 2478 + }, + { + "epoch": 0.724959789442901, + "grad_norm": 1.2916301226572995, + "learning_rate": 1.5125079055626337e-05, + "loss": 0.5645624399185181, + "step": 2479 + }, + { + "epoch": 0.7252522298581664, + "grad_norm": 1.0689440382368105, + "learning_rate": 1.5120926978426288e-05, + "loss": 0.43329858779907227, + "step": 2480 + }, + { + "epoch": 0.7255446702734318, + "grad_norm": 1.420557871943188, + "learning_rate": 1.5116773704240689e-05, + "loss": 0.64244544506073, + "step": 2481 + }, + { + "epoch": 0.7258371106886972, + "grad_norm": 1.3002221181867923, + "learning_rate": 1.5112619234040348e-05, + "loss": 0.6640222072601318, + "step": 2482 + }, + { + "epoch": 0.7261295511039626, + "grad_norm": 1.4810661665547034, + "learning_rate": 1.5108463568796346e-05, + "loss": 0.6346921324729919, + "step": 2483 + }, + { + "epoch": 0.7264219915192279, + "grad_norm": 1.4101536258246594, + "learning_rate": 1.5104306709480045e-05, + "loss": 0.5891947746276855, + "step": 2484 + }, + { + "epoch": 0.7267144319344934, + "grad_norm": 1.2478330500785222, + "learning_rate": 1.5100148657063089e-05, + "loss": 0.616216242313385, + "step": 2485 + }, + { + "epoch": 0.7270068723497587, + "grad_norm": 1.3541911638943873, + "learning_rate": 1.5095989412517389e-05, + "loss": 0.5961766242980957, + "step": 2486 + }, + { + "epoch": 0.7272993127650241, + "grad_norm": 1.27681624299837, + "learning_rate": 1.509182897681515e-05, + "loss": 0.5629050731658936, + "step": 2487 + }, + { + "epoch": 0.7275917531802896, + "grad_norm": 1.3918382252124497, + "learning_rate": 1.5087667350928844e-05, + "loss": 0.6640661954879761, + "step": 2488 + }, + { + "epoch": 0.7278841935955549, + "grad_norm": 1.1741006713729014, + "learning_rate": 1.5083504535831233e-05, + "loss": 0.5884503126144409, + "step": 2489 + }, + { + "epoch": 0.7281766340108203, + "grad_norm": 1.2387841976936662, + "learning_rate": 1.5079340532495344e-05, + "loss": 0.5395207405090332, + "step": 2490 + }, + { + "epoch": 0.7284690744260857, + "grad_norm": 1.5570127298934886, + "learning_rate": 1.5075175341894487e-05, + "loss": 0.5713212490081787, + "step": 2491 + }, + { + "epoch": 0.7287615148413511, + "grad_norm": 1.8811783299638292, + "learning_rate": 1.5071008965002252e-05, + "loss": 0.5732176303863525, + "step": 2492 + }, + { + "epoch": 0.7290539552566164, + "grad_norm": 1.3314823409610355, + "learning_rate": 1.50668414027925e-05, + "loss": 0.6381006240844727, + "step": 2493 + }, + { + "epoch": 0.7293463956718819, + "grad_norm": 1.5687830928425197, + "learning_rate": 1.5062672656239381e-05, + "loss": 0.6533833742141724, + "step": 2494 + }, + { + "epoch": 0.7296388360871472, + "grad_norm": 1.213698756503139, + "learning_rate": 1.5058502726317309e-05, + "loss": 0.5919456481933594, + "step": 2495 + }, + { + "epoch": 0.7299312765024126, + "grad_norm": 1.3954865057419796, + "learning_rate": 1.5054331614000984e-05, + "loss": 0.6128921508789062, + "step": 2496 + }, + { + "epoch": 0.7302237169176781, + "grad_norm": 1.3910630571139424, + "learning_rate": 1.5050159320265371e-05, + "loss": 0.5949394702911377, + "step": 2497 + }, + { + "epoch": 0.7305161573329434, + "grad_norm": 1.5386167534502115, + "learning_rate": 1.5045985846085724e-05, + "loss": 0.6262483596801758, + "step": 2498 + }, + { + "epoch": 0.7308085977482088, + "grad_norm": 1.4477928134421267, + "learning_rate": 1.5041811192437563e-05, + "loss": 0.5032243728637695, + "step": 2499 + }, + { + "epoch": 0.7311010381634742, + "grad_norm": 1.31776348667592, + "learning_rate": 1.5037635360296695e-05, + "loss": 0.6721810102462769, + "step": 2500 + }, + { + "epoch": 0.7313934785787396, + "grad_norm": 1.3556666925406757, + "learning_rate": 1.5033458350639185e-05, + "loss": 0.7091001272201538, + "step": 2501 + }, + { + "epoch": 0.7316859189940049, + "grad_norm": 3.8317594491760163, + "learning_rate": 1.5029280164441395e-05, + "loss": 0.5414971113204956, + "step": 2502 + }, + { + "epoch": 0.7319783594092704, + "grad_norm": 1.364096425695391, + "learning_rate": 1.5025100802679944e-05, + "loss": 0.6714789271354675, + "step": 2503 + }, + { + "epoch": 0.7322707998245358, + "grad_norm": 1.3566105024089323, + "learning_rate": 1.5020920266331733e-05, + "loss": 0.5008493065834045, + "step": 2504 + }, + { + "epoch": 0.7325632402398011, + "grad_norm": 1.652155025588763, + "learning_rate": 1.5016738556373936e-05, + "loss": 0.563892126083374, + "step": 2505 + }, + { + "epoch": 0.7328556806550666, + "grad_norm": 1.3313159442091285, + "learning_rate": 1.5012555673784004e-05, + "loss": 0.6371973752975464, + "step": 2506 + }, + { + "epoch": 0.7331481210703319, + "grad_norm": 1.5289698261558242, + "learning_rate": 1.5008371619539661e-05, + "loss": 0.7365365624427795, + "step": 2507 + }, + { + "epoch": 0.7334405614855973, + "grad_norm": 1.357001447635837, + "learning_rate": 1.5004186394618906e-05, + "loss": 0.5401967763900757, + "step": 2508 + }, + { + "epoch": 0.7337330019008627, + "grad_norm": 1.4338213701683389, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.5827134847640991, + "step": 2509 + }, + { + "epoch": 0.7340254423161281, + "grad_norm": 1.5173171956884226, + "learning_rate": 1.49958124366615e-05, + "loss": 0.7655869126319885, + "step": 2510 + }, + { + "epoch": 0.7343178827313934, + "grad_norm": 1.3360976464033478, + "learning_rate": 1.4991623705582216e-05, + "loss": 0.5410823822021484, + "step": 2511 + }, + { + "epoch": 0.7346103231466589, + "grad_norm": 1.6453007873220271, + "learning_rate": 1.4987433807741242e-05, + "loss": 0.6831178665161133, + "step": 2512 + }, + { + "epoch": 0.7349027635619243, + "grad_norm": 1.2152056235269613, + "learning_rate": 1.498324274411794e-05, + "loss": 0.4952821731567383, + "step": 2513 + }, + { + "epoch": 0.7351952039771896, + "grad_norm": 1.4425254779779118, + "learning_rate": 1.4979050515691944e-05, + "loss": 0.6973339319229126, + "step": 2514 + }, + { + "epoch": 0.7354876443924551, + "grad_norm": 1.428400853551732, + "learning_rate": 1.4974857123443163e-05, + "loss": 0.6604373455047607, + "step": 2515 + }, + { + "epoch": 0.7357800848077204, + "grad_norm": 1.3355207439959806, + "learning_rate": 1.4970662568351776e-05, + "loss": 0.6523034572601318, + "step": 2516 + }, + { + "epoch": 0.7360725252229858, + "grad_norm": 1.2739776061453822, + "learning_rate": 1.4966466851398238e-05, + "loss": 0.6557538509368896, + "step": 2517 + }, + { + "epoch": 0.7363649656382512, + "grad_norm": 1.3243836594251046, + "learning_rate": 1.4962269973563269e-05, + "loss": 0.6993967294692993, + "step": 2518 + }, + { + "epoch": 0.7366574060535166, + "grad_norm": 1.3043008466806634, + "learning_rate": 1.4958071935827862e-05, + "loss": 0.611979067325592, + "step": 2519 + }, + { + "epoch": 0.736949846468782, + "grad_norm": 1.5837280682600245, + "learning_rate": 1.4953872739173289e-05, + "loss": 0.9108786582946777, + "step": 2520 + }, + { + "epoch": 0.7372422868840474, + "grad_norm": 1.5471791396278156, + "learning_rate": 1.4949672384581082e-05, + "loss": 0.7086392045021057, + "step": 2521 + }, + { + "epoch": 0.7375347272993128, + "grad_norm": 1.341070279173996, + "learning_rate": 1.494547087303305e-05, + "loss": 0.6103025674819946, + "step": 2522 + }, + { + "epoch": 0.7378271677145781, + "grad_norm": 1.223930383405044, + "learning_rate": 1.4941268205511272e-05, + "loss": 0.5597528219223022, + "step": 2523 + }, + { + "epoch": 0.7381196081298436, + "grad_norm": 1.4817126292023657, + "learning_rate": 1.4937064382998091e-05, + "loss": 0.6222598552703857, + "step": 2524 + }, + { + "epoch": 0.7384120485451089, + "grad_norm": 1.4738198225513357, + "learning_rate": 1.4932859406476131e-05, + "loss": 0.6083353757858276, + "step": 2525 + }, + { + "epoch": 0.7387044889603743, + "grad_norm": 1.2716230350108357, + "learning_rate": 1.4928653276928275e-05, + "loss": 0.47920671105384827, + "step": 2526 + }, + { + "epoch": 0.7389969293756398, + "grad_norm": 1.2356122713189879, + "learning_rate": 1.4924445995337685e-05, + "loss": 0.5752983093261719, + "step": 2527 + }, + { + "epoch": 0.7392893697909051, + "grad_norm": 1.3500870063925003, + "learning_rate": 1.4920237562687784e-05, + "loss": 0.6275333762168884, + "step": 2528 + }, + { + "epoch": 0.7395818102061705, + "grad_norm": 1.3423023519178945, + "learning_rate": 1.4916027979962266e-05, + "loss": 0.6362103223800659, + "step": 2529 + }, + { + "epoch": 0.7398742506214359, + "grad_norm": 1.4246415171584412, + "learning_rate": 1.49118172481451e-05, + "loss": 0.5902664661407471, + "step": 2530 + }, + { + "epoch": 0.7401666910367013, + "grad_norm": 1.3036213595476636, + "learning_rate": 1.4907605368220514e-05, + "loss": 0.5293874740600586, + "step": 2531 + }, + { + "epoch": 0.7404591314519666, + "grad_norm": 1.3590290047464213, + "learning_rate": 1.4903392341173013e-05, + "loss": 0.7298746109008789, + "step": 2532 + }, + { + "epoch": 0.7407515718672321, + "grad_norm": 1.3755489549876734, + "learning_rate": 1.4899178167987367e-05, + "loss": 0.6428382396697998, + "step": 2533 + }, + { + "epoch": 0.7410440122824974, + "grad_norm": 1.3444422145970576, + "learning_rate": 1.489496284964861e-05, + "loss": 0.6204425096511841, + "step": 2534 + }, + { + "epoch": 0.7413364526977628, + "grad_norm": 1.2627663029943075, + "learning_rate": 1.4890746387142052e-05, + "loss": 0.6025601625442505, + "step": 2535 + }, + { + "epoch": 0.7416288931130283, + "grad_norm": 1.212213289149315, + "learning_rate": 1.4886528781453258e-05, + "loss": 0.5570085644721985, + "step": 2536 + }, + { + "epoch": 0.7419213335282936, + "grad_norm": 1.387517207017057, + "learning_rate": 1.4882310033568072e-05, + "loss": 0.6816439628601074, + "step": 2537 + }, + { + "epoch": 0.742213773943559, + "grad_norm": 1.341130650337267, + "learning_rate": 1.4878090144472603e-05, + "loss": 0.5424396991729736, + "step": 2538 + }, + { + "epoch": 0.7425062143588244, + "grad_norm": 1.583973779595893, + "learning_rate": 1.4873869115153223e-05, + "loss": 0.58860182762146, + "step": 2539 + }, + { + "epoch": 0.7427986547740898, + "grad_norm": 1.227937032120959, + "learning_rate": 1.4869646946596568e-05, + "loss": 0.513140857219696, + "step": 2540 + }, + { + "epoch": 0.7430910951893551, + "grad_norm": 1.3321578929704418, + "learning_rate": 1.486542363978955e-05, + "loss": 0.5967035293579102, + "step": 2541 + }, + { + "epoch": 0.7433835356046206, + "grad_norm": 1.2958174333377406, + "learning_rate": 1.4861199195719334e-05, + "loss": 0.6988440752029419, + "step": 2542 + }, + { + "epoch": 0.743675976019886, + "grad_norm": 1.3279731889181368, + "learning_rate": 1.4856973615373366e-05, + "loss": 0.6176164746284485, + "step": 2543 + }, + { + "epoch": 0.7439684164351513, + "grad_norm": 1.394214331783624, + "learning_rate": 1.4852746899739346e-05, + "loss": 0.5616505742073059, + "step": 2544 + }, + { + "epoch": 0.7442608568504168, + "grad_norm": 1.199172810090394, + "learning_rate": 1.4848519049805243e-05, + "loss": 0.5470465421676636, + "step": 2545 + }, + { + "epoch": 0.7445532972656821, + "grad_norm": 1.393649724579279, + "learning_rate": 1.4844290066559292e-05, + "loss": 0.6362754106521606, + "step": 2546 + }, + { + "epoch": 0.7448457376809475, + "grad_norm": 1.2298975206172837, + "learning_rate": 1.4840059950989992e-05, + "loss": 0.6290515661239624, + "step": 2547 + }, + { + "epoch": 0.7451381780962129, + "grad_norm": 1.4356832247939193, + "learning_rate": 1.4835828704086105e-05, + "loss": 0.7225647568702698, + "step": 2548 + }, + { + "epoch": 0.7454306185114783, + "grad_norm": 1.4603777863967904, + "learning_rate": 1.483159632683666e-05, + "loss": 0.6993023157119751, + "step": 2549 + }, + { + "epoch": 0.7457230589267436, + "grad_norm": 1.5062925776475273, + "learning_rate": 1.482736282023095e-05, + "loss": 0.6960086226463318, + "step": 2550 + }, + { + "epoch": 0.7460154993420091, + "grad_norm": 1.4783046017210701, + "learning_rate": 1.4823128185258535e-05, + "loss": 0.627712607383728, + "step": 2551 + }, + { + "epoch": 0.7463079397572745, + "grad_norm": 1.3756379084869055, + "learning_rate": 1.481889242290923e-05, + "loss": 0.6314729452133179, + "step": 2552 + }, + { + "epoch": 0.7466003801725398, + "grad_norm": 1.293029687195421, + "learning_rate": 1.4814655534173121e-05, + "loss": 0.5948070287704468, + "step": 2553 + }, + { + "epoch": 0.7468928205878053, + "grad_norm": 1.28283626174806, + "learning_rate": 1.4810417520040551e-05, + "loss": 0.6227586269378662, + "step": 2554 + }, + { + "epoch": 0.7471852610030706, + "grad_norm": 1.156874509923564, + "learning_rate": 1.4806178381502139e-05, + "loss": 0.589213490486145, + "step": 2555 + }, + { + "epoch": 0.747477701418336, + "grad_norm": 1.3920763104069633, + "learning_rate": 1.4801938119548748e-05, + "loss": 0.6748968362808228, + "step": 2556 + }, + { + "epoch": 0.7477701418336014, + "grad_norm": 1.5278244850962377, + "learning_rate": 1.4797696735171521e-05, + "loss": 0.627450704574585, + "step": 2557 + }, + { + "epoch": 0.7480625822488668, + "grad_norm": 1.3979513679962843, + "learning_rate": 1.479345422936185e-05, + "loss": 0.5816184878349304, + "step": 2558 + }, + { + "epoch": 0.7483550226641322, + "grad_norm": 1.3403975244231432, + "learning_rate": 1.4789210603111399e-05, + "loss": 0.5184855461120605, + "step": 2559 + }, + { + "epoch": 0.7486474630793976, + "grad_norm": 1.3184163367774433, + "learning_rate": 1.4784965857412088e-05, + "loss": 0.5747300982475281, + "step": 2560 + }, + { + "epoch": 0.748939903494663, + "grad_norm": 1.5154750654158269, + "learning_rate": 1.4780719993256104e-05, + "loss": 0.6957682371139526, + "step": 2561 + }, + { + "epoch": 0.7492323439099283, + "grad_norm": 1.3790848349629903, + "learning_rate": 1.4776473011635886e-05, + "loss": 0.5711330771446228, + "step": 2562 + }, + { + "epoch": 0.7495247843251938, + "grad_norm": 1.260228471581513, + "learning_rate": 1.4772224913544142e-05, + "loss": 0.687350869178772, + "step": 2563 + }, + { + "epoch": 0.7498172247404591, + "grad_norm": 1.549796921470129, + "learning_rate": 1.476797569997384e-05, + "loss": 0.71396803855896, + "step": 2564 + }, + { + "epoch": 0.7501096651557245, + "grad_norm": 1.3620133851355087, + "learning_rate": 1.4763725371918209e-05, + "loss": 0.5457814335823059, + "step": 2565 + }, + { + "epoch": 0.75040210557099, + "grad_norm": 1.4687420339775556, + "learning_rate": 1.4759473930370738e-05, + "loss": 0.5889413952827454, + "step": 2566 + }, + { + "epoch": 0.7506945459862553, + "grad_norm": 1.8883582542449355, + "learning_rate": 1.4755221376325171e-05, + "loss": 0.6222226619720459, + "step": 2567 + }, + { + "epoch": 0.7509869864015207, + "grad_norm": 1.17580934018018, + "learning_rate": 1.475096771077552e-05, + "loss": 0.5273243188858032, + "step": 2568 + }, + { + "epoch": 0.7512794268167861, + "grad_norm": 1.2062680853030614, + "learning_rate": 1.4746712934716055e-05, + "loss": 0.5665162801742554, + "step": 2569 + }, + { + "epoch": 0.7515718672320515, + "grad_norm": 1.6320800654071554, + "learning_rate": 1.4742457049141298e-05, + "loss": 0.5748391151428223, + "step": 2570 + }, + { + "epoch": 0.7518643076473168, + "grad_norm": 1.4197866961281498, + "learning_rate": 1.4738200055046044e-05, + "loss": 0.7002041339874268, + "step": 2571 + }, + { + "epoch": 0.7521567480625823, + "grad_norm": 1.3507056136966096, + "learning_rate": 1.4733941953425337e-05, + "loss": 0.6841630935668945, + "step": 2572 + }, + { + "epoch": 0.7524491884778476, + "grad_norm": 1.6017928671701795, + "learning_rate": 1.4729682745274478e-05, + "loss": 0.7047172784805298, + "step": 2573 + }, + { + "epoch": 0.752741628893113, + "grad_norm": 1.4397980876250445, + "learning_rate": 1.4725422431589035e-05, + "loss": 0.6979919672012329, + "step": 2574 + }, + { + "epoch": 0.7530340693083785, + "grad_norm": 1.3152000128748418, + "learning_rate": 1.4721161013364829e-05, + "loss": 0.6437125205993652, + "step": 2575 + }, + { + "epoch": 0.7533265097236438, + "grad_norm": 1.4573280156715103, + "learning_rate": 1.4716898491597942e-05, + "loss": 0.591254711151123, + "step": 2576 + }, + { + "epoch": 0.7536189501389092, + "grad_norm": 1.592793146861773, + "learning_rate": 1.4712634867284714e-05, + "loss": 0.6276297569274902, + "step": 2577 + }, + { + "epoch": 0.7539113905541746, + "grad_norm": 1.2004846116513588, + "learning_rate": 1.4708370141421737e-05, + "loss": 0.5310626029968262, + "step": 2578 + }, + { + "epoch": 0.75420383096944, + "grad_norm": 1.374287364754045, + "learning_rate": 1.4704104315005864e-05, + "loss": 0.5256849527359009, + "step": 2579 + }, + { + "epoch": 0.7544962713847053, + "grad_norm": 1.4473126972035357, + "learning_rate": 1.4699837389034212e-05, + "loss": 0.6050584316253662, + "step": 2580 + }, + { + "epoch": 0.7547887117999708, + "grad_norm": 1.3425248874126274, + "learning_rate": 1.4695569364504144e-05, + "loss": 0.5124386548995972, + "step": 2581 + }, + { + "epoch": 0.7550811522152362, + "grad_norm": 1.1600080124683732, + "learning_rate": 1.4691300242413289e-05, + "loss": 0.5631951093673706, + "step": 2582 + }, + { + "epoch": 0.7553735926305015, + "grad_norm": 1.3017433820111879, + "learning_rate": 1.4687030023759527e-05, + "loss": 0.6352444291114807, + "step": 2583 + }, + { + "epoch": 0.755666033045767, + "grad_norm": 1.4490307646785157, + "learning_rate": 1.4682758709540992e-05, + "loss": 0.6717500686645508, + "step": 2584 + }, + { + "epoch": 0.7559584734610323, + "grad_norm": 3.0905292476778428, + "learning_rate": 1.467848630075608e-05, + "loss": 0.5889217853546143, + "step": 2585 + }, + { + "epoch": 0.7562509138762977, + "grad_norm": 1.258529998432557, + "learning_rate": 1.4674212798403443e-05, + "loss": 0.49069908261299133, + "step": 2586 + }, + { + "epoch": 0.756543354291563, + "grad_norm": 1.1729027861993524, + "learning_rate": 1.4669938203481982e-05, + "loss": 0.6272397041320801, + "step": 2587 + }, + { + "epoch": 0.7568357947068285, + "grad_norm": 1.5090841451643915, + "learning_rate": 1.466566251699086e-05, + "loss": 0.6218451261520386, + "step": 2588 + }, + { + "epoch": 0.7571282351220939, + "grad_norm": 1.4025085245751263, + "learning_rate": 1.4661385739929492e-05, + "loss": 0.6174849271774292, + "step": 2589 + }, + { + "epoch": 0.7574206755373593, + "grad_norm": 1.3554209784525295, + "learning_rate": 1.465710787329755e-05, + "loss": 0.5595160126686096, + "step": 2590 + }, + { + "epoch": 0.7577131159526247, + "grad_norm": 1.5657464206953444, + "learning_rate": 1.4652828918094954e-05, + "loss": 0.757240891456604, + "step": 2591 + }, + { + "epoch": 0.75800555636789, + "grad_norm": 1.3337551846990978, + "learning_rate": 1.4648548875321893e-05, + "loss": 0.630811333656311, + "step": 2592 + }, + { + "epoch": 0.7582979967831555, + "grad_norm": 1.208341715070646, + "learning_rate": 1.4644267745978797e-05, + "loss": 0.5857812762260437, + "step": 2593 + }, + { + "epoch": 0.7585904371984208, + "grad_norm": 1.1785954348430454, + "learning_rate": 1.463998553106635e-05, + "loss": 0.5869519710540771, + "step": 2594 + }, + { + "epoch": 0.7588828776136862, + "grad_norm": 1.2035584714461103, + "learning_rate": 1.4635702231585498e-05, + "loss": 0.5610413551330566, + "step": 2595 + }, + { + "epoch": 0.7591753180289516, + "grad_norm": 1.255732340436211, + "learning_rate": 1.4631417848537435e-05, + "loss": 0.5634676218032837, + "step": 2596 + }, + { + "epoch": 0.759467758444217, + "grad_norm": 1.2847976698363035, + "learning_rate": 1.4627132382923607e-05, + "loss": 0.6813392639160156, + "step": 2597 + }, + { + "epoch": 0.7597601988594824, + "grad_norm": 1.5611350123657577, + "learning_rate": 1.4622845835745723e-05, + "loss": 0.644945502281189, + "step": 2598 + }, + { + "epoch": 0.7600526392747478, + "grad_norm": 1.4458723370490596, + "learning_rate": 1.461855820800573e-05, + "loss": 0.7432133555412292, + "step": 2599 + }, + { + "epoch": 0.7603450796900132, + "grad_norm": 1.1406983279122715, + "learning_rate": 1.4614269500705832e-05, + "loss": 0.4729112982749939, + "step": 2600 + }, + { + "epoch": 0.7606375201052785, + "grad_norm": 1.4806970647351285, + "learning_rate": 1.4609979714848499e-05, + "loss": 0.7146443128585815, + "step": 2601 + }, + { + "epoch": 0.760929960520544, + "grad_norm": 1.4348530933940364, + "learning_rate": 1.4605688851436436e-05, + "loss": 0.5959945917129517, + "step": 2602 + }, + { + "epoch": 0.7612224009358093, + "grad_norm": 1.3380784718799885, + "learning_rate": 1.4601396911472605e-05, + "loss": 0.6091525554656982, + "step": 2603 + }, + { + "epoch": 0.7615148413510747, + "grad_norm": 1.3043703832448297, + "learning_rate": 1.4597103895960228e-05, + "loss": 0.5101523399353027, + "step": 2604 + }, + { + "epoch": 0.7618072817663402, + "grad_norm": 1.3937793894568855, + "learning_rate": 1.4592809805902762e-05, + "loss": 0.6036165952682495, + "step": 2605 + }, + { + "epoch": 0.7620997221816055, + "grad_norm": 1.361507946530242, + "learning_rate": 1.4588514642303928e-05, + "loss": 0.6094970703125, + "step": 2606 + }, + { + "epoch": 0.7623921625968709, + "grad_norm": 1.3770518433820003, + "learning_rate": 1.4584218406167697e-05, + "loss": 0.49754881858825684, + "step": 2607 + }, + { + "epoch": 0.7626846030121363, + "grad_norm": 1.3703785644048119, + "learning_rate": 1.4579921098498285e-05, + "loss": 0.6066807508468628, + "step": 2608 + }, + { + "epoch": 0.7629770434274017, + "grad_norm": 1.4768479795454132, + "learning_rate": 1.4575622720300162e-05, + "loss": 0.5758910179138184, + "step": 2609 + }, + { + "epoch": 0.763269483842667, + "grad_norm": 1.4281250780822374, + "learning_rate": 1.457132327257805e-05, + "loss": 0.6641621589660645, + "step": 2610 + }, + { + "epoch": 0.7635619242579325, + "grad_norm": 1.506727865728889, + "learning_rate": 1.4567022756336916e-05, + "loss": 0.7024788856506348, + "step": 2611 + }, + { + "epoch": 0.7638543646731978, + "grad_norm": 1.2921755321984356, + "learning_rate": 1.4562721172581982e-05, + "loss": 0.6066344380378723, + "step": 2612 + }, + { + "epoch": 0.7641468050884632, + "grad_norm": 1.3533854830579282, + "learning_rate": 1.4558418522318713e-05, + "loss": 0.566038966178894, + "step": 2613 + }, + { + "epoch": 0.7644392455037287, + "grad_norm": 1.3370326372322123, + "learning_rate": 1.4554114806552833e-05, + "loss": 0.5817335844039917, + "step": 2614 + }, + { + "epoch": 0.764731685918994, + "grad_norm": 1.2813703243908812, + "learning_rate": 1.4549810026290305e-05, + "loss": 0.6001763343811035, + "step": 2615 + }, + { + "epoch": 0.7650241263342594, + "grad_norm": 1.617460530676573, + "learning_rate": 1.4545504182537346e-05, + "loss": 0.6363068222999573, + "step": 2616 + }, + { + "epoch": 0.7653165667495248, + "grad_norm": 1.4805158326873171, + "learning_rate": 1.4541197276300424e-05, + "loss": 0.669566810131073, + "step": 2617 + }, + { + "epoch": 0.7656090071647902, + "grad_norm": 1.2122677055370945, + "learning_rate": 1.4536889308586245e-05, + "loss": 0.47967004776000977, + "step": 2618 + }, + { + "epoch": 0.7659014475800555, + "grad_norm": 1.310958704364757, + "learning_rate": 1.4532580280401777e-05, + "loss": 0.5803399085998535, + "step": 2619 + }, + { + "epoch": 0.766193887995321, + "grad_norm": 1.3185113057937472, + "learning_rate": 1.452827019275423e-05, + "loss": 0.6870115995407104, + "step": 2620 + }, + { + "epoch": 0.7664863284105864, + "grad_norm": 1.307156915151953, + "learning_rate": 1.4523959046651058e-05, + "loss": 0.6190885901451111, + "step": 2621 + }, + { + "epoch": 0.7667787688258517, + "grad_norm": 1.4891479565012034, + "learning_rate": 1.4519646843099961e-05, + "loss": 0.6624859571456909, + "step": 2622 + }, + { + "epoch": 0.7670712092411172, + "grad_norm": 1.253302711959068, + "learning_rate": 1.4515333583108896e-05, + "loss": 0.5770546197891235, + "step": 2623 + }, + { + "epoch": 0.7673636496563825, + "grad_norm": 1.3410371709150275, + "learning_rate": 1.451101926768606e-05, + "loss": 0.6843355894088745, + "step": 2624 + }, + { + "epoch": 0.7676560900716479, + "grad_norm": 1.0930173610522418, + "learning_rate": 1.4506703897839895e-05, + "loss": 0.5293717384338379, + "step": 2625 + }, + { + "epoch": 0.7679485304869133, + "grad_norm": 1.1789701874259584, + "learning_rate": 1.45023874745791e-05, + "loss": 0.44534316658973694, + "step": 2626 + }, + { + "epoch": 0.7682409709021787, + "grad_norm": 4.2234169958332295, + "learning_rate": 1.4498069998912603e-05, + "loss": 0.7279446721076965, + "step": 2627 + }, + { + "epoch": 0.7685334113174441, + "grad_norm": 1.3924343198630234, + "learning_rate": 1.4493751471849596e-05, + "loss": 0.6990453600883484, + "step": 2628 + }, + { + "epoch": 0.7688258517327095, + "grad_norm": 1.3337373981179779, + "learning_rate": 1.44894318943995e-05, + "loss": 0.6610965728759766, + "step": 2629 + }, + { + "epoch": 0.7691182921479749, + "grad_norm": 1.285212706548779, + "learning_rate": 1.4485111267571999e-05, + "loss": 0.5124749541282654, + "step": 2630 + }, + { + "epoch": 0.7694107325632402, + "grad_norm": 1.3445630320041935, + "learning_rate": 1.448078959237701e-05, + "loss": 0.7191518545150757, + "step": 2631 + }, + { + "epoch": 0.7697031729785057, + "grad_norm": 1.1499690572165278, + "learning_rate": 1.4476466869824694e-05, + "loss": 0.5798880457878113, + "step": 2632 + }, + { + "epoch": 0.769995613393771, + "grad_norm": 1.3900006441925277, + "learning_rate": 1.4472143100925467e-05, + "loss": 0.5187106728553772, + "step": 2633 + }, + { + "epoch": 0.7702880538090364, + "grad_norm": 1.1672945310140501, + "learning_rate": 1.4467818286689981e-05, + "loss": 0.5794588327407837, + "step": 2634 + }, + { + "epoch": 0.7705804942243017, + "grad_norm": 1.2435528275045493, + "learning_rate": 1.4463492428129133e-05, + "loss": 0.4884936809539795, + "step": 2635 + }, + { + "epoch": 0.7708729346395672, + "grad_norm": 1.3037745440935204, + "learning_rate": 1.4459165526254074e-05, + "loss": 0.5782946348190308, + "step": 2636 + }, + { + "epoch": 0.7711653750548326, + "grad_norm": 1.2531837165046444, + "learning_rate": 1.445483758207618e-05, + "loss": 0.5173349380493164, + "step": 2637 + }, + { + "epoch": 0.771457815470098, + "grad_norm": 1.4752149684021225, + "learning_rate": 1.4450508596607087e-05, + "loss": 0.616407573223114, + "step": 2638 + }, + { + "epoch": 0.7717502558853634, + "grad_norm": 1.4855666629653779, + "learning_rate": 1.4446178570858672e-05, + "loss": 0.537878155708313, + "step": 2639 + }, + { + "epoch": 0.7720426963006287, + "grad_norm": 1.2968861628303388, + "learning_rate": 1.4441847505843048e-05, + "loss": 0.674277663230896, + "step": 2640 + }, + { + "epoch": 0.7723351367158942, + "grad_norm": 1.440782866010467, + "learning_rate": 1.4437515402572576e-05, + "loss": 0.5064860582351685, + "step": 2641 + }, + { + "epoch": 0.7726275771311595, + "grad_norm": 1.2859384806045262, + "learning_rate": 1.4433182262059861e-05, + "loss": 0.6256883144378662, + "step": 2642 + }, + { + "epoch": 0.7729200175464249, + "grad_norm": 1.2490391757844836, + "learning_rate": 1.4428848085317744e-05, + "loss": 0.6023700833320618, + "step": 2643 + }, + { + "epoch": 0.7732124579616904, + "grad_norm": 1.5137270909206324, + "learning_rate": 1.4424512873359316e-05, + "loss": 0.5670932531356812, + "step": 2644 + }, + { + "epoch": 0.7735048983769557, + "grad_norm": 1.406486208295682, + "learning_rate": 1.4420176627197906e-05, + "loss": 0.760460376739502, + "step": 2645 + }, + { + "epoch": 0.7737973387922211, + "grad_norm": 1.3383411751300025, + "learning_rate": 1.4415839347847082e-05, + "loss": 0.5680848956108093, + "step": 2646 + }, + { + "epoch": 0.7740897792074865, + "grad_norm": 1.2948318300140997, + "learning_rate": 1.4411501036320661e-05, + "loss": 0.5962368249893188, + "step": 2647 + }, + { + "epoch": 0.7743822196227519, + "grad_norm": 1.3851281269469669, + "learning_rate": 1.4407161693632697e-05, + "loss": 0.7149791121482849, + "step": 2648 + }, + { + "epoch": 0.7746746600380172, + "grad_norm": 1.4438569377090373, + "learning_rate": 1.440282132079748e-05, + "loss": 0.5943992733955383, + "step": 2649 + }, + { + "epoch": 0.7749671004532827, + "grad_norm": 1.681920535370579, + "learning_rate": 1.439847991882955e-05, + "loss": 0.7265899181365967, + "step": 2650 + }, + { + "epoch": 0.775259540868548, + "grad_norm": 1.257384791880329, + "learning_rate": 1.4394137488743682e-05, + "loss": 0.6011309027671814, + "step": 2651 + }, + { + "epoch": 0.7755519812838134, + "grad_norm": 1.4419500386554907, + "learning_rate": 1.4389794031554894e-05, + "loss": 0.6853964328765869, + "step": 2652 + }, + { + "epoch": 0.7758444216990789, + "grad_norm": 1.4140520249216477, + "learning_rate": 1.438544954827844e-05, + "loss": 0.6598547697067261, + "step": 2653 + }, + { + "epoch": 0.7761368621143442, + "grad_norm": 1.3919438302264315, + "learning_rate": 1.4381104039929819e-05, + "loss": 0.5776119232177734, + "step": 2654 + }, + { + "epoch": 0.7764293025296096, + "grad_norm": 1.182931573556341, + "learning_rate": 1.4376757507524766e-05, + "loss": 0.6026376485824585, + "step": 2655 + }, + { + "epoch": 0.776721742944875, + "grad_norm": 1.2883148172478378, + "learning_rate": 1.4372409952079256e-05, + "loss": 0.5776997804641724, + "step": 2656 + }, + { + "epoch": 0.7770141833601404, + "grad_norm": 1.5317545348037325, + "learning_rate": 1.4368061374609505e-05, + "loss": 0.5766068696975708, + "step": 2657 + }, + { + "epoch": 0.7773066237754057, + "grad_norm": 1.0428168520269592, + "learning_rate": 1.4363711776131966e-05, + "loss": 0.4783105254173279, + "step": 2658 + }, + { + "epoch": 0.7775990641906712, + "grad_norm": 1.4837098758543301, + "learning_rate": 1.4359361157663332e-05, + "loss": 0.6563695073127747, + "step": 2659 + }, + { + "epoch": 0.7778915046059366, + "grad_norm": 1.0898257169197185, + "learning_rate": 1.4355009520220531e-05, + "loss": 0.5177119374275208, + "step": 2660 + }, + { + "epoch": 0.7781839450212019, + "grad_norm": 1.3520526907259511, + "learning_rate": 1.4350656864820733e-05, + "loss": 0.6590641736984253, + "step": 2661 + }, + { + "epoch": 0.7784763854364674, + "grad_norm": 1.2923155412118275, + "learning_rate": 1.4346303192481348e-05, + "loss": 0.6012274622917175, + "step": 2662 + }, + { + "epoch": 0.7787688258517327, + "grad_norm": 1.439032337982527, + "learning_rate": 1.4341948504220016e-05, + "loss": 0.6731704473495483, + "step": 2663 + }, + { + "epoch": 0.7790612662669981, + "grad_norm": 1.4598986218346195, + "learning_rate": 1.4337592801054623e-05, + "loss": 0.6827171444892883, + "step": 2664 + }, + { + "epoch": 0.7793537066822634, + "grad_norm": 1.3963311439466064, + "learning_rate": 1.4333236084003282e-05, + "loss": 0.6654937267303467, + "step": 2665 + }, + { + "epoch": 0.7796461470975289, + "grad_norm": 1.276825216432019, + "learning_rate": 1.4328878354084355e-05, + "loss": 0.5673532485961914, + "step": 2666 + }, + { + "epoch": 0.7799385875127943, + "grad_norm": 1.3049192363130713, + "learning_rate": 1.432451961231643e-05, + "loss": 0.5401986241340637, + "step": 2667 + }, + { + "epoch": 0.7802310279280597, + "grad_norm": 1.2877259559166432, + "learning_rate": 1.4320159859718341e-05, + "loss": 0.6134701371192932, + "step": 2668 + }, + { + "epoch": 0.7805234683433251, + "grad_norm": 1.5022932512908924, + "learning_rate": 1.4315799097309152e-05, + "loss": 0.6913554668426514, + "step": 2669 + }, + { + "epoch": 0.7808159087585904, + "grad_norm": 1.6126405133572825, + "learning_rate": 1.4311437326108167e-05, + "loss": 0.6969482898712158, + "step": 2670 + }, + { + "epoch": 0.7811083491738559, + "grad_norm": 1.343855488902383, + "learning_rate": 1.4307074547134918e-05, + "loss": 0.6612537503242493, + "step": 2671 + }, + { + "epoch": 0.7814007895891212, + "grad_norm": 1.1627822310905236, + "learning_rate": 1.430271076140918e-05, + "loss": 0.5545899868011475, + "step": 2672 + }, + { + "epoch": 0.7816932300043866, + "grad_norm": 1.1885930128001867, + "learning_rate": 1.4298345969950965e-05, + "loss": 0.6635574698448181, + "step": 2673 + }, + { + "epoch": 0.781985670419652, + "grad_norm": 1.4316816688950922, + "learning_rate": 1.4293980173780514e-05, + "loss": 0.5859510898590088, + "step": 2674 + }, + { + "epoch": 0.7822781108349174, + "grad_norm": 1.246244040215616, + "learning_rate": 1.4289613373918304e-05, + "loss": 0.5839825868606567, + "step": 2675 + }, + { + "epoch": 0.7825705512501828, + "grad_norm": 1.7192756445293216, + "learning_rate": 1.428524557138505e-05, + "loss": 0.6376889944076538, + "step": 2676 + }, + { + "epoch": 0.7828629916654481, + "grad_norm": 1.2061132029389496, + "learning_rate": 1.4280876767201696e-05, + "loss": 0.5473129749298096, + "step": 2677 + }, + { + "epoch": 0.7831554320807136, + "grad_norm": 1.2355367438994083, + "learning_rate": 1.4276506962389429e-05, + "loss": 0.6723904609680176, + "step": 2678 + }, + { + "epoch": 0.7834478724959789, + "grad_norm": 1.318329485547163, + "learning_rate": 1.4272136157969658e-05, + "loss": 0.6036845445632935, + "step": 2679 + }, + { + "epoch": 0.7837403129112444, + "grad_norm": 1.4527977807212105, + "learning_rate": 1.4267764354964038e-05, + "loss": 0.5993655920028687, + "step": 2680 + }, + { + "epoch": 0.7840327533265097, + "grad_norm": 1.5159579383707373, + "learning_rate": 1.4263391554394448e-05, + "loss": 0.6678075194358826, + "step": 2681 + }, + { + "epoch": 0.7843251937417751, + "grad_norm": 1.2588619303254647, + "learning_rate": 1.4259017757283003e-05, + "loss": 0.5627151727676392, + "step": 2682 + }, + { + "epoch": 0.7846176341570406, + "grad_norm": 1.2632820141578516, + "learning_rate": 1.4254642964652053e-05, + "loss": 0.6060316562652588, + "step": 2683 + }, + { + "epoch": 0.7849100745723059, + "grad_norm": 1.590473454276912, + "learning_rate": 1.4250267177524177e-05, + "loss": 0.6535854935646057, + "step": 2684 + }, + { + "epoch": 0.7852025149875713, + "grad_norm": 1.499355267260573, + "learning_rate": 1.4245890396922195e-05, + "loss": 0.7141643762588501, + "step": 2685 + }, + { + "epoch": 0.7854949554028366, + "grad_norm": 1.5067703709229516, + "learning_rate": 1.4241512623869143e-05, + "loss": 0.6685847640037537, + "step": 2686 + }, + { + "epoch": 0.7857873958181021, + "grad_norm": 1.4195544467165693, + "learning_rate": 1.4237133859388305e-05, + "loss": 0.6745196580886841, + "step": 2687 + }, + { + "epoch": 0.7860798362333674, + "grad_norm": 1.5617010746630147, + "learning_rate": 1.423275410450319e-05, + "loss": 0.6891968250274658, + "step": 2688 + }, + { + "epoch": 0.7863722766486329, + "grad_norm": 1.3584703297700564, + "learning_rate": 1.422837336023754e-05, + "loss": 0.5614763498306274, + "step": 2689 + }, + { + "epoch": 0.7866647170638982, + "grad_norm": 1.3595148335065306, + "learning_rate": 1.4223991627615324e-05, + "loss": 0.5867494344711304, + "step": 2690 + }, + { + "epoch": 0.7869571574791636, + "grad_norm": 1.453264768444311, + "learning_rate": 1.421960890766075e-05, + "loss": 0.644777774810791, + "step": 2691 + }, + { + "epoch": 0.787249597894429, + "grad_norm": 1.3023857436912896, + "learning_rate": 1.4215225201398249e-05, + "loss": 0.7237588167190552, + "step": 2692 + }, + { + "epoch": 0.7875420383096944, + "grad_norm": 1.45851809360972, + "learning_rate": 1.4210840509852484e-05, + "loss": 0.6314423680305481, + "step": 2693 + }, + { + "epoch": 0.7878344787249598, + "grad_norm": 1.2286351961246127, + "learning_rate": 1.4206454834048353e-05, + "loss": 0.5298433303833008, + "step": 2694 + }, + { + "epoch": 0.7881269191402251, + "grad_norm": 1.1185262454319822, + "learning_rate": 1.420206817501098e-05, + "loss": 0.507548451423645, + "step": 2695 + }, + { + "epoch": 0.7884193595554906, + "grad_norm": 1.7207072983596743, + "learning_rate": 1.4197680533765721e-05, + "loss": 0.7742520570755005, + "step": 2696 + }, + { + "epoch": 0.7887117999707559, + "grad_norm": 1.3752660802878722, + "learning_rate": 1.4193291911338161e-05, + "loss": 0.6261187195777893, + "step": 2697 + }, + { + "epoch": 0.7890042403860213, + "grad_norm": 1.521521524262885, + "learning_rate": 1.4188902308754108e-05, + "loss": 0.7501171827316284, + "step": 2698 + }, + { + "epoch": 0.7892966808012868, + "grad_norm": 1.3001128857102173, + "learning_rate": 1.4184511727039612e-05, + "loss": 0.5590647459030151, + "step": 2699 + }, + { + "epoch": 0.7895891212165521, + "grad_norm": 1.4479349527989895, + "learning_rate": 1.4180120167220941e-05, + "loss": 0.586786150932312, + "step": 2700 + }, + { + "epoch": 0.7898815616318176, + "grad_norm": 1.2133244570308048, + "learning_rate": 1.4175727630324598e-05, + "loss": 0.5208219289779663, + "step": 2701 + }, + { + "epoch": 0.7901740020470829, + "grad_norm": 1.2365924450408214, + "learning_rate": 1.4171334117377312e-05, + "loss": 0.5925623178482056, + "step": 2702 + }, + { + "epoch": 0.7904664424623483, + "grad_norm": 1.5006045037979843, + "learning_rate": 1.4166939629406034e-05, + "loss": 0.7095032930374146, + "step": 2703 + }, + { + "epoch": 0.7907588828776136, + "grad_norm": 1.167282378609361, + "learning_rate": 1.4162544167437955e-05, + "loss": 0.5683872699737549, + "step": 2704 + }, + { + "epoch": 0.7910513232928791, + "grad_norm": 1.2605941476894575, + "learning_rate": 1.4158147732500482e-05, + "loss": 0.7079274654388428, + "step": 2705 + }, + { + "epoch": 0.7913437637081445, + "grad_norm": 1.3186161570017685, + "learning_rate": 1.415375032562126e-05, + "loss": 0.6336439847946167, + "step": 2706 + }, + { + "epoch": 0.7916362041234098, + "grad_norm": 1.14446239802259, + "learning_rate": 1.414935194782816e-05, + "loss": 0.4842381477355957, + "step": 2707 + }, + { + "epoch": 0.7919286445386753, + "grad_norm": 1.4296190875249344, + "learning_rate": 1.4144952600149267e-05, + "loss": 0.5439653396606445, + "step": 2708 + }, + { + "epoch": 0.7922210849539406, + "grad_norm": 1.2988205927389838, + "learning_rate": 1.4140552283612906e-05, + "loss": 0.6365468502044678, + "step": 2709 + }, + { + "epoch": 0.792513525369206, + "grad_norm": 1.3854921286863888, + "learning_rate": 1.4136150999247623e-05, + "loss": 0.6192438006401062, + "step": 2710 + }, + { + "epoch": 0.7928059657844714, + "grad_norm": 1.2293031316317269, + "learning_rate": 1.4131748748082191e-05, + "loss": 0.5695269703865051, + "step": 2711 + }, + { + "epoch": 0.7930984061997368, + "grad_norm": 1.3405661548900325, + "learning_rate": 1.4127345531145614e-05, + "loss": 0.6892319321632385, + "step": 2712 + }, + { + "epoch": 0.7933908466150021, + "grad_norm": 1.5220370415080073, + "learning_rate": 1.4122941349467109e-05, + "loss": 0.6294678449630737, + "step": 2713 + }, + { + "epoch": 0.7936832870302676, + "grad_norm": 1.2086123903849104, + "learning_rate": 1.4118536204076135e-05, + "loss": 0.6666272878646851, + "step": 2714 + }, + { + "epoch": 0.793975727445533, + "grad_norm": 1.2066166036349477, + "learning_rate": 1.4114130096002363e-05, + "loss": 0.5981796383857727, + "step": 2715 + }, + { + "epoch": 0.7942681678607983, + "grad_norm": 1.5676320725913573, + "learning_rate": 1.4109723026275695e-05, + "loss": 0.6120023131370544, + "step": 2716 + }, + { + "epoch": 0.7945606082760638, + "grad_norm": 1.536602454646116, + "learning_rate": 1.4105314995926257e-05, + "loss": 0.5892866849899292, + "step": 2717 + }, + { + "epoch": 0.7948530486913291, + "grad_norm": 1.504529299257153, + "learning_rate": 1.4100906005984404e-05, + "loss": 0.7625553607940674, + "step": 2718 + }, + { + "epoch": 0.7951454891065945, + "grad_norm": 1.4565362056936688, + "learning_rate": 1.40964960574807e-05, + "loss": 0.643633246421814, + "step": 2719 + }, + { + "epoch": 0.7954379295218599, + "grad_norm": 1.2108583839611744, + "learning_rate": 1.4092085151445953e-05, + "loss": 0.46422284841537476, + "step": 2720 + }, + { + "epoch": 0.7957303699371253, + "grad_norm": 1.2654408745652597, + "learning_rate": 1.4087673288911182e-05, + "loss": 0.6290001273155212, + "step": 2721 + }, + { + "epoch": 0.7960228103523908, + "grad_norm": 1.2400549293858325, + "learning_rate": 1.4083260470907632e-05, + "loss": 0.5175197124481201, + "step": 2722 + }, + { + "epoch": 0.7963152507676561, + "grad_norm": 1.4748861405916942, + "learning_rate": 1.4078846698466776e-05, + "loss": 0.6475427150726318, + "step": 2723 + }, + { + "epoch": 0.7966076911829215, + "grad_norm": 1.3254407316825372, + "learning_rate": 1.40744319726203e-05, + "loss": 0.5978254079818726, + "step": 2724 + }, + { + "epoch": 0.7969001315981868, + "grad_norm": 1.2991181525686113, + "learning_rate": 1.4070016294400124e-05, + "loss": 0.5738629102706909, + "step": 2725 + }, + { + "epoch": 0.7971925720134523, + "grad_norm": 1.3493198611941248, + "learning_rate": 1.4065599664838388e-05, + "loss": 0.5809024572372437, + "step": 2726 + }, + { + "epoch": 0.7974850124287176, + "grad_norm": 1.1539725667160117, + "learning_rate": 1.4061182084967446e-05, + "loss": 0.5907782316207886, + "step": 2727 + }, + { + "epoch": 0.797777452843983, + "grad_norm": 1.4493981600012322, + "learning_rate": 1.4056763555819887e-05, + "loss": 0.7640036344528198, + "step": 2728 + }, + { + "epoch": 0.7980698932592484, + "grad_norm": 1.5601806517528776, + "learning_rate": 1.4052344078428513e-05, + "loss": 0.7472168207168579, + "step": 2729 + }, + { + "epoch": 0.7983623336745138, + "grad_norm": 1.6018546047693625, + "learning_rate": 1.4047923653826347e-05, + "loss": 0.6726990342140198, + "step": 2730 + }, + { + "epoch": 0.7986547740897793, + "grad_norm": 1.3791137229331067, + "learning_rate": 1.404350228304664e-05, + "loss": 0.5949650406837463, + "step": 2731 + }, + { + "epoch": 0.7989472145050446, + "grad_norm": 1.386756095528374, + "learning_rate": 1.403907996712286e-05, + "loss": 0.5578774213790894, + "step": 2732 + }, + { + "epoch": 0.79923965492031, + "grad_norm": 1.5271585141569006, + "learning_rate": 1.4034656707088692e-05, + "loss": 0.6092333197593689, + "step": 2733 + }, + { + "epoch": 0.7995320953355753, + "grad_norm": 1.3098390209876276, + "learning_rate": 1.4030232503978053e-05, + "loss": 0.5095718502998352, + "step": 2734 + }, + { + "epoch": 0.7998245357508408, + "grad_norm": 1.3675399597044373, + "learning_rate": 1.4025807358825072e-05, + "loss": 0.5155727863311768, + "step": 2735 + }, + { + "epoch": 0.8001169761661061, + "grad_norm": 1.3309663791332569, + "learning_rate": 1.4021381272664094e-05, + "loss": 0.5752589702606201, + "step": 2736 + }, + { + "epoch": 0.8004094165813715, + "grad_norm": 1.3619611747950222, + "learning_rate": 1.4016954246529697e-05, + "loss": 0.6334787607192993, + "step": 2737 + }, + { + "epoch": 0.800701856996637, + "grad_norm": 1.3830503239164076, + "learning_rate": 1.4012526281456666e-05, + "loss": 0.7406032085418701, + "step": 2738 + }, + { + "epoch": 0.8009942974119023, + "grad_norm": 1.2904369174268238, + "learning_rate": 1.4008097378480014e-05, + "loss": 0.5805078744888306, + "step": 2739 + }, + { + "epoch": 0.8012867378271677, + "grad_norm": 1.3584200788658642, + "learning_rate": 1.4003667538634972e-05, + "loss": 0.6849163770675659, + "step": 2740 + }, + { + "epoch": 0.8015791782424331, + "grad_norm": 1.5354340760410032, + "learning_rate": 1.3999236762956985e-05, + "loss": 0.7707695960998535, + "step": 2741 + }, + { + "epoch": 0.8018716186576985, + "grad_norm": 1.426293329050591, + "learning_rate": 1.3994805052481715e-05, + "loss": 0.6253059506416321, + "step": 2742 + }, + { + "epoch": 0.8021640590729638, + "grad_norm": 1.274928204575108, + "learning_rate": 1.3990372408245057e-05, + "loss": 0.6450316905975342, + "step": 2743 + }, + { + "epoch": 0.8024564994882293, + "grad_norm": 1.2867865996346037, + "learning_rate": 1.398593883128311e-05, + "loss": 0.672899603843689, + "step": 2744 + }, + { + "epoch": 0.8027489399034947, + "grad_norm": 1.38176481949922, + "learning_rate": 1.3981504322632198e-05, + "loss": 0.6203787326812744, + "step": 2745 + }, + { + "epoch": 0.80304138031876, + "grad_norm": 1.296034523853111, + "learning_rate": 1.3977068883328854e-05, + "loss": 0.541740894317627, + "step": 2746 + }, + { + "epoch": 0.8033338207340255, + "grad_norm": 1.3608273440615848, + "learning_rate": 1.3972632514409843e-05, + "loss": 0.5566504001617432, + "step": 2747 + }, + { + "epoch": 0.8036262611492908, + "grad_norm": 1.378445494532888, + "learning_rate": 1.3968195216912135e-05, + "loss": 0.6911404728889465, + "step": 2748 + }, + { + "epoch": 0.8039187015645562, + "grad_norm": 1.3758218413869647, + "learning_rate": 1.3963756991872921e-05, + "loss": 0.6744735240936279, + "step": 2749 + }, + { + "epoch": 0.8042111419798216, + "grad_norm": 1.3810636187989935, + "learning_rate": 1.3959317840329613e-05, + "loss": 0.6660502552986145, + "step": 2750 + }, + { + "epoch": 0.804503582395087, + "grad_norm": 1.611467815082346, + "learning_rate": 1.3954877763319832e-05, + "loss": 0.607395589351654, + "step": 2751 + }, + { + "epoch": 0.8047960228103523, + "grad_norm": 1.3065536354182021, + "learning_rate": 1.395043676188142e-05, + "loss": 0.53249192237854, + "step": 2752 + }, + { + "epoch": 0.8050884632256178, + "grad_norm": 1.384670069600496, + "learning_rate": 1.394599483705243e-05, + "loss": 0.5728630423545837, + "step": 2753 + }, + { + "epoch": 0.8053809036408832, + "grad_norm": 1.354298055615179, + "learning_rate": 1.3941551989871142e-05, + "loss": 0.6912537813186646, + "step": 2754 + }, + { + "epoch": 0.8056733440561485, + "grad_norm": 1.2211163784496284, + "learning_rate": 1.3937108221376041e-05, + "loss": 0.6002523899078369, + "step": 2755 + }, + { + "epoch": 0.805965784471414, + "grad_norm": 1.165855753943377, + "learning_rate": 1.3932663532605832e-05, + "loss": 0.6573797464370728, + "step": 2756 + }, + { + "epoch": 0.8062582248866793, + "grad_norm": 1.2846173311931015, + "learning_rate": 1.3928217924599433e-05, + "loss": 0.6997278928756714, + "step": 2757 + }, + { + "epoch": 0.8065506653019447, + "grad_norm": 1.3457721921363819, + "learning_rate": 1.3923771398395978e-05, + "loss": 0.565264105796814, + "step": 2758 + }, + { + "epoch": 0.8068431057172101, + "grad_norm": 1.7064740069380804, + "learning_rate": 1.3919323955034815e-05, + "loss": 0.8065239191055298, + "step": 2759 + }, + { + "epoch": 0.8071355461324755, + "grad_norm": 1.4850507802988735, + "learning_rate": 1.3914875595555509e-05, + "loss": 0.556678056716919, + "step": 2760 + }, + { + "epoch": 0.807427986547741, + "grad_norm": 1.653442619870376, + "learning_rate": 1.3910426320997834e-05, + "loss": 0.5528635382652283, + "step": 2761 + }, + { + "epoch": 0.8077204269630063, + "grad_norm": 1.4210714864438183, + "learning_rate": 1.3905976132401785e-05, + "loss": 0.6127038598060608, + "step": 2762 + }, + { + "epoch": 0.8080128673782717, + "grad_norm": 1.4473812948635245, + "learning_rate": 1.390152503080756e-05, + "loss": 0.6311757564544678, + "step": 2763 + }, + { + "epoch": 0.808305307793537, + "grad_norm": 1.256496005559394, + "learning_rate": 1.389707301725558e-05, + "loss": 0.669788122177124, + "step": 2764 + }, + { + "epoch": 0.8085977482088025, + "grad_norm": 1.1602455830470428, + "learning_rate": 1.3892620092786477e-05, + "loss": 0.48408570885658264, + "step": 2765 + }, + { + "epoch": 0.8088901886240678, + "grad_norm": 1.3816192110102654, + "learning_rate": 1.3888166258441098e-05, + "loss": 0.5648288726806641, + "step": 2766 + }, + { + "epoch": 0.8091826290393332, + "grad_norm": 1.359222924847667, + "learning_rate": 1.3883711515260497e-05, + "loss": 0.5894806385040283, + "step": 2767 + }, + { + "epoch": 0.8094750694545986, + "grad_norm": 1.609438084965147, + "learning_rate": 1.3879255864285939e-05, + "loss": 0.8325392603874207, + "step": 2768 + }, + { + "epoch": 0.809767509869864, + "grad_norm": 1.3200888192290248, + "learning_rate": 1.387479930655891e-05, + "loss": 0.5282119512557983, + "step": 2769 + }, + { + "epoch": 0.8100599502851294, + "grad_norm": 1.2020970963419326, + "learning_rate": 1.3870341843121104e-05, + "loss": 0.7565277218818665, + "step": 2770 + }, + { + "epoch": 0.8103523907003948, + "grad_norm": 1.20769025145285, + "learning_rate": 1.3865883475014424e-05, + "loss": 0.5767146944999695, + "step": 2771 + }, + { + "epoch": 0.8106448311156602, + "grad_norm": 1.3747646237948088, + "learning_rate": 1.3861424203280987e-05, + "loss": 0.5988898873329163, + "step": 2772 + }, + { + "epoch": 0.8109372715309255, + "grad_norm": 1.2837797411261327, + "learning_rate": 1.3856964028963119e-05, + "loss": 0.5752500295639038, + "step": 2773 + }, + { + "epoch": 0.811229711946191, + "grad_norm": 1.3281997353125305, + "learning_rate": 1.385250295310336e-05, + "loss": 0.6834297776222229, + "step": 2774 + }, + { + "epoch": 0.8115221523614563, + "grad_norm": 1.376792748908409, + "learning_rate": 1.3848040976744459e-05, + "loss": 0.5667037963867188, + "step": 2775 + }, + { + "epoch": 0.8118145927767217, + "grad_norm": 1.33236222276005, + "learning_rate": 1.3843578100929375e-05, + "loss": 0.5618781447410583, + "step": 2776 + }, + { + "epoch": 0.8121070331919872, + "grad_norm": 1.4974631308124338, + "learning_rate": 1.3839114326701281e-05, + "loss": 0.538033664226532, + "step": 2777 + }, + { + "epoch": 0.8123994736072525, + "grad_norm": 1.3236430994846111, + "learning_rate": 1.3834649655103556e-05, + "loss": 0.7218335270881653, + "step": 2778 + }, + { + "epoch": 0.812691914022518, + "grad_norm": 1.3045533775783231, + "learning_rate": 1.383018408717979e-05, + "loss": 0.5979611873626709, + "step": 2779 + }, + { + "epoch": 0.8129843544377833, + "grad_norm": 1.191818251767074, + "learning_rate": 1.3825717623973775e-05, + "loss": 0.4958215355873108, + "step": 2780 + }, + { + "epoch": 0.8132767948530487, + "grad_norm": 1.4132643925978479, + "learning_rate": 1.3821250266529531e-05, + "loss": 0.6759654879570007, + "step": 2781 + }, + { + "epoch": 0.813569235268314, + "grad_norm": 1.1873413404245543, + "learning_rate": 1.3816782015891272e-05, + "loss": 0.5499521493911743, + "step": 2782 + }, + { + "epoch": 0.8138616756835795, + "grad_norm": 1.327517100573182, + "learning_rate": 1.3812312873103425e-05, + "loss": 0.5308753252029419, + "step": 2783 + }, + { + "epoch": 0.8141541160988449, + "grad_norm": 1.4850132833469487, + "learning_rate": 1.3807842839210617e-05, + "loss": 0.585492730140686, + "step": 2784 + }, + { + "epoch": 0.8144465565141102, + "grad_norm": 1.5985853231384999, + "learning_rate": 1.3803371915257702e-05, + "loss": 0.6598281860351562, + "step": 2785 + }, + { + "epoch": 0.8147389969293757, + "grad_norm": 1.2500600856454092, + "learning_rate": 1.3798900102289726e-05, + "loss": 0.6819334030151367, + "step": 2786 + }, + { + "epoch": 0.815031437344641, + "grad_norm": 2.1106639284366877, + "learning_rate": 1.3794427401351946e-05, + "loss": 0.6548545360565186, + "step": 2787 + }, + { + "epoch": 0.8153238777599064, + "grad_norm": 1.4934248295829666, + "learning_rate": 1.3789953813489834e-05, + "loss": 0.7836263179779053, + "step": 2788 + }, + { + "epoch": 0.8156163181751718, + "grad_norm": 1.3092153960785353, + "learning_rate": 1.3785479339749062e-05, + "loss": 0.6108324527740479, + "step": 2789 + }, + { + "epoch": 0.8159087585904372, + "grad_norm": 1.4189973842835568, + "learning_rate": 1.378100398117551e-05, + "loss": 0.7079485058784485, + "step": 2790 + }, + { + "epoch": 0.8162011990057025, + "grad_norm": 1.2593140459847156, + "learning_rate": 1.3776527738815264e-05, + "loss": 0.5935578346252441, + "step": 2791 + }, + { + "epoch": 0.816493639420968, + "grad_norm": 1.159439153093783, + "learning_rate": 1.3772050613714623e-05, + "loss": 0.5559983253479004, + "step": 2792 + }, + { + "epoch": 0.8167860798362334, + "grad_norm": 1.2282449471592758, + "learning_rate": 1.3767572606920083e-05, + "loss": 0.6230447292327881, + "step": 2793 + }, + { + "epoch": 0.8170785202514987, + "grad_norm": 1.3750755360912204, + "learning_rate": 1.3763093719478357e-05, + "loss": 0.5672184824943542, + "step": 2794 + }, + { + "epoch": 0.8173709606667642, + "grad_norm": 1.3345649111405589, + "learning_rate": 1.3758613952436353e-05, + "loss": 0.6933468580245972, + "step": 2795 + }, + { + "epoch": 0.8176634010820295, + "grad_norm": 1.299919441217989, + "learning_rate": 1.3754133306841188e-05, + "loss": 0.5873827934265137, + "step": 2796 + }, + { + "epoch": 0.8179558414972949, + "grad_norm": 1.3238138716227077, + "learning_rate": 1.3749651783740188e-05, + "loss": 0.6061393022537231, + "step": 2797 + }, + { + "epoch": 0.8182482819125603, + "grad_norm": 1.3503137209197107, + "learning_rate": 1.3745169384180886e-05, + "loss": 0.6218947768211365, + "step": 2798 + }, + { + "epoch": 0.8185407223278257, + "grad_norm": 1.584036085033884, + "learning_rate": 1.3740686109211008e-05, + "loss": 0.6092264652252197, + "step": 2799 + }, + { + "epoch": 0.8188331627430911, + "grad_norm": 1.4327213465282531, + "learning_rate": 1.3736201959878497e-05, + "loss": 0.6145539283752441, + "step": 2800 + }, + { + "epoch": 0.8191256031583565, + "grad_norm": 1.1433366189059146, + "learning_rate": 1.3731716937231493e-05, + "loss": 0.4637746214866638, + "step": 2801 + }, + { + "epoch": 0.8194180435736219, + "grad_norm": 1.2802202387296946, + "learning_rate": 1.3727231042318345e-05, + "loss": 0.6102726459503174, + "step": 2802 + }, + { + "epoch": 0.8197104839888872, + "grad_norm": 1.3432330324336637, + "learning_rate": 1.3722744276187603e-05, + "loss": 0.5885297060012817, + "step": 2803 + }, + { + "epoch": 0.8200029244041527, + "grad_norm": 1.4575985112282515, + "learning_rate": 1.3718256639888021e-05, + "loss": 0.592369019985199, + "step": 2804 + }, + { + "epoch": 0.820295364819418, + "grad_norm": 1.4943856663354038, + "learning_rate": 1.3713768134468557e-05, + "loss": 0.5194098949432373, + "step": 2805 + }, + { + "epoch": 0.8205878052346834, + "grad_norm": 1.3716539173176907, + "learning_rate": 1.370927876097837e-05, + "loss": 0.6033506393432617, + "step": 2806 + }, + { + "epoch": 0.8208802456499488, + "grad_norm": 1.686602588559283, + "learning_rate": 1.3704788520466828e-05, + "loss": 0.6866108179092407, + "step": 2807 + }, + { + "epoch": 0.8211726860652142, + "grad_norm": 1.564205528186879, + "learning_rate": 1.3700297413983492e-05, + "loss": 0.7325261831283569, + "step": 2808 + }, + { + "epoch": 0.8214651264804796, + "grad_norm": 1.531257665763453, + "learning_rate": 1.3695805442578136e-05, + "loss": 0.5422608852386475, + "step": 2809 + }, + { + "epoch": 0.821757566895745, + "grad_norm": 1.5581516895112182, + "learning_rate": 1.369131260730073e-05, + "loss": 0.6124732494354248, + "step": 2810 + }, + { + "epoch": 0.8220500073110104, + "grad_norm": 1.3009124551880797, + "learning_rate": 1.3686818909201442e-05, + "loss": 0.6097716093063354, + "step": 2811 + }, + { + "epoch": 0.8223424477262757, + "grad_norm": 1.302794206877671, + "learning_rate": 1.3682324349330652e-05, + "loss": 0.6283478140830994, + "step": 2812 + }, + { + "epoch": 0.8226348881415412, + "grad_norm": 1.6179042229288885, + "learning_rate": 1.3677828928738934e-05, + "loss": 0.6590027213096619, + "step": 2813 + }, + { + "epoch": 0.8229273285568065, + "grad_norm": 1.5247617474384554, + "learning_rate": 1.3673332648477065e-05, + "loss": 0.6417049169540405, + "step": 2814 + }, + { + "epoch": 0.8232197689720719, + "grad_norm": 1.510678230362789, + "learning_rate": 1.3668835509596023e-05, + "loss": 0.6217149496078491, + "step": 2815 + }, + { + "epoch": 0.8235122093873374, + "grad_norm": 1.9022694632783144, + "learning_rate": 1.3664337513146993e-05, + "loss": 0.7530043125152588, + "step": 2816 + }, + { + "epoch": 0.8238046498026027, + "grad_norm": 1.3235640761468095, + "learning_rate": 1.3659838660181341e-05, + "loss": 0.6690578460693359, + "step": 2817 + }, + { + "epoch": 0.8240970902178681, + "grad_norm": 1.5311368229830338, + "learning_rate": 1.3655338951750657e-05, + "loss": 0.5348777174949646, + "step": 2818 + }, + { + "epoch": 0.8243895306331335, + "grad_norm": 1.494896630136579, + "learning_rate": 1.3650838388906718e-05, + "loss": 0.7076361179351807, + "step": 2819 + }, + { + "epoch": 0.8246819710483989, + "grad_norm": 1.611810759372966, + "learning_rate": 1.3646336972701507e-05, + "loss": 0.6649855375289917, + "step": 2820 + }, + { + "epoch": 0.8249744114636642, + "grad_norm": 1.4188027146347701, + "learning_rate": 1.3641834704187194e-05, + "loss": 0.6484942436218262, + "step": 2821 + }, + { + "epoch": 0.8252668518789297, + "grad_norm": 1.066364944063908, + "learning_rate": 1.3637331584416163e-05, + "loss": 0.5167717337608337, + "step": 2822 + }, + { + "epoch": 0.8255592922941951, + "grad_norm": 1.4320675291883214, + "learning_rate": 1.3632827614440988e-05, + "loss": 0.7808440327644348, + "step": 2823 + }, + { + "epoch": 0.8258517327094604, + "grad_norm": 1.6437853600585473, + "learning_rate": 1.3628322795314449e-05, + "loss": 0.551183819770813, + "step": 2824 + }, + { + "epoch": 0.8261441731247259, + "grad_norm": 1.3439080199790612, + "learning_rate": 1.3623817128089513e-05, + "loss": 0.6084691286087036, + "step": 2825 + }, + { + "epoch": 0.8264366135399912, + "grad_norm": 1.3974747336185755, + "learning_rate": 1.3619310613819363e-05, + "loss": 0.6251019239425659, + "step": 2826 + }, + { + "epoch": 0.8267290539552566, + "grad_norm": 1.237260204163714, + "learning_rate": 1.3614803253557358e-05, + "loss": 0.5037761926651001, + "step": 2827 + }, + { + "epoch": 0.827021494370522, + "grad_norm": 1.3461097726205675, + "learning_rate": 1.3610295048357072e-05, + "loss": 0.5606831312179565, + "step": 2828 + }, + { + "epoch": 0.8273139347857874, + "grad_norm": 1.3850167464051482, + "learning_rate": 1.360578599927227e-05, + "loss": 0.6664785146713257, + "step": 2829 + }, + { + "epoch": 0.8276063752010527, + "grad_norm": 1.3613746427457352, + "learning_rate": 1.360127610735691e-05, + "loss": 0.7105492353439331, + "step": 2830 + }, + { + "epoch": 0.8278988156163182, + "grad_norm": 1.3577681820511107, + "learning_rate": 1.3596765373665162e-05, + "loss": 0.6255359053611755, + "step": 2831 + }, + { + "epoch": 0.8281912560315836, + "grad_norm": 1.3150522794807806, + "learning_rate": 1.3592253799251377e-05, + "loss": 0.5422149300575256, + "step": 2832 + }, + { + "epoch": 0.8284836964468489, + "grad_norm": 1.4383576380181533, + "learning_rate": 1.3587741385170104e-05, + "loss": 0.6044044494628906, + "step": 2833 + }, + { + "epoch": 0.8287761368621144, + "grad_norm": 1.2478223452248756, + "learning_rate": 1.3583228132476094e-05, + "loss": 0.6256763935089111, + "step": 2834 + }, + { + "epoch": 0.8290685772773797, + "grad_norm": 1.2507601544621354, + "learning_rate": 1.3578714042224297e-05, + "loss": 0.6759064793586731, + "step": 2835 + }, + { + "epoch": 0.8293610176926451, + "grad_norm": 1.3610869198536528, + "learning_rate": 1.3574199115469852e-05, + "loss": 0.5819023251533508, + "step": 2836 + }, + { + "epoch": 0.8296534581079105, + "grad_norm": 1.331505314238688, + "learning_rate": 1.3569683353268098e-05, + "loss": 0.5412642359733582, + "step": 2837 + }, + { + "epoch": 0.8299458985231759, + "grad_norm": 1.0998151045906572, + "learning_rate": 1.356516675667456e-05, + "loss": 0.5129171013832092, + "step": 2838 + }, + { + "epoch": 0.8302383389384413, + "grad_norm": 1.310393887156268, + "learning_rate": 1.356064932674497e-05, + "loss": 0.5165198445320129, + "step": 2839 + }, + { + "epoch": 0.8305307793537067, + "grad_norm": 1.287643091691659, + "learning_rate": 1.3556131064535249e-05, + "loss": 0.6545724272727966, + "step": 2840 + }, + { + "epoch": 0.8308232197689721, + "grad_norm": 1.2180901867245224, + "learning_rate": 1.3551611971101513e-05, + "loss": 0.5715968608856201, + "step": 2841 + }, + { + "epoch": 0.8311156601842374, + "grad_norm": 1.1619522611517994, + "learning_rate": 1.3547092047500074e-05, + "loss": 0.7063779830932617, + "step": 2842 + }, + { + "epoch": 0.8314081005995029, + "grad_norm": 1.2876429096537105, + "learning_rate": 1.3542571294787437e-05, + "loss": 0.6391212940216064, + "step": 2843 + }, + { + "epoch": 0.8317005410147682, + "grad_norm": 1.3047489403917027, + "learning_rate": 1.3538049714020298e-05, + "loss": 0.7145380973815918, + "step": 2844 + }, + { + "epoch": 0.8319929814300336, + "grad_norm": 1.4749234473747483, + "learning_rate": 1.3533527306255547e-05, + "loss": 0.7262213230133057, + "step": 2845 + }, + { + "epoch": 0.832285421845299, + "grad_norm": 1.5661213009447377, + "learning_rate": 1.3529004072550276e-05, + "loss": 0.7621959447860718, + "step": 2846 + }, + { + "epoch": 0.8325778622605644, + "grad_norm": 1.2349365167185542, + "learning_rate": 1.3524480013961757e-05, + "loss": 0.6372592449188232, + "step": 2847 + }, + { + "epoch": 0.8328703026758298, + "grad_norm": 1.5746526285594844, + "learning_rate": 1.3519955131547469e-05, + "loss": 0.6223774552345276, + "step": 2848 + }, + { + "epoch": 0.8331627430910952, + "grad_norm": 1.3246634087041118, + "learning_rate": 1.3515429426365066e-05, + "loss": 0.6500433683395386, + "step": 2849 + }, + { + "epoch": 0.8334551835063606, + "grad_norm": 1.4424195637381385, + "learning_rate": 1.3510902899472408e-05, + "loss": 0.6136040687561035, + "step": 2850 + }, + { + "epoch": 0.8337476239216259, + "grad_norm": 1.512738908953339, + "learning_rate": 1.3506375551927546e-05, + "loss": 0.5297173261642456, + "step": 2851 + }, + { + "epoch": 0.8340400643368914, + "grad_norm": 1.4629352546381682, + "learning_rate": 1.3501847384788718e-05, + "loss": 0.6215870976448059, + "step": 2852 + }, + { + "epoch": 0.8343325047521567, + "grad_norm": 1.3184866454725659, + "learning_rate": 1.3497318399114354e-05, + "loss": 0.5507583618164062, + "step": 2853 + }, + { + "epoch": 0.8346249451674221, + "grad_norm": 1.6022185079697295, + "learning_rate": 1.349278859596308e-05, + "loss": 0.6348794102668762, + "step": 2854 + }, + { + "epoch": 0.8349173855826876, + "grad_norm": 1.4038791520130975, + "learning_rate": 1.3488257976393708e-05, + "loss": 0.7009605765342712, + "step": 2855 + }, + { + "epoch": 0.8352098259979529, + "grad_norm": 1.2288500000369813, + "learning_rate": 1.3483726541465238e-05, + "loss": 0.6268658638000488, + "step": 2856 + }, + { + "epoch": 0.8355022664132183, + "grad_norm": 1.1391793971559063, + "learning_rate": 1.3479194292236875e-05, + "loss": 0.7187683582305908, + "step": 2857 + }, + { + "epoch": 0.8357947068284837, + "grad_norm": 1.5724396660128028, + "learning_rate": 1.3474661229768002e-05, + "loss": 0.7016449570655823, + "step": 2858 + }, + { + "epoch": 0.8360871472437491, + "grad_norm": 1.5882858400771258, + "learning_rate": 1.347012735511819e-05, + "loss": 0.5852428674697876, + "step": 2859 + }, + { + "epoch": 0.8363795876590144, + "grad_norm": 1.4143289380031852, + "learning_rate": 1.3465592669347207e-05, + "loss": 0.6232450008392334, + "step": 2860 + }, + { + "epoch": 0.8366720280742799, + "grad_norm": 1.3444277392597084, + "learning_rate": 1.346105717351501e-05, + "loss": 0.526097297668457, + "step": 2861 + }, + { + "epoch": 0.8369644684895453, + "grad_norm": 1.5627282993073515, + "learning_rate": 1.3456520868681741e-05, + "loss": 0.6065535545349121, + "step": 2862 + }, + { + "epoch": 0.8372569089048106, + "grad_norm": 1.3941305759607394, + "learning_rate": 1.3451983755907736e-05, + "loss": 0.5836296677589417, + "step": 2863 + }, + { + "epoch": 0.8375493493200761, + "grad_norm": 1.336778139255592, + "learning_rate": 1.3447445836253519e-05, + "loss": 0.678827166557312, + "step": 2864 + }, + { + "epoch": 0.8378417897353414, + "grad_norm": 1.3002974651392025, + "learning_rate": 1.3442907110779794e-05, + "loss": 0.5206096172332764, + "step": 2865 + }, + { + "epoch": 0.8381342301506068, + "grad_norm": 1.3468789034772342, + "learning_rate": 1.3438367580547468e-05, + "loss": 0.6424980163574219, + "step": 2866 + }, + { + "epoch": 0.8384266705658722, + "grad_norm": 1.1467777796306478, + "learning_rate": 1.3433827246617624e-05, + "loss": 0.6293484568595886, + "step": 2867 + }, + { + "epoch": 0.8387191109811376, + "grad_norm": 1.2601562582063903, + "learning_rate": 1.3429286110051539e-05, + "loss": 0.5912167429924011, + "step": 2868 + }, + { + "epoch": 0.8390115513964029, + "grad_norm": 1.5181261084157656, + "learning_rate": 1.342474417191068e-05, + "loss": 0.6571674346923828, + "step": 2869 + }, + { + "epoch": 0.8393039918116684, + "grad_norm": 1.421037061270542, + "learning_rate": 1.342020143325669e-05, + "loss": 0.5519720911979675, + "step": 2870 + }, + { + "epoch": 0.8395964322269338, + "grad_norm": 1.3997247827352193, + "learning_rate": 1.341565789515141e-05, + "loss": 0.6465001106262207, + "step": 2871 + }, + { + "epoch": 0.8398888726421991, + "grad_norm": 1.398359818513133, + "learning_rate": 1.3411113558656865e-05, + "loss": 0.6022073030471802, + "step": 2872 + }, + { + "epoch": 0.8401813130574646, + "grad_norm": 1.361775248337709, + "learning_rate": 1.3406568424835264e-05, + "loss": 0.610893726348877, + "step": 2873 + }, + { + "epoch": 0.8404737534727299, + "grad_norm": 1.427563498701008, + "learning_rate": 1.340202249474901e-05, + "loss": 0.5296563506126404, + "step": 2874 + }, + { + "epoch": 0.8407661938879953, + "grad_norm": 1.170906744718837, + "learning_rate": 1.3397475769460679e-05, + "loss": 0.6327008605003357, + "step": 2875 + }, + { + "epoch": 0.8410586343032607, + "grad_norm": 1.3517928558744952, + "learning_rate": 1.3392928250033045e-05, + "loss": 0.6437617540359497, + "step": 2876 + }, + { + "epoch": 0.8413510747185261, + "grad_norm": 1.3416431365752262, + "learning_rate": 1.3388379937529063e-05, + "loss": 0.5627291202545166, + "step": 2877 + }, + { + "epoch": 0.8416435151337915, + "grad_norm": 1.3602688623647594, + "learning_rate": 1.3383830833011871e-05, + "loss": 0.5921163558959961, + "step": 2878 + }, + { + "epoch": 0.8419359555490569, + "grad_norm": 1.2033937218328357, + "learning_rate": 1.3379280937544797e-05, + "loss": 0.5749082565307617, + "step": 2879 + }, + { + "epoch": 0.8422283959643223, + "grad_norm": 1.462463173522237, + "learning_rate": 1.3374730252191347e-05, + "loss": 0.6294553279876709, + "step": 2880 + }, + { + "epoch": 0.8425208363795876, + "grad_norm": 1.222130659730857, + "learning_rate": 1.3370178778015223e-05, + "loss": 0.5172078609466553, + "step": 2881 + }, + { + "epoch": 0.8428132767948531, + "grad_norm": 1.3695607626504847, + "learning_rate": 1.3365626516080301e-05, + "loss": 0.44069811701774597, + "step": 2882 + }, + { + "epoch": 0.8431057172101184, + "grad_norm": 1.31704500891114, + "learning_rate": 1.336107346745064e-05, + "loss": 0.72663813829422, + "step": 2883 + }, + { + "epoch": 0.8433981576253838, + "grad_norm": 1.3488066557741722, + "learning_rate": 1.3356519633190495e-05, + "loss": 0.6562269926071167, + "step": 2884 + }, + { + "epoch": 0.8436905980406492, + "grad_norm": 1.3994820366244107, + "learning_rate": 1.3351965014364293e-05, + "loss": 0.699925422668457, + "step": 2885 + }, + { + "epoch": 0.8439830384559146, + "grad_norm": 1.3294441855934318, + "learning_rate": 1.3347409612036651e-05, + "loss": 0.5902425646781921, + "step": 2886 + }, + { + "epoch": 0.84427547887118, + "grad_norm": 1.401705271294413, + "learning_rate": 1.3342853427272362e-05, + "loss": 0.613966703414917, + "step": 2887 + }, + { + "epoch": 0.8445679192864454, + "grad_norm": 1.2630848315271062, + "learning_rate": 1.333829646113641e-05, + "loss": 0.5864139199256897, + "step": 2888 + }, + { + "epoch": 0.8448603597017108, + "grad_norm": 1.5447722719058155, + "learning_rate": 1.3333738714693958e-05, + "loss": 0.5851572751998901, + "step": 2889 + }, + { + "epoch": 0.8451528001169761, + "grad_norm": 1.4679598706703352, + "learning_rate": 1.3329180189010348e-05, + "loss": 0.6564328074455261, + "step": 2890 + }, + { + "epoch": 0.8454452405322416, + "grad_norm": 1.3794930949186583, + "learning_rate": 1.3324620885151115e-05, + "loss": 0.6745615005493164, + "step": 2891 + }, + { + "epoch": 0.8457376809475069, + "grad_norm": 1.277678612967463, + "learning_rate": 1.3320060804181962e-05, + "loss": 0.5003606081008911, + "step": 2892 + }, + { + "epoch": 0.8460301213627723, + "grad_norm": 1.4995028165986726, + "learning_rate": 1.3315499947168781e-05, + "loss": 0.6646369695663452, + "step": 2893 + }, + { + "epoch": 0.8463225617780378, + "grad_norm": 1.3696086888087433, + "learning_rate": 1.3310938315177647e-05, + "loss": 0.6903572082519531, + "step": 2894 + }, + { + "epoch": 0.8466150021933031, + "grad_norm": 1.633835119151456, + "learning_rate": 1.330637590927481e-05, + "loss": 0.6221956610679626, + "step": 2895 + }, + { + "epoch": 0.8469074426085685, + "grad_norm": 1.5369372818354106, + "learning_rate": 1.3301812730526713e-05, + "loss": 0.5602666139602661, + "step": 2896 + }, + { + "epoch": 0.8471998830238339, + "grad_norm": 1.2910113915198014, + "learning_rate": 1.3297248779999963e-05, + "loss": 0.5843783617019653, + "step": 2897 + }, + { + "epoch": 0.8474923234390993, + "grad_norm": 1.304495064263293, + "learning_rate": 1.3292684058761357e-05, + "loss": 0.5040254592895508, + "step": 2898 + }, + { + "epoch": 0.8477847638543646, + "grad_norm": 1.4968280315795712, + "learning_rate": 1.3288118567877874e-05, + "loss": 0.6180210709571838, + "step": 2899 + }, + { + "epoch": 0.8480772042696301, + "grad_norm": 1.345230482752467, + "learning_rate": 1.3283552308416668e-05, + "loss": 0.5050851106643677, + "step": 2900 + }, + { + "epoch": 0.8483696446848955, + "grad_norm": 1.604217394640997, + "learning_rate": 1.3278985281445072e-05, + "loss": 0.6627126932144165, + "step": 2901 + }, + { + "epoch": 0.8486620851001608, + "grad_norm": 1.40930260394039, + "learning_rate": 1.3274417488030607e-05, + "loss": 0.5984441041946411, + "step": 2902 + }, + { + "epoch": 0.8489545255154263, + "grad_norm": 1.3584927833580034, + "learning_rate": 1.3269848929240958e-05, + "loss": 0.611599326133728, + "step": 2903 + }, + { + "epoch": 0.8492469659306916, + "grad_norm": 1.4743229169395644, + "learning_rate": 1.3265279606144006e-05, + "loss": 0.6057847142219543, + "step": 2904 + }, + { + "epoch": 0.849539406345957, + "grad_norm": 1.5324921987406994, + "learning_rate": 1.3260709519807797e-05, + "loss": 0.7123644948005676, + "step": 2905 + }, + { + "epoch": 0.8498318467612224, + "grad_norm": 1.4337194400937256, + "learning_rate": 1.3256138671300564e-05, + "loss": 0.6193811893463135, + "step": 2906 + }, + { + "epoch": 0.8501242871764878, + "grad_norm": 1.6102821646068017, + "learning_rate": 1.3251567061690717e-05, + "loss": 0.5775484442710876, + "step": 2907 + }, + { + "epoch": 0.8504167275917531, + "grad_norm": 1.5171257755680165, + "learning_rate": 1.3246994692046837e-05, + "loss": 0.5655511617660522, + "step": 2908 + }, + { + "epoch": 0.8507091680070186, + "grad_norm": 1.8299545213851978, + "learning_rate": 1.3242421563437688e-05, + "loss": 0.6216102838516235, + "step": 2909 + }, + { + "epoch": 0.851001608422284, + "grad_norm": 1.4045274179517395, + "learning_rate": 1.3237847676932217e-05, + "loss": 0.649554967880249, + "step": 2910 + }, + { + "epoch": 0.8512940488375493, + "grad_norm": 1.5965021256139, + "learning_rate": 1.3233273033599534e-05, + "loss": 0.6688281297683716, + "step": 2911 + }, + { + "epoch": 0.8515864892528148, + "grad_norm": 1.2158705367599922, + "learning_rate": 1.322869763450894e-05, + "loss": 0.664188027381897, + "step": 2912 + }, + { + "epoch": 0.8518789296680801, + "grad_norm": 1.27994094299147, + "learning_rate": 1.3224121480729905e-05, + "loss": 0.47189265489578247, + "step": 2913 + }, + { + "epoch": 0.8521713700833455, + "grad_norm": 1.387813816085696, + "learning_rate": 1.3219544573332075e-05, + "loss": 0.6190480589866638, + "step": 2914 + }, + { + "epoch": 0.8524638104986109, + "grad_norm": 1.3459335682790516, + "learning_rate": 1.3214966913385277e-05, + "loss": 0.6564091444015503, + "step": 2915 + }, + { + "epoch": 0.8527562509138763, + "grad_norm": 1.563994961699158, + "learning_rate": 1.321038850195951e-05, + "loss": 0.6083766222000122, + "step": 2916 + }, + { + "epoch": 0.8530486913291417, + "grad_norm": 1.2689051257322506, + "learning_rate": 1.3205809340124951e-05, + "loss": 0.5262473821640015, + "step": 2917 + }, + { + "epoch": 0.8533411317444071, + "grad_norm": 1.3633671661320785, + "learning_rate": 1.320122942895195e-05, + "loss": 0.6170297861099243, + "step": 2918 + }, + { + "epoch": 0.8536335721596725, + "grad_norm": 1.3838619263880951, + "learning_rate": 1.3196648769511036e-05, + "loss": 0.5791536569595337, + "step": 2919 + }, + { + "epoch": 0.8539260125749378, + "grad_norm": 1.4116909766151964, + "learning_rate": 1.3192067362872904e-05, + "loss": 0.5870766639709473, + "step": 2920 + }, + { + "epoch": 0.8542184529902033, + "grad_norm": 1.5317627298998806, + "learning_rate": 1.3187485210108438e-05, + "loss": 0.604548990726471, + "step": 2921 + }, + { + "epoch": 0.8545108934054686, + "grad_norm": 1.3458362989469688, + "learning_rate": 1.3182902312288682e-05, + "loss": 0.5292568206787109, + "step": 2922 + }, + { + "epoch": 0.854803333820734, + "grad_norm": 1.276264176970529, + "learning_rate": 1.3178318670484862e-05, + "loss": 0.5638582706451416, + "step": 2923 + }, + { + "epoch": 0.8550957742359994, + "grad_norm": 1.5369089697533718, + "learning_rate": 1.317373428576838e-05, + "loss": 0.5730164051055908, + "step": 2924 + }, + { + "epoch": 0.8553882146512648, + "grad_norm": 1.369500285153578, + "learning_rate": 1.3169149159210803e-05, + "loss": 0.6170799732208252, + "step": 2925 + }, + { + "epoch": 0.8556806550665302, + "grad_norm": 1.3401436683949477, + "learning_rate": 1.3164563291883879e-05, + "loss": 0.591925323009491, + "step": 2926 + }, + { + "epoch": 0.8559730954817956, + "grad_norm": 1.5178314176439451, + "learning_rate": 1.3159976684859528e-05, + "loss": 0.7269439697265625, + "step": 2927 + }, + { + "epoch": 0.856265535897061, + "grad_norm": 1.552203527248451, + "learning_rate": 1.3155389339209839e-05, + "loss": 0.615471363067627, + "step": 2928 + }, + { + "epoch": 0.8565579763123263, + "grad_norm": 1.4397776020126687, + "learning_rate": 1.3150801256007076e-05, + "loss": 0.6264692544937134, + "step": 2929 + }, + { + "epoch": 0.8568504167275918, + "grad_norm": 1.203302342126932, + "learning_rate": 1.314621243632368e-05, + "loss": 0.5729779005050659, + "step": 2930 + }, + { + "epoch": 0.8571428571428571, + "grad_norm": 1.3833464526102248, + "learning_rate": 1.314162288123225e-05, + "loss": 0.6462980508804321, + "step": 2931 + }, + { + "epoch": 0.8574352975581225, + "grad_norm": 1.1795102455310789, + "learning_rate": 1.3137032591805577e-05, + "loss": 0.5493176579475403, + "step": 2932 + }, + { + "epoch": 0.857727737973388, + "grad_norm": 1.1422942251299026, + "learning_rate": 1.3132441569116608e-05, + "loss": 0.49161234498023987, + "step": 2933 + }, + { + "epoch": 0.8580201783886533, + "grad_norm": 1.428090020215004, + "learning_rate": 1.312784981423847e-05, + "loss": 0.6724506616592407, + "step": 2934 + }, + { + "epoch": 0.8583126188039187, + "grad_norm": 1.6216709335890533, + "learning_rate": 1.3123257328244455e-05, + "loss": 0.6180965900421143, + "step": 2935 + }, + { + "epoch": 0.8586050592191841, + "grad_norm": 1.6797724821518334, + "learning_rate": 1.3118664112208027e-05, + "loss": 0.6676491498947144, + "step": 2936 + }, + { + "epoch": 0.8588974996344495, + "grad_norm": 1.1911121778916818, + "learning_rate": 1.3114070167202827e-05, + "loss": 0.5964041948318481, + "step": 2937 + }, + { + "epoch": 0.8591899400497148, + "grad_norm": 1.3660050885815391, + "learning_rate": 1.3109475494302657e-05, + "loss": 0.708328366279602, + "step": 2938 + }, + { + "epoch": 0.8594823804649803, + "grad_norm": 1.6146616988047677, + "learning_rate": 1.3104880094581495e-05, + "loss": 0.6360403299331665, + "step": 2939 + }, + { + "epoch": 0.8597748208802457, + "grad_norm": 1.5628439078603966, + "learning_rate": 1.3100283969113494e-05, + "loss": 0.5450131893157959, + "step": 2940 + }, + { + "epoch": 0.860067261295511, + "grad_norm": 1.2422442713506727, + "learning_rate": 1.3095687118972962e-05, + "loss": 0.4472329020500183, + "step": 2941 + }, + { + "epoch": 0.8603597017107765, + "grad_norm": 1.2824654152788901, + "learning_rate": 1.3091089545234387e-05, + "loss": 0.6853972673416138, + "step": 2942 + }, + { + "epoch": 0.8606521421260418, + "grad_norm": 1.5236765495118778, + "learning_rate": 1.3086491248972429e-05, + "loss": 0.6547979116439819, + "step": 2943 + }, + { + "epoch": 0.8609445825413072, + "grad_norm": 1.2521364069886292, + "learning_rate": 1.3081892231261903e-05, + "loss": 0.46194693446159363, + "step": 2944 + }, + { + "epoch": 0.8612370229565726, + "grad_norm": 1.3749685968664958, + "learning_rate": 1.307729249317781e-05, + "loss": 0.5715345144271851, + "step": 2945 + }, + { + "epoch": 0.861529463371838, + "grad_norm": 1.2925136251134925, + "learning_rate": 1.3072692035795305e-05, + "loss": 0.5590982437133789, + "step": 2946 + }, + { + "epoch": 0.8618219037871033, + "grad_norm": 1.4594997051230878, + "learning_rate": 1.3068090860189719e-05, + "loss": 0.5435009002685547, + "step": 2947 + }, + { + "epoch": 0.8621143442023688, + "grad_norm": 1.1604259212434795, + "learning_rate": 1.3063488967436548e-05, + "loss": 0.4528965651988983, + "step": 2948 + }, + { + "epoch": 0.8624067846176342, + "grad_norm": 1.1967844606343032, + "learning_rate": 1.3058886358611457e-05, + "loss": 0.5520291328430176, + "step": 2949 + }, + { + "epoch": 0.8626992250328995, + "grad_norm": 1.3959982999797578, + "learning_rate": 1.305428303479028e-05, + "loss": 0.6444021463394165, + "step": 2950 + }, + { + "epoch": 0.862991665448165, + "grad_norm": 1.597979452275331, + "learning_rate": 1.3049678997049016e-05, + "loss": 0.7808041572570801, + "step": 2951 + }, + { + "epoch": 0.8632841058634303, + "grad_norm": 1.6855013913251111, + "learning_rate": 1.3045074246463825e-05, + "loss": 0.6297428607940674, + "step": 2952 + }, + { + "epoch": 0.8635765462786957, + "grad_norm": 1.5203533995419023, + "learning_rate": 1.3040468784111045e-05, + "loss": 0.5776612162590027, + "step": 2953 + }, + { + "epoch": 0.8638689866939611, + "grad_norm": 1.3696314111811954, + "learning_rate": 1.3035862611067169e-05, + "loss": 0.49298524856567383, + "step": 2954 + }, + { + "epoch": 0.8641614271092265, + "grad_norm": 1.7023849342400221, + "learning_rate": 1.303125572840887e-05, + "loss": 0.8061650991439819, + "step": 2955 + }, + { + "epoch": 0.8644538675244919, + "grad_norm": 1.4509821363343893, + "learning_rate": 1.3026648137212976e-05, + "loss": 0.7741662859916687, + "step": 2956 + }, + { + "epoch": 0.8647463079397573, + "grad_norm": 1.350671993753925, + "learning_rate": 1.302203983855648e-05, + "loss": 0.5589889287948608, + "step": 2957 + }, + { + "epoch": 0.8650387483550227, + "grad_norm": 1.400755532782556, + "learning_rate": 1.3017430833516547e-05, + "loss": 0.5801941752433777, + "step": 2958 + }, + { + "epoch": 0.865331188770288, + "grad_norm": 1.3298019485580883, + "learning_rate": 1.30128211231705e-05, + "loss": 0.5874185562133789, + "step": 2959 + }, + { + "epoch": 0.8656236291855535, + "grad_norm": 1.1737111706818832, + "learning_rate": 1.3008210708595837e-05, + "loss": 0.6062727570533752, + "step": 2960 + }, + { + "epoch": 0.8659160696008188, + "grad_norm": 1.3334829952801492, + "learning_rate": 1.3003599590870209e-05, + "loss": 0.571448802947998, + "step": 2961 + }, + { + "epoch": 0.8662085100160842, + "grad_norm": 1.3654619359177553, + "learning_rate": 1.2998987771071442e-05, + "loss": 0.7001944780349731, + "step": 2962 + }, + { + "epoch": 0.8665009504313496, + "grad_norm": 1.48577297171421, + "learning_rate": 1.2994375250277516e-05, + "loss": 0.49182790517807007, + "step": 2963 + }, + { + "epoch": 0.866793390846615, + "grad_norm": 1.342673325945858, + "learning_rate": 1.298976202956658e-05, + "loss": 0.5299041271209717, + "step": 2964 + }, + { + "epoch": 0.8670858312618804, + "grad_norm": 1.1975267191215118, + "learning_rate": 1.2985148110016947e-05, + "loss": 0.4955265522003174, + "step": 2965 + }, + { + "epoch": 0.8673782716771458, + "grad_norm": 1.132262479106049, + "learning_rate": 1.2980533492707094e-05, + "loss": 0.6395630836486816, + "step": 2966 + }, + { + "epoch": 0.8676707120924112, + "grad_norm": 1.1303573523984183, + "learning_rate": 1.2975918178715661e-05, + "loss": 0.5926274061203003, + "step": 2967 + }, + { + "epoch": 0.8679631525076765, + "grad_norm": 1.194805436445147, + "learning_rate": 1.2971302169121447e-05, + "loss": 0.5556914806365967, + "step": 2968 + }, + { + "epoch": 0.868255592922942, + "grad_norm": 1.2766981949480176, + "learning_rate": 1.2966685465003415e-05, + "loss": 0.5347195863723755, + "step": 2969 + }, + { + "epoch": 0.8685480333382073, + "grad_norm": 1.3728880032694415, + "learning_rate": 1.2962068067440694e-05, + "loss": 0.6839208006858826, + "step": 2970 + }, + { + "epoch": 0.8688404737534727, + "grad_norm": 1.1132776608061867, + "learning_rate": 1.295744997751257e-05, + "loss": 0.5741337537765503, + "step": 2971 + }, + { + "epoch": 0.8691329141687382, + "grad_norm": 1.536125480269087, + "learning_rate": 1.29528311962985e-05, + "loss": 0.7383404970169067, + "step": 2972 + }, + { + "epoch": 0.8694253545840035, + "grad_norm": 1.4560088611056379, + "learning_rate": 1.294821172487809e-05, + "loss": 0.5075374245643616, + "step": 2973 + }, + { + "epoch": 0.8697177949992689, + "grad_norm": 1.235849675897421, + "learning_rate": 1.2943591564331113e-05, + "loss": 0.557248592376709, + "step": 2974 + }, + { + "epoch": 0.8700102354145343, + "grad_norm": 1.3655420768672006, + "learning_rate": 1.2938970715737506e-05, + "loss": 0.5687203407287598, + "step": 2975 + }, + { + "epoch": 0.8703026758297997, + "grad_norm": 1.3479345698129241, + "learning_rate": 1.2934349180177364e-05, + "loss": 0.5946108102798462, + "step": 2976 + }, + { + "epoch": 0.870595116245065, + "grad_norm": 1.258994257926457, + "learning_rate": 1.2929726958730942e-05, + "loss": 0.6103173494338989, + "step": 2977 + }, + { + "epoch": 0.8708875566603305, + "grad_norm": 1.4914714674105345, + "learning_rate": 1.2925104052478657e-05, + "loss": 0.7007244825363159, + "step": 2978 + }, + { + "epoch": 0.8711799970755959, + "grad_norm": 1.4140285074261345, + "learning_rate": 1.2920480462501082e-05, + "loss": 0.6157742142677307, + "step": 2979 + }, + { + "epoch": 0.8714724374908612, + "grad_norm": 1.4708644175648395, + "learning_rate": 1.2915856189878956e-05, + "loss": 0.6501113176345825, + "step": 2980 + }, + { + "epoch": 0.8717648779061267, + "grad_norm": 1.2555000815915451, + "learning_rate": 1.2911231235693178e-05, + "loss": 0.5084626078605652, + "step": 2981 + }, + { + "epoch": 0.872057318321392, + "grad_norm": 1.343175395168551, + "learning_rate": 1.2906605601024796e-05, + "loss": 0.5953651666641235, + "step": 2982 + }, + { + "epoch": 0.8723497587366574, + "grad_norm": 1.218776434986359, + "learning_rate": 1.290197928695503e-05, + "loss": 0.5733205676078796, + "step": 2983 + }, + { + "epoch": 0.8726421991519228, + "grad_norm": 1.5420791901099857, + "learning_rate": 1.2897352294565248e-05, + "loss": 0.5976133942604065, + "step": 2984 + }, + { + "epoch": 0.8729346395671882, + "grad_norm": 1.2904353456419873, + "learning_rate": 1.2892724624936983e-05, + "loss": 0.5092414617538452, + "step": 2985 + }, + { + "epoch": 0.8732270799824535, + "grad_norm": 1.4935525581566107, + "learning_rate": 1.2888096279151926e-05, + "loss": 0.7244688272476196, + "step": 2986 + }, + { + "epoch": 0.873519520397719, + "grad_norm": 1.5818576721862576, + "learning_rate": 1.2883467258291922e-05, + "loss": 0.6943881511688232, + "step": 2987 + }, + { + "epoch": 0.8738119608129844, + "grad_norm": 1.422762914124539, + "learning_rate": 1.287883756343898e-05, + "loss": 0.6484338641166687, + "step": 2988 + }, + { + "epoch": 0.8741044012282497, + "grad_norm": 1.23046146833686, + "learning_rate": 1.2874207195675262e-05, + "loss": 0.620865523815155, + "step": 2989 + }, + { + "epoch": 0.8743968416435152, + "grad_norm": 1.245843663622743, + "learning_rate": 1.2869576156083085e-05, + "loss": 0.5290236473083496, + "step": 2990 + }, + { + "epoch": 0.8746892820587805, + "grad_norm": 1.383695697280258, + "learning_rate": 1.2864944445744932e-05, + "loss": 0.7140257358551025, + "step": 2991 + }, + { + "epoch": 0.8749817224740459, + "grad_norm": 1.3901579888827407, + "learning_rate": 1.286031206574343e-05, + "loss": 0.7167611122131348, + "step": 2992 + }, + { + "epoch": 0.8752741628893113, + "grad_norm": 1.4097752029885913, + "learning_rate": 1.2855679017161372e-05, + "loss": 0.5631322860717773, + "step": 2993 + }, + { + "epoch": 0.8755666033045767, + "grad_norm": 1.4535459078300315, + "learning_rate": 1.2851045301081714e-05, + "loss": 0.6250770092010498, + "step": 2994 + }, + { + "epoch": 0.8758590437198421, + "grad_norm": 1.3041015408341177, + "learning_rate": 1.2846410918587546e-05, + "loss": 0.5121266841888428, + "step": 2995 + }, + { + "epoch": 0.8761514841351075, + "grad_norm": 1.2982813372349626, + "learning_rate": 1.2841775870762134e-05, + "loss": 0.6075780987739563, + "step": 2996 + }, + { + "epoch": 0.8764439245503729, + "grad_norm": 1.2610269556078437, + "learning_rate": 1.283714015868889e-05, + "loss": 0.516838014125824, + "step": 2997 + }, + { + "epoch": 0.8767363649656382, + "grad_norm": 1.3997368275790003, + "learning_rate": 1.2832503783451384e-05, + "loss": 0.6952051520347595, + "step": 2998 + }, + { + "epoch": 0.8770288053809037, + "grad_norm": 1.668277066498958, + "learning_rate": 1.2827866746133342e-05, + "loss": 0.8039685487747192, + "step": 2999 + }, + { + "epoch": 0.877321245796169, + "grad_norm": 1.3275322129226486, + "learning_rate": 1.2823229047818642e-05, + "loss": 0.6200549602508545, + "step": 3000 + }, + { + "epoch": 0.8776136862114344, + "grad_norm": 1.3153882408773916, + "learning_rate": 1.2818590689591315e-05, + "loss": 0.6666116714477539, + "step": 3001 + }, + { + "epoch": 0.8779061266266998, + "grad_norm": 1.4255915593552042, + "learning_rate": 1.2813951672535551e-05, + "loss": 0.566741943359375, + "step": 3002 + }, + { + "epoch": 0.8781985670419652, + "grad_norm": 1.2390037918473238, + "learning_rate": 1.2809311997735697e-05, + "loss": 0.6103402376174927, + "step": 3003 + }, + { + "epoch": 0.8784910074572306, + "grad_norm": 1.1444583076116077, + "learning_rate": 1.280467166627624e-05, + "loss": 0.48296916484832764, + "step": 3004 + }, + { + "epoch": 0.878783447872496, + "grad_norm": 1.4235586871910597, + "learning_rate": 1.2800030679241834e-05, + "loss": 0.5995723605155945, + "step": 3005 + }, + { + "epoch": 0.8790758882877614, + "grad_norm": 1.5173093942193803, + "learning_rate": 1.2795389037717286e-05, + "loss": 0.6199642419815063, + "step": 3006 + }, + { + "epoch": 0.8793683287030267, + "grad_norm": 1.5757356892284924, + "learning_rate": 1.279074674278754e-05, + "loss": 0.6740807294845581, + "step": 3007 + }, + { + "epoch": 0.8796607691182922, + "grad_norm": 1.4923318097982954, + "learning_rate": 1.2786103795537714e-05, + "loss": 0.7330688238143921, + "step": 3008 + }, + { + "epoch": 0.8799532095335575, + "grad_norm": 1.1357910142893406, + "learning_rate": 1.2781460197053066e-05, + "loss": 0.5048441290855408, + "step": 3009 + }, + { + "epoch": 0.8802456499488229, + "grad_norm": 1.2484561154788956, + "learning_rate": 1.277681594841901e-05, + "loss": 0.6103702187538147, + "step": 3010 + }, + { + "epoch": 0.8805380903640884, + "grad_norm": 1.3117487221252475, + "learning_rate": 1.2772171050721107e-05, + "loss": 0.5223366022109985, + "step": 3011 + }, + { + "epoch": 0.8808305307793537, + "grad_norm": 1.5806437295259135, + "learning_rate": 1.2767525505045078e-05, + "loss": 0.708305835723877, + "step": 3012 + }, + { + "epoch": 0.8811229711946191, + "grad_norm": 1.324207789268205, + "learning_rate": 1.2762879312476785e-05, + "loss": 0.6827911734580994, + "step": 3013 + }, + { + "epoch": 0.8814154116098845, + "grad_norm": 1.7302207886555443, + "learning_rate": 1.2758232474102254e-05, + "loss": 0.6977027654647827, + "step": 3014 + }, + { + "epoch": 0.8817078520251499, + "grad_norm": 1.235299173012923, + "learning_rate": 1.2753584991007654e-05, + "loss": 0.5534720420837402, + "step": 3015 + }, + { + "epoch": 0.8820002924404152, + "grad_norm": 1.1722300923390174, + "learning_rate": 1.2748936864279305e-05, + "loss": 0.541682243347168, + "step": 3016 + }, + { + "epoch": 0.8822927328556807, + "grad_norm": 1.4134630737456748, + "learning_rate": 1.2744288095003674e-05, + "loss": 0.6195456981658936, + "step": 3017 + }, + { + "epoch": 0.8825851732709461, + "grad_norm": 1.1963339495389647, + "learning_rate": 1.2739638684267387e-05, + "loss": 0.5050234794616699, + "step": 3018 + }, + { + "epoch": 0.8828776136862114, + "grad_norm": 1.1967088542641229, + "learning_rate": 1.2734988633157218e-05, + "loss": 0.5397066473960876, + "step": 3019 + }, + { + "epoch": 0.8831700541014769, + "grad_norm": 1.3480056981854442, + "learning_rate": 1.273033794276008e-05, + "loss": 0.5932190418243408, + "step": 3020 + }, + { + "epoch": 0.8834624945167422, + "grad_norm": 1.2383533139434324, + "learning_rate": 1.2725686614163055e-05, + "loss": 0.5780059099197388, + "step": 3021 + }, + { + "epoch": 0.8837549349320076, + "grad_norm": 1.4379159594856536, + "learning_rate": 1.2721034648453353e-05, + "loss": 0.5850226879119873, + "step": 3022 + }, + { + "epoch": 0.884047375347273, + "grad_norm": 1.351057706249645, + "learning_rate": 1.2716382046718346e-05, + "loss": 0.6684393882751465, + "step": 3023 + }, + { + "epoch": 0.8843398157625384, + "grad_norm": 1.3578422906902012, + "learning_rate": 1.271172881004555e-05, + "loss": 0.6045842170715332, + "step": 3024 + }, + { + "epoch": 0.8846322561778037, + "grad_norm": 1.4246831207517041, + "learning_rate": 1.2707074939522633e-05, + "loss": 0.6769551038742065, + "step": 3025 + }, + { + "epoch": 0.8849246965930692, + "grad_norm": 1.541147063192512, + "learning_rate": 1.2702420436237408e-05, + "loss": 0.5581091642379761, + "step": 3026 + }, + { + "epoch": 0.8852171370083346, + "grad_norm": 1.381695049653859, + "learning_rate": 1.269776530127784e-05, + "loss": 0.5010186433792114, + "step": 3027 + }, + { + "epoch": 0.8855095774235999, + "grad_norm": 1.3620137613749654, + "learning_rate": 1.2693109535732034e-05, + "loss": 0.4537884294986725, + "step": 3028 + }, + { + "epoch": 0.8858020178388654, + "grad_norm": 1.155156838639785, + "learning_rate": 1.2688453140688246e-05, + "loss": 0.5920443534851074, + "step": 3029 + }, + { + "epoch": 0.8860944582541307, + "grad_norm": 1.4193738144287875, + "learning_rate": 1.2683796117234884e-05, + "loss": 0.564072847366333, + "step": 3030 + }, + { + "epoch": 0.8863868986693961, + "grad_norm": 1.3206014730711304, + "learning_rate": 1.26791384664605e-05, + "loss": 0.657585620880127, + "step": 3031 + }, + { + "epoch": 0.8866793390846615, + "grad_norm": 1.4850669504718117, + "learning_rate": 1.2674480189453786e-05, + "loss": 0.6864298582077026, + "step": 3032 + }, + { + "epoch": 0.8869717794999269, + "grad_norm": 1.4143994971740543, + "learning_rate": 1.266982128730359e-05, + "loss": 0.6416069865226746, + "step": 3033 + }, + { + "epoch": 0.8872642199151923, + "grad_norm": 1.3298070008922416, + "learning_rate": 1.2665161761098899e-05, + "loss": 0.6405118703842163, + "step": 3034 + }, + { + "epoch": 0.8875566603304577, + "grad_norm": 1.4036133965159712, + "learning_rate": 1.266050161192885e-05, + "loss": 0.649673342704773, + "step": 3035 + }, + { + "epoch": 0.8878491007457231, + "grad_norm": 1.359043965576467, + "learning_rate": 1.2655840840882729e-05, + "loss": 0.5914620161056519, + "step": 3036 + }, + { + "epoch": 0.8881415411609884, + "grad_norm": 1.5837746169822255, + "learning_rate": 1.2651179449049958e-05, + "loss": 0.6080621480941772, + "step": 3037 + }, + { + "epoch": 0.8884339815762539, + "grad_norm": 1.5302588008128089, + "learning_rate": 1.264651743752011e-05, + "loss": 0.657015860080719, + "step": 3038 + }, + { + "epoch": 0.8887264219915192, + "grad_norm": 1.3603604072518423, + "learning_rate": 1.26418548073829e-05, + "loss": 0.5384848713874817, + "step": 3039 + }, + { + "epoch": 0.8890188624067846, + "grad_norm": 1.5457096573294893, + "learning_rate": 1.2637191559728195e-05, + "loss": 0.7452554106712341, + "step": 3040 + }, + { + "epoch": 0.88931130282205, + "grad_norm": 1.4411555623785637, + "learning_rate": 1.2632527695645993e-05, + "loss": 0.743236780166626, + "step": 3041 + }, + { + "epoch": 0.8896037432373154, + "grad_norm": 1.5417347407679962, + "learning_rate": 1.2627863216226453e-05, + "loss": 0.557692289352417, + "step": 3042 + }, + { + "epoch": 0.8898961836525808, + "grad_norm": 1.3302198914823486, + "learning_rate": 1.2623198122559863e-05, + "loss": 0.5637259483337402, + "step": 3043 + }, + { + "epoch": 0.8901886240678462, + "grad_norm": 1.4403910054587767, + "learning_rate": 1.261853241573666e-05, + "loss": 0.5217350721359253, + "step": 3044 + }, + { + "epoch": 0.8904810644831116, + "grad_norm": 1.4659582389098327, + "learning_rate": 1.2613866096847423e-05, + "loss": 0.5971624255180359, + "step": 3045 + }, + { + "epoch": 0.8907735048983769, + "grad_norm": 1.5641010174504344, + "learning_rate": 1.260919916698288e-05, + "loss": 0.6586427092552185, + "step": 3046 + }, + { + "epoch": 0.8910659453136424, + "grad_norm": 1.8045032510726307, + "learning_rate": 1.2604531627233895e-05, + "loss": 0.7059915661811829, + "step": 3047 + }, + { + "epoch": 0.8913583857289077, + "grad_norm": 1.3406441666811264, + "learning_rate": 1.2599863478691483e-05, + "loss": 0.582252025604248, + "step": 3048 + }, + { + "epoch": 0.8916508261441731, + "grad_norm": 1.2760858553291834, + "learning_rate": 1.2595194722446786e-05, + "loss": 0.6901981830596924, + "step": 3049 + }, + { + "epoch": 0.8919432665594386, + "grad_norm": 1.5789638647855007, + "learning_rate": 1.2590525359591101e-05, + "loss": 0.7462388873100281, + "step": 3050 + }, + { + "epoch": 0.8922357069747039, + "grad_norm": 1.1893369289763132, + "learning_rate": 1.2585855391215866e-05, + "loss": 0.4963245391845703, + "step": 3051 + }, + { + "epoch": 0.8925281473899693, + "grad_norm": 1.427293357699651, + "learning_rate": 1.2581184818412655e-05, + "loss": 0.6408337354660034, + "step": 3052 + }, + { + "epoch": 0.8928205878052347, + "grad_norm": 1.3357664905418998, + "learning_rate": 1.257651364227319e-05, + "loss": 0.44528326392173767, + "step": 3053 + }, + { + "epoch": 0.8931130282205001, + "grad_norm": 1.4527206031665332, + "learning_rate": 1.2571841863889322e-05, + "loss": 0.4595017731189728, + "step": 3054 + }, + { + "epoch": 0.8934054686357654, + "grad_norm": 1.435143014894245, + "learning_rate": 1.2567169484353057e-05, + "loss": 0.6934910416603088, + "step": 3055 + }, + { + "epoch": 0.8936979090510309, + "grad_norm": 1.3543177360296097, + "learning_rate": 1.2562496504756535e-05, + "loss": 0.6392845511436462, + "step": 3056 + }, + { + "epoch": 0.8939903494662963, + "grad_norm": 1.3638361282130094, + "learning_rate": 1.255782292619203e-05, + "loss": 0.5506458878517151, + "step": 3057 + }, + { + "epoch": 0.8942827898815616, + "grad_norm": 1.3861859212756857, + "learning_rate": 1.255314874975197e-05, + "loss": 0.5871223211288452, + "step": 3058 + }, + { + "epoch": 0.8945752302968271, + "grad_norm": 1.4446737131271559, + "learning_rate": 1.254847397652892e-05, + "loss": 0.603033185005188, + "step": 3059 + }, + { + "epoch": 0.8948676707120924, + "grad_norm": 1.4764688506929942, + "learning_rate": 1.2543798607615566e-05, + "loss": 0.667452335357666, + "step": 3060 + }, + { + "epoch": 0.8951601111273578, + "grad_norm": 1.5052245195755742, + "learning_rate": 1.2539122644104755e-05, + "loss": 0.6264449954032898, + "step": 3061 + }, + { + "epoch": 0.8954525515426232, + "grad_norm": 1.2694525054193362, + "learning_rate": 1.2534446087089465e-05, + "loss": 0.6085609793663025, + "step": 3062 + }, + { + "epoch": 0.8957449919578886, + "grad_norm": 1.5027824768205942, + "learning_rate": 1.252976893766281e-05, + "loss": 0.6414828896522522, + "step": 3063 + }, + { + "epoch": 0.8960374323731539, + "grad_norm": 1.5067492390612103, + "learning_rate": 1.2525091196918049e-05, + "loss": 0.714614987373352, + "step": 3064 + }, + { + "epoch": 0.8963298727884194, + "grad_norm": 1.4473594871396505, + "learning_rate": 1.2520412865948574e-05, + "loss": 0.5966176986694336, + "step": 3065 + }, + { + "epoch": 0.8966223132036848, + "grad_norm": 1.234582474772498, + "learning_rate": 1.2515733945847914e-05, + "loss": 0.5162957906723022, + "step": 3066 + }, + { + "epoch": 0.8969147536189501, + "grad_norm": 1.5378382727824902, + "learning_rate": 1.2511054437709743e-05, + "loss": 0.6460821628570557, + "step": 3067 + }, + { + "epoch": 0.8972071940342156, + "grad_norm": 1.3526579806372556, + "learning_rate": 1.2506374342627861e-05, + "loss": 0.6802507638931274, + "step": 3068 + }, + { + "epoch": 0.8974996344494809, + "grad_norm": 1.4306769896677902, + "learning_rate": 1.2501693661696218e-05, + "loss": 0.5966957807540894, + "step": 3069 + }, + { + "epoch": 0.8977920748647463, + "grad_norm": 1.336293797847081, + "learning_rate": 1.2497012396008893e-05, + "loss": 0.607227087020874, + "step": 3070 + }, + { + "epoch": 0.8980845152800117, + "grad_norm": 1.360686606627987, + "learning_rate": 1.2492330546660098e-05, + "loss": 0.6544637084007263, + "step": 3071 + }, + { + "epoch": 0.8983769556952771, + "grad_norm": 1.410133865972111, + "learning_rate": 1.2487648114744196e-05, + "loss": 0.5896593332290649, + "step": 3072 + }, + { + "epoch": 0.8986693961105425, + "grad_norm": 1.296908458370691, + "learning_rate": 1.248296510135567e-05, + "loss": 0.5710231065750122, + "step": 3073 + }, + { + "epoch": 0.8989618365258079, + "grad_norm": 1.2057046094411794, + "learning_rate": 1.2478281507589147e-05, + "loss": 0.5918926000595093, + "step": 3074 + }, + { + "epoch": 0.8992542769410733, + "grad_norm": 1.5306817529094334, + "learning_rate": 1.2473597334539392e-05, + "loss": 0.681663453578949, + "step": 3075 + }, + { + "epoch": 0.8995467173563386, + "grad_norm": 1.2671727964507529, + "learning_rate": 1.24689125833013e-05, + "loss": 0.5229436159133911, + "step": 3076 + }, + { + "epoch": 0.8998391577716041, + "grad_norm": 1.5769374861363958, + "learning_rate": 1.2464227254969903e-05, + "loss": 0.7165119051933289, + "step": 3077 + }, + { + "epoch": 0.9001315981868694, + "grad_norm": 1.2324966791017462, + "learning_rate": 1.2459541350640368e-05, + "loss": 0.514594554901123, + "step": 3078 + }, + { + "epoch": 0.9004240386021348, + "grad_norm": 1.4144268048636097, + "learning_rate": 1.2454854871407993e-05, + "loss": 0.6173784732818604, + "step": 3079 + }, + { + "epoch": 0.9007164790174002, + "grad_norm": 1.6555744107314199, + "learning_rate": 1.245016781836822e-05, + "loss": 0.6796407103538513, + "step": 3080 + }, + { + "epoch": 0.9010089194326656, + "grad_norm": 1.3666754181554102, + "learning_rate": 1.2445480192616619e-05, + "loss": 0.6901683807373047, + "step": 3081 + }, + { + "epoch": 0.901301359847931, + "grad_norm": 1.295839204252469, + "learning_rate": 1.2440791995248886e-05, + "loss": 0.6215920448303223, + "step": 3082 + }, + { + "epoch": 0.9015938002631964, + "grad_norm": 1.29381925555321, + "learning_rate": 1.243610322736087e-05, + "loss": 0.6109690070152283, + "step": 3083 + }, + { + "epoch": 0.9018862406784618, + "grad_norm": 1.3751453546430485, + "learning_rate": 1.2431413890048534e-05, + "loss": 0.5273362398147583, + "step": 3084 + }, + { + "epoch": 0.9021786810937271, + "grad_norm": 1.197511083408015, + "learning_rate": 1.2426723984407982e-05, + "loss": 0.5219408273696899, + "step": 3085 + }, + { + "epoch": 0.9024711215089926, + "grad_norm": 1.4389803986869047, + "learning_rate": 1.2422033511535458e-05, + "loss": 0.6894690990447998, + "step": 3086 + }, + { + "epoch": 0.9027635619242579, + "grad_norm": 1.2949596320128054, + "learning_rate": 1.2417342472527325e-05, + "loss": 0.6135656833648682, + "step": 3087 + }, + { + "epoch": 0.9030560023395233, + "grad_norm": 1.4997841327771624, + "learning_rate": 1.2412650868480088e-05, + "loss": 0.595108151435852, + "step": 3088 + }, + { + "epoch": 0.9033484427547888, + "grad_norm": 1.4068106482758378, + "learning_rate": 1.2407958700490376e-05, + "loss": 0.6445261240005493, + "step": 3089 + }, + { + "epoch": 0.9036408831700541, + "grad_norm": 1.1391728287440939, + "learning_rate": 1.240326596965496e-05, + "loss": 0.5601890087127686, + "step": 3090 + }, + { + "epoch": 0.9039333235853195, + "grad_norm": 1.4556896662499954, + "learning_rate": 1.239857267707074e-05, + "loss": 0.6229134798049927, + "step": 3091 + }, + { + "epoch": 0.9042257640005849, + "grad_norm": 1.3633245090329542, + "learning_rate": 1.2393878823834737e-05, + "loss": 0.5769803524017334, + "step": 3092 + }, + { + "epoch": 0.9045182044158503, + "grad_norm": 1.5373386649577192, + "learning_rate": 1.2389184411044113e-05, + "loss": 0.8101233243942261, + "step": 3093 + }, + { + "epoch": 0.9048106448311156, + "grad_norm": 1.3507156228218853, + "learning_rate": 1.2384489439796159e-05, + "loss": 0.5562945604324341, + "step": 3094 + }, + { + "epoch": 0.9051030852463811, + "grad_norm": 1.6942487879562902, + "learning_rate": 1.2379793911188299e-05, + "loss": 0.5764975547790527, + "step": 3095 + }, + { + "epoch": 0.9053955256616465, + "grad_norm": 1.4280019855873591, + "learning_rate": 1.2375097826318079e-05, + "loss": 0.5951659083366394, + "step": 3096 + }, + { + "epoch": 0.9056879660769118, + "grad_norm": 1.3804272066554735, + "learning_rate": 1.2370401186283186e-05, + "loss": 0.5550940632820129, + "step": 3097 + }, + { + "epoch": 0.9059804064921773, + "grad_norm": 1.5012418323017303, + "learning_rate": 1.2365703992181425e-05, + "loss": 0.5423737168312073, + "step": 3098 + }, + { + "epoch": 0.9062728469074426, + "grad_norm": 1.3277873552974655, + "learning_rate": 1.236100624511074e-05, + "loss": 0.633366048336029, + "step": 3099 + }, + { + "epoch": 0.906565287322708, + "grad_norm": 1.2113954677804317, + "learning_rate": 1.2356307946169202e-05, + "loss": 0.6067361831665039, + "step": 3100 + }, + { + "epoch": 0.9068577277379734, + "grad_norm": 1.1800329005672614, + "learning_rate": 1.2351609096455006e-05, + "loss": 0.6039519309997559, + "step": 3101 + }, + { + "epoch": 0.9071501681532388, + "grad_norm": 1.3373962705942997, + "learning_rate": 1.2346909697066486e-05, + "loss": 0.5643757581710815, + "step": 3102 + }, + { + "epoch": 0.9074426085685041, + "grad_norm": 1.4963223668806274, + "learning_rate": 1.2342209749102088e-05, + "loss": 0.5406394004821777, + "step": 3103 + }, + { + "epoch": 0.9077350489837696, + "grad_norm": 1.3377232980381308, + "learning_rate": 1.2337509253660404e-05, + "loss": 0.5845915079116821, + "step": 3104 + }, + { + "epoch": 0.908027489399035, + "grad_norm": 1.614536146442758, + "learning_rate": 1.2332808211840147e-05, + "loss": 0.6912981271743774, + "step": 3105 + }, + { + "epoch": 0.9083199298143003, + "grad_norm": 1.2433178855630291, + "learning_rate": 1.2328106624740151e-05, + "loss": 0.5571672320365906, + "step": 3106 + }, + { + "epoch": 0.9086123702295658, + "grad_norm": 1.0866011599268561, + "learning_rate": 1.2323404493459386e-05, + "loss": 0.5219087600708008, + "step": 3107 + }, + { + "epoch": 0.9089048106448311, + "grad_norm": 1.3110052749572634, + "learning_rate": 1.2318701819096952e-05, + "loss": 0.5780971050262451, + "step": 3108 + }, + { + "epoch": 0.9091972510600965, + "grad_norm": 1.4311943893173962, + "learning_rate": 1.2313998602752063e-05, + "loss": 0.6206589937210083, + "step": 3109 + }, + { + "epoch": 0.9094896914753618, + "grad_norm": 1.4768884476442792, + "learning_rate": 1.2309294845524068e-05, + "loss": 0.6063584089279175, + "step": 3110 + }, + { + "epoch": 0.9097821318906273, + "grad_norm": 1.7547035202334638, + "learning_rate": 1.2304590548512445e-05, + "loss": 0.5733555555343628, + "step": 3111 + }, + { + "epoch": 0.9100745723058927, + "grad_norm": 1.0786362412869268, + "learning_rate": 1.2299885712816792e-05, + "loss": 0.5227848887443542, + "step": 3112 + }, + { + "epoch": 0.910367012721158, + "grad_norm": 1.3268713618037162, + "learning_rate": 1.2295180339536839e-05, + "loss": 0.6357969045639038, + "step": 3113 + }, + { + "epoch": 0.9106594531364235, + "grad_norm": 1.4243975329678797, + "learning_rate": 1.2290474429772438e-05, + "loss": 0.6194056272506714, + "step": 3114 + }, + { + "epoch": 0.9109518935516888, + "grad_norm": 1.3151715542581663, + "learning_rate": 1.2285767984623563e-05, + "loss": 0.5274733304977417, + "step": 3115 + }, + { + "epoch": 0.9112443339669543, + "grad_norm": 1.370068266036648, + "learning_rate": 1.228106100519032e-05, + "loss": 0.5612698197364807, + "step": 3116 + }, + { + "epoch": 0.9115367743822196, + "grad_norm": 1.578530779654035, + "learning_rate": 1.2276353492572937e-05, + "loss": 0.6261074542999268, + "step": 3117 + }, + { + "epoch": 0.911829214797485, + "grad_norm": 1.2011662273206838, + "learning_rate": 1.2271645447871764e-05, + "loss": 0.6407681703567505, + "step": 3118 + }, + { + "epoch": 0.9121216552127503, + "grad_norm": 1.869370443317622, + "learning_rate": 1.226693687218728e-05, + "loss": 0.7862328290939331, + "step": 3119 + }, + { + "epoch": 0.9124140956280158, + "grad_norm": 1.4175623746202768, + "learning_rate": 1.2262227766620083e-05, + "loss": 0.5079205632209778, + "step": 3120 + }, + { + "epoch": 0.9127065360432812, + "grad_norm": 1.5666620241066453, + "learning_rate": 1.2257518132270903e-05, + "loss": 0.6074210405349731, + "step": 3121 + }, + { + "epoch": 0.9129989764585466, + "grad_norm": 1.5222891825114737, + "learning_rate": 1.2252807970240582e-05, + "loss": 0.642460823059082, + "step": 3122 + }, + { + "epoch": 0.913291416873812, + "grad_norm": 1.5105961127505823, + "learning_rate": 1.22480972816301e-05, + "loss": 0.5996612310409546, + "step": 3123 + }, + { + "epoch": 0.9135838572890773, + "grad_norm": 1.4191755584361432, + "learning_rate": 1.2243386067540548e-05, + "loss": 0.5629523992538452, + "step": 3124 + }, + { + "epoch": 0.9138762977043428, + "grad_norm": 1.488297008451051, + "learning_rate": 1.223867432907314e-05, + "loss": 0.5794960260391235, + "step": 3125 + }, + { + "epoch": 0.9141687381196081, + "grad_norm": 1.4839380471480481, + "learning_rate": 1.2233962067329217e-05, + "loss": 0.6665213108062744, + "step": 3126 + }, + { + "epoch": 0.9144611785348735, + "grad_norm": 1.7069185609011637, + "learning_rate": 1.2229249283410245e-05, + "loss": 0.6834249496459961, + "step": 3127 + }, + { + "epoch": 0.914753618950139, + "grad_norm": 1.472483487554638, + "learning_rate": 1.2224535978417809e-05, + "loss": 0.5709845423698425, + "step": 3128 + }, + { + "epoch": 0.9150460593654043, + "grad_norm": 1.3783113695609808, + "learning_rate": 1.2219822153453613e-05, + "loss": 0.5455344915390015, + "step": 3129 + }, + { + "epoch": 0.9153384997806697, + "grad_norm": 1.5138708664001599, + "learning_rate": 1.2215107809619483e-05, + "loss": 0.6291406154632568, + "step": 3130 + }, + { + "epoch": 0.915630940195935, + "grad_norm": 1.340686035335307, + "learning_rate": 1.2210392948017371e-05, + "loss": 0.5953069925308228, + "step": 3131 + }, + { + "epoch": 0.9159233806112005, + "grad_norm": 1.3390197673162056, + "learning_rate": 1.2205677569749347e-05, + "loss": 0.6958901882171631, + "step": 3132 + }, + { + "epoch": 0.9162158210264658, + "grad_norm": 2.251590691230911, + "learning_rate": 1.2200961675917605e-05, + "loss": 0.5867033004760742, + "step": 3133 + }, + { + "epoch": 0.9165082614417313, + "grad_norm": 1.2167957981489814, + "learning_rate": 1.2196245267624449e-05, + "loss": 0.5364042520523071, + "step": 3134 + }, + { + "epoch": 0.9168007018569967, + "grad_norm": 1.0997310314063415, + "learning_rate": 1.2191528345972318e-05, + "loss": 0.5141438841819763, + "step": 3135 + }, + { + "epoch": 0.917093142272262, + "grad_norm": 1.1435709173541644, + "learning_rate": 1.218681091206376e-05, + "loss": 0.5024605393409729, + "step": 3136 + }, + { + "epoch": 0.9173855826875275, + "grad_norm": 1.4583614763595478, + "learning_rate": 1.2182092967001447e-05, + "loss": 0.567114531993866, + "step": 3137 + }, + { + "epoch": 0.9176780231027928, + "grad_norm": 1.4993671644221835, + "learning_rate": 1.217737451188817e-05, + "loss": 0.7224113941192627, + "step": 3138 + }, + { + "epoch": 0.9179704635180582, + "grad_norm": 1.368376715547139, + "learning_rate": 1.2172655547826839e-05, + "loss": 0.6033936738967896, + "step": 3139 + }, + { + "epoch": 0.9182629039333235, + "grad_norm": 1.4327847369216065, + "learning_rate": 1.2167936075920486e-05, + "loss": 0.5555745363235474, + "step": 3140 + }, + { + "epoch": 0.918555344348589, + "grad_norm": 1.1757378939927343, + "learning_rate": 1.2163216097272255e-05, + "loss": 0.5939170718193054, + "step": 3141 + }, + { + "epoch": 0.9188477847638543, + "grad_norm": 1.49535441688526, + "learning_rate": 1.2158495612985415e-05, + "loss": 0.7141895294189453, + "step": 3142 + }, + { + "epoch": 0.9191402251791198, + "grad_norm": 1.5558405168210478, + "learning_rate": 1.2153774624163345e-05, + "loss": 0.585646390914917, + "step": 3143 + }, + { + "epoch": 0.9194326655943852, + "grad_norm": 1.114182805953909, + "learning_rate": 1.2149053131909556e-05, + "loss": 0.5378825068473816, + "step": 3144 + }, + { + "epoch": 0.9197251060096505, + "grad_norm": 1.383902731385194, + "learning_rate": 1.2144331137327663e-05, + "loss": 0.569821834564209, + "step": 3145 + }, + { + "epoch": 0.920017546424916, + "grad_norm": 1.6457891792908532, + "learning_rate": 1.2139608641521406e-05, + "loss": 0.6101462244987488, + "step": 3146 + }, + { + "epoch": 0.9203099868401813, + "grad_norm": 1.2016357640033675, + "learning_rate": 1.2134885645594637e-05, + "loss": 0.5481746792793274, + "step": 3147 + }, + { + "epoch": 0.9206024272554467, + "grad_norm": 1.538402380383642, + "learning_rate": 1.2130162150651326e-05, + "loss": 0.7075197696685791, + "step": 3148 + }, + { + "epoch": 0.920894867670712, + "grad_norm": 1.7217246005422928, + "learning_rate": 1.2125438157795567e-05, + "loss": 0.6375464200973511, + "step": 3149 + }, + { + "epoch": 0.9211873080859775, + "grad_norm": 1.3850395600859229, + "learning_rate": 1.2120713668131558e-05, + "loss": 0.6954327821731567, + "step": 3150 + }, + { + "epoch": 0.9214797485012429, + "grad_norm": 1.3658544095341296, + "learning_rate": 1.2115988682763626e-05, + "loss": 0.5855636596679688, + "step": 3151 + }, + { + "epoch": 0.9217721889165083, + "grad_norm": 1.4751760026778278, + "learning_rate": 1.2111263202796206e-05, + "loss": 0.6056143641471863, + "step": 3152 + }, + { + "epoch": 0.9220646293317737, + "grad_norm": 1.551741495670365, + "learning_rate": 1.2106537229333848e-05, + "loss": 0.7918239831924438, + "step": 3153 + }, + { + "epoch": 0.922357069747039, + "grad_norm": 1.7033588700340108, + "learning_rate": 1.2101810763481218e-05, + "loss": 0.7772212028503418, + "step": 3154 + }, + { + "epoch": 0.9226495101623045, + "grad_norm": 1.511966147005096, + "learning_rate": 1.2097083806343104e-05, + "loss": 0.6332443356513977, + "step": 3155 + }, + { + "epoch": 0.9229419505775698, + "grad_norm": 1.358434184305942, + "learning_rate": 1.2092356359024399e-05, + "loss": 0.6254568099975586, + "step": 3156 + }, + { + "epoch": 0.9232343909928352, + "grad_norm": 1.5630990314712985, + "learning_rate": 1.208762842263012e-05, + "loss": 0.6178697347640991, + "step": 3157 + }, + { + "epoch": 0.9235268314081005, + "grad_norm": 1.1998616171531247, + "learning_rate": 1.2082899998265387e-05, + "loss": 0.5049355030059814, + "step": 3158 + }, + { + "epoch": 0.923819271823366, + "grad_norm": 1.4513160919924062, + "learning_rate": 1.2078171087035444e-05, + "loss": 0.7013234496116638, + "step": 3159 + }, + { + "epoch": 0.9241117122386314, + "grad_norm": 1.4119575222677514, + "learning_rate": 1.2073441690045647e-05, + "loss": 0.576643705368042, + "step": 3160 + }, + { + "epoch": 0.9244041526538967, + "grad_norm": 1.2307321356514476, + "learning_rate": 1.2068711808401459e-05, + "loss": 0.5163617134094238, + "step": 3161 + }, + { + "epoch": 0.9246965930691622, + "grad_norm": 1.39625806011197, + "learning_rate": 1.2063981443208466e-05, + "loss": 0.571370005607605, + "step": 3162 + }, + { + "epoch": 0.9249890334844275, + "grad_norm": 1.3814954844513003, + "learning_rate": 1.2059250595572358e-05, + "loss": 0.7424927949905396, + "step": 3163 + }, + { + "epoch": 0.925281473899693, + "grad_norm": 1.398481393831642, + "learning_rate": 1.2054519266598946e-05, + "loss": 0.6661131381988525, + "step": 3164 + }, + { + "epoch": 0.9255739143149583, + "grad_norm": 1.382448951979987, + "learning_rate": 1.2049787457394145e-05, + "loss": 0.6416351795196533, + "step": 3165 + }, + { + "epoch": 0.9258663547302237, + "grad_norm": 1.5012000035545232, + "learning_rate": 1.2045055169063988e-05, + "loss": 0.6708394289016724, + "step": 3166 + }, + { + "epoch": 0.9261587951454892, + "grad_norm": 1.5269915566780659, + "learning_rate": 1.2040322402714624e-05, + "loss": 0.536340057849884, + "step": 3167 + }, + { + "epoch": 0.9264512355607545, + "grad_norm": 1.4556897812811458, + "learning_rate": 1.20355891594523e-05, + "loss": 0.5621340274810791, + "step": 3168 + }, + { + "epoch": 0.9267436759760199, + "grad_norm": 1.274628172323648, + "learning_rate": 1.2030855440383387e-05, + "loss": 0.5972496271133423, + "step": 3169 + }, + { + "epoch": 0.9270361163912852, + "grad_norm": 1.4230845419048714, + "learning_rate": 1.2026121246614362e-05, + "loss": 0.567542314529419, + "step": 3170 + }, + { + "epoch": 0.9273285568065507, + "grad_norm": 1.092340586033623, + "learning_rate": 1.2021386579251814e-05, + "loss": 0.5487483739852905, + "step": 3171 + }, + { + "epoch": 0.927620997221816, + "grad_norm": 1.6219751059797927, + "learning_rate": 1.2016651439402445e-05, + "loss": 0.7988057136535645, + "step": 3172 + }, + { + "epoch": 0.9279134376370815, + "grad_norm": 1.2231171520157942, + "learning_rate": 1.2011915828173066e-05, + "loss": 0.5333850979804993, + "step": 3173 + }, + { + "epoch": 0.9282058780523469, + "grad_norm": 1.1146388373256622, + "learning_rate": 1.2007179746670592e-05, + "loss": 0.5640296936035156, + "step": 3174 + }, + { + "epoch": 0.9284983184676122, + "grad_norm": 1.7918188640848236, + "learning_rate": 1.2002443196002057e-05, + "loss": 0.7154449820518494, + "step": 3175 + }, + { + "epoch": 0.9287907588828777, + "grad_norm": 1.533684329230312, + "learning_rate": 1.1997706177274597e-05, + "loss": 0.8660446405410767, + "step": 3176 + }, + { + "epoch": 0.929083199298143, + "grad_norm": 1.498753630747748, + "learning_rate": 1.1992968691595465e-05, + "loss": 0.601166307926178, + "step": 3177 + }, + { + "epoch": 0.9293756397134084, + "grad_norm": 1.4563708289231845, + "learning_rate": 1.1988230740072022e-05, + "loss": 0.6197638511657715, + "step": 3178 + }, + { + "epoch": 0.9296680801286737, + "grad_norm": 1.2218794629813654, + "learning_rate": 1.198349232381173e-05, + "loss": 0.5716423988342285, + "step": 3179 + }, + { + "epoch": 0.9299605205439392, + "grad_norm": 1.1601969521725652, + "learning_rate": 1.197875344392217e-05, + "loss": 0.4319373071193695, + "step": 3180 + }, + { + "epoch": 0.9302529609592045, + "grad_norm": 1.3226372570662766, + "learning_rate": 1.1974014101511018e-05, + "loss": 0.5299028158187866, + "step": 3181 + }, + { + "epoch": 0.93054540137447, + "grad_norm": 1.4024951088839022, + "learning_rate": 1.1969274297686075e-05, + "loss": 0.7085509300231934, + "step": 3182 + }, + { + "epoch": 0.9308378417897354, + "grad_norm": 1.3237854936063287, + "learning_rate": 1.1964534033555237e-05, + "loss": 0.6025770902633667, + "step": 3183 + }, + { + "epoch": 0.9311302822050007, + "grad_norm": 1.2585066067859425, + "learning_rate": 1.1959793310226518e-05, + "loss": 0.5624677538871765, + "step": 3184 + }, + { + "epoch": 0.9314227226202662, + "grad_norm": 1.3607236544497474, + "learning_rate": 1.1955052128808025e-05, + "loss": 0.602645754814148, + "step": 3185 + }, + { + "epoch": 0.9317151630355315, + "grad_norm": 1.5196424442530971, + "learning_rate": 1.1950310490407984e-05, + "loss": 0.6495026350021362, + "step": 3186 + }, + { + "epoch": 0.9320076034507969, + "grad_norm": 1.2037819566859902, + "learning_rate": 1.1945568396134721e-05, + "loss": 0.50370192527771, + "step": 3187 + }, + { + "epoch": 0.9323000438660622, + "grad_norm": 1.4578860564520788, + "learning_rate": 1.1940825847096677e-05, + "loss": 0.5717373490333557, + "step": 3188 + }, + { + "epoch": 0.9325924842813277, + "grad_norm": 1.2463647398252022, + "learning_rate": 1.1936082844402395e-05, + "loss": 0.5863519310951233, + "step": 3189 + }, + { + "epoch": 0.9328849246965931, + "grad_norm": 1.3634372027202455, + "learning_rate": 1.1931339389160516e-05, + "loss": 0.6607284545898438, + "step": 3190 + }, + { + "epoch": 0.9331773651118584, + "grad_norm": 1.2667041686104175, + "learning_rate": 1.1926595482479799e-05, + "loss": 0.5578058958053589, + "step": 3191 + }, + { + "epoch": 0.9334698055271239, + "grad_norm": 1.577459199872034, + "learning_rate": 1.19218511254691e-05, + "loss": 0.6839171648025513, + "step": 3192 + }, + { + "epoch": 0.9337622459423892, + "grad_norm": 1.4197717809462, + "learning_rate": 1.1917106319237386e-05, + "loss": 0.5071141719818115, + "step": 3193 + }, + { + "epoch": 0.9340546863576547, + "grad_norm": 1.3302825340941604, + "learning_rate": 1.1912361064893726e-05, + "loss": 0.5112525820732117, + "step": 3194 + }, + { + "epoch": 0.93434712677292, + "grad_norm": 1.3701575961238917, + "learning_rate": 1.1907615363547299e-05, + "loss": 0.5661873817443848, + "step": 3195 + }, + { + "epoch": 0.9346395671881854, + "grad_norm": 1.3078991902724904, + "learning_rate": 1.190286921630737e-05, + "loss": 0.5520195364952087, + "step": 3196 + }, + { + "epoch": 0.9349320076034507, + "grad_norm": 1.1923433518822224, + "learning_rate": 1.1898122624283337e-05, + "loss": 0.560089111328125, + "step": 3197 + }, + { + "epoch": 0.9352244480187162, + "grad_norm": 1.3393482355065873, + "learning_rate": 1.1893375588584681e-05, + "loss": 0.6431207656860352, + "step": 3198 + }, + { + "epoch": 0.9355168884339816, + "grad_norm": 1.6025933525200546, + "learning_rate": 1.1888628110320995e-05, + "loss": 0.7365666031837463, + "step": 3199 + }, + { + "epoch": 0.935809328849247, + "grad_norm": 1.5181397488734587, + "learning_rate": 1.1883880190601968e-05, + "loss": 0.5455417633056641, + "step": 3200 + }, + { + "epoch": 0.9361017692645124, + "grad_norm": 1.2648151177686433, + "learning_rate": 1.1879131830537403e-05, + "loss": 0.5749938488006592, + "step": 3201 + }, + { + "epoch": 0.9363942096797777, + "grad_norm": 1.4774526931967815, + "learning_rate": 1.1874383031237196e-05, + "loss": 0.588424563407898, + "step": 3202 + }, + { + "epoch": 0.9366866500950431, + "grad_norm": 1.7045519601542285, + "learning_rate": 1.1869633793811352e-05, + "loss": 0.7039792537689209, + "step": 3203 + }, + { + "epoch": 0.9369790905103085, + "grad_norm": 1.3777530310932211, + "learning_rate": 1.1864884119369977e-05, + "loss": 0.5972777009010315, + "step": 3204 + }, + { + "epoch": 0.9372715309255739, + "grad_norm": 1.5348242749242778, + "learning_rate": 1.1860134009023281e-05, + "loss": 0.6510647535324097, + "step": 3205 + }, + { + "epoch": 0.9375639713408394, + "grad_norm": 1.3174058455781212, + "learning_rate": 1.1855383463881566e-05, + "loss": 0.606874406337738, + "step": 3206 + }, + { + "epoch": 0.9378564117561047, + "grad_norm": 1.4675285988638056, + "learning_rate": 1.1850632485055247e-05, + "loss": 0.5527048110961914, + "step": 3207 + }, + { + "epoch": 0.9381488521713701, + "grad_norm": 1.3531723389548285, + "learning_rate": 1.1845881073654838e-05, + "loss": 0.6297399997711182, + "step": 3208 + }, + { + "epoch": 0.9384412925866354, + "grad_norm": 1.4561464002236073, + "learning_rate": 1.184112923079095e-05, + "loss": 0.5852634310722351, + "step": 3209 + }, + { + "epoch": 0.9387337330019009, + "grad_norm": 1.276124242645333, + "learning_rate": 1.1836376957574301e-05, + "loss": 0.5648211240768433, + "step": 3210 + }, + { + "epoch": 0.9390261734171662, + "grad_norm": 1.4542765956455581, + "learning_rate": 1.1831624255115703e-05, + "loss": 0.5547506213188171, + "step": 3211 + }, + { + "epoch": 0.9393186138324316, + "grad_norm": 1.3882723904405088, + "learning_rate": 1.1826871124526072e-05, + "loss": 0.5927829146385193, + "step": 3212 + }, + { + "epoch": 0.9396110542476971, + "grad_norm": 1.4870159815211654, + "learning_rate": 1.182211756691642e-05, + "loss": 0.5705278515815735, + "step": 3213 + }, + { + "epoch": 0.9399034946629624, + "grad_norm": 1.3481561389317809, + "learning_rate": 1.1817363583397868e-05, + "loss": 0.547038197517395, + "step": 3214 + }, + { + "epoch": 0.9401959350782279, + "grad_norm": 1.6799026497887648, + "learning_rate": 1.1812609175081626e-05, + "loss": 0.6136760115623474, + "step": 3215 + }, + { + "epoch": 0.9404883754934932, + "grad_norm": 1.3697737055687615, + "learning_rate": 1.1807854343079015e-05, + "loss": 0.5784845352172852, + "step": 3216 + }, + { + "epoch": 0.9407808159087586, + "grad_norm": 1.306268521565337, + "learning_rate": 1.1803099088501439e-05, + "loss": 0.6629599332809448, + "step": 3217 + }, + { + "epoch": 0.9410732563240239, + "grad_norm": 1.3560413521315915, + "learning_rate": 1.1798343412460416e-05, + "loss": 0.6058052778244019, + "step": 3218 + }, + { + "epoch": 0.9413656967392894, + "grad_norm": 1.236587656133179, + "learning_rate": 1.1793587316067552e-05, + "loss": 0.5689725875854492, + "step": 3219 + }, + { + "epoch": 0.9416581371545547, + "grad_norm": 1.2722209400014248, + "learning_rate": 1.1788830800434561e-05, + "loss": 0.5718861818313599, + "step": 3220 + }, + { + "epoch": 0.9419505775698201, + "grad_norm": 1.4517063699959183, + "learning_rate": 1.1784073866673245e-05, + "loss": 0.6061254739761353, + "step": 3221 + }, + { + "epoch": 0.9422430179850856, + "grad_norm": 1.3732176542504997, + "learning_rate": 1.1779316515895511e-05, + "loss": 0.6805517077445984, + "step": 3222 + }, + { + "epoch": 0.9425354584003509, + "grad_norm": 1.3828844754339646, + "learning_rate": 1.1774558749213358e-05, + "loss": 0.5553466081619263, + "step": 3223 + }, + { + "epoch": 0.9428278988156163, + "grad_norm": 1.2173236944216692, + "learning_rate": 1.176980056773889e-05, + "loss": 0.6408798694610596, + "step": 3224 + }, + { + "epoch": 0.9431203392308817, + "grad_norm": 1.222815565053331, + "learning_rate": 1.1765041972584296e-05, + "loss": 0.5269505381584167, + "step": 3225 + }, + { + "epoch": 0.9434127796461471, + "grad_norm": 1.424391391794669, + "learning_rate": 1.1760282964861873e-05, + "loss": 0.682415246963501, + "step": 3226 + }, + { + "epoch": 0.9437052200614124, + "grad_norm": 1.4623421356805024, + "learning_rate": 1.1755523545684016e-05, + "loss": 0.507567286491394, + "step": 3227 + }, + { + "epoch": 0.9439976604766779, + "grad_norm": 1.4192334343942388, + "learning_rate": 1.1750763716163199e-05, + "loss": 0.6977763175964355, + "step": 3228 + }, + { + "epoch": 0.9442901008919433, + "grad_norm": 1.3754010773945908, + "learning_rate": 1.1746003477412007e-05, + "loss": 0.5626407861709595, + "step": 3229 + }, + { + "epoch": 0.9445825413072086, + "grad_norm": 1.537446067568307, + "learning_rate": 1.1741242830543118e-05, + "loss": 0.5280323624610901, + "step": 3230 + }, + { + "epoch": 0.9448749817224741, + "grad_norm": 1.564549447099706, + "learning_rate": 1.1736481776669307e-05, + "loss": 0.6236885190010071, + "step": 3231 + }, + { + "epoch": 0.9451674221377394, + "grad_norm": 1.2957140073878561, + "learning_rate": 1.1731720316903435e-05, + "loss": 0.5250823497772217, + "step": 3232 + }, + { + "epoch": 0.9454598625530048, + "grad_norm": 1.3562245135276858, + "learning_rate": 1.1726958452358472e-05, + "loss": 0.5885770320892334, + "step": 3233 + }, + { + "epoch": 0.9457523029682702, + "grad_norm": 1.5466392002562799, + "learning_rate": 1.1722196184147467e-05, + "loss": 0.7812498807907104, + "step": 3234 + }, + { + "epoch": 0.9460447433835356, + "grad_norm": 2.1182720670568678, + "learning_rate": 1.1717433513383575e-05, + "loss": 0.6763796210289001, + "step": 3235 + }, + { + "epoch": 0.9463371837988009, + "grad_norm": 1.4130641179603503, + "learning_rate": 1.1712670441180045e-05, + "loss": 0.5983982682228088, + "step": 3236 + }, + { + "epoch": 0.9466296242140664, + "grad_norm": 1.4075974845813908, + "learning_rate": 1.1707906968650214e-05, + "loss": 0.6665002107620239, + "step": 3237 + }, + { + "epoch": 0.9469220646293318, + "grad_norm": 1.3129047594602676, + "learning_rate": 1.1703143096907507e-05, + "loss": 0.7676652669906616, + "step": 3238 + }, + { + "epoch": 0.9472145050445971, + "grad_norm": 1.552106023331421, + "learning_rate": 1.1698378827065461e-05, + "loss": 0.710014820098877, + "step": 3239 + }, + { + "epoch": 0.9475069454598626, + "grad_norm": 1.3709978679968329, + "learning_rate": 1.169361416023769e-05, + "loss": 0.5800554752349854, + "step": 3240 + }, + { + "epoch": 0.9477993858751279, + "grad_norm": 1.2790925568283578, + "learning_rate": 1.1688849097537904e-05, + "loss": 0.602012574672699, + "step": 3241 + }, + { + "epoch": 0.9480918262903933, + "grad_norm": 1.4089569844293444, + "learning_rate": 1.1684083640079912e-05, + "loss": 0.4943910241127014, + "step": 3242 + }, + { + "epoch": 0.9483842667056587, + "grad_norm": 1.3173293444454082, + "learning_rate": 1.1679317788977609e-05, + "loss": 0.49094298481941223, + "step": 3243 + }, + { + "epoch": 0.9486767071209241, + "grad_norm": 1.1684708220820899, + "learning_rate": 1.1674551545344983e-05, + "loss": 0.46416157484054565, + "step": 3244 + }, + { + "epoch": 0.9489691475361896, + "grad_norm": 1.3422229221849986, + "learning_rate": 1.1669784910296114e-05, + "loss": 0.5170255899429321, + "step": 3245 + }, + { + "epoch": 0.9492615879514549, + "grad_norm": 1.3467691134757651, + "learning_rate": 1.1665017884945174e-05, + "loss": 0.7673200368881226, + "step": 3246 + }, + { + "epoch": 0.9495540283667203, + "grad_norm": 1.194998950326605, + "learning_rate": 1.1660250470406426e-05, + "loss": 0.49335333704948425, + "step": 3247 + }, + { + "epoch": 0.9498464687819856, + "grad_norm": 1.5055569823397887, + "learning_rate": 1.1655482667794228e-05, + "loss": 0.6620640754699707, + "step": 3248 + }, + { + "epoch": 0.9501389091972511, + "grad_norm": 1.5536985980342881, + "learning_rate": 1.1650714478223022e-05, + "loss": 0.600047767162323, + "step": 3249 + }, + { + "epoch": 0.9504313496125164, + "grad_norm": 1.449375702915225, + "learning_rate": 1.164594590280734e-05, + "loss": 0.668572187423706, + "step": 3250 + }, + { + "epoch": 0.9507237900277818, + "grad_norm": 1.28696773590094, + "learning_rate": 1.1641176942661812e-05, + "loss": 0.4460945725440979, + "step": 3251 + }, + { + "epoch": 0.9510162304430473, + "grad_norm": 1.553130185640807, + "learning_rate": 1.1636407598901154e-05, + "loss": 0.6650545597076416, + "step": 3252 + }, + { + "epoch": 0.9513086708583126, + "grad_norm": 1.4537452557116313, + "learning_rate": 1.1631637872640166e-05, + "loss": 0.5631237030029297, + "step": 3253 + }, + { + "epoch": 0.951601111273578, + "grad_norm": 1.2642307643713007, + "learning_rate": 1.162686776499375e-05, + "loss": 0.650580883026123, + "step": 3254 + }, + { + "epoch": 0.9518935516888434, + "grad_norm": 1.2808622379645098, + "learning_rate": 1.1622097277076883e-05, + "loss": 0.5606606602668762, + "step": 3255 + }, + { + "epoch": 0.9521859921041088, + "grad_norm": 1.6059525544711786, + "learning_rate": 1.1617326410004639e-05, + "loss": 0.667366623878479, + "step": 3256 + }, + { + "epoch": 0.9524784325193741, + "grad_norm": 1.2848877829061671, + "learning_rate": 1.1612555164892181e-05, + "loss": 0.5895084738731384, + "step": 3257 + }, + { + "epoch": 0.9527708729346396, + "grad_norm": 1.3031742059601414, + "learning_rate": 1.1607783542854759e-05, + "loss": 0.6468119025230408, + "step": 3258 + }, + { + "epoch": 0.9530633133499049, + "grad_norm": 1.567653748749065, + "learning_rate": 1.1603011545007708e-05, + "loss": 0.7178056240081787, + "step": 3259 + }, + { + "epoch": 0.9533557537651703, + "grad_norm": 1.0796246328531958, + "learning_rate": 1.1598239172466457e-05, + "loss": 0.42994585633277893, + "step": 3260 + }, + { + "epoch": 0.9536481941804358, + "grad_norm": 1.3208710287997751, + "learning_rate": 1.1593466426346513e-05, + "loss": 0.4939822554588318, + "step": 3261 + }, + { + "epoch": 0.9539406345957011, + "grad_norm": 1.4828958620285886, + "learning_rate": 1.1588693307763483e-05, + "loss": 0.4252137839794159, + "step": 3262 + }, + { + "epoch": 0.9542330750109665, + "grad_norm": 1.4293991408504185, + "learning_rate": 1.1583919817833051e-05, + "loss": 0.5772995948791504, + "step": 3263 + }, + { + "epoch": 0.9545255154262319, + "grad_norm": 1.4892265763022432, + "learning_rate": 1.1579145957670992e-05, + "loss": 0.6784560680389404, + "step": 3264 + }, + { + "epoch": 0.9548179558414973, + "grad_norm": 1.4340903064465058, + "learning_rate": 1.1574371728393169e-05, + "loss": 0.5373483896255493, + "step": 3265 + }, + { + "epoch": 0.9551103962567626, + "grad_norm": 1.5590731671081544, + "learning_rate": 1.1569597131115523e-05, + "loss": 0.7517837285995483, + "step": 3266 + }, + { + "epoch": 0.9554028366720281, + "grad_norm": 1.2323534514024168, + "learning_rate": 1.1564822166954092e-05, + "loss": 0.6715551614761353, + "step": 3267 + }, + { + "epoch": 0.9556952770872935, + "grad_norm": 1.5740418428519831, + "learning_rate": 1.1560046837024994e-05, + "loss": 0.6892265677452087, + "step": 3268 + }, + { + "epoch": 0.9559877175025588, + "grad_norm": 1.1845546480418727, + "learning_rate": 1.1555271142444433e-05, + "loss": 0.5564894676208496, + "step": 3269 + }, + { + "epoch": 0.9562801579178243, + "grad_norm": 1.4735106062071393, + "learning_rate": 1.15504950843287e-05, + "loss": 0.6211465001106262, + "step": 3270 + }, + { + "epoch": 0.9565725983330896, + "grad_norm": 1.360797371118281, + "learning_rate": 1.1545718663794165e-05, + "loss": 0.6189093589782715, + "step": 3271 + }, + { + "epoch": 0.956865038748355, + "grad_norm": 1.332461163898103, + "learning_rate": 1.1540941881957293e-05, + "loss": 0.6600508689880371, + "step": 3272 + }, + { + "epoch": 0.9571574791636204, + "grad_norm": 1.1722369932825303, + "learning_rate": 1.1536164739934626e-05, + "loss": 0.5891202688217163, + "step": 3273 + }, + { + "epoch": 0.9574499195788858, + "grad_norm": 1.450456789269031, + "learning_rate": 1.1531387238842788e-05, + "loss": 0.5996856093406677, + "step": 3274 + }, + { + "epoch": 0.9577423599941511, + "grad_norm": 1.3947581203143906, + "learning_rate": 1.15266093797985e-05, + "loss": 0.5645085573196411, + "step": 3275 + }, + { + "epoch": 0.9580348004094166, + "grad_norm": 1.3192013477387883, + "learning_rate": 1.1521831163918545e-05, + "loss": 0.5934250354766846, + "step": 3276 + }, + { + "epoch": 0.958327240824682, + "grad_norm": 1.3125475487560205, + "learning_rate": 1.151705259231981e-05, + "loss": 0.6659657955169678, + "step": 3277 + }, + { + "epoch": 0.9586196812399473, + "grad_norm": 1.4439329469838202, + "learning_rate": 1.1512273666119255e-05, + "loss": 0.518921434879303, + "step": 3278 + }, + { + "epoch": 0.9589121216552128, + "grad_norm": 1.5520324796179028, + "learning_rate": 1.1507494386433927e-05, + "loss": 0.6015551686286926, + "step": 3279 + }, + { + "epoch": 0.9592045620704781, + "grad_norm": 1.3864839845404684, + "learning_rate": 1.150271475438095e-05, + "loss": 0.5590265393257141, + "step": 3280 + }, + { + "epoch": 0.9594970024857435, + "grad_norm": 2.135782810317134, + "learning_rate": 1.149793477107754e-05, + "loss": 0.5820340514183044, + "step": 3281 + }, + { + "epoch": 0.9597894429010089, + "grad_norm": 1.5263684685914536, + "learning_rate": 1.1493154437640981e-05, + "loss": 0.5356709957122803, + "step": 3282 + }, + { + "epoch": 0.9600818833162743, + "grad_norm": 1.6754028625571513, + "learning_rate": 1.1488373755188651e-05, + "loss": 0.7024146318435669, + "step": 3283 + }, + { + "epoch": 0.9603743237315397, + "grad_norm": 1.1672092433368113, + "learning_rate": 1.1483592724838007e-05, + "loss": 0.4929785132408142, + "step": 3284 + }, + { + "epoch": 0.9606667641468051, + "grad_norm": 1.288237919875972, + "learning_rate": 1.147881134770658e-05, + "loss": 0.6902902126312256, + "step": 3285 + }, + { + "epoch": 0.9609592045620705, + "grad_norm": 1.3348356135288268, + "learning_rate": 1.1474029624911997e-05, + "loss": 0.5339258313179016, + "step": 3286 + }, + { + "epoch": 0.9612516449773358, + "grad_norm": 1.4657145875756896, + "learning_rate": 1.146924755757195e-05, + "loss": 0.6998730897903442, + "step": 3287 + }, + { + "epoch": 0.9615440853926013, + "grad_norm": 1.257948537764273, + "learning_rate": 1.1464465146804218e-05, + "loss": 0.6174519062042236, + "step": 3288 + }, + { + "epoch": 0.9618365258078666, + "grad_norm": 1.812192547108516, + "learning_rate": 1.145968239372666e-05, + "loss": 0.5395258665084839, + "step": 3289 + }, + { + "epoch": 0.962128966223132, + "grad_norm": 1.4759469600623887, + "learning_rate": 1.1454899299457221e-05, + "loss": 0.6355341672897339, + "step": 3290 + }, + { + "epoch": 0.9624214066383975, + "grad_norm": 1.519697305957534, + "learning_rate": 1.1450115865113916e-05, + "loss": 0.5315179228782654, + "step": 3291 + }, + { + "epoch": 0.9627138470536628, + "grad_norm": 1.468105168017502, + "learning_rate": 1.1445332091814844e-05, + "loss": 0.5595142841339111, + "step": 3292 + }, + { + "epoch": 0.9630062874689282, + "grad_norm": 1.2033736096293444, + "learning_rate": 1.1440547980678185e-05, + "loss": 0.5509291291236877, + "step": 3293 + }, + { + "epoch": 0.9632987278841936, + "grad_norm": 1.5381505996084959, + "learning_rate": 1.1435763532822191e-05, + "loss": 0.6831322908401489, + "step": 3294 + }, + { + "epoch": 0.963591168299459, + "grad_norm": 1.3733453232745707, + "learning_rate": 1.1430978749365203e-05, + "loss": 0.5494598150253296, + "step": 3295 + }, + { + "epoch": 0.9638836087147243, + "grad_norm": 1.498661160088125, + "learning_rate": 1.142619363142563e-05, + "loss": 0.5613550543785095, + "step": 3296 + }, + { + "epoch": 0.9641760491299898, + "grad_norm": 1.5212850266198317, + "learning_rate": 1.1421408180121972e-05, + "loss": 0.656089186668396, + "step": 3297 + }, + { + "epoch": 0.9644684895452551, + "grad_norm": 1.1510410875603876, + "learning_rate": 1.1416622396572791e-05, + "loss": 0.5913431644439697, + "step": 3298 + }, + { + "epoch": 0.9647609299605205, + "grad_norm": 1.3644056514467953, + "learning_rate": 1.1411836281896737e-05, + "loss": 0.6706565022468567, + "step": 3299 + }, + { + "epoch": 0.965053370375786, + "grad_norm": 1.3661421058655916, + "learning_rate": 1.1407049837212539e-05, + "loss": 0.6169217824935913, + "step": 3300 + }, + { + "epoch": 0.9653458107910513, + "grad_norm": 1.2988460072876178, + "learning_rate": 1.1402263063638994e-05, + "loss": 0.5516680479049683, + "step": 3301 + }, + { + "epoch": 0.9656382512063167, + "grad_norm": 1.2914486970247845, + "learning_rate": 1.1397475962294986e-05, + "loss": 0.7105098962783813, + "step": 3302 + }, + { + "epoch": 0.9659306916215821, + "grad_norm": 1.5297340917133426, + "learning_rate": 1.139268853429947e-05, + "loss": 0.6183327436447144, + "step": 3303 + }, + { + "epoch": 0.9662231320368475, + "grad_norm": 1.4183780196378124, + "learning_rate": 1.1387900780771472e-05, + "loss": 0.6160033941268921, + "step": 3304 + }, + { + "epoch": 0.9665155724521128, + "grad_norm": 1.4212044707464202, + "learning_rate": 1.1383112702830108e-05, + "loss": 0.5526994466781616, + "step": 3305 + }, + { + "epoch": 0.9668080128673783, + "grad_norm": 1.381901469460175, + "learning_rate": 1.137832430159456e-05, + "loss": 0.5476477742195129, + "step": 3306 + }, + { + "epoch": 0.9671004532826437, + "grad_norm": 1.3794404018811846, + "learning_rate": 1.1373535578184083e-05, + "loss": 0.558393657207489, + "step": 3307 + }, + { + "epoch": 0.967392893697909, + "grad_norm": 1.4577860579810487, + "learning_rate": 1.1368746533718017e-05, + "loss": 0.6302276849746704, + "step": 3308 + }, + { + "epoch": 0.9676853341131745, + "grad_norm": 1.2805956031485568, + "learning_rate": 1.1363957169315773e-05, + "loss": 0.619697630405426, + "step": 3309 + }, + { + "epoch": 0.9679777745284398, + "grad_norm": 1.4119075289775231, + "learning_rate": 1.135916748609683e-05, + "loss": 0.564563512802124, + "step": 3310 + }, + { + "epoch": 0.9682702149437052, + "grad_norm": 1.6014783450991135, + "learning_rate": 1.1354377485180756e-05, + "loss": 0.6238751411437988, + "step": 3311 + }, + { + "epoch": 0.9685626553589706, + "grad_norm": 1.4620948350058627, + "learning_rate": 1.1349587167687177e-05, + "loss": 0.8079221844673157, + "step": 3312 + }, + { + "epoch": 0.968855095774236, + "grad_norm": 1.4034979651528738, + "learning_rate": 1.1344796534735805e-05, + "loss": 0.5547629594802856, + "step": 3313 + }, + { + "epoch": 0.9691475361895013, + "grad_norm": 1.2187187942390127, + "learning_rate": 1.134000558744642e-05, + "loss": 0.630042552947998, + "step": 3314 + }, + { + "epoch": 0.9694399766047668, + "grad_norm": 1.284912675244452, + "learning_rate": 1.1335214326938872e-05, + "loss": 0.5283412337303162, + "step": 3315 + }, + { + "epoch": 0.9697324170200322, + "grad_norm": 1.3484514955842084, + "learning_rate": 1.1330422754333097e-05, + "loss": 0.6356452703475952, + "step": 3316 + }, + { + "epoch": 0.9700248574352975, + "grad_norm": 1.265116321608699, + "learning_rate": 1.132563087074909e-05, + "loss": 0.6531886458396912, + "step": 3317 + }, + { + "epoch": 0.970317297850563, + "grad_norm": 1.6209665553722108, + "learning_rate": 1.1320838677306927e-05, + "loss": 0.5725178718566895, + "step": 3318 + }, + { + "epoch": 0.9706097382658283, + "grad_norm": 1.460783947968998, + "learning_rate": 1.1316046175126758e-05, + "loss": 0.6341495513916016, + "step": 3319 + }, + { + "epoch": 0.9709021786810937, + "grad_norm": 1.428850290510927, + "learning_rate": 1.1311253365328794e-05, + "loss": 0.5792768597602844, + "step": 3320 + }, + { + "epoch": 0.9711946190963591, + "grad_norm": 1.2539734431492524, + "learning_rate": 1.1306460249033326e-05, + "loss": 0.5495700836181641, + "step": 3321 + }, + { + "epoch": 0.9714870595116245, + "grad_norm": 1.3779597112573112, + "learning_rate": 1.1301666827360721e-05, + "loss": 0.7092291116714478, + "step": 3322 + }, + { + "epoch": 0.97177949992689, + "grad_norm": 1.210154083257435, + "learning_rate": 1.1296873101431409e-05, + "loss": 0.5368257761001587, + "step": 3323 + }, + { + "epoch": 0.9720719403421553, + "grad_norm": 1.2901315838159502, + "learning_rate": 1.1292079072365898e-05, + "loss": 0.6116393804550171, + "step": 3324 + }, + { + "epoch": 0.9723643807574207, + "grad_norm": 1.6375876584807947, + "learning_rate": 1.1287284741284757e-05, + "loss": 0.5654028654098511, + "step": 3325 + }, + { + "epoch": 0.972656821172686, + "grad_norm": 1.4007947938241085, + "learning_rate": 1.1282490109308633e-05, + "loss": 0.6436389684677124, + "step": 3326 + }, + { + "epoch": 0.9729492615879515, + "grad_norm": 1.6286174854172328, + "learning_rate": 1.1277695177558243e-05, + "loss": 0.7687330842018127, + "step": 3327 + }, + { + "epoch": 0.9732417020032168, + "grad_norm": 1.3338540478099405, + "learning_rate": 1.1272899947154377e-05, + "loss": 0.5350443124771118, + "step": 3328 + }, + { + "epoch": 0.9735341424184822, + "grad_norm": 1.5528633763871835, + "learning_rate": 1.1268104419217884e-05, + "loss": 0.6032785773277283, + "step": 3329 + }, + { + "epoch": 0.9738265828337477, + "grad_norm": 1.410347655987774, + "learning_rate": 1.1263308594869697e-05, + "loss": 0.5756093263626099, + "step": 3330 + }, + { + "epoch": 0.974119023249013, + "grad_norm": 1.5831169693775362, + "learning_rate": 1.1258512475230807e-05, + "loss": 0.6977418065071106, + "step": 3331 + }, + { + "epoch": 0.9744114636642784, + "grad_norm": 1.3726893652594243, + "learning_rate": 1.1253716061422275e-05, + "loss": 0.5409448146820068, + "step": 3332 + }, + { + "epoch": 0.9747039040795438, + "grad_norm": 1.3626349639764654, + "learning_rate": 1.1248919354565237e-05, + "loss": 0.5863862037658691, + "step": 3333 + }, + { + "epoch": 0.9749963444948092, + "grad_norm": 1.313934697737098, + "learning_rate": 1.1244122355780895e-05, + "loss": 0.6039433479309082, + "step": 3334 + }, + { + "epoch": 0.9752887849100745, + "grad_norm": 1.4813691831553626, + "learning_rate": 1.1239325066190513e-05, + "loss": 0.6696581840515137, + "step": 3335 + }, + { + "epoch": 0.97558122532534, + "grad_norm": 1.5159715106591773, + "learning_rate": 1.1234527486915439e-05, + "loss": 0.6308715343475342, + "step": 3336 + }, + { + "epoch": 0.9758736657406053, + "grad_norm": 1.4927391317525602, + "learning_rate": 1.1229729619077065e-05, + "loss": 0.580268383026123, + "step": 3337 + }, + { + "epoch": 0.9761661061558707, + "grad_norm": 1.775582999909584, + "learning_rate": 1.1224931463796871e-05, + "loss": 0.8080834746360779, + "step": 3338 + }, + { + "epoch": 0.9764585465711362, + "grad_norm": 1.3814988427954438, + "learning_rate": 1.1220133022196395e-05, + "loss": 0.4933619499206543, + "step": 3339 + }, + { + "epoch": 0.9767509869864015, + "grad_norm": 1.26412210808527, + "learning_rate": 1.1215334295397244e-05, + "loss": 0.5639102458953857, + "step": 3340 + }, + { + "epoch": 0.9770434274016669, + "grad_norm": 1.3947001629341338, + "learning_rate": 1.1210535284521094e-05, + "loss": 0.6332741975784302, + "step": 3341 + }, + { + "epoch": 0.9773358678169323, + "grad_norm": 1.4234927806293247, + "learning_rate": 1.1205735990689677e-05, + "loss": 0.5425227880477905, + "step": 3342 + }, + { + "epoch": 0.9776283082321977, + "grad_norm": 1.2841671137073696, + "learning_rate": 1.1200936415024804e-05, + "loss": 0.48746997117996216, + "step": 3343 + }, + { + "epoch": 0.977920748647463, + "grad_norm": 1.3045240526527524, + "learning_rate": 1.1196136558648345e-05, + "loss": 0.5509577393531799, + "step": 3344 + }, + { + "epoch": 0.9782131890627285, + "grad_norm": 1.5306708658005588, + "learning_rate": 1.1191336422682237e-05, + "loss": 0.5939484238624573, + "step": 3345 + }, + { + "epoch": 0.9785056294779939, + "grad_norm": 1.4772741629174198, + "learning_rate": 1.1186536008248487e-05, + "loss": 0.6078917384147644, + "step": 3346 + }, + { + "epoch": 0.9787980698932592, + "grad_norm": 1.4449426772113496, + "learning_rate": 1.1181735316469157e-05, + "loss": 0.5578145980834961, + "step": 3347 + }, + { + "epoch": 0.9790905103085247, + "grad_norm": 1.5556898331182667, + "learning_rate": 1.1176934348466384e-05, + "loss": 0.6809493899345398, + "step": 3348 + }, + { + "epoch": 0.97938295072379, + "grad_norm": 1.3454886518258895, + "learning_rate": 1.117213310536236e-05, + "loss": 0.6057093143463135, + "step": 3349 + }, + { + "epoch": 0.9796753911390554, + "grad_norm": 1.2918762120947054, + "learning_rate": 1.1167331588279351e-05, + "loss": 0.6656113266944885, + "step": 3350 + }, + { + "epoch": 0.9799678315543208, + "grad_norm": 1.3588186351553628, + "learning_rate": 1.1162529798339682e-05, + "loss": 0.5260547399520874, + "step": 3351 + }, + { + "epoch": 0.9802602719695862, + "grad_norm": 1.4059510686804249, + "learning_rate": 1.115772773666574e-05, + "loss": 0.6918379068374634, + "step": 3352 + }, + { + "epoch": 0.9805527123848515, + "grad_norm": 1.4859264660633271, + "learning_rate": 1.115292540437998e-05, + "loss": 0.7128825187683105, + "step": 3353 + }, + { + "epoch": 0.980845152800117, + "grad_norm": 1.7806281788252345, + "learning_rate": 1.1148122802604913e-05, + "loss": 0.6858257055282593, + "step": 3354 + }, + { + "epoch": 0.9811375932153824, + "grad_norm": 1.3250069966815017, + "learning_rate": 1.1143319932463124e-05, + "loss": 0.540290117263794, + "step": 3355 + }, + { + "epoch": 0.9814300336306477, + "grad_norm": 1.3692222106755043, + "learning_rate": 1.1138516795077251e-05, + "loss": 0.7293038368225098, + "step": 3356 + }, + { + "epoch": 0.9817224740459132, + "grad_norm": 1.2337952733643827, + "learning_rate": 1.1133713391570003e-05, + "loss": 0.5981270670890808, + "step": 3357 + }, + { + "epoch": 0.9820149144611785, + "grad_norm": 1.282642205016649, + "learning_rate": 1.1128909723064138e-05, + "loss": 0.6175673604011536, + "step": 3358 + }, + { + "epoch": 0.9823073548764439, + "grad_norm": 1.233452486411816, + "learning_rate": 1.112410579068249e-05, + "loss": 0.5385074615478516, + "step": 3359 + }, + { + "epoch": 0.9825997952917093, + "grad_norm": 1.372295513124522, + "learning_rate": 1.1119301595547952e-05, + "loss": 0.5754122734069824, + "step": 3360 + }, + { + "epoch": 0.9828922357069747, + "grad_norm": 1.4139982265628481, + "learning_rate": 1.1114497138783469e-05, + "loss": 0.5817348957061768, + "step": 3361 + }, + { + "epoch": 0.9831846761222401, + "grad_norm": 1.5953096945649214, + "learning_rate": 1.1109692421512058e-05, + "loss": 0.7561115026473999, + "step": 3362 + }, + { + "epoch": 0.9834771165375055, + "grad_norm": 1.4339527302516233, + "learning_rate": 1.1104887444856786e-05, + "loss": 0.5972003936767578, + "step": 3363 + }, + { + "epoch": 0.9837695569527709, + "grad_norm": 1.7933233288020083, + "learning_rate": 1.1100082209940795e-05, + "loss": 0.7569154500961304, + "step": 3364 + }, + { + "epoch": 0.9840619973680362, + "grad_norm": 1.6291951934588174, + "learning_rate": 1.1095276717887273e-05, + "loss": 0.587831437587738, + "step": 3365 + }, + { + "epoch": 0.9843544377833017, + "grad_norm": 1.3893746663182953, + "learning_rate": 1.109047096981948e-05, + "loss": 0.5265868902206421, + "step": 3366 + }, + { + "epoch": 0.984646878198567, + "grad_norm": 1.5308570155926502, + "learning_rate": 1.1085664966860728e-05, + "loss": 0.6065980792045593, + "step": 3367 + }, + { + "epoch": 0.9849393186138324, + "grad_norm": 1.2582827679300745, + "learning_rate": 1.1080858710134392e-05, + "loss": 0.5859705209732056, + "step": 3368 + }, + { + "epoch": 0.9852317590290979, + "grad_norm": 1.2323676627113982, + "learning_rate": 1.1076052200763903e-05, + "loss": 0.508766770362854, + "step": 3369 + }, + { + "epoch": 0.9855241994443632, + "grad_norm": 1.36193145330846, + "learning_rate": 1.1071245439872752e-05, + "loss": 0.569848358631134, + "step": 3370 + }, + { + "epoch": 0.9858166398596286, + "grad_norm": 1.5268801014665052, + "learning_rate": 1.1066438428584496e-05, + "loss": 0.6665600538253784, + "step": 3371 + }, + { + "epoch": 0.986109080274894, + "grad_norm": 4.0352208239875536, + "learning_rate": 1.1061631168022742e-05, + "loss": 0.5942315459251404, + "step": 3372 + }, + { + "epoch": 0.9864015206901594, + "grad_norm": 1.3552035470831052, + "learning_rate": 1.1056823659311158e-05, + "loss": 0.5270178318023682, + "step": 3373 + }, + { + "epoch": 0.9866939611054247, + "grad_norm": 1.484191192307279, + "learning_rate": 1.1052015903573465e-05, + "loss": 0.6879183053970337, + "step": 3374 + }, + { + "epoch": 0.9869864015206902, + "grad_norm": 1.3455375539569006, + "learning_rate": 1.1047207901933453e-05, + "loss": 0.5980993509292603, + "step": 3375 + }, + { + "epoch": 0.9872788419359555, + "grad_norm": 1.3905728698834559, + "learning_rate": 1.1042399655514961e-05, + "loss": 0.5616245865821838, + "step": 3376 + }, + { + "epoch": 0.9875712823512209, + "grad_norm": 1.186489901347366, + "learning_rate": 1.1037591165441887e-05, + "loss": 0.6233900785446167, + "step": 3377 + }, + { + "epoch": 0.9878637227664864, + "grad_norm": 1.2146885941659273, + "learning_rate": 1.1032782432838188e-05, + "loss": 0.612476110458374, + "step": 3378 + }, + { + "epoch": 0.9881561631817517, + "grad_norm": 1.4001611534955285, + "learning_rate": 1.1027973458827874e-05, + "loss": 0.7109482288360596, + "step": 3379 + }, + { + "epoch": 0.9884486035970171, + "grad_norm": 1.4339596644962305, + "learning_rate": 1.1023164244535013e-05, + "loss": 0.7105005383491516, + "step": 3380 + }, + { + "epoch": 0.9887410440122825, + "grad_norm": 1.1897152470249062, + "learning_rate": 1.1018354791083731e-05, + "loss": 0.5401301383972168, + "step": 3381 + }, + { + "epoch": 0.9890334844275479, + "grad_norm": 1.2391450524860042, + "learning_rate": 1.101354509959821e-05, + "loss": 0.504487156867981, + "step": 3382 + }, + { + "epoch": 0.9893259248428132, + "grad_norm": 1.5778073649668172, + "learning_rate": 1.1008735171202685e-05, + "loss": 0.5634675025939941, + "step": 3383 + }, + { + "epoch": 0.9896183652580787, + "grad_norm": 1.2596231385186676, + "learning_rate": 1.1003925007021444e-05, + "loss": 0.4828820824623108, + "step": 3384 + }, + { + "epoch": 0.9899108056733441, + "grad_norm": 1.5274466661026922, + "learning_rate": 1.0999114608178837e-05, + "loss": 0.7154384851455688, + "step": 3385 + }, + { + "epoch": 0.9902032460886094, + "grad_norm": 1.4762279403432657, + "learning_rate": 1.0994303975799268e-05, + "loss": 0.626085638999939, + "step": 3386 + }, + { + "epoch": 0.9904956865038749, + "grad_norm": 1.2276097303271793, + "learning_rate": 1.0989493111007186e-05, + "loss": 0.5179756283760071, + "step": 3387 + }, + { + "epoch": 0.9907881269191402, + "grad_norm": 1.443725456432181, + "learning_rate": 1.0984682014927108e-05, + "loss": 0.6992131471633911, + "step": 3388 + }, + { + "epoch": 0.9910805673344056, + "grad_norm": 1.3252934977411588, + "learning_rate": 1.0979870688683598e-05, + "loss": 0.5791709423065186, + "step": 3389 + }, + { + "epoch": 0.991373007749671, + "grad_norm": 1.2293406038140111, + "learning_rate": 1.097505913340127e-05, + "loss": 0.4703817367553711, + "step": 3390 + }, + { + "epoch": 0.9916654481649364, + "grad_norm": 1.7130975290215298, + "learning_rate": 1.0970247350204797e-05, + "loss": 0.6042051911354065, + "step": 3391 + }, + { + "epoch": 0.9919578885802017, + "grad_norm": 1.5075227997294136, + "learning_rate": 1.0965435340218905e-05, + "loss": 0.6806557178497314, + "step": 3392 + }, + { + "epoch": 0.9922503289954672, + "grad_norm": 1.4336313879655775, + "learning_rate": 1.0960623104568373e-05, + "loss": 0.6372751593589783, + "step": 3393 + }, + { + "epoch": 0.9925427694107326, + "grad_norm": 1.2403325317456615, + "learning_rate": 1.0955810644378031e-05, + "loss": 0.48651185631752014, + "step": 3394 + }, + { + "epoch": 0.9928352098259979, + "grad_norm": 1.5056465468012041, + "learning_rate": 1.0950997960772764e-05, + "loss": 0.5244222283363342, + "step": 3395 + }, + { + "epoch": 0.9931276502412634, + "grad_norm": 1.4445958557594307, + "learning_rate": 1.0946185054877505e-05, + "loss": 0.6194322109222412, + "step": 3396 + }, + { + "epoch": 0.9934200906565287, + "grad_norm": 1.4199918179889868, + "learning_rate": 1.0941371927817241e-05, + "loss": 0.690010666847229, + "step": 3397 + }, + { + "epoch": 0.9937125310717941, + "grad_norm": 1.9110036566867663, + "learning_rate": 1.0936558580717013e-05, + "loss": 0.7332549095153809, + "step": 3398 + }, + { + "epoch": 0.9940049714870595, + "grad_norm": 1.428619260140058, + "learning_rate": 1.093174501470191e-05, + "loss": 0.5264838337898254, + "step": 3399 + }, + { + "epoch": 0.9942974119023249, + "grad_norm": 1.1922668548863515, + "learning_rate": 1.092693123089708e-05, + "loss": 0.624382734298706, + "step": 3400 + }, + { + "epoch": 0.9945898523175903, + "grad_norm": 1.6559518933415514, + "learning_rate": 1.0922117230427705e-05, + "loss": 0.6340548992156982, + "step": 3401 + }, + { + "epoch": 0.9948822927328557, + "grad_norm": 1.194444639014181, + "learning_rate": 1.0917303014419036e-05, + "loss": 0.4452754855155945, + "step": 3402 + }, + { + "epoch": 0.9951747331481211, + "grad_norm": 1.4241998861848877, + "learning_rate": 1.0912488583996364e-05, + "loss": 0.6180763244628906, + "step": 3403 + }, + { + "epoch": 0.9954671735633864, + "grad_norm": 1.7347993099568695, + "learning_rate": 1.0907673940285032e-05, + "loss": 0.7079293727874756, + "step": 3404 + }, + { + "epoch": 0.9957596139786519, + "grad_norm": 1.6216897448198107, + "learning_rate": 1.090285908441044e-05, + "loss": 0.6608254909515381, + "step": 3405 + }, + { + "epoch": 0.9960520543939172, + "grad_norm": 1.6873856420041173, + "learning_rate": 1.0898044017498024e-05, + "loss": 0.6450251340866089, + "step": 3406 + }, + { + "epoch": 0.9963444948091826, + "grad_norm": 1.4055094844579619, + "learning_rate": 1.089322874067328e-05, + "loss": 0.6267623901367188, + "step": 3407 + }, + { + "epoch": 0.9966369352244481, + "grad_norm": 1.6519553259967432, + "learning_rate": 1.0888413255061747e-05, + "loss": 0.6756424903869629, + "step": 3408 + }, + { + "epoch": 0.9969293756397134, + "grad_norm": 1.4122044676522614, + "learning_rate": 1.0883597561789017e-05, + "loss": 0.6578212976455688, + "step": 3409 + }, + { + "epoch": 0.9972218160549788, + "grad_norm": 1.600222297323414, + "learning_rate": 1.087878166198073e-05, + "loss": 0.8186248540878296, + "step": 3410 + }, + { + "epoch": 0.9975142564702442, + "grad_norm": 1.4575083835366422, + "learning_rate": 1.0873965556762573e-05, + "loss": 0.6689319610595703, + "step": 3411 + }, + { + "epoch": 0.9978066968855096, + "grad_norm": 1.5562694813418687, + "learning_rate": 1.0869149247260282e-05, + "loss": 0.5471278429031372, + "step": 3412 + }, + { + "epoch": 0.9980991373007749, + "grad_norm": 1.239131034827953, + "learning_rate": 1.0864332734599636e-05, + "loss": 0.4673747420310974, + "step": 3413 + }, + { + "epoch": 0.9983915777160404, + "grad_norm": 1.4054798008983762, + "learning_rate": 1.085951601990647e-05, + "loss": 0.5777568221092224, + "step": 3414 + }, + { + "epoch": 0.9986840181313057, + "grad_norm": 1.6708797545900484, + "learning_rate": 1.0854699104306661e-05, + "loss": 0.6758528351783752, + "step": 3415 + }, + { + "epoch": 0.9989764585465711, + "grad_norm": 1.169154860422915, + "learning_rate": 1.0849881988926132e-05, + "loss": 0.5759919881820679, + "step": 3416 + }, + { + "epoch": 0.9992688989618366, + "grad_norm": 1.3291108456245637, + "learning_rate": 1.0845064674890857e-05, + "loss": 0.606694221496582, + "step": 3417 + }, + { + "epoch": 0.9995613393771019, + "grad_norm": 1.475290016916602, + "learning_rate": 1.0840247163326851e-05, + "loss": 0.627873957157135, + "step": 3418 + }, + { + "epoch": 0.9998537797923673, + "grad_norm": 1.4144594545282698, + "learning_rate": 1.083542945536018e-05, + "loss": 0.5560880303382874, + "step": 3419 + }, + { + "epoch": 1.0, + "grad_norm": 2.3650000488034633, + "learning_rate": 1.0830611552116952e-05, + "loss": 0.5983354449272156, + "step": 3420 + }, + { + "epoch": 1.0002924404152653, + "grad_norm": 1.1169918975180415, + "learning_rate": 1.0825793454723325e-05, + "loss": 0.5012353658676147, + "step": 3421 + }, + { + "epoch": 1.0005848808305309, + "grad_norm": 1.6136465051179143, + "learning_rate": 1.0820975164305498e-05, + "loss": 0.4585106372833252, + "step": 3422 + }, + { + "epoch": 1.0008773212457962, + "grad_norm": 1.2831850675969656, + "learning_rate": 1.0816156681989717e-05, + "loss": 0.5790318846702576, + "step": 3423 + }, + { + "epoch": 1.0011697616610615, + "grad_norm": 1.5258008126885618, + "learning_rate": 1.0811338008902277e-05, + "loss": 0.6016381978988647, + "step": 3424 + }, + { + "epoch": 1.0014622020763269, + "grad_norm": 1.328199543518758, + "learning_rate": 1.0806519146169507e-05, + "loss": 0.5756744146347046, + "step": 3425 + }, + { + "epoch": 1.0017546424915924, + "grad_norm": 1.1865012964818713, + "learning_rate": 1.0801700094917792e-05, + "loss": 0.4776861369609833, + "step": 3426 + }, + { + "epoch": 1.0020470829068577, + "grad_norm": 1.8629358545914494, + "learning_rate": 1.0796880856273557e-05, + "loss": 0.645842969417572, + "step": 3427 + }, + { + "epoch": 1.002339523322123, + "grad_norm": 1.1125775865964678, + "learning_rate": 1.0792061431363266e-05, + "loss": 0.5645815134048462, + "step": 3428 + }, + { + "epoch": 1.0026319637373886, + "grad_norm": 1.4821141209987578, + "learning_rate": 1.0787241821313428e-05, + "loss": 0.5477975606918335, + "step": 3429 + }, + { + "epoch": 1.002924404152654, + "grad_norm": 1.0992693186116131, + "learning_rate": 1.0782422027250604e-05, + "loss": 0.4064188599586487, + "step": 3430 + }, + { + "epoch": 1.0032168445679193, + "grad_norm": 1.3634803374266724, + "learning_rate": 1.0777602050301384e-05, + "loss": 0.5360208749771118, + "step": 3431 + }, + { + "epoch": 1.0035092849831846, + "grad_norm": 1.4203435807547533, + "learning_rate": 1.0772781891592419e-05, + "loss": 0.6189982891082764, + "step": 3432 + }, + { + "epoch": 1.0038017253984501, + "grad_norm": 1.4406563602891276, + "learning_rate": 1.0767961552250382e-05, + "loss": 0.4623541533946991, + "step": 3433 + }, + { + "epoch": 1.0040941658137155, + "grad_norm": 1.4714321386033957, + "learning_rate": 1.0763141033402e-05, + "loss": 0.6094095706939697, + "step": 3434 + }, + { + "epoch": 1.0043866062289808, + "grad_norm": 1.8852494834868845, + "learning_rate": 1.0758320336174042e-05, + "loss": 0.6997445821762085, + "step": 3435 + }, + { + "epoch": 1.0046790466442463, + "grad_norm": 1.3591852438815977, + "learning_rate": 1.0753499461693316e-05, + "loss": 0.5447323322296143, + "step": 3436 + }, + { + "epoch": 1.0049714870595117, + "grad_norm": 1.526403087538078, + "learning_rate": 1.0748678411086672e-05, + "loss": 0.5851927995681763, + "step": 3437 + }, + { + "epoch": 1.005263927474777, + "grad_norm": 1.2443699762001765, + "learning_rate": 1.0743857185481006e-05, + "loss": 0.5897810459136963, + "step": 3438 + }, + { + "epoch": 1.0055563678900423, + "grad_norm": 1.277276792826896, + "learning_rate": 1.073903578600324e-05, + "loss": 0.47671592235565186, + "step": 3439 + }, + { + "epoch": 1.0058488083053079, + "grad_norm": 1.5091606917661848, + "learning_rate": 1.0734214213780355e-05, + "loss": 0.5586696863174438, + "step": 3440 + }, + { + "epoch": 1.0061412487205732, + "grad_norm": 1.7171075095449666, + "learning_rate": 1.0729392469939362e-05, + "loss": 0.6817598342895508, + "step": 3441 + }, + { + "epoch": 1.0064336891358385, + "grad_norm": 1.4899951597044825, + "learning_rate": 1.0724570555607311e-05, + "loss": 0.6503750085830688, + "step": 3442 + }, + { + "epoch": 1.0067261295511039, + "grad_norm": 1.516461978227071, + "learning_rate": 1.07197484719113e-05, + "loss": 0.7121564149856567, + "step": 3443 + }, + { + "epoch": 1.0070185699663694, + "grad_norm": 1.2899445236891802, + "learning_rate": 1.071492621997846e-05, + "loss": 0.5760178565979004, + "step": 3444 + }, + { + "epoch": 1.0073110103816347, + "grad_norm": 1.2567067936293974, + "learning_rate": 1.0710103800935965e-05, + "loss": 0.4555765390396118, + "step": 3445 + }, + { + "epoch": 1.0076034507969, + "grad_norm": 1.73824720674272, + "learning_rate": 1.0705281215911021e-05, + "loss": 0.6098523736000061, + "step": 3446 + }, + { + "epoch": 1.0078958912121656, + "grad_norm": 1.3529009112365886, + "learning_rate": 1.070045846603088e-05, + "loss": 0.49828749895095825, + "step": 3447 + }, + { + "epoch": 1.008188331627431, + "grad_norm": 1.6747165622943363, + "learning_rate": 1.0695635552422834e-05, + "loss": 0.5134999752044678, + "step": 3448 + }, + { + "epoch": 1.0084807720426963, + "grad_norm": 1.6379844761327287, + "learning_rate": 1.0690812476214209e-05, + "loss": 0.53546142578125, + "step": 3449 + }, + { + "epoch": 1.0087732124579616, + "grad_norm": 1.353591975524027, + "learning_rate": 1.0685989238532364e-05, + "loss": 0.4955276846885681, + "step": 3450 + }, + { + "epoch": 1.0090656528732271, + "grad_norm": 1.5308502126967132, + "learning_rate": 1.0681165840504708e-05, + "loss": 0.5693827271461487, + "step": 3451 + }, + { + "epoch": 1.0093580932884925, + "grad_norm": 1.2544327118971752, + "learning_rate": 1.0676342283258676e-05, + "loss": 0.5023596286773682, + "step": 3452 + }, + { + "epoch": 1.0096505337037578, + "grad_norm": 1.4830383604575028, + "learning_rate": 1.0671518567921748e-05, + "loss": 0.5601100921630859, + "step": 3453 + }, + { + "epoch": 1.0099429741190233, + "grad_norm": 1.5483896672555095, + "learning_rate": 1.0666694695621438e-05, + "loss": 0.5744563341140747, + "step": 3454 + }, + { + "epoch": 1.0102354145342887, + "grad_norm": 1.2243241739970807, + "learning_rate": 1.0661870667485298e-05, + "loss": 0.531909704208374, + "step": 3455 + }, + { + "epoch": 1.010527854949554, + "grad_norm": 1.5063779223920848, + "learning_rate": 1.0657046484640911e-05, + "loss": 0.5737274885177612, + "step": 3456 + }, + { + "epoch": 1.0108202953648193, + "grad_norm": 1.3852723907754825, + "learning_rate": 1.0652222148215905e-05, + "loss": 0.5550329089164734, + "step": 3457 + }, + { + "epoch": 1.0111127357800849, + "grad_norm": 1.6139287553682227, + "learning_rate": 1.0647397659337936e-05, + "loss": 0.47795504331588745, + "step": 3458 + }, + { + "epoch": 1.0114051761953502, + "grad_norm": 1.4543285146976004, + "learning_rate": 1.0642573019134703e-05, + "loss": 0.6817550659179688, + "step": 3459 + }, + { + "epoch": 1.0116976166106155, + "grad_norm": 1.1722820118460164, + "learning_rate": 1.063774822873393e-05, + "loss": 0.45271044969558716, + "step": 3460 + }, + { + "epoch": 1.011990057025881, + "grad_norm": 1.537598582173988, + "learning_rate": 1.0632923289263389e-05, + "loss": 0.611709475517273, + "step": 3461 + }, + { + "epoch": 1.0122824974411464, + "grad_norm": 1.4188302760105698, + "learning_rate": 1.0628098201850876e-05, + "loss": 0.5101709961891174, + "step": 3462 + }, + { + "epoch": 1.0125749378564117, + "grad_norm": 1.433548611715836, + "learning_rate": 1.0623272967624227e-05, + "loss": 0.6550514698028564, + "step": 3463 + }, + { + "epoch": 1.012867378271677, + "grad_norm": 1.2796248072280718, + "learning_rate": 1.0618447587711312e-05, + "loss": 0.479978084564209, + "step": 3464 + }, + { + "epoch": 1.0131598186869426, + "grad_norm": 1.5575466316491844, + "learning_rate": 1.0613622063240035e-05, + "loss": 0.5616719722747803, + "step": 3465 + }, + { + "epoch": 1.013452259102208, + "grad_norm": 1.5865800035698945, + "learning_rate": 1.060879639533833e-05, + "loss": 0.5160953998565674, + "step": 3466 + }, + { + "epoch": 1.0137446995174733, + "grad_norm": 1.5690447549246889, + "learning_rate": 1.0603970585134168e-05, + "loss": 0.6069898009300232, + "step": 3467 + }, + { + "epoch": 1.0140371399327388, + "grad_norm": 1.4806335128762829, + "learning_rate": 1.0599144633755555e-05, + "loss": 0.5800961256027222, + "step": 3468 + }, + { + "epoch": 1.0143295803480041, + "grad_norm": 1.2794607035027592, + "learning_rate": 1.0594318542330528e-05, + "loss": 0.5286555290222168, + "step": 3469 + }, + { + "epoch": 1.0146220207632695, + "grad_norm": 1.3098421389423984, + "learning_rate": 1.0589492311987157e-05, + "loss": 0.44960829615592957, + "step": 3470 + }, + { + "epoch": 1.0149144611785348, + "grad_norm": 1.787788159345536, + "learning_rate": 1.0584665943853538e-05, + "loss": 0.5799434781074524, + "step": 3471 + }, + { + "epoch": 1.0152069015938003, + "grad_norm": 1.3655057393381103, + "learning_rate": 1.057983943905781e-05, + "loss": 0.5142421126365662, + "step": 3472 + }, + { + "epoch": 1.0154993420090657, + "grad_norm": 1.3605211166498987, + "learning_rate": 1.0575012798728141e-05, + "loss": 0.5184981226921082, + "step": 3473 + }, + { + "epoch": 1.015791782424331, + "grad_norm": 1.6630390830837942, + "learning_rate": 1.0570186023992724e-05, + "loss": 0.5747173428535461, + "step": 3474 + }, + { + "epoch": 1.0160842228395965, + "grad_norm": 1.4307323575447104, + "learning_rate": 1.0565359115979792e-05, + "loss": 0.5994119644165039, + "step": 3475 + }, + { + "epoch": 1.0163766632548619, + "grad_norm": 1.4001969418816858, + "learning_rate": 1.0560532075817605e-05, + "loss": 0.5020599365234375, + "step": 3476 + }, + { + "epoch": 1.0166691036701272, + "grad_norm": 1.5266027572877992, + "learning_rate": 1.0555704904634451e-05, + "loss": 0.5023698806762695, + "step": 3477 + }, + { + "epoch": 1.0169615440853925, + "grad_norm": 1.3247610849347196, + "learning_rate": 1.0550877603558656e-05, + "loss": 0.3998676538467407, + "step": 3478 + }, + { + "epoch": 1.017253984500658, + "grad_norm": 1.2513443496343235, + "learning_rate": 1.0546050173718569e-05, + "loss": 0.5083760619163513, + "step": 3479 + }, + { + "epoch": 1.0175464249159234, + "grad_norm": 1.3684676716830397, + "learning_rate": 1.0541222616242575e-05, + "loss": 0.49840620160102844, + "step": 3480 + }, + { + "epoch": 1.0178388653311887, + "grad_norm": 1.3303553104888959, + "learning_rate": 1.0536394932259085e-05, + "loss": 0.5302960276603699, + "step": 3481 + }, + { + "epoch": 1.018131305746454, + "grad_norm": 1.338379797222235, + "learning_rate": 1.0531567122896543e-05, + "loss": 0.5694236755371094, + "step": 3482 + }, + { + "epoch": 1.0184237461617196, + "grad_norm": 1.4305833876226657, + "learning_rate": 1.0526739189283414e-05, + "loss": 0.5155326128005981, + "step": 3483 + }, + { + "epoch": 1.018716186576985, + "grad_norm": 1.3829306833852764, + "learning_rate": 1.0521911132548207e-05, + "loss": 0.6254806518554688, + "step": 3484 + }, + { + "epoch": 1.0190086269922503, + "grad_norm": 1.9177430357611984, + "learning_rate": 1.0517082953819442e-05, + "loss": 0.5623525977134705, + "step": 3485 + }, + { + "epoch": 1.0193010674075158, + "grad_norm": 1.67092732120196, + "learning_rate": 1.051225465422568e-05, + "loss": 0.6289865970611572, + "step": 3486 + }, + { + "epoch": 1.0195935078227811, + "grad_norm": 1.4045798370952283, + "learning_rate": 1.050742623489551e-05, + "loss": 0.5935345888137817, + "step": 3487 + }, + { + "epoch": 1.0198859482380465, + "grad_norm": 1.696103524125264, + "learning_rate": 1.0502597696957542e-05, + "loss": 0.5223839282989502, + "step": 3488 + }, + { + "epoch": 1.0201783886533118, + "grad_norm": 1.9382869881093494, + "learning_rate": 1.0497769041540418e-05, + "loss": 0.6766373515129089, + "step": 3489 + }, + { + "epoch": 1.0204708290685773, + "grad_norm": 1.7017290392950901, + "learning_rate": 1.0492940269772806e-05, + "loss": 0.4934672713279724, + "step": 3490 + }, + { + "epoch": 1.0207632694838427, + "grad_norm": 1.345123127698455, + "learning_rate": 1.0488111382783403e-05, + "loss": 0.5207735300064087, + "step": 3491 + }, + { + "epoch": 1.021055709899108, + "grad_norm": 1.6293706929191067, + "learning_rate": 1.0483282381700933e-05, + "loss": 0.6090695261955261, + "step": 3492 + }, + { + "epoch": 1.0213481503143735, + "grad_norm": 1.2927953162345942, + "learning_rate": 1.0478453267654147e-05, + "loss": 0.5777665376663208, + "step": 3493 + }, + { + "epoch": 1.0216405907296389, + "grad_norm": 1.5951555841510592, + "learning_rate": 1.0473624041771814e-05, + "loss": 0.7241395711898804, + "step": 3494 + }, + { + "epoch": 1.0219330311449042, + "grad_norm": 1.4480767991556562, + "learning_rate": 1.0468794705182742e-05, + "loss": 0.45545506477355957, + "step": 3495 + }, + { + "epoch": 1.0222254715601695, + "grad_norm": 1.422698945534055, + "learning_rate": 1.0463965259015761e-05, + "loss": 0.5519885420799255, + "step": 3496 + }, + { + "epoch": 1.022517911975435, + "grad_norm": 1.509316262763282, + "learning_rate": 1.045913570439972e-05, + "loss": 0.558646559715271, + "step": 3497 + }, + { + "epoch": 1.0228103523907004, + "grad_norm": 1.4960690347564465, + "learning_rate": 1.0454306042463499e-05, + "loss": 0.5259999632835388, + "step": 3498 + }, + { + "epoch": 1.0231027928059657, + "grad_norm": 1.2679527875669403, + "learning_rate": 1.0449476274336004e-05, + "loss": 0.4711627960205078, + "step": 3499 + }, + { + "epoch": 1.0233952332212313, + "grad_norm": 1.5395810801486782, + "learning_rate": 1.0444646401146161e-05, + "loss": 0.5893874168395996, + "step": 3500 + }, + { + "epoch": 1.0236876736364966, + "grad_norm": 1.498228532943397, + "learning_rate": 1.0439816424022926e-05, + "loss": 0.5596123933792114, + "step": 3501 + }, + { + "epoch": 1.023980114051762, + "grad_norm": 1.3706228388690522, + "learning_rate": 1.0434986344095276e-05, + "loss": 0.5228658318519592, + "step": 3502 + }, + { + "epoch": 1.0242725544670273, + "grad_norm": 1.3956010390337459, + "learning_rate": 1.0430156162492216e-05, + "loss": 0.5520567297935486, + "step": 3503 + }, + { + "epoch": 1.0245649948822928, + "grad_norm": 1.2988010194163804, + "learning_rate": 1.0425325880342762e-05, + "loss": 0.531911313533783, + "step": 3504 + }, + { + "epoch": 1.0248574352975581, + "grad_norm": 1.5296749459710133, + "learning_rate": 1.0420495498775974e-05, + "loss": 0.58717942237854, + "step": 3505 + }, + { + "epoch": 1.0251498757128235, + "grad_norm": 1.3937094974123596, + "learning_rate": 1.0415665018920919e-05, + "loss": 0.4972108006477356, + "step": 3506 + }, + { + "epoch": 1.025442316128089, + "grad_norm": 1.4653045497635373, + "learning_rate": 1.0410834441906692e-05, + "loss": 0.567977249622345, + "step": 3507 + }, + { + "epoch": 1.0257347565433543, + "grad_norm": 1.4984249963013099, + "learning_rate": 1.0406003768862416e-05, + "loss": 0.568755567073822, + "step": 3508 + }, + { + "epoch": 1.0260271969586197, + "grad_norm": 1.5140899451878516, + "learning_rate": 1.0401173000917224e-05, + "loss": 0.5668960809707642, + "step": 3509 + }, + { + "epoch": 1.026319637373885, + "grad_norm": 1.5737165138245863, + "learning_rate": 1.0396342139200282e-05, + "loss": 0.5956743955612183, + "step": 3510 + }, + { + "epoch": 1.0266120777891505, + "grad_norm": 1.3000472899601168, + "learning_rate": 1.0391511184840775e-05, + "loss": 0.5258834362030029, + "step": 3511 + }, + { + "epoch": 1.0269045182044159, + "grad_norm": 1.52676259543146, + "learning_rate": 1.038668013896791e-05, + "loss": 0.7358168363571167, + "step": 3512 + }, + { + "epoch": 1.0271969586196812, + "grad_norm": 1.6868440270891885, + "learning_rate": 1.0381849002710914e-05, + "loss": 0.5845209956169128, + "step": 3513 + }, + { + "epoch": 1.0274893990349467, + "grad_norm": 1.4837942506085555, + "learning_rate": 1.0377017777199034e-05, + "loss": 0.4475495219230652, + "step": 3514 + }, + { + "epoch": 1.027781839450212, + "grad_norm": 1.2830033919091985, + "learning_rate": 1.0372186463561542e-05, + "loss": 0.5555804371833801, + "step": 3515 + }, + { + "epoch": 1.0280742798654774, + "grad_norm": 1.65016913167245, + "learning_rate": 1.0367355062927726e-05, + "loss": 0.5927316546440125, + "step": 3516 + }, + { + "epoch": 1.0283667202807427, + "grad_norm": 1.3376999356667882, + "learning_rate": 1.0362523576426897e-05, + "loss": 0.47281715273857117, + "step": 3517 + }, + { + "epoch": 1.0286591606960083, + "grad_norm": 1.4195049172993812, + "learning_rate": 1.0357692005188387e-05, + "loss": 0.5275483727455139, + "step": 3518 + }, + { + "epoch": 1.0289516011112736, + "grad_norm": 1.6670234220228792, + "learning_rate": 1.0352860350341547e-05, + "loss": 0.5740839242935181, + "step": 3519 + }, + { + "epoch": 1.029244041526539, + "grad_norm": 1.3668449892598942, + "learning_rate": 1.0348028613015747e-05, + "loss": 0.6030054688453674, + "step": 3520 + }, + { + "epoch": 1.0295364819418042, + "grad_norm": 1.4423080423666719, + "learning_rate": 1.034319679434037e-05, + "loss": 0.5415347814559937, + "step": 3521 + }, + { + "epoch": 1.0298289223570698, + "grad_norm": 1.4756281264212951, + "learning_rate": 1.033836489544483e-05, + "loss": 0.5850083231925964, + "step": 3522 + }, + { + "epoch": 1.0301213627723351, + "grad_norm": 1.516707487989418, + "learning_rate": 1.0333532917458556e-05, + "loss": 0.47614163160324097, + "step": 3523 + }, + { + "epoch": 1.0304138031876005, + "grad_norm": 1.5357316287676814, + "learning_rate": 1.0328700861510987e-05, + "loss": 0.5645745992660522, + "step": 3524 + }, + { + "epoch": 1.030706243602866, + "grad_norm": 1.3186548714848774, + "learning_rate": 1.0323868728731591e-05, + "loss": 0.5729008913040161, + "step": 3525 + }, + { + "epoch": 1.0309986840181313, + "grad_norm": 1.373781447264802, + "learning_rate": 1.031903652024985e-05, + "loss": 0.5177778005599976, + "step": 3526 + }, + { + "epoch": 1.0312911244333967, + "grad_norm": 1.390457184292636, + "learning_rate": 1.0314204237195263e-05, + "loss": 0.49413079023361206, + "step": 3527 + }, + { + "epoch": 1.031583564848662, + "grad_norm": 1.4789369230243037, + "learning_rate": 1.0309371880697342e-05, + "loss": 0.5074756145477295, + "step": 3528 + }, + { + "epoch": 1.0318760052639275, + "grad_norm": 1.590543948205407, + "learning_rate": 1.0304539451885629e-05, + "loss": 0.5601285696029663, + "step": 3529 + }, + { + "epoch": 1.0321684456791929, + "grad_norm": 1.3273904087281212, + "learning_rate": 1.029970695188967e-05, + "loss": 0.48358121514320374, + "step": 3530 + }, + { + "epoch": 1.0324608860944582, + "grad_norm": 1.4772927313727484, + "learning_rate": 1.0294874381839033e-05, + "loss": 0.4472161829471588, + "step": 3531 + }, + { + "epoch": 1.0327533265097237, + "grad_norm": 1.4129544794929634, + "learning_rate": 1.02900417428633e-05, + "loss": 0.6011627912521362, + "step": 3532 + }, + { + "epoch": 1.033045766924989, + "grad_norm": 1.354725840134447, + "learning_rate": 1.0285209036092076e-05, + "loss": 0.5212395191192627, + "step": 3533 + }, + { + "epoch": 1.0333382073402544, + "grad_norm": 1.844431950477259, + "learning_rate": 1.0280376262654971e-05, + "loss": 0.5433810949325562, + "step": 3534 + }, + { + "epoch": 1.0336306477555197, + "grad_norm": 1.4124385690995565, + "learning_rate": 1.0275543423681622e-05, + "loss": 0.5215464234352112, + "step": 3535 + }, + { + "epoch": 1.0339230881707853, + "grad_norm": 1.3386210311441036, + "learning_rate": 1.0270710520301672e-05, + "loss": 0.511099100112915, + "step": 3536 + }, + { + "epoch": 1.0342155285860506, + "grad_norm": 1.3822305233430652, + "learning_rate": 1.0265877553644783e-05, + "loss": 0.4954407811164856, + "step": 3537 + }, + { + "epoch": 1.034507969001316, + "grad_norm": 1.5424734752588294, + "learning_rate": 1.0261044524840633e-05, + "loss": 0.5491081476211548, + "step": 3538 + }, + { + "epoch": 1.0348004094165815, + "grad_norm": 1.5108040554468096, + "learning_rate": 1.0256211435018912e-05, + "loss": 0.43202829360961914, + "step": 3539 + }, + { + "epoch": 1.0350928498318468, + "grad_norm": 1.5814180623509084, + "learning_rate": 1.0251378285309326e-05, + "loss": 0.4721212089061737, + "step": 3540 + }, + { + "epoch": 1.0353852902471121, + "grad_norm": 1.6070602892086314, + "learning_rate": 1.0246545076841596e-05, + "loss": 0.5621099472045898, + "step": 3541 + }, + { + "epoch": 1.0356777306623774, + "grad_norm": 1.5170284121136077, + "learning_rate": 1.0241711810745452e-05, + "loss": 0.5572346448898315, + "step": 3542 + }, + { + "epoch": 1.035970171077643, + "grad_norm": 1.3590672633285579, + "learning_rate": 1.023687848815064e-05, + "loss": 0.40916550159454346, + "step": 3543 + }, + { + "epoch": 1.0362626114929083, + "grad_norm": 1.5018716604616227, + "learning_rate": 1.0232045110186926e-05, + "loss": 0.5370572805404663, + "step": 3544 + }, + { + "epoch": 1.0365550519081737, + "grad_norm": 1.603253593979403, + "learning_rate": 1.0227211677984074e-05, + "loss": 0.5381634831428528, + "step": 3545 + }, + { + "epoch": 1.0368474923234392, + "grad_norm": 1.3795492267662186, + "learning_rate": 1.0222378192671878e-05, + "loss": 0.4807749092578888, + "step": 3546 + }, + { + "epoch": 1.0371399327387045, + "grad_norm": 1.4973562396665303, + "learning_rate": 1.0217544655380129e-05, + "loss": 0.5673447847366333, + "step": 3547 + }, + { + "epoch": 1.0374323731539699, + "grad_norm": 1.6360254172890698, + "learning_rate": 1.0212711067238639e-05, + "loss": 0.5259549021720886, + "step": 3548 + }, + { + "epoch": 1.0377248135692352, + "grad_norm": 1.4439961362376934, + "learning_rate": 1.0207877429377232e-05, + "loss": 0.48267534375190735, + "step": 3549 + }, + { + "epoch": 1.0380172539845007, + "grad_norm": 1.438603988067733, + "learning_rate": 1.0203043742925738e-05, + "loss": 0.44843387603759766, + "step": 3550 + }, + { + "epoch": 1.038309694399766, + "grad_norm": 1.5765887333733293, + "learning_rate": 1.0198210009014005e-05, + "loss": 0.8050575256347656, + "step": 3551 + }, + { + "epoch": 1.0386021348150314, + "grad_norm": 1.3559927051954717, + "learning_rate": 1.0193376228771887e-05, + "loss": 0.590203046798706, + "step": 3552 + }, + { + "epoch": 1.0388945752302967, + "grad_norm": 1.4420953878245995, + "learning_rate": 1.0188542403329252e-05, + "loss": 0.5974458456039429, + "step": 3553 + }, + { + "epoch": 1.0391870156455623, + "grad_norm": 1.4408311686918343, + "learning_rate": 1.0183708533815975e-05, + "loss": 0.4628743827342987, + "step": 3554 + }, + { + "epoch": 1.0394794560608276, + "grad_norm": 1.538902326182442, + "learning_rate": 1.0178874621361944e-05, + "loss": 0.6738137006759644, + "step": 3555 + }, + { + "epoch": 1.039771896476093, + "grad_norm": 1.2584091446339778, + "learning_rate": 1.0174040667097061e-05, + "loss": 0.48062413930892944, + "step": 3556 + }, + { + "epoch": 1.0400643368913585, + "grad_norm": 1.4180020858721523, + "learning_rate": 1.016920667215123e-05, + "loss": 0.564401388168335, + "step": 3557 + }, + { + "epoch": 1.0403567773066238, + "grad_norm": 1.5220611788966263, + "learning_rate": 1.0164372637654367e-05, + "loss": 0.4035246968269348, + "step": 3558 + }, + { + "epoch": 1.0406492177218891, + "grad_norm": 1.3759176374876299, + "learning_rate": 1.0159538564736399e-05, + "loss": 0.4484536051750183, + "step": 3559 + }, + { + "epoch": 1.0409416581371547, + "grad_norm": 1.5320485493087415, + "learning_rate": 1.0154704454527265e-05, + "loss": 0.6257200837135315, + "step": 3560 + }, + { + "epoch": 1.04123409855242, + "grad_norm": 1.7250809702027206, + "learning_rate": 1.0149870308156899e-05, + "loss": 0.5541477799415588, + "step": 3561 + }, + { + "epoch": 1.0415265389676853, + "grad_norm": 1.5360272319586679, + "learning_rate": 1.0145036126755264e-05, + "loss": 0.6248821020126343, + "step": 3562 + }, + { + "epoch": 1.0418189793829506, + "grad_norm": 1.3930925306710389, + "learning_rate": 1.0140201911452318e-05, + "loss": 0.574689507484436, + "step": 3563 + }, + { + "epoch": 1.0421114197982162, + "grad_norm": 1.45907196010364, + "learning_rate": 1.0135367663378025e-05, + "loss": 0.5873313546180725, + "step": 3564 + }, + { + "epoch": 1.0424038602134815, + "grad_norm": 1.7911480245961826, + "learning_rate": 1.0130533383662361e-05, + "loss": 0.6662088632583618, + "step": 3565 + }, + { + "epoch": 1.0426963006287469, + "grad_norm": 1.688392121046196, + "learning_rate": 1.0125699073435316e-05, + "loss": 0.6517773866653442, + "step": 3566 + }, + { + "epoch": 1.0429887410440122, + "grad_norm": 1.8273298961737783, + "learning_rate": 1.0120864733826877e-05, + "loss": 0.6311444640159607, + "step": 3567 + }, + { + "epoch": 1.0432811814592777, + "grad_norm": 1.4367651958960501, + "learning_rate": 1.0116030365967037e-05, + "loss": 0.49060457944869995, + "step": 3568 + }, + { + "epoch": 1.043573621874543, + "grad_norm": 1.609897253932932, + "learning_rate": 1.0111195970985813e-05, + "loss": 0.5405893921852112, + "step": 3569 + }, + { + "epoch": 1.0438660622898084, + "grad_norm": 1.4830806836977097, + "learning_rate": 1.01063615500132e-05, + "loss": 0.482162743806839, + "step": 3570 + }, + { + "epoch": 1.044158502705074, + "grad_norm": 1.4107369824500982, + "learning_rate": 1.0101527104179224e-05, + "loss": 0.4542362093925476, + "step": 3571 + }, + { + "epoch": 1.0444509431203393, + "grad_norm": 1.5628480243599212, + "learning_rate": 1.00966926346139e-05, + "loss": 0.6157265305519104, + "step": 3572 + }, + { + "epoch": 1.0447433835356046, + "grad_norm": 1.6143915430154057, + "learning_rate": 1.0091858142447266e-05, + "loss": 0.6591875553131104, + "step": 3573 + }, + { + "epoch": 1.04503582395087, + "grad_norm": 1.410506710976703, + "learning_rate": 1.0087023628809347e-05, + "loss": 0.5686256885528564, + "step": 3574 + }, + { + "epoch": 1.0453282643661355, + "grad_norm": 1.2971662039691743, + "learning_rate": 1.0082189094830183e-05, + "loss": 0.45131799578666687, + "step": 3575 + }, + { + "epoch": 1.0456207047814008, + "grad_norm": 1.6508365467694242, + "learning_rate": 1.0077354541639821e-05, + "loss": 0.5787829160690308, + "step": 3576 + }, + { + "epoch": 1.0459131451966661, + "grad_norm": 1.6915833775625508, + "learning_rate": 1.0072519970368303e-05, + "loss": 0.5755574107170105, + "step": 3577 + }, + { + "epoch": 1.0462055856119317, + "grad_norm": 1.4591194150184388, + "learning_rate": 1.0067685382145683e-05, + "loss": 0.5017693638801575, + "step": 3578 + }, + { + "epoch": 1.046498026027197, + "grad_norm": 1.508478769597254, + "learning_rate": 1.0062850778102017e-05, + "loss": 0.5096016526222229, + "step": 3579 + }, + { + "epoch": 1.0467904664424623, + "grad_norm": 1.443966956114079, + "learning_rate": 1.0058016159367365e-05, + "loss": 0.4988967180252075, + "step": 3580 + }, + { + "epoch": 1.0470829068577276, + "grad_norm": 1.5186890104543016, + "learning_rate": 1.0053181527071786e-05, + "loss": 0.5410172939300537, + "step": 3581 + }, + { + "epoch": 1.0473753472729932, + "grad_norm": 1.7546625585964495, + "learning_rate": 1.004834688234535e-05, + "loss": 0.5980710983276367, + "step": 3582 + }, + { + "epoch": 1.0476677876882585, + "grad_norm": 1.347751797857706, + "learning_rate": 1.0043512226318124e-05, + "loss": 0.4737449586391449, + "step": 3583 + }, + { + "epoch": 1.0479602281035238, + "grad_norm": 1.5493397390355739, + "learning_rate": 1.003867756012018e-05, + "loss": 0.6106469631195068, + "step": 3584 + }, + { + "epoch": 1.0482526685187894, + "grad_norm": 1.6077524420960543, + "learning_rate": 1.0033842884881593e-05, + "loss": 0.48002901673316956, + "step": 3585 + }, + { + "epoch": 1.0485451089340547, + "grad_norm": 1.4065529576638647, + "learning_rate": 1.0029008201732433e-05, + "loss": 0.5101731419563293, + "step": 3586 + }, + { + "epoch": 1.04883754934932, + "grad_norm": 1.6961382740739117, + "learning_rate": 1.0024173511802786e-05, + "loss": 0.6350706219673157, + "step": 3587 + }, + { + "epoch": 1.0491299897645854, + "grad_norm": 1.4947432010936612, + "learning_rate": 1.0019338816222725e-05, + "loss": 0.5268979072570801, + "step": 3588 + }, + { + "epoch": 1.049422430179851, + "grad_norm": 1.4955724361545546, + "learning_rate": 1.0014504116122335e-05, + "loss": 0.5670457482337952, + "step": 3589 + }, + { + "epoch": 1.0497148705951163, + "grad_norm": 1.7472274991386971, + "learning_rate": 1.0009669412631697e-05, + "loss": 0.6200711727142334, + "step": 3590 + }, + { + "epoch": 1.0500073110103816, + "grad_norm": 1.5117580085419962, + "learning_rate": 1.0004834706880891e-05, + "loss": 0.44014686346054077, + "step": 3591 + }, + { + "epoch": 1.050299751425647, + "grad_norm": 1.4806608082423456, + "learning_rate": 1e-05, + "loss": 0.4690900146961212, + "step": 3592 + }, + { + "epoch": 1.0505921918409125, + "grad_norm": 1.5061085663062508, + "learning_rate": 9.995165293119112e-06, + "loss": 0.5791969299316406, + "step": 3593 + }, + { + "epoch": 1.0508846322561778, + "grad_norm": 1.403652610849375, + "learning_rate": 9.990330587368306e-06, + "loss": 0.5566244125366211, + "step": 3594 + }, + { + "epoch": 1.0511770726714431, + "grad_norm": 1.47068511144412, + "learning_rate": 9.985495883877668e-06, + "loss": 0.5201646685600281, + "step": 3595 + }, + { + "epoch": 1.0514695130867087, + "grad_norm": 1.3147681531847344, + "learning_rate": 9.980661183777277e-06, + "loss": 0.44774526357650757, + "step": 3596 + }, + { + "epoch": 1.051761953501974, + "grad_norm": 1.641682032458417, + "learning_rate": 9.975826488197217e-06, + "loss": 0.5346901416778564, + "step": 3597 + }, + { + "epoch": 1.0520543939172393, + "grad_norm": 1.516503297952313, + "learning_rate": 9.970991798267568e-06, + "loss": 0.4639764428138733, + "step": 3598 + }, + { + "epoch": 1.0523468343325049, + "grad_norm": 1.5385061459553095, + "learning_rate": 9.966157115118412e-06, + "loss": 0.5505763292312622, + "step": 3599 + }, + { + "epoch": 1.0526392747477702, + "grad_norm": 1.5065604638146801, + "learning_rate": 9.961322439879821e-06, + "loss": 0.5187631845474243, + "step": 3600 + }, + { + "epoch": 1.0529317151630355, + "grad_norm": 1.5837365707911437, + "learning_rate": 9.95648777368188e-06, + "loss": 0.5990081429481506, + "step": 3601 + }, + { + "epoch": 1.0532241555783008, + "grad_norm": 1.5943954940503307, + "learning_rate": 9.951653117654653e-06, + "loss": 0.5926306843757629, + "step": 3602 + }, + { + "epoch": 1.0535165959935664, + "grad_norm": 1.5828616151591308, + "learning_rate": 9.946818472928215e-06, + "loss": 0.5294582843780518, + "step": 3603 + }, + { + "epoch": 1.0538090364088317, + "grad_norm": 1.4492789926079117, + "learning_rate": 9.941983840632637e-06, + "loss": 0.5442140102386475, + "step": 3604 + }, + { + "epoch": 1.054101476824097, + "grad_norm": 1.5960181258924353, + "learning_rate": 9.937149221897984e-06, + "loss": 0.5888028740882874, + "step": 3605 + }, + { + "epoch": 1.0543939172393624, + "grad_norm": 1.6823030520405429, + "learning_rate": 9.93231461785432e-06, + "loss": 0.7545796632766724, + "step": 3606 + }, + { + "epoch": 1.054686357654628, + "grad_norm": 1.4193397986001617, + "learning_rate": 9.9274800296317e-06, + "loss": 0.4850383996963501, + "step": 3607 + }, + { + "epoch": 1.0549787980698933, + "grad_norm": 1.7761903590602732, + "learning_rate": 9.922645458360182e-06, + "loss": 0.5658243894577026, + "step": 3608 + }, + { + "epoch": 1.0552712384851586, + "grad_norm": 1.913627443584159, + "learning_rate": 9.917810905169818e-06, + "loss": 0.6526712775230408, + "step": 3609 + }, + { + "epoch": 1.0555636789004241, + "grad_norm": 1.7132894383948376, + "learning_rate": 9.912976371190657e-06, + "loss": 0.6125987768173218, + "step": 3610 + }, + { + "epoch": 1.0558561193156895, + "grad_norm": 1.3139938490016692, + "learning_rate": 9.908141857552737e-06, + "loss": 0.40159785747528076, + "step": 3611 + }, + { + "epoch": 1.0561485597309548, + "grad_norm": 1.7052081125083998, + "learning_rate": 9.903307365386103e-06, + "loss": 0.6628924608230591, + "step": 3612 + }, + { + "epoch": 1.05644100014622, + "grad_norm": 1.638888923278887, + "learning_rate": 9.898472895820783e-06, + "loss": 0.6083816289901733, + "step": 3613 + }, + { + "epoch": 1.0567334405614857, + "grad_norm": 1.564812875636552, + "learning_rate": 9.893638449986806e-06, + "loss": 0.5349488854408264, + "step": 3614 + }, + { + "epoch": 1.057025880976751, + "grad_norm": 1.5340813216184335, + "learning_rate": 9.888804029014194e-06, + "loss": 0.6119222044944763, + "step": 3615 + }, + { + "epoch": 1.0573183213920163, + "grad_norm": 1.367693459120948, + "learning_rate": 9.883969634032964e-06, + "loss": 0.531359851360321, + "step": 3616 + }, + { + "epoch": 1.0576107618072819, + "grad_norm": 1.6344237981695606, + "learning_rate": 9.879135266173127e-06, + "loss": 0.6604791879653931, + "step": 3617 + }, + { + "epoch": 1.0579032022225472, + "grad_norm": 1.4352324880813543, + "learning_rate": 9.874300926564689e-06, + "loss": 0.4691445231437683, + "step": 3618 + }, + { + "epoch": 1.0581956426378125, + "grad_norm": 1.2910646539258182, + "learning_rate": 9.869466616337642e-06, + "loss": 0.5690087080001831, + "step": 3619 + }, + { + "epoch": 1.0584880830530778, + "grad_norm": 1.403700057828388, + "learning_rate": 9.86463233662198e-06, + "loss": 0.5426729917526245, + "step": 3620 + }, + { + "epoch": 1.0587805234683434, + "grad_norm": 1.578075476325045, + "learning_rate": 9.859798088547687e-06, + "loss": 0.5640411376953125, + "step": 3621 + }, + { + "epoch": 1.0590729638836087, + "grad_norm": 1.4838032713556162, + "learning_rate": 9.854963873244738e-06, + "loss": 0.6724091172218323, + "step": 3622 + }, + { + "epoch": 1.059365404298874, + "grad_norm": 1.4145337335983883, + "learning_rate": 9.850129691843105e-06, + "loss": 0.5448887348175049, + "step": 3623 + }, + { + "epoch": 1.0596578447141396, + "grad_norm": 1.5190623574509117, + "learning_rate": 9.845295545472742e-06, + "loss": 0.5555344820022583, + "step": 3624 + }, + { + "epoch": 1.059950285129405, + "grad_norm": 1.6879154347320564, + "learning_rate": 9.840461435263604e-06, + "loss": 0.5053969621658325, + "step": 3625 + }, + { + "epoch": 1.0602427255446703, + "grad_norm": 1.5675488432589333, + "learning_rate": 9.835627362345636e-06, + "loss": 0.5866390466690063, + "step": 3626 + }, + { + "epoch": 1.0605351659599356, + "grad_norm": 1.81247497722172, + "learning_rate": 9.830793327848773e-06, + "loss": 0.5936717987060547, + "step": 3627 + }, + { + "epoch": 1.0608276063752011, + "grad_norm": 1.5536122437945554, + "learning_rate": 9.82595933290294e-06, + "loss": 0.6009070873260498, + "step": 3628 + }, + { + "epoch": 1.0611200467904665, + "grad_norm": 1.588445125911092, + "learning_rate": 9.821125378638059e-06, + "loss": 0.5361435413360596, + "step": 3629 + }, + { + "epoch": 1.0614124872057318, + "grad_norm": 1.4856331412797505, + "learning_rate": 9.816291466184025e-06, + "loss": 0.5763939619064331, + "step": 3630 + }, + { + "epoch": 1.061704927620997, + "grad_norm": 1.618308780160016, + "learning_rate": 9.81145759667075e-06, + "loss": 0.57512366771698, + "step": 3631 + }, + { + "epoch": 1.0619973680362627, + "grad_norm": 1.4990484363196022, + "learning_rate": 9.806623771228115e-06, + "loss": 0.6144367456436157, + "step": 3632 + }, + { + "epoch": 1.062289808451528, + "grad_norm": 1.5222649609215075, + "learning_rate": 9.801789990985997e-06, + "loss": 0.5715698003768921, + "step": 3633 + }, + { + "epoch": 1.0625822488667933, + "grad_norm": 1.3438421364889925, + "learning_rate": 9.796956257074263e-06, + "loss": 0.632681131362915, + "step": 3634 + }, + { + "epoch": 1.0628746892820589, + "grad_norm": 1.2996961363437054, + "learning_rate": 9.79212257062277e-06, + "loss": 0.5362547636032104, + "step": 3635 + }, + { + "epoch": 1.0631671296973242, + "grad_norm": 1.2451948790215157, + "learning_rate": 9.787288932761361e-06, + "loss": 0.553846538066864, + "step": 3636 + }, + { + "epoch": 1.0634595701125895, + "grad_norm": 2.0033616068213456, + "learning_rate": 9.782455344619871e-06, + "loss": 0.7200362682342529, + "step": 3637 + }, + { + "epoch": 1.063752010527855, + "grad_norm": 1.5986858016901493, + "learning_rate": 9.777621807328126e-06, + "loss": 0.5544596910476685, + "step": 3638 + }, + { + "epoch": 1.0640444509431204, + "grad_norm": 1.9336329750915207, + "learning_rate": 9.772788322015926e-06, + "loss": 0.687321126461029, + "step": 3639 + }, + { + "epoch": 1.0643368913583857, + "grad_norm": 1.4658162923896687, + "learning_rate": 9.767954889813076e-06, + "loss": 0.4986167550086975, + "step": 3640 + }, + { + "epoch": 1.064629331773651, + "grad_norm": 1.6835767903522258, + "learning_rate": 9.763121511849358e-06, + "loss": 0.5021307468414307, + "step": 3641 + }, + { + "epoch": 1.0649217721889166, + "grad_norm": 1.6084332451713093, + "learning_rate": 9.758288189254548e-06, + "loss": 0.5542711019515991, + "step": 3642 + }, + { + "epoch": 1.065214212604182, + "grad_norm": 1.4567212868909125, + "learning_rate": 9.753454923158407e-06, + "loss": 0.5161126852035522, + "step": 3643 + }, + { + "epoch": 1.0655066530194472, + "grad_norm": 1.3588587385016027, + "learning_rate": 9.748621714690674e-06, + "loss": 0.6041361093521118, + "step": 3644 + }, + { + "epoch": 1.0657990934347126, + "grad_norm": 1.5312936542968558, + "learning_rate": 9.74378856498109e-06, + "loss": 0.5252672433853149, + "step": 3645 + }, + { + "epoch": 1.0660915338499781, + "grad_norm": 1.508976518247356, + "learning_rate": 9.738955475159369e-06, + "loss": 0.5198208093643188, + "step": 3646 + }, + { + "epoch": 1.0663839742652435, + "grad_norm": 1.617831688267231, + "learning_rate": 9.734122446355219e-06, + "loss": 0.5547968149185181, + "step": 3647 + }, + { + "epoch": 1.0666764146805088, + "grad_norm": 1.3192996989880752, + "learning_rate": 9.72928947969833e-06, + "loss": 0.5854370594024658, + "step": 3648 + }, + { + "epoch": 1.0669688550957743, + "grad_norm": 1.4612935433441103, + "learning_rate": 9.724456576318383e-06, + "loss": 0.5199173092842102, + "step": 3649 + }, + { + "epoch": 1.0672612955110397, + "grad_norm": 1.5597306303032106, + "learning_rate": 9.71962373734503e-06, + "loss": 0.49684566259384155, + "step": 3650 + }, + { + "epoch": 1.067553735926305, + "grad_norm": 1.5081407431370675, + "learning_rate": 9.714790963907927e-06, + "loss": 0.593805193901062, + "step": 3651 + }, + { + "epoch": 1.0678461763415703, + "grad_norm": 1.6501383657240702, + "learning_rate": 9.7099582571367e-06, + "loss": 0.5524622201919556, + "step": 3652 + }, + { + "epoch": 1.0681386167568359, + "grad_norm": 1.589706723326761, + "learning_rate": 9.70512561816097e-06, + "loss": 0.5796955227851868, + "step": 3653 + }, + { + "epoch": 1.0684310571721012, + "grad_norm": 1.6252059263075247, + "learning_rate": 9.700293048110335e-06, + "loss": 0.5470535159111023, + "step": 3654 + }, + { + "epoch": 1.0687234975873665, + "grad_norm": 1.180447413588476, + "learning_rate": 9.695460548114374e-06, + "loss": 0.5438790321350098, + "step": 3655 + }, + { + "epoch": 1.069015938002632, + "grad_norm": 1.5271792603913512, + "learning_rate": 9.69062811930266e-06, + "loss": 0.6324823498725891, + "step": 3656 + }, + { + "epoch": 1.0693083784178974, + "grad_norm": 1.5347219744388463, + "learning_rate": 9.68579576280474e-06, + "loss": 0.5261266231536865, + "step": 3657 + }, + { + "epoch": 1.0696008188331627, + "grad_norm": 1.408009396375569, + "learning_rate": 9.680963479750152e-06, + "loss": 0.49827292561531067, + "step": 3658 + }, + { + "epoch": 1.069893259248428, + "grad_norm": 1.8715423798930795, + "learning_rate": 9.67613127126841e-06, + "loss": 0.5273935794830322, + "step": 3659 + }, + { + "epoch": 1.0701856996636936, + "grad_norm": 1.5578682729768194, + "learning_rate": 9.671299138489017e-06, + "loss": 0.5816709995269775, + "step": 3660 + }, + { + "epoch": 1.070478140078959, + "grad_norm": 1.7016426471813102, + "learning_rate": 9.66646708254145e-06, + "loss": 0.5591616630554199, + "step": 3661 + }, + { + "epoch": 1.0707705804942242, + "grad_norm": 1.5738449439513973, + "learning_rate": 9.661635104555172e-06, + "loss": 0.581566572189331, + "step": 3662 + }, + { + "epoch": 1.0710630209094898, + "grad_norm": 1.5518333497561696, + "learning_rate": 9.656803205659632e-06, + "loss": 0.5339047312736511, + "step": 3663 + }, + { + "epoch": 1.0713554613247551, + "grad_norm": 1.6271916881343873, + "learning_rate": 9.651971386984258e-06, + "loss": 0.5200103521347046, + "step": 3664 + }, + { + "epoch": 1.0716479017400204, + "grad_norm": 1.6521270716003156, + "learning_rate": 9.647139649658454e-06, + "loss": 0.7201805114746094, + "step": 3665 + }, + { + "epoch": 1.0719403421552858, + "grad_norm": 1.534541270100013, + "learning_rate": 9.642307994811614e-06, + "loss": 0.4801551103591919, + "step": 3666 + }, + { + "epoch": 1.0722327825705513, + "grad_norm": 1.5215862158184845, + "learning_rate": 9.637476423573106e-06, + "loss": 0.5809728503227234, + "step": 3667 + }, + { + "epoch": 1.0725252229858167, + "grad_norm": 1.6423129831570165, + "learning_rate": 9.632644937072277e-06, + "loss": 0.6493573188781738, + "step": 3668 + }, + { + "epoch": 1.072817663401082, + "grad_norm": 1.5984538738730298, + "learning_rate": 9.627813536438461e-06, + "loss": 0.5858349800109863, + "step": 3669 + }, + { + "epoch": 1.0731101038163473, + "grad_norm": 1.5154205099747375, + "learning_rate": 9.622982222800968e-06, + "loss": 0.604835033416748, + "step": 3670 + }, + { + "epoch": 1.0734025442316129, + "grad_norm": 1.6814842296922758, + "learning_rate": 9.618150997289091e-06, + "loss": 0.6168441772460938, + "step": 3671 + }, + { + "epoch": 1.0736949846468782, + "grad_norm": 1.4221905571438933, + "learning_rate": 9.613319861032093e-06, + "loss": 0.5297094583511353, + "step": 3672 + }, + { + "epoch": 1.0739874250621435, + "grad_norm": 1.4440813284349416, + "learning_rate": 9.608488815159226e-06, + "loss": 0.513571560382843, + "step": 3673 + }, + { + "epoch": 1.074279865477409, + "grad_norm": 1.4202335692197015, + "learning_rate": 9.603657860799721e-06, + "loss": 0.4383837580680847, + "step": 3674 + }, + { + "epoch": 1.0745723058926744, + "grad_norm": 1.660966167075539, + "learning_rate": 9.59882699908278e-06, + "loss": 0.5428420305252075, + "step": 3675 + }, + { + "epoch": 1.0748647463079397, + "grad_norm": 1.331252403406651, + "learning_rate": 9.593996231137587e-06, + "loss": 0.5193662047386169, + "step": 3676 + }, + { + "epoch": 1.0751571867232053, + "grad_norm": 1.1890998376752542, + "learning_rate": 9.589165558093311e-06, + "loss": 0.47949904203414917, + "step": 3677 + }, + { + "epoch": 1.0754496271384706, + "grad_norm": 1.4440336102087743, + "learning_rate": 9.584334981079085e-06, + "loss": 0.5092326402664185, + "step": 3678 + }, + { + "epoch": 1.075742067553736, + "grad_norm": 1.642845621448486, + "learning_rate": 9.579504501224028e-06, + "loss": 0.6627280712127686, + "step": 3679 + }, + { + "epoch": 1.0760345079690012, + "grad_norm": 1.4633415466571795, + "learning_rate": 9.57467411965724e-06, + "loss": 0.45087775588035583, + "step": 3680 + }, + { + "epoch": 1.0763269483842668, + "grad_norm": 1.5441336288481917, + "learning_rate": 9.569843837507788e-06, + "loss": 0.5745380520820618, + "step": 3681 + }, + { + "epoch": 1.0766193887995321, + "grad_norm": 1.4663672637613454, + "learning_rate": 9.565013655904728e-06, + "loss": 0.4410436749458313, + "step": 3682 + }, + { + "epoch": 1.0769118292147974, + "grad_norm": 1.5197962338342057, + "learning_rate": 9.560183575977079e-06, + "loss": 0.4991244375705719, + "step": 3683 + }, + { + "epoch": 1.0772042696300628, + "grad_norm": 1.760205368894331, + "learning_rate": 9.555353598853842e-06, + "loss": 0.6316145658493042, + "step": 3684 + }, + { + "epoch": 1.0774967100453283, + "grad_norm": 1.7400994246729, + "learning_rate": 9.550523725664e-06, + "loss": 0.5593908429145813, + "step": 3685 + }, + { + "epoch": 1.0777891504605936, + "grad_norm": 1.360696277932948, + "learning_rate": 9.545693957536503e-06, + "loss": 0.5491319894790649, + "step": 3686 + }, + { + "epoch": 1.078081590875859, + "grad_norm": 1.6733496726210937, + "learning_rate": 9.540864295600282e-06, + "loss": 0.6299821138381958, + "step": 3687 + }, + { + "epoch": 1.0783740312911245, + "grad_norm": 1.584478567774571, + "learning_rate": 9.536034740984244e-06, + "loss": 0.5673841238021851, + "step": 3688 + }, + { + "epoch": 1.0786664717063899, + "grad_norm": 1.2029070866459273, + "learning_rate": 9.53120529481726e-06, + "loss": 0.45966464281082153, + "step": 3689 + }, + { + "epoch": 1.0789589121216552, + "grad_norm": 1.5763188044346095, + "learning_rate": 9.526375958228191e-06, + "loss": 0.5831631422042847, + "step": 3690 + }, + { + "epoch": 1.0792513525369205, + "grad_norm": 1.6299976133727174, + "learning_rate": 9.52154673234586e-06, + "loss": 0.5456256866455078, + "step": 3691 + }, + { + "epoch": 1.079543792952186, + "grad_norm": 1.4868906970264604, + "learning_rate": 9.516717618299069e-06, + "loss": 0.46428292989730835, + "step": 3692 + }, + { + "epoch": 1.0798362333674514, + "grad_norm": 1.4498481381133475, + "learning_rate": 9.511888617216602e-06, + "loss": 0.47320839762687683, + "step": 3693 + }, + { + "epoch": 1.0801286737827167, + "grad_norm": 1.4932376641022789, + "learning_rate": 9.507059730227199e-06, + "loss": 0.5205492973327637, + "step": 3694 + }, + { + "epoch": 1.0804211141979823, + "grad_norm": 1.631704411581211, + "learning_rate": 9.502230958459587e-06, + "loss": 0.42696553468704224, + "step": 3695 + }, + { + "epoch": 1.0807135546132476, + "grad_norm": 1.5001123816983175, + "learning_rate": 9.497402303042463e-06, + "loss": 0.5147116780281067, + "step": 3696 + }, + { + "epoch": 1.081005995028513, + "grad_norm": 1.38029323867701, + "learning_rate": 9.492573765104494e-06, + "loss": 0.5080294609069824, + "step": 3697 + }, + { + "epoch": 1.0812984354437782, + "grad_norm": 1.6652094239637947, + "learning_rate": 9.487745345774323e-06, + "loss": 0.6228866577148438, + "step": 3698 + }, + { + "epoch": 1.0815908758590438, + "grad_norm": 1.5822778586922481, + "learning_rate": 9.482917046180563e-06, + "loss": 0.5560915470123291, + "step": 3699 + }, + { + "epoch": 1.0818833162743091, + "grad_norm": 1.5535091238731367, + "learning_rate": 9.4780888674518e-06, + "loss": 0.5245859622955322, + "step": 3700 + }, + { + "epoch": 1.0821757566895744, + "grad_norm": 1.5051094804368905, + "learning_rate": 9.47326081071659e-06, + "loss": 0.6462790966033936, + "step": 3701 + }, + { + "epoch": 1.08246819710484, + "grad_norm": 1.5924758840128848, + "learning_rate": 9.468432877103462e-06, + "loss": 0.5196692943572998, + "step": 3702 + }, + { + "epoch": 1.0827606375201053, + "grad_norm": 1.7568328506180717, + "learning_rate": 9.463605067740917e-06, + "loss": 0.5487779974937439, + "step": 3703 + }, + { + "epoch": 1.0830530779353706, + "grad_norm": 1.6433197945872438, + "learning_rate": 9.458777383757428e-06, + "loss": 0.5471592545509338, + "step": 3704 + }, + { + "epoch": 1.083345518350636, + "grad_norm": 1.7295248979937683, + "learning_rate": 9.453949826281436e-06, + "loss": 0.6927378177642822, + "step": 3705 + }, + { + "epoch": 1.0836379587659015, + "grad_norm": 1.645450906929874, + "learning_rate": 9.449122396441344e-06, + "loss": 0.569003164768219, + "step": 3706 + }, + { + "epoch": 1.0839303991811668, + "grad_norm": 1.5204128580175535, + "learning_rate": 9.444295095365549e-06, + "loss": 0.5655964612960815, + "step": 3707 + }, + { + "epoch": 1.0842228395964322, + "grad_norm": 1.5653417821245283, + "learning_rate": 9.439467924182397e-06, + "loss": 0.6223032474517822, + "step": 3708 + }, + { + "epoch": 1.0845152800116975, + "grad_norm": 1.8058201614843348, + "learning_rate": 9.43464088402021e-06, + "loss": 0.6553555727005005, + "step": 3709 + }, + { + "epoch": 1.084807720426963, + "grad_norm": 1.7065419655088354, + "learning_rate": 9.429813976007277e-06, + "loss": 0.534509539604187, + "step": 3710 + }, + { + "epoch": 1.0851001608422284, + "grad_norm": 1.7341944929762452, + "learning_rate": 9.42498720127186e-06, + "loss": 0.5801417827606201, + "step": 3711 + }, + { + "epoch": 1.0853926012574937, + "grad_norm": 1.4311879630985456, + "learning_rate": 9.42016056094219e-06, + "loss": 0.47260361909866333, + "step": 3712 + }, + { + "epoch": 1.0856850416727593, + "grad_norm": 1.5640804855296242, + "learning_rate": 9.415334056146464e-06, + "loss": 0.5924841165542603, + "step": 3713 + }, + { + "epoch": 1.0859774820880246, + "grad_norm": 1.7346051575584198, + "learning_rate": 9.410507688012847e-06, + "loss": 0.6029725074768066, + "step": 3714 + }, + { + "epoch": 1.08626992250329, + "grad_norm": 1.6762909361099274, + "learning_rate": 9.405681457669472e-06, + "loss": 0.5838413834571838, + "step": 3715 + }, + { + "epoch": 1.0865623629185555, + "grad_norm": 1.277586165055191, + "learning_rate": 9.400855366244445e-06, + "loss": 0.4739546775817871, + "step": 3716 + }, + { + "epoch": 1.0868548033338208, + "grad_norm": 1.5391172094714582, + "learning_rate": 9.396029414865832e-06, + "loss": 0.4870055913925171, + "step": 3717 + }, + { + "epoch": 1.0871472437490861, + "grad_norm": 1.4254039758246118, + "learning_rate": 9.39120360466167e-06, + "loss": 0.5572132468223572, + "step": 3718 + }, + { + "epoch": 1.0874396841643514, + "grad_norm": 1.6824352313774058, + "learning_rate": 9.386377936759966e-06, + "loss": 0.5601439476013184, + "step": 3719 + }, + { + "epoch": 1.087732124579617, + "grad_norm": 1.4548205788512927, + "learning_rate": 9.38155241228869e-06, + "loss": 0.4551504850387573, + "step": 3720 + }, + { + "epoch": 1.0880245649948823, + "grad_norm": 1.447968175073075, + "learning_rate": 9.376727032375773e-06, + "loss": 0.5656375885009766, + "step": 3721 + }, + { + "epoch": 1.0883170054101476, + "grad_norm": 1.4767808933411752, + "learning_rate": 9.371901798149124e-06, + "loss": 0.5597153902053833, + "step": 3722 + }, + { + "epoch": 1.088609445825413, + "grad_norm": 1.5252235269095387, + "learning_rate": 9.367076710736613e-06, + "loss": 0.5946288108825684, + "step": 3723 + }, + { + "epoch": 1.0889018862406785, + "grad_norm": 1.9924638298376933, + "learning_rate": 9.36225177126607e-06, + "loss": 0.5951449871063232, + "step": 3724 + }, + { + "epoch": 1.0891943266559438, + "grad_norm": 1.7845167649533908, + "learning_rate": 9.3574269808653e-06, + "loss": 0.5755487680435181, + "step": 3725 + }, + { + "epoch": 1.0894867670712092, + "grad_norm": 1.5254834641419546, + "learning_rate": 9.352602340662065e-06, + "loss": 0.5118892788887024, + "step": 3726 + }, + { + "epoch": 1.0897792074864747, + "grad_norm": 1.596558008598135, + "learning_rate": 9.347777851784097e-06, + "loss": 0.5652351975440979, + "step": 3727 + }, + { + "epoch": 1.09007164790174, + "grad_norm": 1.5215560380827415, + "learning_rate": 9.34295351535909e-06, + "loss": 0.624887228012085, + "step": 3728 + }, + { + "epoch": 1.0903640883170054, + "grad_norm": 1.447383452488018, + "learning_rate": 9.338129332514705e-06, + "loss": 0.534363329410553, + "step": 3729 + }, + { + "epoch": 1.0906565287322707, + "grad_norm": 1.477841435635963, + "learning_rate": 9.333305304378565e-06, + "loss": 0.6203521490097046, + "step": 3730 + }, + { + "epoch": 1.0909489691475363, + "grad_norm": 1.7401174715864398, + "learning_rate": 9.328481432078254e-06, + "loss": 0.64560866355896, + "step": 3731 + }, + { + "epoch": 1.0912414095628016, + "grad_norm": 1.5841972191853104, + "learning_rate": 9.323657716741327e-06, + "loss": 0.5389514565467834, + "step": 3732 + }, + { + "epoch": 1.091533849978067, + "grad_norm": 1.4621625707128454, + "learning_rate": 9.318834159495295e-06, + "loss": 0.5245277881622314, + "step": 3733 + }, + { + "epoch": 1.0918262903933325, + "grad_norm": 1.6486990138865423, + "learning_rate": 9.314010761467637e-06, + "loss": 0.603967010974884, + "step": 3734 + }, + { + "epoch": 1.0921187308085978, + "grad_norm": 1.7983997195133608, + "learning_rate": 9.309187523785794e-06, + "loss": 0.5426995754241943, + "step": 3735 + }, + { + "epoch": 1.092411171223863, + "grad_norm": 1.6248514181798874, + "learning_rate": 9.30436444757717e-06, + "loss": 0.5400352478027344, + "step": 3736 + }, + { + "epoch": 1.0927036116391284, + "grad_norm": 1.5009984854869718, + "learning_rate": 9.299541533969121e-06, + "loss": 0.5016524195671082, + "step": 3737 + }, + { + "epoch": 1.092996052054394, + "grad_norm": 1.7929437285814107, + "learning_rate": 9.294718784088982e-06, + "loss": 0.526217520236969, + "step": 3738 + }, + { + "epoch": 1.0932884924696593, + "grad_norm": 1.7293517567202035, + "learning_rate": 9.289896199064038e-06, + "loss": 0.525063157081604, + "step": 3739 + }, + { + "epoch": 1.0935809328849246, + "grad_norm": 1.269101628653969, + "learning_rate": 9.285073780021541e-06, + "loss": 0.3792048692703247, + "step": 3740 + }, + { + "epoch": 1.0938733733001902, + "grad_norm": 1.4416380651624152, + "learning_rate": 9.280251528088702e-06, + "loss": 0.5326308012008667, + "step": 3741 + }, + { + "epoch": 1.0941658137154555, + "grad_norm": 1.3946561055322027, + "learning_rate": 9.275429444392692e-06, + "loss": 0.5675199627876282, + "step": 3742 + }, + { + "epoch": 1.0944582541307208, + "grad_norm": 1.640552639536372, + "learning_rate": 9.270607530060643e-06, + "loss": 0.6525516510009766, + "step": 3743 + }, + { + "epoch": 1.0947506945459862, + "grad_norm": 1.563647681973335, + "learning_rate": 9.265785786219647e-06, + "loss": 0.6376343369483948, + "step": 3744 + }, + { + "epoch": 1.0950431349612517, + "grad_norm": 1.7701418719133022, + "learning_rate": 9.260964213996763e-06, + "loss": 0.6440377235412598, + "step": 3745 + }, + { + "epoch": 1.095335575376517, + "grad_norm": 1.886853414823259, + "learning_rate": 9.256142814518997e-06, + "loss": 0.5971434116363525, + "step": 3746 + }, + { + "epoch": 1.0956280157917824, + "grad_norm": 1.3797760891901851, + "learning_rate": 9.251321588913331e-06, + "loss": 0.5096890330314636, + "step": 3747 + }, + { + "epoch": 1.0959204562070477, + "grad_norm": 1.7099901744739332, + "learning_rate": 9.246500538306686e-06, + "loss": 0.4303498864173889, + "step": 3748 + }, + { + "epoch": 1.0962128966223132, + "grad_norm": 1.5934571510718554, + "learning_rate": 9.241679663825961e-06, + "loss": 0.5484192371368408, + "step": 3749 + }, + { + "epoch": 1.0965053370375786, + "grad_norm": 1.6268147624989107, + "learning_rate": 9.236858966598004e-06, + "loss": 0.6057884693145752, + "step": 3750 + }, + { + "epoch": 1.096797777452844, + "grad_norm": 1.565840426411154, + "learning_rate": 9.232038447749623e-06, + "loss": 0.5261536836624146, + "step": 3751 + }, + { + "epoch": 1.0970902178681095, + "grad_norm": 1.3242416099520606, + "learning_rate": 9.227218108407586e-06, + "loss": 0.470365047454834, + "step": 3752 + }, + { + "epoch": 1.0973826582833748, + "grad_norm": 1.8694075496184692, + "learning_rate": 9.222397949698618e-06, + "loss": 0.6158323287963867, + "step": 3753 + }, + { + "epoch": 1.09767509869864, + "grad_norm": 1.4353847976975904, + "learning_rate": 9.217577972749401e-06, + "loss": 0.582190990447998, + "step": 3754 + }, + { + "epoch": 1.0979675391139057, + "grad_norm": 1.5377732823861585, + "learning_rate": 9.212758178686575e-06, + "loss": 0.4939305782318115, + "step": 3755 + }, + { + "epoch": 1.098259979529171, + "grad_norm": 1.501946006392042, + "learning_rate": 9.207938568636739e-06, + "loss": 0.576829731464386, + "step": 3756 + }, + { + "epoch": 1.0985524199444363, + "grad_norm": 1.582387804664269, + "learning_rate": 9.203119143726445e-06, + "loss": 0.581257164478302, + "step": 3757 + }, + { + "epoch": 1.0988448603597016, + "grad_norm": 1.4501950316688965, + "learning_rate": 9.19829990508221e-06, + "loss": 0.6105127334594727, + "step": 3758 + }, + { + "epoch": 1.0991373007749672, + "grad_norm": 1.7379854400774775, + "learning_rate": 9.193480853830495e-06, + "loss": 0.5311432480812073, + "step": 3759 + }, + { + "epoch": 1.0994297411902325, + "grad_norm": 1.3707297007944412, + "learning_rate": 9.188661991097726e-06, + "loss": 0.44334596395492554, + "step": 3760 + }, + { + "epoch": 1.0997221816054978, + "grad_norm": 1.6175971035022318, + "learning_rate": 9.183843318010285e-06, + "loss": 0.5795773267745972, + "step": 3761 + }, + { + "epoch": 1.1000146220207632, + "grad_norm": 1.4465404341375856, + "learning_rate": 9.179024835694504e-06, + "loss": 0.619825541973114, + "step": 3762 + }, + { + "epoch": 1.1003070624360287, + "grad_norm": 1.754450237430447, + "learning_rate": 9.174206545276678e-06, + "loss": 0.633934497833252, + "step": 3763 + }, + { + "epoch": 1.100599502851294, + "grad_norm": 1.34560762533496, + "learning_rate": 9.169388447883053e-06, + "loss": 0.48922473192214966, + "step": 3764 + }, + { + "epoch": 1.1008919432665594, + "grad_norm": 1.7340747304342141, + "learning_rate": 9.164570544639825e-06, + "loss": 0.6125025153160095, + "step": 3765 + }, + { + "epoch": 1.101184383681825, + "grad_norm": 1.4327034643571392, + "learning_rate": 9.159752836673154e-06, + "loss": 0.5428078174591064, + "step": 3766 + }, + { + "epoch": 1.1014768240970902, + "grad_norm": 1.4335551572069505, + "learning_rate": 9.154935325109148e-06, + "loss": 0.5848157405853271, + "step": 3767 + }, + { + "epoch": 1.1017692645123556, + "grad_norm": 1.5053579548838565, + "learning_rate": 9.150118011073872e-06, + "loss": 0.5150102376937866, + "step": 3768 + }, + { + "epoch": 1.102061704927621, + "grad_norm": 1.429407171536289, + "learning_rate": 9.145300895693344e-06, + "loss": 0.6106699705123901, + "step": 3769 + }, + { + "epoch": 1.1023541453428864, + "grad_norm": 1.4079938603953852, + "learning_rate": 9.140483980093534e-06, + "loss": 0.5819482803344727, + "step": 3770 + }, + { + "epoch": 1.1026465857581518, + "grad_norm": 1.7060315490040079, + "learning_rate": 9.135667265400369e-06, + "loss": 0.6499812602996826, + "step": 3771 + }, + { + "epoch": 1.102939026173417, + "grad_norm": 1.520551323323022, + "learning_rate": 9.130850752739724e-06, + "loss": 0.5375189781188965, + "step": 3772 + }, + { + "epoch": 1.1032314665886827, + "grad_norm": 1.5200340564855783, + "learning_rate": 9.12603444323743e-06, + "loss": 0.5582318902015686, + "step": 3773 + }, + { + "epoch": 1.103523907003948, + "grad_norm": 1.6010357553720616, + "learning_rate": 9.121218338019273e-06, + "loss": 0.5549799203872681, + "step": 3774 + }, + { + "epoch": 1.1038163474192133, + "grad_norm": 1.672600820514396, + "learning_rate": 9.116402438210988e-06, + "loss": 0.4942197799682617, + "step": 3775 + }, + { + "epoch": 1.1041087878344786, + "grad_norm": 1.294858704528479, + "learning_rate": 9.11158674493826e-06, + "loss": 0.5039837956428528, + "step": 3776 + }, + { + "epoch": 1.1044012282497442, + "grad_norm": 1.4904483423531274, + "learning_rate": 9.106771259326726e-06, + "loss": 0.49781280755996704, + "step": 3777 + }, + { + "epoch": 1.1046936686650095, + "grad_norm": 1.5058975394537781, + "learning_rate": 9.101955982501981e-06, + "loss": 0.41755813360214233, + "step": 3778 + }, + { + "epoch": 1.1049861090802748, + "grad_norm": 1.3247888444316807, + "learning_rate": 9.097140915589564e-06, + "loss": 0.5605067014694214, + "step": 3779 + }, + { + "epoch": 1.1052785494955404, + "grad_norm": 1.6960736504408462, + "learning_rate": 9.092326059714971e-06, + "loss": 0.6291122436523438, + "step": 3780 + }, + { + "epoch": 1.1055709899108057, + "grad_norm": 1.5309788529424204, + "learning_rate": 9.087511416003636e-06, + "loss": 0.5164260864257812, + "step": 3781 + }, + { + "epoch": 1.105863430326071, + "grad_norm": 1.481065256446166, + "learning_rate": 9.082696985580964e-06, + "loss": 0.5002986192703247, + "step": 3782 + }, + { + "epoch": 1.1061558707413364, + "grad_norm": 1.8553995759252653, + "learning_rate": 9.077882769572295e-06, + "loss": 0.5149055123329163, + "step": 3783 + }, + { + "epoch": 1.106448311156602, + "grad_norm": 1.4637547819206846, + "learning_rate": 9.073068769102925e-06, + "loss": 0.5375808477401733, + "step": 3784 + }, + { + "epoch": 1.1067407515718672, + "grad_norm": 1.4438276838658128, + "learning_rate": 9.06825498529809e-06, + "loss": 0.5574408173561096, + "step": 3785 + }, + { + "epoch": 1.1070331919871326, + "grad_norm": 1.8566533611842586, + "learning_rate": 9.063441419282989e-06, + "loss": 0.7410034537315369, + "step": 3786 + }, + { + "epoch": 1.107325632402398, + "grad_norm": 1.4780218137550694, + "learning_rate": 9.058628072182759e-06, + "loss": 0.4890757203102112, + "step": 3787 + }, + { + "epoch": 1.1076180728176634, + "grad_norm": 1.449027088222319, + "learning_rate": 9.053814945122496e-06, + "loss": 0.5012304782867432, + "step": 3788 + }, + { + "epoch": 1.1079105132329288, + "grad_norm": 1.6277147220392454, + "learning_rate": 9.049002039227239e-06, + "loss": 0.5235648155212402, + "step": 3789 + }, + { + "epoch": 1.108202953648194, + "grad_norm": 1.5099212526378973, + "learning_rate": 9.044189355621969e-06, + "loss": 0.44732457399368286, + "step": 3790 + }, + { + "epoch": 1.1084953940634596, + "grad_norm": 1.6131396298332503, + "learning_rate": 9.039376895431627e-06, + "loss": 0.5771712064743042, + "step": 3791 + }, + { + "epoch": 1.108787834478725, + "grad_norm": 2.537465666899194, + "learning_rate": 9.034564659781096e-06, + "loss": 0.5361784100532532, + "step": 3792 + }, + { + "epoch": 1.1090802748939903, + "grad_norm": 1.3520934517992165, + "learning_rate": 9.029752649795203e-06, + "loss": 0.5305893421173096, + "step": 3793 + }, + { + "epoch": 1.1093727153092559, + "grad_norm": 1.3303918593615456, + "learning_rate": 9.02494086659873e-06, + "loss": 0.5094715356826782, + "step": 3794 + }, + { + "epoch": 1.1096651557245212, + "grad_norm": 1.4162243148383913, + "learning_rate": 9.020129311316405e-06, + "loss": 0.5406676530838013, + "step": 3795 + }, + { + "epoch": 1.1099575961397865, + "grad_norm": 1.5431545303983976, + "learning_rate": 9.015317985072893e-06, + "loss": 0.5170687437057495, + "step": 3796 + }, + { + "epoch": 1.1102500365550518, + "grad_norm": 1.454438976249235, + "learning_rate": 9.010506888992814e-06, + "loss": 0.4632429778575897, + "step": 3797 + }, + { + "epoch": 1.1105424769703174, + "grad_norm": 1.8257270837662332, + "learning_rate": 9.005696024200734e-06, + "loss": 0.5614180564880371, + "step": 3798 + }, + { + "epoch": 1.1108349173855827, + "grad_norm": 1.5187438448472135, + "learning_rate": 9.000885391821164e-06, + "loss": 0.5660920143127441, + "step": 3799 + }, + { + "epoch": 1.111127357800848, + "grad_norm": 1.616333702810617, + "learning_rate": 8.996074992978558e-06, + "loss": 0.6346436142921448, + "step": 3800 + }, + { + "epoch": 1.1114197982161134, + "grad_norm": 1.2613316779938173, + "learning_rate": 8.991264828797319e-06, + "loss": 0.4295850396156311, + "step": 3801 + }, + { + "epoch": 1.111712238631379, + "grad_norm": 1.4545086499056976, + "learning_rate": 8.986454900401791e-06, + "loss": 0.4797070622444153, + "step": 3802 + }, + { + "epoch": 1.1120046790466442, + "grad_norm": 1.3353593055033692, + "learning_rate": 8.98164520891627e-06, + "loss": 0.4912114143371582, + "step": 3803 + }, + { + "epoch": 1.1122971194619096, + "grad_norm": 1.6135433736276805, + "learning_rate": 8.976835755464988e-06, + "loss": 0.4156647026538849, + "step": 3804 + }, + { + "epoch": 1.1125895598771751, + "grad_norm": 1.6120031027815822, + "learning_rate": 8.97202654117213e-06, + "loss": 0.4527992010116577, + "step": 3805 + }, + { + "epoch": 1.1128820002924404, + "grad_norm": 1.6881758541294942, + "learning_rate": 8.967217567161817e-06, + "loss": 0.5969425439834595, + "step": 3806 + }, + { + "epoch": 1.1131744407077058, + "grad_norm": 1.5313114259080804, + "learning_rate": 8.962408834558116e-06, + "loss": 0.5867633819580078, + "step": 3807 + }, + { + "epoch": 1.113466881122971, + "grad_norm": 1.4924056676350326, + "learning_rate": 8.957600344485042e-06, + "loss": 0.549109697341919, + "step": 3808 + }, + { + "epoch": 1.1137593215382366, + "grad_norm": 1.6602567019426782, + "learning_rate": 8.952792098066549e-06, + "loss": 0.6336593627929688, + "step": 3809 + }, + { + "epoch": 1.114051761953502, + "grad_norm": 1.4867429859275132, + "learning_rate": 8.947984096426537e-06, + "loss": 0.5403220653533936, + "step": 3810 + }, + { + "epoch": 1.1143442023687673, + "grad_norm": 1.3422567204959701, + "learning_rate": 8.943176340688846e-06, + "loss": 0.37941914796829224, + "step": 3811 + }, + { + "epoch": 1.1146366427840328, + "grad_norm": 1.7322077540170269, + "learning_rate": 8.938368831977262e-06, + "loss": 0.5509335994720459, + "step": 3812 + }, + { + "epoch": 1.1149290831992982, + "grad_norm": 1.7077554301344111, + "learning_rate": 8.933561571415506e-06, + "loss": 0.5798860788345337, + "step": 3813 + }, + { + "epoch": 1.1152215236145635, + "grad_norm": 1.8693354922278385, + "learning_rate": 8.92875456012725e-06, + "loss": 0.5549412965774536, + "step": 3814 + }, + { + "epoch": 1.1155139640298288, + "grad_norm": 1.5992402094758784, + "learning_rate": 8.9239477992361e-06, + "loss": 0.4707058072090149, + "step": 3815 + }, + { + "epoch": 1.1158064044450944, + "grad_norm": 1.5838333385974708, + "learning_rate": 8.919141289865611e-06, + "loss": 0.4717002511024475, + "step": 3816 + }, + { + "epoch": 1.1160988448603597, + "grad_norm": 1.288572308356885, + "learning_rate": 8.914335033139274e-06, + "loss": 0.48403650522232056, + "step": 3817 + }, + { + "epoch": 1.116391285275625, + "grad_norm": 1.6715157915340426, + "learning_rate": 8.909529030180522e-06, + "loss": 0.48592090606689453, + "step": 3818 + }, + { + "epoch": 1.1166837256908906, + "grad_norm": 1.566157541574177, + "learning_rate": 8.904723282112728e-06, + "loss": 0.5052220225334167, + "step": 3819 + }, + { + "epoch": 1.116976166106156, + "grad_norm": 1.6151321192825796, + "learning_rate": 8.899917790059208e-06, + "loss": 0.7858535051345825, + "step": 3820 + }, + { + "epoch": 1.1172686065214212, + "grad_norm": 1.8369545909174703, + "learning_rate": 8.895112555143217e-06, + "loss": 0.6768159866333008, + "step": 3821 + }, + { + "epoch": 1.1175610469366866, + "grad_norm": 1.8079763728482598, + "learning_rate": 8.890307578487947e-06, + "loss": 0.5661243200302124, + "step": 3822 + }, + { + "epoch": 1.1178534873519521, + "grad_norm": 1.7067515294047517, + "learning_rate": 8.885502861216535e-06, + "loss": 0.5129438638687134, + "step": 3823 + }, + { + "epoch": 1.1181459277672174, + "grad_norm": 1.5735393429941704, + "learning_rate": 8.880698404452051e-06, + "loss": 0.4813467264175415, + "step": 3824 + }, + { + "epoch": 1.1184383681824828, + "grad_norm": 1.5840908667031388, + "learning_rate": 8.87589420931751e-06, + "loss": 0.5165577530860901, + "step": 3825 + }, + { + "epoch": 1.118730808597748, + "grad_norm": 1.4425390765128903, + "learning_rate": 8.871090276935863e-06, + "loss": 0.47335073351860046, + "step": 3826 + }, + { + "epoch": 1.1190232490130136, + "grad_norm": 1.6934955516318184, + "learning_rate": 8.86628660843e-06, + "loss": 0.4902348518371582, + "step": 3827 + }, + { + "epoch": 1.119315689428279, + "grad_norm": 1.7245920316429901, + "learning_rate": 8.861483204922752e-06, + "loss": 0.5933388471603394, + "step": 3828 + }, + { + "epoch": 1.1196081298435443, + "grad_norm": 1.5505961542425288, + "learning_rate": 8.85668006753688e-06, + "loss": 0.4898201823234558, + "step": 3829 + }, + { + "epoch": 1.1199005702588098, + "grad_norm": 1.3287782031202422, + "learning_rate": 8.851877197395088e-06, + "loss": 0.4745003879070282, + "step": 3830 + }, + { + "epoch": 1.1201930106740752, + "grad_norm": 1.5081067046883336, + "learning_rate": 8.847074595620024e-06, + "loss": 0.5246972441673279, + "step": 3831 + }, + { + "epoch": 1.1204854510893405, + "grad_norm": 1.498399687409688, + "learning_rate": 8.842272263334263e-06, + "loss": 0.5196787714958191, + "step": 3832 + }, + { + "epoch": 1.120777891504606, + "grad_norm": 2.0301798075149446, + "learning_rate": 8.83747020166032e-06, + "loss": 0.6721034049987793, + "step": 3833 + }, + { + "epoch": 1.1210703319198714, + "grad_norm": 1.4930580521199184, + "learning_rate": 8.832668411720652e-06, + "loss": 0.5654234886169434, + "step": 3834 + }, + { + "epoch": 1.1213627723351367, + "grad_norm": 1.91001506609742, + "learning_rate": 8.827866894637642e-06, + "loss": 0.7520767450332642, + "step": 3835 + }, + { + "epoch": 1.121655212750402, + "grad_norm": 1.3753523987373926, + "learning_rate": 8.82306565153362e-06, + "loss": 0.43645960092544556, + "step": 3836 + }, + { + "epoch": 1.1219476531656676, + "grad_norm": 1.688542605024225, + "learning_rate": 8.818264683530845e-06, + "loss": 0.5802274942398071, + "step": 3837 + }, + { + "epoch": 1.122240093580933, + "grad_norm": 1.5706370149670577, + "learning_rate": 8.813463991751516e-06, + "loss": 0.5593410134315491, + "step": 3838 + }, + { + "epoch": 1.1225325339961982, + "grad_norm": 1.56949134961986, + "learning_rate": 8.808663577317765e-06, + "loss": 0.6126681566238403, + "step": 3839 + }, + { + "epoch": 1.1228249744114636, + "grad_norm": 1.6396156905409707, + "learning_rate": 8.80386344135166e-06, + "loss": 0.6245180368423462, + "step": 3840 + }, + { + "epoch": 1.123117414826729, + "grad_norm": 1.3541654958690765, + "learning_rate": 8.799063584975201e-06, + "loss": 0.6611473560333252, + "step": 3841 + }, + { + "epoch": 1.1234098552419944, + "grad_norm": 1.2310988629927149, + "learning_rate": 8.79426400931033e-06, + "loss": 0.40020978450775146, + "step": 3842 + }, + { + "epoch": 1.1237022956572598, + "grad_norm": 1.3644507366239775, + "learning_rate": 8.789464715478913e-06, + "loss": 0.4965318441390991, + "step": 3843 + }, + { + "epoch": 1.1239947360725253, + "grad_norm": 1.4326851806590044, + "learning_rate": 8.784665704602758e-06, + "loss": 0.4838374853134155, + "step": 3844 + }, + { + "epoch": 1.1242871764877906, + "grad_norm": 1.389039662475551, + "learning_rate": 8.77986697780361e-06, + "loss": 0.5756508708000183, + "step": 3845 + }, + { + "epoch": 1.124579616903056, + "grad_norm": 1.484286888056792, + "learning_rate": 8.775068536203132e-06, + "loss": 0.5341511964797974, + "step": 3846 + }, + { + "epoch": 1.1248720573183213, + "grad_norm": 1.5007549282773276, + "learning_rate": 8.77027038092294e-06, + "loss": 0.6239134073257446, + "step": 3847 + }, + { + "epoch": 1.1251644977335868, + "grad_norm": 1.818555508500906, + "learning_rate": 8.765472513084566e-06, + "loss": 0.5642406940460205, + "step": 3848 + }, + { + "epoch": 1.1254569381488522, + "grad_norm": 1.5841401225303304, + "learning_rate": 8.760674933809488e-06, + "loss": 0.5242771506309509, + "step": 3849 + }, + { + "epoch": 1.1257493785641175, + "grad_norm": 1.5608207104848433, + "learning_rate": 8.755877644219108e-06, + "loss": 0.5205737352371216, + "step": 3850 + }, + { + "epoch": 1.126041818979383, + "grad_norm": 1.6760248633979633, + "learning_rate": 8.751080645434768e-06, + "loss": 0.5005168318748474, + "step": 3851 + }, + { + "epoch": 1.1263342593946484, + "grad_norm": 1.539730717074913, + "learning_rate": 8.74628393857773e-06, + "loss": 0.44978275895118713, + "step": 3852 + }, + { + "epoch": 1.1266266998099137, + "grad_norm": 1.3558571119447433, + "learning_rate": 8.741487524769198e-06, + "loss": 0.43631571531295776, + "step": 3853 + }, + { + "epoch": 1.126919140225179, + "grad_norm": 1.220093214706796, + "learning_rate": 8.736691405130306e-06, + "loss": 0.4196016788482666, + "step": 3854 + }, + { + "epoch": 1.1272115806404446, + "grad_norm": 1.6296314839875645, + "learning_rate": 8.731895580782118e-06, + "loss": 0.6389856338500977, + "step": 3855 + }, + { + "epoch": 1.12750402105571, + "grad_norm": 1.726359030533187, + "learning_rate": 8.72710005284563e-06, + "loss": 0.5465584993362427, + "step": 3856 + }, + { + "epoch": 1.1277964614709752, + "grad_norm": 1.469192647678069, + "learning_rate": 8.722304822441757e-06, + "loss": 0.5513765811920166, + "step": 3857 + }, + { + "epoch": 1.1280889018862408, + "grad_norm": 1.5516390698184288, + "learning_rate": 8.717509890691369e-06, + "loss": 0.6984349489212036, + "step": 3858 + }, + { + "epoch": 1.128381342301506, + "grad_norm": 1.6096511723205336, + "learning_rate": 8.712715258715248e-06, + "loss": 0.5311027765274048, + "step": 3859 + }, + { + "epoch": 1.1286737827167714, + "grad_norm": 1.5113126886002746, + "learning_rate": 8.707920927634105e-06, + "loss": 0.4598672091960907, + "step": 3860 + }, + { + "epoch": 1.1289662231320368, + "grad_norm": 1.8202302284240548, + "learning_rate": 8.703126898568591e-06, + "loss": 0.6177612543106079, + "step": 3861 + }, + { + "epoch": 1.1292586635473023, + "grad_norm": 1.8043964275332298, + "learning_rate": 8.69833317263928e-06, + "loss": 0.6442389488220215, + "step": 3862 + }, + { + "epoch": 1.1295511039625676, + "grad_norm": 1.4793698971631246, + "learning_rate": 8.693539750966672e-06, + "loss": 0.5925737023353577, + "step": 3863 + }, + { + "epoch": 1.129843544377833, + "grad_norm": 1.3730688779887357, + "learning_rate": 8.688746634671207e-06, + "loss": 0.46009114384651184, + "step": 3864 + }, + { + "epoch": 1.1301359847930983, + "grad_norm": 1.6065358861472605, + "learning_rate": 8.683953824873246e-06, + "loss": 0.5438460111618042, + "step": 3865 + }, + { + "epoch": 1.1304284252083638, + "grad_norm": 1.6436751318662282, + "learning_rate": 8.679161322693073e-06, + "loss": 0.5355101823806763, + "step": 3866 + }, + { + "epoch": 1.1307208656236292, + "grad_norm": 1.5636124606467166, + "learning_rate": 8.67436912925091e-06, + "loss": 0.4494459629058838, + "step": 3867 + }, + { + "epoch": 1.1310133060388945, + "grad_norm": 1.5118698872161136, + "learning_rate": 8.669577245666905e-06, + "loss": 0.5828550457954407, + "step": 3868 + }, + { + "epoch": 1.13130574645416, + "grad_norm": 1.43455699505813, + "learning_rate": 8.664785673061127e-06, + "loss": 0.4956590235233307, + "step": 3869 + }, + { + "epoch": 1.1315981868694254, + "grad_norm": 1.5146504272638424, + "learning_rate": 8.659994412553582e-06, + "loss": 0.5447779893875122, + "step": 3870 + }, + { + "epoch": 1.1318906272846907, + "grad_norm": 1.6512585184867246, + "learning_rate": 8.655203465264196e-06, + "loss": 0.6275361776351929, + "step": 3871 + }, + { + "epoch": 1.1321830676999562, + "grad_norm": 1.564521343459816, + "learning_rate": 8.650412832312823e-06, + "loss": 0.47899991273880005, + "step": 3872 + }, + { + "epoch": 1.1324755081152216, + "grad_norm": 1.1875547206815094, + "learning_rate": 8.645622514819243e-06, + "loss": 0.3356127142906189, + "step": 3873 + }, + { + "epoch": 1.132767948530487, + "grad_norm": 1.4442401622701144, + "learning_rate": 8.640832513903168e-06, + "loss": 0.48855727910995483, + "step": 3874 + }, + { + "epoch": 1.1330603889457522, + "grad_norm": 1.4528018972795056, + "learning_rate": 8.636042830684227e-06, + "loss": 0.46642380952835083, + "step": 3875 + }, + { + "epoch": 1.1333528293610178, + "grad_norm": 1.8421536572224761, + "learning_rate": 8.631253466281984e-06, + "loss": 0.6179598569869995, + "step": 3876 + }, + { + "epoch": 1.133645269776283, + "grad_norm": 1.6762180368596016, + "learning_rate": 8.626464421815919e-06, + "loss": 0.6361704468727112, + "step": 3877 + }, + { + "epoch": 1.1339377101915484, + "grad_norm": 1.574443230288469, + "learning_rate": 8.621675698405446e-06, + "loss": 0.6243701577186584, + "step": 3878 + }, + { + "epoch": 1.1342301506068138, + "grad_norm": 1.6113304231540622, + "learning_rate": 8.616887297169895e-06, + "loss": 0.5402215123176575, + "step": 3879 + }, + { + "epoch": 1.1345225910220793, + "grad_norm": 1.6390191276422172, + "learning_rate": 8.61209921922853e-06, + "loss": 0.6050009727478027, + "step": 3880 + }, + { + "epoch": 1.1348150314373446, + "grad_norm": 1.6106875040973343, + "learning_rate": 8.607311465700534e-06, + "loss": 0.5705801248550415, + "step": 3881 + }, + { + "epoch": 1.13510747185261, + "grad_norm": 1.5380461037587805, + "learning_rate": 8.602524037705018e-06, + "loss": 0.5467248558998108, + "step": 3882 + }, + { + "epoch": 1.1353999122678755, + "grad_norm": 1.7121108266736746, + "learning_rate": 8.597736936361007e-06, + "loss": 0.5903012752532959, + "step": 3883 + }, + { + "epoch": 1.1356923526831408, + "grad_norm": 1.6218348221942134, + "learning_rate": 8.592950162787463e-06, + "loss": 0.6034090518951416, + "step": 3884 + }, + { + "epoch": 1.1359847930984062, + "grad_norm": 1.3056254339924755, + "learning_rate": 8.588163718103264e-06, + "loss": 0.4282987117767334, + "step": 3885 + }, + { + "epoch": 1.1362772335136717, + "grad_norm": 1.5127630417626896, + "learning_rate": 8.583377603427212e-06, + "loss": 0.47374194860458374, + "step": 3886 + }, + { + "epoch": 1.136569673928937, + "grad_norm": 1.5841934947134406, + "learning_rate": 8.578591819878033e-06, + "loss": 0.43954724073410034, + "step": 3887 + }, + { + "epoch": 1.1368621143442024, + "grad_norm": 1.4278799477191386, + "learning_rate": 8.573806368574372e-06, + "loss": 0.4731065034866333, + "step": 3888 + }, + { + "epoch": 1.1371545547594677, + "grad_norm": 1.4184800646863156, + "learning_rate": 8.5690212506348e-06, + "loss": 0.5241256356239319, + "step": 3889 + }, + { + "epoch": 1.1374469951747332, + "grad_norm": 1.631021419370316, + "learning_rate": 8.56423646717781e-06, + "loss": 0.5823307037353516, + "step": 3890 + }, + { + "epoch": 1.1377394355899986, + "grad_norm": 1.654201038343883, + "learning_rate": 8.55945201932182e-06, + "loss": 0.5360631346702576, + "step": 3891 + }, + { + "epoch": 1.138031876005264, + "grad_norm": 1.5773624073994579, + "learning_rate": 8.554667908185158e-06, + "loss": 0.5227797627449036, + "step": 3892 + }, + { + "epoch": 1.1383243164205292, + "grad_norm": 1.7414634806893152, + "learning_rate": 8.549884134886089e-06, + "loss": 0.6232806444168091, + "step": 3893 + }, + { + "epoch": 1.1386167568357948, + "grad_norm": 1.567438316916472, + "learning_rate": 8.545100700542782e-06, + "loss": 0.6697877049446106, + "step": 3894 + }, + { + "epoch": 1.13890919725106, + "grad_norm": 1.5115348655280192, + "learning_rate": 8.540317606273343e-06, + "loss": 0.6348206400871277, + "step": 3895 + }, + { + "epoch": 1.1392016376663254, + "grad_norm": 1.5453537409734852, + "learning_rate": 8.535534853195786e-06, + "loss": 0.5578476190567017, + "step": 3896 + }, + { + "epoch": 1.139494078081591, + "grad_norm": 1.6674507953444782, + "learning_rate": 8.530752442428055e-06, + "loss": 0.6439946889877319, + "step": 3897 + }, + { + "epoch": 1.1397865184968563, + "grad_norm": 1.5233786551580588, + "learning_rate": 8.525970375088006e-06, + "loss": 0.5292261242866516, + "step": 3898 + }, + { + "epoch": 1.1400789589121216, + "grad_norm": 1.6870433422022266, + "learning_rate": 8.521188652293421e-06, + "loss": 0.5836480855941772, + "step": 3899 + }, + { + "epoch": 1.140371399327387, + "grad_norm": 1.731988866581243, + "learning_rate": 8.516407275161998e-06, + "loss": 0.5166354775428772, + "step": 3900 + }, + { + "epoch": 1.1406638397426525, + "grad_norm": 1.61853635946673, + "learning_rate": 8.511626244811352e-06, + "loss": 0.5236127972602844, + "step": 3901 + }, + { + "epoch": 1.1409562801579178, + "grad_norm": 1.3903461786321225, + "learning_rate": 8.506845562359022e-06, + "loss": 0.4900703430175781, + "step": 3902 + }, + { + "epoch": 1.1412487205731832, + "grad_norm": 1.484704749479714, + "learning_rate": 8.502065228922464e-06, + "loss": 0.5200212001800537, + "step": 3903 + }, + { + "epoch": 1.1415411609884485, + "grad_norm": 1.588712114908106, + "learning_rate": 8.497285245619053e-06, + "loss": 0.5553300976753235, + "step": 3904 + }, + { + "epoch": 1.141833601403714, + "grad_norm": 1.5947362241383982, + "learning_rate": 8.492505613566075e-06, + "loss": 0.5650131702423096, + "step": 3905 + }, + { + "epoch": 1.1421260418189794, + "grad_norm": 1.7370414648582224, + "learning_rate": 8.487726333880746e-06, + "loss": 0.4732077121734619, + "step": 3906 + }, + { + "epoch": 1.1424184822342447, + "grad_norm": 1.4560698890341355, + "learning_rate": 8.482947407680193e-06, + "loss": 0.46741920709609985, + "step": 3907 + }, + { + "epoch": 1.1427109226495102, + "grad_norm": 1.717154367813477, + "learning_rate": 8.478168836081457e-06, + "loss": 0.606191873550415, + "step": 3908 + }, + { + "epoch": 1.1430033630647756, + "grad_norm": 1.4018605845855592, + "learning_rate": 8.473390620201505e-06, + "loss": 0.4373897314071655, + "step": 3909 + }, + { + "epoch": 1.143295803480041, + "grad_norm": 1.4705540951964773, + "learning_rate": 8.468612761157215e-06, + "loss": 0.5460623502731323, + "step": 3910 + }, + { + "epoch": 1.1435882438953064, + "grad_norm": 1.311062743935516, + "learning_rate": 8.463835260065379e-06, + "loss": 0.4939531087875366, + "step": 3911 + }, + { + "epoch": 1.1438806843105718, + "grad_norm": 1.4297382144675803, + "learning_rate": 8.459058118042708e-06, + "loss": 0.544964611530304, + "step": 3912 + }, + { + "epoch": 1.144173124725837, + "grad_norm": 1.600083847682917, + "learning_rate": 8.454281336205836e-06, + "loss": 0.6118921041488647, + "step": 3913 + }, + { + "epoch": 1.1444655651411024, + "grad_norm": 1.4018893231050742, + "learning_rate": 8.449504915671304e-06, + "loss": 0.561060905456543, + "step": 3914 + }, + { + "epoch": 1.144758005556368, + "grad_norm": 1.3967184209578762, + "learning_rate": 8.444728857555572e-06, + "loss": 0.430827796459198, + "step": 3915 + }, + { + "epoch": 1.1450504459716333, + "grad_norm": 1.7776445971156332, + "learning_rate": 8.439953162975011e-06, + "loss": 0.5482884645462036, + "step": 3916 + }, + { + "epoch": 1.1453428863868986, + "grad_norm": 1.8487697311002218, + "learning_rate": 8.435177833045911e-06, + "loss": 0.6614879965782166, + "step": 3917 + }, + { + "epoch": 1.145635326802164, + "grad_norm": 1.686073678884194, + "learning_rate": 8.430402868884482e-06, + "loss": 0.6290509104728699, + "step": 3918 + }, + { + "epoch": 1.1459277672174295, + "grad_norm": 1.479686951025449, + "learning_rate": 8.425628271606836e-06, + "loss": 0.404970645904541, + "step": 3919 + }, + { + "epoch": 1.1462202076326948, + "grad_norm": 1.3978968237521616, + "learning_rate": 8.420854042329011e-06, + "loss": 0.4902762174606323, + "step": 3920 + }, + { + "epoch": 1.1465126480479602, + "grad_norm": 1.6869046118960203, + "learning_rate": 8.416080182166955e-06, + "loss": 0.5757346153259277, + "step": 3921 + }, + { + "epoch": 1.1468050884632257, + "grad_norm": 1.5541954452670608, + "learning_rate": 8.41130669223652e-06, + "loss": 0.5453485250473022, + "step": 3922 + }, + { + "epoch": 1.147097528878491, + "grad_norm": 1.7189844130617113, + "learning_rate": 8.40653357365349e-06, + "loss": 0.5660290122032166, + "step": 3923 + }, + { + "epoch": 1.1473899692937564, + "grad_norm": 1.576466831282747, + "learning_rate": 8.40176082753355e-06, + "loss": 0.46013498306274414, + "step": 3924 + }, + { + "epoch": 1.147682409709022, + "grad_norm": 1.4364824711460213, + "learning_rate": 8.396988454992296e-06, + "loss": 0.5183000564575195, + "step": 3925 + }, + { + "epoch": 1.1479748501242872, + "grad_norm": 1.7258149850246205, + "learning_rate": 8.392216457145246e-06, + "loss": 0.5407284498214722, + "step": 3926 + }, + { + "epoch": 1.1482672905395526, + "grad_norm": 1.735168999167248, + "learning_rate": 8.387444835107824e-06, + "loss": 0.5960655808448792, + "step": 3927 + }, + { + "epoch": 1.148559730954818, + "grad_norm": 1.2891916158500891, + "learning_rate": 8.382673589995365e-06, + "loss": 0.4363316297531128, + "step": 3928 + }, + { + "epoch": 1.1488521713700834, + "grad_norm": 1.4413045514377891, + "learning_rate": 8.377902722923122e-06, + "loss": 0.5143908262252808, + "step": 3929 + }, + { + "epoch": 1.1491446117853488, + "grad_norm": 1.696244956095385, + "learning_rate": 8.373132235006254e-06, + "loss": 0.6016460657119751, + "step": 3930 + }, + { + "epoch": 1.149437052200614, + "grad_norm": 1.4319561508465357, + "learning_rate": 8.368362127359835e-06, + "loss": 0.5120511651039124, + "step": 3931 + }, + { + "epoch": 1.1497294926158794, + "grad_norm": 1.5254857110351325, + "learning_rate": 8.363592401098853e-06, + "loss": 0.49658435583114624, + "step": 3932 + }, + { + "epoch": 1.150021933031145, + "grad_norm": 1.7705521617533395, + "learning_rate": 8.358823057338188e-06, + "loss": 0.584032416343689, + "step": 3933 + }, + { + "epoch": 1.1503143734464103, + "grad_norm": 1.5012587623360505, + "learning_rate": 8.35405409719266e-06, + "loss": 0.4673706293106079, + "step": 3934 + }, + { + "epoch": 1.1506068138616756, + "grad_norm": 1.618555555366979, + "learning_rate": 8.349285521776982e-06, + "loss": 0.633565366268158, + "step": 3935 + }, + { + "epoch": 1.1508992542769412, + "grad_norm": 1.6576478038135816, + "learning_rate": 8.344517332205774e-06, + "loss": 0.6029015779495239, + "step": 3936 + }, + { + "epoch": 1.1511916946922065, + "grad_norm": 1.519081286345544, + "learning_rate": 8.339749529593574e-06, + "loss": 0.45594489574432373, + "step": 3937 + }, + { + "epoch": 1.1514841351074718, + "grad_norm": 1.5262842564669963, + "learning_rate": 8.334982115054828e-06, + "loss": 0.4413541257381439, + "step": 3938 + }, + { + "epoch": 1.1517765755227372, + "grad_norm": 1.6373893488771099, + "learning_rate": 8.330215089703887e-06, + "loss": 0.5674389004707336, + "step": 3939 + }, + { + "epoch": 1.1520690159380027, + "grad_norm": 1.386401958621656, + "learning_rate": 8.325448454655019e-06, + "loss": 0.43449294567108154, + "step": 3940 + }, + { + "epoch": 1.152361456353268, + "grad_norm": 1.6283403091444353, + "learning_rate": 8.320682211022393e-06, + "loss": 0.5190714597702026, + "step": 3941 + }, + { + "epoch": 1.1526538967685334, + "grad_norm": 1.5774508757028434, + "learning_rate": 8.31591635992009e-06, + "loss": 0.56162428855896, + "step": 3942 + }, + { + "epoch": 1.1529463371837987, + "grad_norm": 1.4891934876919055, + "learning_rate": 8.311150902462096e-06, + "loss": 0.5588958263397217, + "step": 3943 + }, + { + "epoch": 1.1532387775990642, + "grad_norm": 1.5108312938903155, + "learning_rate": 8.306385839762312e-06, + "loss": 0.5438264608383179, + "step": 3944 + }, + { + "epoch": 1.1535312180143296, + "grad_norm": 1.575513080138648, + "learning_rate": 8.30162117293454e-06, + "loss": 0.5860258340835571, + "step": 3945 + }, + { + "epoch": 1.153823658429595, + "grad_norm": 1.552005958726473, + "learning_rate": 8.296856903092494e-06, + "loss": 0.4742947220802307, + "step": 3946 + }, + { + "epoch": 1.1541160988448604, + "grad_norm": 1.44195573685015, + "learning_rate": 8.292093031349791e-06, + "loss": 0.47963109612464905, + "step": 3947 + }, + { + "epoch": 1.1544085392601258, + "grad_norm": 1.5340226225614597, + "learning_rate": 8.287329558819957e-06, + "loss": 0.5404704213142395, + "step": 3948 + }, + { + "epoch": 1.154700979675391, + "grad_norm": 1.8054477659796657, + "learning_rate": 8.282566486616425e-06, + "loss": 0.6559766530990601, + "step": 3949 + }, + { + "epoch": 1.1549934200906566, + "grad_norm": 1.507763379787764, + "learning_rate": 8.277803815852535e-06, + "loss": 0.4462929368019104, + "step": 3950 + }, + { + "epoch": 1.155285860505922, + "grad_norm": 1.6398920335039024, + "learning_rate": 8.273041547641531e-06, + "loss": 0.5672504901885986, + "step": 3951 + }, + { + "epoch": 1.1555783009211873, + "grad_norm": 1.5384582587859306, + "learning_rate": 8.268279683096567e-06, + "loss": 0.4040188193321228, + "step": 3952 + }, + { + "epoch": 1.1558707413364526, + "grad_norm": 1.4954603260099153, + "learning_rate": 8.263518223330698e-06, + "loss": 0.4639814794063568, + "step": 3953 + }, + { + "epoch": 1.1561631817517182, + "grad_norm": 1.3560290444841174, + "learning_rate": 8.258757169456885e-06, + "loss": 0.384866327047348, + "step": 3954 + }, + { + "epoch": 1.1564556221669835, + "grad_norm": 1.5360587849114566, + "learning_rate": 8.253996522587997e-06, + "loss": 0.452106773853302, + "step": 3955 + }, + { + "epoch": 1.1567480625822488, + "grad_norm": 1.5044138285106523, + "learning_rate": 8.249236283836806e-06, + "loss": 0.487504780292511, + "step": 3956 + }, + { + "epoch": 1.1570405029975142, + "grad_norm": 1.6199121483000312, + "learning_rate": 8.244476454315989e-06, + "loss": 0.6225916147232056, + "step": 3957 + }, + { + "epoch": 1.1573329434127797, + "grad_norm": 1.7421167385988239, + "learning_rate": 8.239717035138128e-06, + "loss": 0.5254271030426025, + "step": 3958 + }, + { + "epoch": 1.157625383828045, + "grad_norm": 1.6240162719096014, + "learning_rate": 8.234958027415707e-06, + "loss": 0.5759135484695435, + "step": 3959 + }, + { + "epoch": 1.1579178242433104, + "grad_norm": 1.6959935899735565, + "learning_rate": 8.230199432261115e-06, + "loss": 0.5720966458320618, + "step": 3960 + }, + { + "epoch": 1.158210264658576, + "grad_norm": 1.5797174163929866, + "learning_rate": 8.225441250786643e-06, + "loss": 0.4807323217391968, + "step": 3961 + }, + { + "epoch": 1.1585027050738412, + "grad_norm": 1.6197693861653146, + "learning_rate": 8.22068348410449e-06, + "loss": 0.5049746036529541, + "step": 3962 + }, + { + "epoch": 1.1587951454891066, + "grad_norm": 1.673364031578337, + "learning_rate": 8.215926133326758e-06, + "loss": 0.5321973562240601, + "step": 3963 + }, + { + "epoch": 1.159087585904372, + "grad_norm": 1.3992709586079797, + "learning_rate": 8.211169199565444e-06, + "loss": 0.5176634788513184, + "step": 3964 + }, + { + "epoch": 1.1593800263196374, + "grad_norm": 1.5661593234971032, + "learning_rate": 8.20641268393245e-06, + "loss": 0.5345112681388855, + "step": 3965 + }, + { + "epoch": 1.1596724667349028, + "grad_norm": 1.8309312482061675, + "learning_rate": 8.201656587539589e-06, + "loss": 0.47578325867652893, + "step": 3966 + }, + { + "epoch": 1.159964907150168, + "grad_norm": 1.5996140092470157, + "learning_rate": 8.196900911498563e-06, + "loss": 0.5018264651298523, + "step": 3967 + }, + { + "epoch": 1.1602573475654336, + "grad_norm": 1.530612277867195, + "learning_rate": 8.192145656920989e-06, + "loss": 0.4643394351005554, + "step": 3968 + }, + { + "epoch": 1.160549787980699, + "grad_norm": 1.6066179328722245, + "learning_rate": 8.187390824918375e-06, + "loss": 0.5391045808792114, + "step": 3969 + }, + { + "epoch": 1.1608422283959643, + "grad_norm": 1.4691594768883462, + "learning_rate": 8.182636416602136e-06, + "loss": 0.5168124437332153, + "step": 3970 + }, + { + "epoch": 1.1611346688112296, + "grad_norm": 1.4702658109064293, + "learning_rate": 8.177882433083583e-06, + "loss": 0.5821055173873901, + "step": 3971 + }, + { + "epoch": 1.1614271092264952, + "grad_norm": 1.597748811964364, + "learning_rate": 8.173128875473933e-06, + "loss": 0.6031824946403503, + "step": 3972 + }, + { + "epoch": 1.1617195496417605, + "grad_norm": 1.598311083454874, + "learning_rate": 8.1683757448843e-06, + "loss": 0.5085259675979614, + "step": 3973 + }, + { + "epoch": 1.1620119900570258, + "grad_norm": 1.6218562380492636, + "learning_rate": 8.163623042425702e-06, + "loss": 0.5654903650283813, + "step": 3974 + }, + { + "epoch": 1.1623044304722914, + "grad_norm": 1.6279393236171642, + "learning_rate": 8.158870769209051e-06, + "loss": 0.3920902609825134, + "step": 3975 + }, + { + "epoch": 1.1625968708875567, + "grad_norm": 1.6100798425685794, + "learning_rate": 8.154118926345165e-06, + "loss": 0.5334979891777039, + "step": 3976 + }, + { + "epoch": 1.162889311302822, + "grad_norm": 1.7332980039574648, + "learning_rate": 8.149367514944754e-06, + "loss": 0.6212184429168701, + "step": 3977 + }, + { + "epoch": 1.1631817517180874, + "grad_norm": 1.847204612085083, + "learning_rate": 8.144616536118437e-06, + "loss": 0.71863853931427, + "step": 3978 + }, + { + "epoch": 1.163474192133353, + "grad_norm": 1.7297963031597574, + "learning_rate": 8.139865990976722e-06, + "loss": 0.5263794660568237, + "step": 3979 + }, + { + "epoch": 1.1637666325486182, + "grad_norm": 1.5706968019905152, + "learning_rate": 8.135115880630025e-06, + "loss": 0.5035576224327087, + "step": 3980 + }, + { + "epoch": 1.1640590729638836, + "grad_norm": 1.4183002447341373, + "learning_rate": 8.130366206188651e-06, + "loss": 0.5695084929466248, + "step": 3981 + }, + { + "epoch": 1.1643515133791489, + "grad_norm": 1.51980370598088, + "learning_rate": 8.125616968762806e-06, + "loss": 0.5826396942138672, + "step": 3982 + }, + { + "epoch": 1.1646439537944144, + "grad_norm": 1.5991682342910063, + "learning_rate": 8.1208681694626e-06, + "loss": 0.5132841467857361, + "step": 3983 + }, + { + "epoch": 1.1649363942096798, + "grad_norm": 1.7073185800473716, + "learning_rate": 8.116119809398034e-06, + "loss": 0.6572669744491577, + "step": 3984 + }, + { + "epoch": 1.165228834624945, + "grad_norm": 1.8729301131644296, + "learning_rate": 8.111371889679007e-06, + "loss": 0.5365801453590393, + "step": 3985 + }, + { + "epoch": 1.1655212750402106, + "grad_norm": 1.4561472169130645, + "learning_rate": 8.10662441141532e-06, + "loss": 0.44511687755584717, + "step": 3986 + }, + { + "epoch": 1.165813715455476, + "grad_norm": 1.596383666869324, + "learning_rate": 8.101877375716666e-06, + "loss": 0.47212404012680054, + "step": 3987 + }, + { + "epoch": 1.1661061558707413, + "grad_norm": 1.5859450593798408, + "learning_rate": 8.097130783692631e-06, + "loss": 0.5942205786705017, + "step": 3988 + }, + { + "epoch": 1.1663985962860068, + "grad_norm": 1.6678058947227146, + "learning_rate": 8.092384636452708e-06, + "loss": 0.49162304401397705, + "step": 3989 + }, + { + "epoch": 1.1666910367012722, + "grad_norm": 2.635849062548634, + "learning_rate": 8.087638935106277e-06, + "loss": 0.6544803380966187, + "step": 3990 + }, + { + "epoch": 1.1669834771165375, + "grad_norm": 1.677008396527972, + "learning_rate": 8.082893680762619e-06, + "loss": 0.5572186708450317, + "step": 3991 + }, + { + "epoch": 1.1672759175318028, + "grad_norm": 1.6443546400872178, + "learning_rate": 8.078148874530906e-06, + "loss": 0.5836775898933411, + "step": 3992 + }, + { + "epoch": 1.1675683579470684, + "grad_norm": 1.3079140035223278, + "learning_rate": 8.073404517520208e-06, + "loss": 0.5507068634033203, + "step": 3993 + }, + { + "epoch": 1.1678607983623337, + "grad_norm": 1.9861505555993526, + "learning_rate": 8.068660610839489e-06, + "loss": 0.5312684178352356, + "step": 3994 + }, + { + "epoch": 1.168153238777599, + "grad_norm": 1.6552821635427635, + "learning_rate": 8.06391715559761e-06, + "loss": 0.5555688142776489, + "step": 3995 + }, + { + "epoch": 1.1684456791928644, + "grad_norm": 1.6871575092969338, + "learning_rate": 8.059174152903324e-06, + "loss": 0.5724596977233887, + "step": 3996 + }, + { + "epoch": 1.16873811960813, + "grad_norm": 1.6937240718062052, + "learning_rate": 8.054431603865282e-06, + "loss": 0.6212218999862671, + "step": 3997 + }, + { + "epoch": 1.1690305600233952, + "grad_norm": 1.4230492945656301, + "learning_rate": 8.049689509592023e-06, + "loss": 0.5061509609222412, + "step": 3998 + }, + { + "epoch": 1.1693230004386606, + "grad_norm": 1.695677070671476, + "learning_rate": 8.044947871191982e-06, + "loss": 0.6143001914024353, + "step": 3999 + }, + { + "epoch": 1.169615440853926, + "grad_norm": 1.32203821023621, + "learning_rate": 8.040206689773487e-06, + "loss": 0.5079911351203918, + "step": 4000 + }, + { + "epoch": 1.1699078812691914, + "grad_norm": 1.6638666755727167, + "learning_rate": 8.035465966444764e-06, + "loss": 0.6104908585548401, + "step": 4001 + }, + { + "epoch": 1.1702003216844568, + "grad_norm": 1.680147004679776, + "learning_rate": 8.03072570231393e-06, + "loss": 0.5953013896942139, + "step": 4002 + }, + { + "epoch": 1.1704927620997223, + "grad_norm": 1.3075175590117196, + "learning_rate": 8.025985898488986e-06, + "loss": 0.4541323781013489, + "step": 4003 + }, + { + "epoch": 1.1707852025149876, + "grad_norm": 1.5142685350846732, + "learning_rate": 8.021246556077838e-06, + "loss": 0.5708850026130676, + "step": 4004 + }, + { + "epoch": 1.171077642930253, + "grad_norm": 1.6574340180310174, + "learning_rate": 8.016507676188275e-06, + "loss": 0.5430601835250854, + "step": 4005 + }, + { + "epoch": 1.1713700833455183, + "grad_norm": 1.504278683910439, + "learning_rate": 8.011769259927981e-06, + "loss": 0.5621174573898315, + "step": 4006 + }, + { + "epoch": 1.1716625237607838, + "grad_norm": 1.5473800123062453, + "learning_rate": 8.007031308404536e-06, + "loss": 0.48092782497406006, + "step": 4007 + }, + { + "epoch": 1.1719549641760492, + "grad_norm": 1.9401766125340165, + "learning_rate": 8.002293822725404e-06, + "loss": 0.5770663022994995, + "step": 4008 + }, + { + "epoch": 1.1722474045913145, + "grad_norm": 1.7123399188942874, + "learning_rate": 7.997556803997945e-06, + "loss": 0.5692728757858276, + "step": 4009 + }, + { + "epoch": 1.1725398450065798, + "grad_norm": 1.6496884851556144, + "learning_rate": 7.99282025332941e-06, + "loss": 0.6256895065307617, + "step": 4010 + }, + { + "epoch": 1.1728322854218454, + "grad_norm": 1.4529405935008253, + "learning_rate": 7.988084171826937e-06, + "loss": 0.4272884130477905, + "step": 4011 + }, + { + "epoch": 1.1731247258371107, + "grad_norm": 1.548325850009333, + "learning_rate": 7.983348560597557e-06, + "loss": 0.5113184452056885, + "step": 4012 + }, + { + "epoch": 1.173417166252376, + "grad_norm": 1.4489276426544837, + "learning_rate": 7.978613420748186e-06, + "loss": 0.45635539293289185, + "step": 4013 + }, + { + "epoch": 1.1737096066676416, + "grad_norm": 1.6347983233600756, + "learning_rate": 7.973878753385638e-06, + "loss": 0.5539636611938477, + "step": 4014 + }, + { + "epoch": 1.174002047082907, + "grad_norm": 1.4140693021111321, + "learning_rate": 7.969144559616615e-06, + "loss": 0.6083431243896484, + "step": 4015 + }, + { + "epoch": 1.1742944874981722, + "grad_norm": 1.372102806580561, + "learning_rate": 7.9644108405477e-06, + "loss": 0.5268326997756958, + "step": 4016 + }, + { + "epoch": 1.1745869279134376, + "grad_norm": 1.5989300144328094, + "learning_rate": 7.95967759728538e-06, + "loss": 0.5680301189422607, + "step": 4017 + }, + { + "epoch": 1.174879368328703, + "grad_norm": 1.62105042514946, + "learning_rate": 7.954944830936012e-06, + "loss": 0.5457121133804321, + "step": 4018 + }, + { + "epoch": 1.1751718087439684, + "grad_norm": 1.59579517284719, + "learning_rate": 7.950212542605857e-06, + "loss": 0.5358338356018066, + "step": 4019 + }, + { + "epoch": 1.1754642491592338, + "grad_norm": 1.5630110417390142, + "learning_rate": 7.945480733401056e-06, + "loss": 0.6094579696655273, + "step": 4020 + }, + { + "epoch": 1.175756689574499, + "grad_norm": 1.6732097827507912, + "learning_rate": 7.940749404427642e-06, + "loss": 0.5108463764190674, + "step": 4021 + }, + { + "epoch": 1.1760491299897646, + "grad_norm": 1.6495017651653137, + "learning_rate": 7.936018556791537e-06, + "loss": 0.4946494698524475, + "step": 4022 + }, + { + "epoch": 1.17634157040503, + "grad_norm": 1.7757854212337651, + "learning_rate": 7.931288191598543e-06, + "loss": 0.5056017637252808, + "step": 4023 + }, + { + "epoch": 1.1766340108202953, + "grad_norm": 1.5276820294687934, + "learning_rate": 7.926558309954354e-06, + "loss": 0.5242294073104858, + "step": 4024 + }, + { + "epoch": 1.1769264512355608, + "grad_norm": 1.682705103807737, + "learning_rate": 7.921828912964556e-06, + "loss": 0.5667276382446289, + "step": 4025 + }, + { + "epoch": 1.1772188916508262, + "grad_norm": 1.6370912674167624, + "learning_rate": 7.917100001734614e-06, + "loss": 0.5282422304153442, + "step": 4026 + }, + { + "epoch": 1.1775113320660915, + "grad_norm": 1.6026370834828365, + "learning_rate": 7.912371577369881e-06, + "loss": 0.4887520670890808, + "step": 4027 + }, + { + "epoch": 1.177803772481357, + "grad_norm": 1.6287890532743194, + "learning_rate": 7.907643640975603e-06, + "loss": 0.5082155466079712, + "step": 4028 + }, + { + "epoch": 1.1780962128966224, + "grad_norm": 1.649507152949628, + "learning_rate": 7.902916193656898e-06, + "loss": 0.5432984828948975, + "step": 4029 + }, + { + "epoch": 1.1783886533118877, + "grad_norm": 1.7249606112651144, + "learning_rate": 7.898189236518783e-06, + "loss": 0.4313681721687317, + "step": 4030 + }, + { + "epoch": 1.178681093727153, + "grad_norm": 1.494399406404666, + "learning_rate": 7.893462770666155e-06, + "loss": 0.6051831245422363, + "step": 4031 + }, + { + "epoch": 1.1789735341424186, + "grad_norm": 1.5057449817059945, + "learning_rate": 7.888736797203796e-06, + "loss": 0.45805442333221436, + "step": 4032 + }, + { + "epoch": 1.179265974557684, + "grad_norm": 1.7917233044229635, + "learning_rate": 7.884011317236376e-06, + "loss": 0.4998340606689453, + "step": 4033 + }, + { + "epoch": 1.1795584149729492, + "grad_norm": 1.651259706746187, + "learning_rate": 7.879286331868443e-06, + "loss": 0.5298212170600891, + "step": 4034 + }, + { + "epoch": 1.1798508553882145, + "grad_norm": 1.7028183419777814, + "learning_rate": 7.874561842204437e-06, + "loss": 0.5104682445526123, + "step": 4035 + }, + { + "epoch": 1.18014329580348, + "grad_norm": 1.6393724776910414, + "learning_rate": 7.869837849348676e-06, + "loss": 0.5793051719665527, + "step": 4036 + }, + { + "epoch": 1.1804357362187454, + "grad_norm": 1.4839435154715734, + "learning_rate": 7.865114354405367e-06, + "loss": 0.42913323640823364, + "step": 4037 + }, + { + "epoch": 1.1807281766340108, + "grad_norm": 2.104724599006863, + "learning_rate": 7.860391358478596e-06, + "loss": 0.5183675289154053, + "step": 4038 + }, + { + "epoch": 1.1810206170492763, + "grad_norm": 1.5685744104736703, + "learning_rate": 7.855668862672339e-06, + "loss": 0.444034218788147, + "step": 4039 + }, + { + "epoch": 1.1813130574645416, + "grad_norm": 1.486556561749613, + "learning_rate": 7.850946868090446e-06, + "loss": 0.4357207417488098, + "step": 4040 + }, + { + "epoch": 1.181605497879807, + "grad_norm": 1.6923285770365775, + "learning_rate": 7.846225375836657e-06, + "loss": 0.4517707824707031, + "step": 4041 + }, + { + "epoch": 1.1818979382950725, + "grad_norm": 1.5865011864132745, + "learning_rate": 7.841504387014589e-06, + "loss": 0.4437381625175476, + "step": 4042 + }, + { + "epoch": 1.1821903787103378, + "grad_norm": 1.4744521314451464, + "learning_rate": 7.836783902727746e-06, + "loss": 0.5364828109741211, + "step": 4043 + }, + { + "epoch": 1.1824828191256032, + "grad_norm": 1.650227369991675, + "learning_rate": 7.832063924079516e-06, + "loss": 0.4814251661300659, + "step": 4044 + }, + { + "epoch": 1.1827752595408685, + "grad_norm": 1.9016693432010778, + "learning_rate": 7.827344452173163e-06, + "loss": 0.5376232862472534, + "step": 4045 + }, + { + "epoch": 1.183067699956134, + "grad_norm": 1.7007887018924743, + "learning_rate": 7.822625488111833e-06, + "loss": 0.6005147695541382, + "step": 4046 + }, + { + "epoch": 1.1833601403713994, + "grad_norm": 1.5696670197669271, + "learning_rate": 7.817907032998556e-06, + "loss": 0.5276827216148376, + "step": 4047 + }, + { + "epoch": 1.1836525807866647, + "grad_norm": 2.1978111734105994, + "learning_rate": 7.813189087936243e-06, + "loss": 0.6425626277923584, + "step": 4048 + }, + { + "epoch": 1.18394502120193, + "grad_norm": 1.272646490936496, + "learning_rate": 7.808471654027685e-06, + "loss": 0.44388407468795776, + "step": 4049 + }, + { + "epoch": 1.1842374616171956, + "grad_norm": 1.743245771156321, + "learning_rate": 7.803754732375554e-06, + "loss": 0.5044336318969727, + "step": 4050 + }, + { + "epoch": 1.184529902032461, + "grad_norm": 1.9415496480441554, + "learning_rate": 7.7990383240824e-06, + "loss": 0.6964906454086304, + "step": 4051 + }, + { + "epoch": 1.1848223424477262, + "grad_norm": 1.9029191440552455, + "learning_rate": 7.794322430250654e-06, + "loss": 0.6093637943267822, + "step": 4052 + }, + { + "epoch": 1.1851147828629918, + "grad_norm": 1.8079016024144563, + "learning_rate": 7.78960705198263e-06, + "loss": 0.5264803171157837, + "step": 4053 + }, + { + "epoch": 1.185407223278257, + "grad_norm": 1.444425047773482, + "learning_rate": 7.78489219038052e-06, + "loss": 0.5336456298828125, + "step": 4054 + }, + { + "epoch": 1.1856996636935224, + "grad_norm": 1.7563642817078289, + "learning_rate": 7.78017784654639e-06, + "loss": 0.5266311168670654, + "step": 4055 + }, + { + "epoch": 1.1859921041087877, + "grad_norm": 1.6538609406479838, + "learning_rate": 7.775464021582195e-06, + "loss": 0.6281685829162598, + "step": 4056 + }, + { + "epoch": 1.1862845445240533, + "grad_norm": 1.6081255371588656, + "learning_rate": 7.770750716589758e-06, + "loss": 0.560591995716095, + "step": 4057 + }, + { + "epoch": 1.1865769849393186, + "grad_norm": 1.7912692279763305, + "learning_rate": 7.766037932670786e-06, + "loss": 0.5751859545707703, + "step": 4058 + }, + { + "epoch": 1.186869425354584, + "grad_norm": 1.629657999448518, + "learning_rate": 7.761325670926864e-06, + "loss": 0.5404624938964844, + "step": 4059 + }, + { + "epoch": 1.1871618657698493, + "grad_norm": 1.6997280704374504, + "learning_rate": 7.756613932459456e-06, + "loss": 0.4714626669883728, + "step": 4060 + }, + { + "epoch": 1.1874543061851148, + "grad_norm": 1.4471766418666208, + "learning_rate": 7.751902718369903e-06, + "loss": 0.5449519157409668, + "step": 4061 + }, + { + "epoch": 1.1877467466003802, + "grad_norm": 1.6279611933236646, + "learning_rate": 7.747192029759419e-06, + "loss": 0.6518754959106445, + "step": 4062 + }, + { + "epoch": 1.1880391870156455, + "grad_norm": 1.4655931664348079, + "learning_rate": 7.7424818677291e-06, + "loss": 0.47224369645118713, + "step": 4063 + }, + { + "epoch": 1.188331627430911, + "grad_norm": 1.6924798895194766, + "learning_rate": 7.737772233379919e-06, + "loss": 0.5482417345046997, + "step": 4064 + }, + { + "epoch": 1.1886240678461764, + "grad_norm": 1.3910277085667344, + "learning_rate": 7.733063127812724e-06, + "loss": 0.5401996374130249, + "step": 4065 + }, + { + "epoch": 1.1889165082614417, + "grad_norm": 2.4517381628425547, + "learning_rate": 7.72835455212824e-06, + "loss": 0.4678424596786499, + "step": 4066 + }, + { + "epoch": 1.1892089486767072, + "grad_norm": 1.6156459518768798, + "learning_rate": 7.72364650742707e-06, + "loss": 0.5191294550895691, + "step": 4067 + }, + { + "epoch": 1.1895013890919726, + "grad_norm": 1.4433917939096517, + "learning_rate": 7.718938994809685e-06, + "loss": 0.44018834829330444, + "step": 4068 + }, + { + "epoch": 1.1897938295072379, + "grad_norm": 1.3955169745603861, + "learning_rate": 7.714232015376442e-06, + "loss": 0.47852614521980286, + "step": 4069 + }, + { + "epoch": 1.1900862699225032, + "grad_norm": 1.523334975304476, + "learning_rate": 7.709525570227567e-06, + "loss": 0.5748994946479797, + "step": 4070 + }, + { + "epoch": 1.1903787103377688, + "grad_norm": 1.7714529908638612, + "learning_rate": 7.704819660463164e-06, + "loss": 0.5015645027160645, + "step": 4071 + }, + { + "epoch": 1.190671150753034, + "grad_norm": 1.8100962592275294, + "learning_rate": 7.70011428718321e-06, + "loss": 0.6200511455535889, + "step": 4072 + }, + { + "epoch": 1.1909635911682994, + "grad_norm": 1.531990990921369, + "learning_rate": 7.69540945148756e-06, + "loss": 0.6311289668083191, + "step": 4073 + }, + { + "epoch": 1.1912560315835647, + "grad_norm": 1.5403717728586237, + "learning_rate": 7.690705154475937e-06, + "loss": 0.5707247257232666, + "step": 4074 + }, + { + "epoch": 1.1915484719988303, + "grad_norm": 2.0693191702072107, + "learning_rate": 7.686001397247944e-06, + "loss": 0.5616360902786255, + "step": 4075 + }, + { + "epoch": 1.1918409124140956, + "grad_norm": 1.7144278887449431, + "learning_rate": 7.681298180903054e-06, + "loss": 0.5955555438995361, + "step": 4076 + }, + { + "epoch": 1.192133352829361, + "grad_norm": 1.6252826516162207, + "learning_rate": 7.676595506540615e-06, + "loss": 0.5057257413864136, + "step": 4077 + }, + { + "epoch": 1.1924257932446265, + "grad_norm": 1.8445544744897249, + "learning_rate": 7.671893375259854e-06, + "loss": 0.5795278549194336, + "step": 4078 + }, + { + "epoch": 1.1927182336598918, + "grad_norm": 1.5541021220011975, + "learning_rate": 7.66719178815986e-06, + "loss": 0.5213087797164917, + "step": 4079 + }, + { + "epoch": 1.1930106740751572, + "grad_norm": 1.5183242886274189, + "learning_rate": 7.662490746339601e-06, + "loss": 0.5333693027496338, + "step": 4080 + }, + { + "epoch": 1.1933031144904227, + "grad_norm": 1.5253876680230323, + "learning_rate": 7.657790250897916e-06, + "loss": 0.4705297648906708, + "step": 4081 + }, + { + "epoch": 1.193595554905688, + "grad_norm": 1.5875026444946445, + "learning_rate": 7.65309030293352e-06, + "loss": 0.5376054644584656, + "step": 4082 + }, + { + "epoch": 1.1938879953209534, + "grad_norm": 1.4103452849520708, + "learning_rate": 7.648390903544997e-06, + "loss": 0.47457355260849, + "step": 4083 + }, + { + "epoch": 1.1941804357362187, + "grad_norm": 1.5068528532277095, + "learning_rate": 7.6436920538308e-06, + "loss": 0.48752763867378235, + "step": 4084 + }, + { + "epoch": 1.1944728761514842, + "grad_norm": 1.5226531730849548, + "learning_rate": 7.63899375488926e-06, + "loss": 0.48227858543395996, + "step": 4085 + }, + { + "epoch": 1.1947653165667496, + "grad_norm": 1.4101996785965327, + "learning_rate": 7.634296007818576e-06, + "loss": 0.4294116497039795, + "step": 4086 + }, + { + "epoch": 1.1950577569820149, + "grad_norm": 1.24669252589954, + "learning_rate": 7.629598813716817e-06, + "loss": 0.5562552809715271, + "step": 4087 + }, + { + "epoch": 1.1953501973972802, + "grad_norm": 1.6628602240304204, + "learning_rate": 7.624902173681923e-06, + "loss": 0.6466431617736816, + "step": 4088 + }, + { + "epoch": 1.1956426378125458, + "grad_norm": 1.7596122427030323, + "learning_rate": 7.620206088811704e-06, + "loss": 0.7183903455734253, + "step": 4089 + }, + { + "epoch": 1.195935078227811, + "grad_norm": 1.666055880141139, + "learning_rate": 7.615510560203841e-06, + "loss": 0.5667496919631958, + "step": 4090 + }, + { + "epoch": 1.1962275186430764, + "grad_norm": 1.5232682591562918, + "learning_rate": 7.610815588955888e-06, + "loss": 0.5603050589561462, + "step": 4091 + }, + { + "epoch": 1.196519959058342, + "grad_norm": 1.6556784363331365, + "learning_rate": 7.606121176165267e-06, + "loss": 0.5305474996566772, + "step": 4092 + }, + { + "epoch": 1.1968123994736073, + "grad_norm": 2.0140258709167163, + "learning_rate": 7.6014273229292625e-06, + "loss": 0.7321374416351318, + "step": 4093 + }, + { + "epoch": 1.1971048398888726, + "grad_norm": 1.5891169030075603, + "learning_rate": 7.5967340303450385e-06, + "loss": 0.44885972142219543, + "step": 4094 + }, + { + "epoch": 1.197397280304138, + "grad_norm": 1.7955283190373275, + "learning_rate": 7.592041299509624e-06, + "loss": 0.593859076499939, + "step": 4095 + }, + { + "epoch": 1.1976897207194035, + "grad_norm": 1.4553428657338656, + "learning_rate": 7.587349131519913e-06, + "loss": 0.6701182723045349, + "step": 4096 + }, + { + "epoch": 1.1979821611346688, + "grad_norm": 1.9268795339399152, + "learning_rate": 7.582657527472674e-06, + "loss": 0.6456711292266846, + "step": 4097 + }, + { + "epoch": 1.1982746015499341, + "grad_norm": 1.6602700214896833, + "learning_rate": 7.577966488464543e-06, + "loss": 0.5933864116668701, + "step": 4098 + }, + { + "epoch": 1.1985670419651995, + "grad_norm": 1.398305705152583, + "learning_rate": 7.5732760155920175e-06, + "loss": 0.4609876275062561, + "step": 4099 + }, + { + "epoch": 1.198859482380465, + "grad_norm": 1.4827488477589208, + "learning_rate": 7.568586109951468e-06, + "loss": 0.540961503982544, + "step": 4100 + }, + { + "epoch": 1.1991519227957304, + "grad_norm": 1.629963355664156, + "learning_rate": 7.563896772639132e-06, + "loss": 0.5522942543029785, + "step": 4101 + }, + { + "epoch": 1.1994443632109957, + "grad_norm": 1.739784480619601, + "learning_rate": 7.559208004751114e-06, + "loss": 0.483737587928772, + "step": 4102 + }, + { + "epoch": 1.1997368036262612, + "grad_norm": 1.2743684109876499, + "learning_rate": 7.554519807383384e-06, + "loss": 0.3760339915752411, + "step": 4103 + }, + { + "epoch": 1.2000292440415266, + "grad_norm": 1.494486709964621, + "learning_rate": 7.549832181631782e-06, + "loss": 0.5034801959991455, + "step": 4104 + }, + { + "epoch": 1.2003216844567919, + "grad_norm": 1.6403057961263519, + "learning_rate": 7.545145128592009e-06, + "loss": 0.5605261325836182, + "step": 4105 + }, + { + "epoch": 1.2006141248720574, + "grad_norm": 1.4179033673825343, + "learning_rate": 7.540458649359637e-06, + "loss": 0.4724245071411133, + "step": 4106 + }, + { + "epoch": 1.2009065652873228, + "grad_norm": 1.609040907971216, + "learning_rate": 7.535772745030101e-06, + "loss": 0.564873218536377, + "step": 4107 + }, + { + "epoch": 1.201199005702588, + "grad_norm": 1.9431151220409157, + "learning_rate": 7.531087416698702e-06, + "loss": 0.699596643447876, + "step": 4108 + }, + { + "epoch": 1.2014914461178534, + "grad_norm": 1.5180492689699372, + "learning_rate": 7.526402665460612e-06, + "loss": 0.47448351979255676, + "step": 4109 + }, + { + "epoch": 1.201783886533119, + "grad_norm": 1.4606225624905942, + "learning_rate": 7.521718492410855e-06, + "loss": 0.4681323766708374, + "step": 4110 + }, + { + "epoch": 1.2020763269483843, + "grad_norm": 1.8550718864551587, + "learning_rate": 7.517034898644333e-06, + "loss": 0.6361842155456543, + "step": 4111 + }, + { + "epoch": 1.2023687673636496, + "grad_norm": 1.5211596606564617, + "learning_rate": 7.5123518852558075e-06, + "loss": 0.4732646942138672, + "step": 4112 + }, + { + "epoch": 1.202661207778915, + "grad_norm": 1.6512929892036816, + "learning_rate": 7.507669453339903e-06, + "loss": 0.57124263048172, + "step": 4113 + }, + { + "epoch": 1.2029536481941805, + "grad_norm": 1.758611342292707, + "learning_rate": 7.502987603991111e-06, + "loss": 0.5228173732757568, + "step": 4114 + }, + { + "epoch": 1.2032460886094458, + "grad_norm": 1.7352024129193708, + "learning_rate": 7.4983063383037864e-06, + "loss": 0.5501765012741089, + "step": 4115 + }, + { + "epoch": 1.2035385290247111, + "grad_norm": 1.6782467710972089, + "learning_rate": 7.493625657372141e-06, + "loss": 0.5062840580940247, + "step": 4116 + }, + { + "epoch": 1.2038309694399767, + "grad_norm": 1.6960273401585455, + "learning_rate": 7.4889455622902616e-06, + "loss": 0.7060763835906982, + "step": 4117 + }, + { + "epoch": 1.204123409855242, + "grad_norm": 1.478061987478783, + "learning_rate": 7.484266054152088e-06, + "loss": 0.42127668857574463, + "step": 4118 + }, + { + "epoch": 1.2044158502705073, + "grad_norm": 1.3574946815299211, + "learning_rate": 7.479587134051429e-06, + "loss": 0.490860253572464, + "step": 4119 + }, + { + "epoch": 1.204708290685773, + "grad_norm": 1.443033575116078, + "learning_rate": 7.474908803081955e-06, + "loss": 0.45786625146865845, + "step": 4120 + }, + { + "epoch": 1.2050007311010382, + "grad_norm": 1.810733388901398, + "learning_rate": 7.470231062337192e-06, + "loss": 0.5267277359962463, + "step": 4121 + }, + { + "epoch": 1.2052931715163036, + "grad_norm": 1.670838162040588, + "learning_rate": 7.465553912910539e-06, + "loss": 0.47834646701812744, + "step": 4122 + }, + { + "epoch": 1.2055856119315689, + "grad_norm": 1.4366745635956868, + "learning_rate": 7.460877355895249e-06, + "loss": 0.5348576903343201, + "step": 4123 + }, + { + "epoch": 1.2058780523468344, + "grad_norm": 1.7186674622129299, + "learning_rate": 7.456201392384437e-06, + "loss": 0.47992441058158875, + "step": 4124 + }, + { + "epoch": 1.2061704927620998, + "grad_norm": 1.6108537844876905, + "learning_rate": 7.451526023471085e-06, + "loss": 0.5693913698196411, + "step": 4125 + }, + { + "epoch": 1.206462933177365, + "grad_norm": 1.8995573488864546, + "learning_rate": 7.4468512502480305e-06, + "loss": 0.5165153741836548, + "step": 4126 + }, + { + "epoch": 1.2067553735926304, + "grad_norm": 1.4833110616884417, + "learning_rate": 7.442177073807973e-06, + "loss": 0.522534966468811, + "step": 4127 + }, + { + "epoch": 1.207047814007896, + "grad_norm": 2.0276890753098464, + "learning_rate": 7.43750349524347e-06, + "loss": 0.6298432946205139, + "step": 4128 + }, + { + "epoch": 1.2073402544231613, + "grad_norm": 1.5840348747117112, + "learning_rate": 7.432830515646947e-06, + "loss": 0.5077394247055054, + "step": 4129 + }, + { + "epoch": 1.2076326948384266, + "grad_norm": 2.0187303897146682, + "learning_rate": 7.428158136110681e-06, + "loss": 0.6492841839790344, + "step": 4130 + }, + { + "epoch": 1.2079251352536922, + "grad_norm": 1.8651226738731277, + "learning_rate": 7.423486357726813e-06, + "loss": 0.5204535126686096, + "step": 4131 + }, + { + "epoch": 1.2082175756689575, + "grad_norm": 1.7208004693147547, + "learning_rate": 7.418815181587347e-06, + "loss": 0.56598961353302, + "step": 4132 + }, + { + "epoch": 1.2085100160842228, + "grad_norm": 1.7632065676998485, + "learning_rate": 7.4141446087841364e-06, + "loss": 0.486950159072876, + "step": 4133 + }, + { + "epoch": 1.2088024564994881, + "grad_norm": 1.8229002651567825, + "learning_rate": 7.4094746404089e-06, + "loss": 0.7218466997146606, + "step": 4134 + }, + { + "epoch": 1.2090948969147537, + "grad_norm": 1.715700034058204, + "learning_rate": 7.404805277553218e-06, + "loss": 0.6486172676086426, + "step": 4135 + }, + { + "epoch": 1.209387337330019, + "grad_norm": 1.8385918373460561, + "learning_rate": 7.400136521308521e-06, + "loss": 0.6160574555397034, + "step": 4136 + }, + { + "epoch": 1.2096797777452843, + "grad_norm": 1.6766631954981184, + "learning_rate": 7.395468372766107e-06, + "loss": 0.6184699535369873, + "step": 4137 + }, + { + "epoch": 1.2099722181605497, + "grad_norm": 1.6881704887676476, + "learning_rate": 7.390800833017124e-06, + "loss": 0.5795263051986694, + "step": 4138 + }, + { + "epoch": 1.2102646585758152, + "grad_norm": 1.6841718896097397, + "learning_rate": 7.386133903152581e-06, + "loss": 0.5409367084503174, + "step": 4139 + }, + { + "epoch": 1.2105570989910805, + "grad_norm": 1.678948206873695, + "learning_rate": 7.3814675842633465e-06, + "loss": 0.47924935817718506, + "step": 4140 + }, + { + "epoch": 1.2108495394063459, + "grad_norm": 1.6532202064740131, + "learning_rate": 7.376801877440143e-06, + "loss": 0.5737412571907043, + "step": 4141 + }, + { + "epoch": 1.2111419798216114, + "grad_norm": 1.5307761286613382, + "learning_rate": 7.372136783773551e-06, + "loss": 0.538013219833374, + "step": 4142 + }, + { + "epoch": 1.2114344202368768, + "grad_norm": 1.4940902719253717, + "learning_rate": 7.367472304354011e-06, + "loss": 0.4523904323577881, + "step": 4143 + }, + { + "epoch": 1.211726860652142, + "grad_norm": 1.4793905716399964, + "learning_rate": 7.362808440271811e-06, + "loss": 0.5057293176651001, + "step": 4144 + }, + { + "epoch": 1.2120193010674076, + "grad_norm": 1.4373562566302274, + "learning_rate": 7.358145192617103e-06, + "loss": 0.4653171896934509, + "step": 4145 + }, + { + "epoch": 1.212311741482673, + "grad_norm": 1.6048946971271119, + "learning_rate": 7.353482562479896e-06, + "loss": 0.607070803642273, + "step": 4146 + }, + { + "epoch": 1.2126041818979383, + "grad_norm": 1.51939699208445, + "learning_rate": 7.348820550950047e-06, + "loss": 0.4721861481666565, + "step": 4147 + }, + { + "epoch": 1.2128966223132036, + "grad_norm": 1.8802239228266517, + "learning_rate": 7.3441591591172765e-06, + "loss": 0.6656746864318848, + "step": 4148 + }, + { + "epoch": 1.2131890627284692, + "grad_norm": 1.504596663567376, + "learning_rate": 7.339498388071154e-06, + "loss": 0.5231848359107971, + "step": 4149 + }, + { + "epoch": 1.2134815031437345, + "grad_norm": 1.4250712810936565, + "learning_rate": 7.334838238901106e-06, + "loss": 0.42241257429122925, + "step": 4150 + }, + { + "epoch": 1.2137739435589998, + "grad_norm": 1.4721862632309721, + "learning_rate": 7.3301787126964165e-06, + "loss": 0.427111953496933, + "step": 4151 + }, + { + "epoch": 1.2140663839742651, + "grad_norm": 2.038283523639075, + "learning_rate": 7.325519810546219e-06, + "loss": 0.6208339929580688, + "step": 4152 + }, + { + "epoch": 1.2143588243895307, + "grad_norm": 1.3970516014119925, + "learning_rate": 7.320861533539505e-06, + "loss": 0.5031273365020752, + "step": 4153 + }, + { + "epoch": 1.214651264804796, + "grad_norm": 1.5153681425347725, + "learning_rate": 7.3162038827651205e-06, + "loss": 0.5617444515228271, + "step": 4154 + }, + { + "epoch": 1.2149437052200613, + "grad_norm": 1.4855483785732004, + "learning_rate": 7.311546859311758e-06, + "loss": 0.4616255462169647, + "step": 4155 + }, + { + "epoch": 1.215236145635327, + "grad_norm": 1.5704453976932513, + "learning_rate": 7.306890464267972e-06, + "loss": 0.5799977779388428, + "step": 4156 + }, + { + "epoch": 1.2155285860505922, + "grad_norm": 1.521477491941422, + "learning_rate": 7.302234698722165e-06, + "loss": 0.5669786930084229, + "step": 4157 + }, + { + "epoch": 1.2158210264658575, + "grad_norm": 1.5325381791627977, + "learning_rate": 7.297579563762595e-06, + "loss": 0.5622642040252686, + "step": 4158 + }, + { + "epoch": 1.216113466881123, + "grad_norm": 1.8789411887268221, + "learning_rate": 7.292925060477367e-06, + "loss": 0.6896791458129883, + "step": 4159 + }, + { + "epoch": 1.2164059072963884, + "grad_norm": 1.5263918361022677, + "learning_rate": 7.288271189954451e-06, + "loss": 0.6704437136650085, + "step": 4160 + }, + { + "epoch": 1.2166983477116537, + "grad_norm": 1.6192057061391554, + "learning_rate": 7.2836179532816565e-06, + "loss": 0.6340646743774414, + "step": 4161 + }, + { + "epoch": 1.216990788126919, + "grad_norm": 1.4283430296516553, + "learning_rate": 7.278965351546648e-06, + "loss": 0.528992772102356, + "step": 4162 + }, + { + "epoch": 1.2172832285421846, + "grad_norm": 1.4842100691170903, + "learning_rate": 7.274313385836949e-06, + "loss": 0.45160621404647827, + "step": 4163 + }, + { + "epoch": 1.21757566895745, + "grad_norm": 1.3859373993268853, + "learning_rate": 7.269662057239919e-06, + "loss": 0.5398670434951782, + "step": 4164 + }, + { + "epoch": 1.2178681093727153, + "grad_norm": 1.7598892874276293, + "learning_rate": 7.265011366842785e-06, + "loss": 0.5174476504325867, + "step": 4165 + }, + { + "epoch": 1.2181605497879806, + "grad_norm": 1.663231631427072, + "learning_rate": 7.260361315732613e-06, + "loss": 0.4830206632614136, + "step": 4166 + }, + { + "epoch": 1.2184529902032462, + "grad_norm": 1.4149457900973579, + "learning_rate": 7.2557119049963266e-06, + "loss": 0.42422181367874146, + "step": 4167 + }, + { + "epoch": 1.2187454306185115, + "grad_norm": 1.363467777836694, + "learning_rate": 7.251063135720699e-06, + "loss": 0.43544018268585205, + "step": 4168 + }, + { + "epoch": 1.2190378710337768, + "grad_norm": 1.4776092804767433, + "learning_rate": 7.2464150089923465e-06, + "loss": 0.5352005362510681, + "step": 4169 + }, + { + "epoch": 1.2193303114490424, + "grad_norm": 1.5459436268475357, + "learning_rate": 7.241767525897746e-06, + "loss": 0.4718678891658783, + "step": 4170 + }, + { + "epoch": 1.2196227518643077, + "grad_norm": 1.4994134423194976, + "learning_rate": 7.237120687523214e-06, + "loss": 0.618084192276001, + "step": 4171 + }, + { + "epoch": 1.219915192279573, + "grad_norm": 1.8137589794234399, + "learning_rate": 7.232474494954924e-06, + "loss": 0.625995397567749, + "step": 4172 + }, + { + "epoch": 1.2202076326948383, + "grad_norm": 1.4989590312422592, + "learning_rate": 7.227828949278894e-06, + "loss": 0.5382465124130249, + "step": 4173 + }, + { + "epoch": 1.220500073110104, + "grad_norm": 1.702878462884744, + "learning_rate": 7.223184051580992e-06, + "loss": 0.5299465656280518, + "step": 4174 + }, + { + "epoch": 1.2207925135253692, + "grad_norm": 1.7776293184889576, + "learning_rate": 7.218539802946934e-06, + "loss": 0.5899940729141235, + "step": 4175 + }, + { + "epoch": 1.2210849539406345, + "grad_norm": 1.9763552708522982, + "learning_rate": 7.213896204462286e-06, + "loss": 0.6126594543457031, + "step": 4176 + }, + { + "epoch": 1.2213773943558999, + "grad_norm": 1.6580044033592523, + "learning_rate": 7.20925325721246e-06, + "loss": 0.5576338768005371, + "step": 4177 + }, + { + "epoch": 1.2216698347711654, + "grad_norm": 1.5044012673537284, + "learning_rate": 7.204610962282717e-06, + "loss": 0.540515661239624, + "step": 4178 + }, + { + "epoch": 1.2219622751864307, + "grad_norm": 1.5281012838641301, + "learning_rate": 7.1999693207581675e-06, + "loss": 0.5306440591812134, + "step": 4179 + }, + { + "epoch": 1.222254715601696, + "grad_norm": 2.02113466617051, + "learning_rate": 7.195328333723763e-06, + "loss": 0.6274853944778442, + "step": 4180 + }, + { + "epoch": 1.2225471560169616, + "grad_norm": 1.6954554706562375, + "learning_rate": 7.190688002264308e-06, + "loss": 0.5626333951950073, + "step": 4181 + }, + { + "epoch": 1.222839596432227, + "grad_norm": 1.6364457786315536, + "learning_rate": 7.18604832746445e-06, + "loss": 0.5938719511032104, + "step": 4182 + }, + { + "epoch": 1.2231320368474923, + "grad_norm": 1.4010331016668016, + "learning_rate": 7.181409310408688e-06, + "loss": 0.4599727988243103, + "step": 4183 + }, + { + "epoch": 1.2234244772627578, + "grad_norm": 1.516823379099723, + "learning_rate": 7.176770952181363e-06, + "loss": 0.5912302732467651, + "step": 4184 + }, + { + "epoch": 1.2237169176780232, + "grad_norm": 1.24563200951521, + "learning_rate": 7.172133253866662e-06, + "loss": 0.534631073474884, + "step": 4185 + }, + { + "epoch": 1.2240093580932885, + "grad_norm": 1.3825393422514298, + "learning_rate": 7.167496216548618e-06, + "loss": 0.5084418058395386, + "step": 4186 + }, + { + "epoch": 1.2243017985085538, + "grad_norm": 1.6343841724383257, + "learning_rate": 7.162859841311112e-06, + "loss": 0.6906956434249878, + "step": 4187 + }, + { + "epoch": 1.2245942389238194, + "grad_norm": 1.6583835426138527, + "learning_rate": 7.158224129237867e-06, + "loss": 0.5578658580780029, + "step": 4188 + }, + { + "epoch": 1.2248866793390847, + "grad_norm": 1.4116232043960963, + "learning_rate": 7.153589081412455e-06, + "loss": 0.4438907206058502, + "step": 4189 + }, + { + "epoch": 1.22517911975435, + "grad_norm": 1.9189119615156511, + "learning_rate": 7.148954698918289e-06, + "loss": 0.6366580724716187, + "step": 4190 + }, + { + "epoch": 1.2254715601696153, + "grad_norm": 1.674796821883658, + "learning_rate": 7.144320982838628e-06, + "loss": 0.5532524585723877, + "step": 4191 + }, + { + "epoch": 1.2257640005848809, + "grad_norm": 1.3678471530217577, + "learning_rate": 7.139687934256574e-06, + "loss": 0.4847594201564789, + "step": 4192 + }, + { + "epoch": 1.2260564410001462, + "grad_norm": 1.651582950772816, + "learning_rate": 7.135055554255073e-06, + "loss": 0.6273454427719116, + "step": 4193 + }, + { + "epoch": 1.2263488814154115, + "grad_norm": 1.826963047999446, + "learning_rate": 7.130423843916917e-06, + "loss": 0.6320512294769287, + "step": 4194 + }, + { + "epoch": 1.226641321830677, + "grad_norm": 1.9938752870068028, + "learning_rate": 7.125792804324741e-06, + "loss": 0.5499723553657532, + "step": 4195 + }, + { + "epoch": 1.2269337622459424, + "grad_norm": 1.531512294163018, + "learning_rate": 7.121162436561023e-06, + "loss": 0.5855484008789062, + "step": 4196 + }, + { + "epoch": 1.2272262026612077, + "grad_norm": 1.5092619418718032, + "learning_rate": 7.11653274170808e-06, + "loss": 0.5998305678367615, + "step": 4197 + }, + { + "epoch": 1.2275186430764733, + "grad_norm": 1.6613439290789596, + "learning_rate": 7.111903720848077e-06, + "loss": 0.6963703632354736, + "step": 4198 + }, + { + "epoch": 1.2278110834917386, + "grad_norm": 1.7273945695579416, + "learning_rate": 7.10727537506302e-06, + "loss": 0.5664974451065063, + "step": 4199 + }, + { + "epoch": 1.228103523907004, + "grad_norm": 1.8611907189119672, + "learning_rate": 7.102647705434755e-06, + "loss": 0.6502630710601807, + "step": 4200 + }, + { + "epoch": 1.2283959643222693, + "grad_norm": 1.5674599206950446, + "learning_rate": 7.098020713044973e-06, + "loss": 0.5727233290672302, + "step": 4201 + }, + { + "epoch": 1.2286884047375348, + "grad_norm": 1.4105219463780128, + "learning_rate": 7.093394398975206e-06, + "loss": 0.47885602712631226, + "step": 4202 + }, + { + "epoch": 1.2289808451528001, + "grad_norm": 1.62325320016664, + "learning_rate": 7.088768764306826e-06, + "loss": 0.46089547872543335, + "step": 4203 + }, + { + "epoch": 1.2292732855680655, + "grad_norm": 1.3853973501267451, + "learning_rate": 7.084143810121044e-06, + "loss": 0.48920977115631104, + "step": 4204 + }, + { + "epoch": 1.2295657259833308, + "grad_norm": 1.4371671531095065, + "learning_rate": 7.07951953749892e-06, + "loss": 0.5320104956626892, + "step": 4205 + }, + { + "epoch": 1.2298581663985964, + "grad_norm": 1.693565977205871, + "learning_rate": 7.074895947521347e-06, + "loss": 0.6403206586837769, + "step": 4206 + }, + { + "epoch": 1.2301506068138617, + "grad_norm": 1.3774390509755927, + "learning_rate": 7.070273041269062e-06, + "loss": 0.5522217750549316, + "step": 4207 + }, + { + "epoch": 1.230443047229127, + "grad_norm": 1.644407790392686, + "learning_rate": 7.0656508198226405e-06, + "loss": 0.5235073566436768, + "step": 4208 + }, + { + "epoch": 1.2307354876443926, + "grad_norm": 1.9076552987416457, + "learning_rate": 7.061029284262497e-06, + "loss": 0.5972521305084229, + "step": 4209 + }, + { + "epoch": 1.2310279280596579, + "grad_norm": 1.7443828706372393, + "learning_rate": 7.0564084356688885e-06, + "loss": 0.5989280343055725, + "step": 4210 + }, + { + "epoch": 1.2313203684749232, + "grad_norm": 1.4723000244161777, + "learning_rate": 7.051788275121913e-06, + "loss": 0.5714213848114014, + "step": 4211 + }, + { + "epoch": 1.2316128088901885, + "grad_norm": 1.715005842824084, + "learning_rate": 7.047168803701502e-06, + "loss": 0.5588504076004028, + "step": 4212 + }, + { + "epoch": 1.231905249305454, + "grad_norm": 1.3648320017744335, + "learning_rate": 7.042550022487431e-06, + "loss": 0.47527533769607544, + "step": 4213 + }, + { + "epoch": 1.2321976897207194, + "grad_norm": 1.4838404108317171, + "learning_rate": 7.03793193255931e-06, + "loss": 0.5281137228012085, + "step": 4214 + }, + { + "epoch": 1.2324901301359847, + "grad_norm": 1.7839268972332825, + "learning_rate": 7.033314534996589e-06, + "loss": 0.5509631037712097, + "step": 4215 + }, + { + "epoch": 1.23278257055125, + "grad_norm": 1.5991883103171023, + "learning_rate": 7.028697830878557e-06, + "loss": 0.5291438698768616, + "step": 4216 + }, + { + "epoch": 1.2330750109665156, + "grad_norm": 1.9253124571991533, + "learning_rate": 7.024081821284343e-06, + "loss": 0.5931780934333801, + "step": 4217 + }, + { + "epoch": 1.233367451381781, + "grad_norm": 1.5978832259158926, + "learning_rate": 7.019466507292908e-06, + "loss": 0.4883537292480469, + "step": 4218 + }, + { + "epoch": 1.2336598917970463, + "grad_norm": 1.5441369085427046, + "learning_rate": 7.014851889983058e-06, + "loss": 0.45155030488967896, + "step": 4219 + }, + { + "epoch": 1.2339523322123118, + "grad_norm": 1.7603110515675113, + "learning_rate": 7.010237970433426e-06, + "loss": 0.6107507944107056, + "step": 4220 + }, + { + "epoch": 1.2342447726275771, + "grad_norm": 1.4005214588133317, + "learning_rate": 7.0056247497224905e-06, + "loss": 0.41764840483665466, + "step": 4221 + }, + { + "epoch": 1.2345372130428425, + "grad_norm": 1.4727432689856292, + "learning_rate": 7.0010122289285635e-06, + "loss": 0.6786199808120728, + "step": 4222 + }, + { + "epoch": 1.234829653458108, + "grad_norm": 1.6328773458986388, + "learning_rate": 6.996400409129793e-06, + "loss": 0.5378292798995972, + "step": 4223 + }, + { + "epoch": 1.2351220938733734, + "grad_norm": 1.5470680329093456, + "learning_rate": 6.9917892914041685e-06, + "loss": 0.47646570205688477, + "step": 4224 + }, + { + "epoch": 1.2354145342886387, + "grad_norm": 1.6302332764801317, + "learning_rate": 6.987178876829503e-06, + "loss": 0.554225504398346, + "step": 4225 + }, + { + "epoch": 1.235706974703904, + "grad_norm": 1.8888599643549215, + "learning_rate": 6.982569166483459e-06, + "loss": 0.42614030838012695, + "step": 4226 + }, + { + "epoch": 1.2359994151191696, + "grad_norm": 1.523118498051214, + "learning_rate": 6.977960161443524e-06, + "loss": 0.5043676495552063, + "step": 4227 + }, + { + "epoch": 1.2362918555344349, + "grad_norm": 1.454372819437309, + "learning_rate": 6.973351862787029e-06, + "loss": 0.4905642569065094, + "step": 4228 + }, + { + "epoch": 1.2365842959497002, + "grad_norm": 1.6152329822736995, + "learning_rate": 6.9687442715911325e-06, + "loss": 0.5860332250595093, + "step": 4229 + }, + { + "epoch": 1.2368767363649655, + "grad_norm": 1.3841079659340747, + "learning_rate": 6.9641373889328345e-06, + "loss": 0.4900137782096863, + "step": 4230 + }, + { + "epoch": 1.237169176780231, + "grad_norm": 1.7249957815195471, + "learning_rate": 6.959531215888961e-06, + "loss": 0.5736855268478394, + "step": 4231 + }, + { + "epoch": 1.2374616171954964, + "grad_norm": 1.6635333389812996, + "learning_rate": 6.95492575353618e-06, + "loss": 0.6390400528907776, + "step": 4232 + }, + { + "epoch": 1.2377540576107617, + "grad_norm": 1.6623693676348965, + "learning_rate": 6.95032100295099e-06, + "loss": 0.6553822159767151, + "step": 4233 + }, + { + "epoch": 1.2380464980260273, + "grad_norm": 1.871056647578711, + "learning_rate": 6.945716965209723e-06, + "loss": 0.6685863733291626, + "step": 4234 + }, + { + "epoch": 1.2383389384412926, + "grad_norm": 1.7090289188063175, + "learning_rate": 6.941113641388542e-06, + "loss": 0.5172277688980103, + "step": 4235 + }, + { + "epoch": 1.238631378856558, + "grad_norm": 1.9648968097135298, + "learning_rate": 6.936511032563451e-06, + "loss": 0.6578007936477661, + "step": 4236 + }, + { + "epoch": 1.2389238192718235, + "grad_norm": 1.5304274814539944, + "learning_rate": 6.931909139810283e-06, + "loss": 0.5679500699043274, + "step": 4237 + }, + { + "epoch": 1.2392162596870888, + "grad_norm": 1.6592749019605815, + "learning_rate": 6.927307964204695e-06, + "loss": 0.49142318964004517, + "step": 4238 + }, + { + "epoch": 1.2395087001023541, + "grad_norm": 1.497996058585022, + "learning_rate": 6.9227075068221926e-06, + "loss": 0.5339487195014954, + "step": 4239 + }, + { + "epoch": 1.2398011405176195, + "grad_norm": 1.9993237065248757, + "learning_rate": 6.918107768738097e-06, + "loss": 0.5845860242843628, + "step": 4240 + }, + { + "epoch": 1.240093580932885, + "grad_norm": 2.5543699126297823, + "learning_rate": 6.9135087510275735e-06, + "loss": 0.6767281889915466, + "step": 4241 + }, + { + "epoch": 1.2403860213481503, + "grad_norm": 1.850547226886836, + "learning_rate": 6.908910454765612e-06, + "loss": 0.6119472980499268, + "step": 4242 + }, + { + "epoch": 1.2406784617634157, + "grad_norm": 1.6013723709723773, + "learning_rate": 6.904312881027038e-06, + "loss": 0.6375409364700317, + "step": 4243 + }, + { + "epoch": 1.240970902178681, + "grad_norm": 1.9482571730059268, + "learning_rate": 6.899716030886508e-06, + "loss": 0.7059881687164307, + "step": 4244 + }, + { + "epoch": 1.2412633425939466, + "grad_norm": 1.9206862231453385, + "learning_rate": 6.895119905418504e-06, + "loss": 0.6463328003883362, + "step": 4245 + }, + { + "epoch": 1.2415557830092119, + "grad_norm": 1.5219372029025222, + "learning_rate": 6.890524505697345e-06, + "loss": 0.5374869108200073, + "step": 4246 + }, + { + "epoch": 1.2418482234244772, + "grad_norm": 1.625313205404651, + "learning_rate": 6.885929832797176e-06, + "loss": 0.5219276547431946, + "step": 4247 + }, + { + "epoch": 1.2421406638397428, + "grad_norm": 1.4315105659194174, + "learning_rate": 6.881335887791973e-06, + "loss": 0.4815624952316284, + "step": 4248 + }, + { + "epoch": 1.242433104255008, + "grad_norm": 1.318059168550072, + "learning_rate": 6.8767426717555475e-06, + "loss": 0.5111992955207825, + "step": 4249 + }, + { + "epoch": 1.2427255446702734, + "grad_norm": 1.6870166439076426, + "learning_rate": 6.872150185761533e-06, + "loss": 0.5331606268882751, + "step": 4250 + }, + { + "epoch": 1.2430179850855387, + "grad_norm": 1.5572023614320247, + "learning_rate": 6.867558430883393e-06, + "loss": 0.5375202894210815, + "step": 4251 + }, + { + "epoch": 1.2433104255008043, + "grad_norm": 1.495445158871636, + "learning_rate": 6.862967408194425e-06, + "loss": 0.5667152404785156, + "step": 4252 + }, + { + "epoch": 1.2436028659160696, + "grad_norm": 2.036302557289267, + "learning_rate": 6.858377118767752e-06, + "loss": 0.5679255723953247, + "step": 4253 + }, + { + "epoch": 1.243895306331335, + "grad_norm": 1.7798647531094058, + "learning_rate": 6.853787563676324e-06, + "loss": 0.6097947359085083, + "step": 4254 + }, + { + "epoch": 1.2441877467466003, + "grad_norm": 1.458407608257313, + "learning_rate": 6.849198743992927e-06, + "loss": 0.41869044303894043, + "step": 4255 + }, + { + "epoch": 1.2444801871618658, + "grad_norm": 1.595586166137391, + "learning_rate": 6.8446106607901655e-06, + "loss": 0.6414821147918701, + "step": 4256 + }, + { + "epoch": 1.2447726275771311, + "grad_norm": 1.9180058965370612, + "learning_rate": 6.840023315140476e-06, + "loss": 0.5985021591186523, + "step": 4257 + }, + { + "epoch": 1.2450650679923965, + "grad_norm": 1.429348085027092, + "learning_rate": 6.8354367081161235e-06, + "loss": 0.4718092381954193, + "step": 4258 + }, + { + "epoch": 1.245357508407662, + "grad_norm": 1.374927912317877, + "learning_rate": 6.8308508407892e-06, + "loss": 0.46431800723075867, + "step": 4259 + }, + { + "epoch": 1.2456499488229273, + "grad_norm": 1.4906925043469428, + "learning_rate": 6.826265714231624e-06, + "loss": 0.5499997735023499, + "step": 4260 + }, + { + "epoch": 1.2459423892381927, + "grad_norm": 1.605653884930273, + "learning_rate": 6.8216813295151415e-06, + "loss": 0.6078206300735474, + "step": 4261 + }, + { + "epoch": 1.2462348296534582, + "grad_norm": 1.6116067904051048, + "learning_rate": 6.817097687711322e-06, + "loss": 0.5706520080566406, + "step": 4262 + }, + { + "epoch": 1.2465272700687235, + "grad_norm": 1.4579793726336556, + "learning_rate": 6.812514789891566e-06, + "loss": 0.5210137367248535, + "step": 4263 + }, + { + "epoch": 1.2468197104839889, + "grad_norm": 1.5969341972097826, + "learning_rate": 6.807932637127097e-06, + "loss": 0.42632028460502625, + "step": 4264 + }, + { + "epoch": 1.2471121508992542, + "grad_norm": 1.3281470644259092, + "learning_rate": 6.803351230488967e-06, + "loss": 0.49990004301071167, + "step": 4265 + }, + { + "epoch": 1.2474045913145198, + "grad_norm": 1.6439327542913937, + "learning_rate": 6.798770571048052e-06, + "loss": 0.557829737663269, + "step": 4266 + }, + { + "epoch": 1.247697031729785, + "grad_norm": 1.6838717466364301, + "learning_rate": 6.794190659875052e-06, + "loss": 0.4784187078475952, + "step": 4267 + }, + { + "epoch": 1.2479894721450504, + "grad_norm": 1.6243877795123443, + "learning_rate": 6.789611498040492e-06, + "loss": 0.4795057773590088, + "step": 4268 + }, + { + "epoch": 1.2482819125603157, + "grad_norm": 1.4149752899303223, + "learning_rate": 6.785033086614725e-06, + "loss": 0.415715754032135, + "step": 4269 + }, + { + "epoch": 1.2485743529755813, + "grad_norm": 1.4478921102692126, + "learning_rate": 6.7804554266679266e-06, + "loss": 0.49056607484817505, + "step": 4270 + }, + { + "epoch": 1.2488667933908466, + "grad_norm": 1.8227279880342706, + "learning_rate": 6.775878519270098e-06, + "loss": 0.5268200039863586, + "step": 4271 + }, + { + "epoch": 1.249159233806112, + "grad_norm": 1.5664194732567784, + "learning_rate": 6.771302365491064e-06, + "loss": 0.6250356435775757, + "step": 4272 + }, + { + "epoch": 1.2494516742213775, + "grad_norm": 1.5152208337758115, + "learning_rate": 6.76672696640047e-06, + "loss": 0.5403029918670654, + "step": 4273 + }, + { + "epoch": 1.2497441146366428, + "grad_norm": 1.6699524807174595, + "learning_rate": 6.762152323067787e-06, + "loss": 0.47006577253341675, + "step": 4274 + }, + { + "epoch": 1.2500365550519081, + "grad_norm": 1.7406248179582138, + "learning_rate": 6.7575784365623134e-06, + "loss": 0.5088232755661011, + "step": 4275 + }, + { + "epoch": 1.2503289954671737, + "grad_norm": 1.7598214720338152, + "learning_rate": 6.7530053079531664e-06, + "loss": 0.5438642501831055, + "step": 4276 + }, + { + "epoch": 1.250621435882439, + "grad_norm": 1.4316922317447767, + "learning_rate": 6.748432938309286e-06, + "loss": 0.45436567068099976, + "step": 4277 + }, + { + "epoch": 1.2509138762977043, + "grad_norm": 1.5793052704561465, + "learning_rate": 6.743861328699438e-06, + "loss": 0.5298944115638733, + "step": 4278 + }, + { + "epoch": 1.2512063167129697, + "grad_norm": 1.3504092629468785, + "learning_rate": 6.7392904801922055e-06, + "loss": 0.49393707513809204, + "step": 4279 + }, + { + "epoch": 1.251498757128235, + "grad_norm": 1.4852717426676887, + "learning_rate": 6.734720393855998e-06, + "loss": 0.5540947318077087, + "step": 4280 + }, + { + "epoch": 1.2517911975435005, + "grad_norm": 1.4330918355062934, + "learning_rate": 6.730151070759043e-06, + "loss": 0.47406166791915894, + "step": 4281 + }, + { + "epoch": 1.2520836379587659, + "grad_norm": 1.5653956712736337, + "learning_rate": 6.725582511969397e-06, + "loss": 0.46885907649993896, + "step": 4282 + }, + { + "epoch": 1.2523760783740312, + "grad_norm": 1.7710771095422673, + "learning_rate": 6.721014718554931e-06, + "loss": 0.537517786026001, + "step": 4283 + }, + { + "epoch": 1.2526685187892967, + "grad_norm": 1.5323701554592244, + "learning_rate": 6.716447691583336e-06, + "loss": 0.514340341091156, + "step": 4284 + }, + { + "epoch": 1.252960959204562, + "grad_norm": 1.6716715067641383, + "learning_rate": 6.711881432122129e-06, + "loss": 0.5696117281913757, + "step": 4285 + }, + { + "epoch": 1.2532533996198274, + "grad_norm": 1.632492076185155, + "learning_rate": 6.707315941238645e-06, + "loss": 0.5620799660682678, + "step": 4286 + }, + { + "epoch": 1.253545840035093, + "grad_norm": 1.7721487037647632, + "learning_rate": 6.702751220000039e-06, + "loss": 0.4832923412322998, + "step": 4287 + }, + { + "epoch": 1.2538382804503583, + "grad_norm": 1.7195688873272827, + "learning_rate": 6.698187269473289e-06, + "loss": 0.6608176231384277, + "step": 4288 + }, + { + "epoch": 1.2541307208656236, + "grad_norm": 1.67536250359078, + "learning_rate": 6.69362409072519e-06, + "loss": 0.6002779006958008, + "step": 4289 + }, + { + "epoch": 1.2544231612808892, + "grad_norm": 1.5859756058231869, + "learning_rate": 6.689061684822357e-06, + "loss": 0.49898988008499146, + "step": 4290 + }, + { + "epoch": 1.2547156016961545, + "grad_norm": 1.908707186131175, + "learning_rate": 6.684500052831222e-06, + "loss": 0.5887055397033691, + "step": 4291 + }, + { + "epoch": 1.2550080421114198, + "grad_norm": 1.7680049519728702, + "learning_rate": 6.679939195818043e-06, + "loss": 0.6494714617729187, + "step": 4292 + }, + { + "epoch": 1.2553004825266851, + "grad_norm": 2.320887096811341, + "learning_rate": 6.67537911484889e-06, + "loss": 0.5708397626876831, + "step": 4293 + }, + { + "epoch": 1.2555929229419505, + "grad_norm": 1.4472817266256797, + "learning_rate": 6.670819810989656e-06, + "loss": 0.40412014722824097, + "step": 4294 + }, + { + "epoch": 1.255885363357216, + "grad_norm": 1.675200347061479, + "learning_rate": 6.666261285306048e-06, + "loss": 0.5141078233718872, + "step": 4295 + }, + { + "epoch": 1.2561778037724813, + "grad_norm": 1.8039877813287382, + "learning_rate": 6.661703538863595e-06, + "loss": 0.6463406085968018, + "step": 4296 + }, + { + "epoch": 1.2564702441877467, + "grad_norm": 1.5123528456732447, + "learning_rate": 6.657146572727643e-06, + "loss": 0.5809177160263062, + "step": 4297 + }, + { + "epoch": 1.2567626846030122, + "grad_norm": 1.6628802038143384, + "learning_rate": 6.652590387963354e-06, + "loss": 0.5124412775039673, + "step": 4298 + }, + { + "epoch": 1.2570551250182775, + "grad_norm": 1.8011842610745197, + "learning_rate": 6.64803498563571e-06, + "loss": 0.5399736762046814, + "step": 4299 + }, + { + "epoch": 1.2573475654335429, + "grad_norm": 1.4403786785249715, + "learning_rate": 6.6434803668095095e-06, + "loss": 0.548133373260498, + "step": 4300 + }, + { + "epoch": 1.2576400058488084, + "grad_norm": 1.7736401224051406, + "learning_rate": 6.638926532549364e-06, + "loss": 0.45056310296058655, + "step": 4301 + }, + { + "epoch": 1.2579324462640737, + "grad_norm": 2.434184879977136, + "learning_rate": 6.634373483919705e-06, + "loss": 0.5191814303398132, + "step": 4302 + }, + { + "epoch": 1.258224886679339, + "grad_norm": 1.4188278481806091, + "learning_rate": 6.62982122198478e-06, + "loss": 0.41939109563827515, + "step": 4303 + }, + { + "epoch": 1.2585173270946044, + "grad_norm": 1.6631261031278954, + "learning_rate": 6.625269747808655e-06, + "loss": 0.6535190939903259, + "step": 4304 + }, + { + "epoch": 1.2588097675098697, + "grad_norm": 1.7210614964326925, + "learning_rate": 6.620719062455207e-06, + "loss": 0.6282539367675781, + "step": 4305 + }, + { + "epoch": 1.2591022079251353, + "grad_norm": 1.5686327106153548, + "learning_rate": 6.616169166988133e-06, + "loss": 0.5378686189651489, + "step": 4306 + }, + { + "epoch": 1.2593946483404006, + "grad_norm": 1.80292094791683, + "learning_rate": 6.611620062470942e-06, + "loss": 0.5278643369674683, + "step": 4307 + }, + { + "epoch": 1.259687088755666, + "grad_norm": 1.5211478183195457, + "learning_rate": 6.607071749966958e-06, + "loss": 0.5578285455703735, + "step": 4308 + }, + { + "epoch": 1.2599795291709315, + "grad_norm": 1.7646090466366875, + "learning_rate": 6.602524230539324e-06, + "loss": 0.6452580094337463, + "step": 4309 + }, + { + "epoch": 1.2602719695861968, + "grad_norm": 1.7812547970338353, + "learning_rate": 6.597977505250992e-06, + "loss": 0.6133028268814087, + "step": 4310 + }, + { + "epoch": 1.2605644100014621, + "grad_norm": 1.552230597230507, + "learning_rate": 6.5934315751647345e-06, + "loss": 0.4930221140384674, + "step": 4311 + }, + { + "epoch": 1.2608568504167277, + "grad_norm": 2.197359143106273, + "learning_rate": 6.588886441343136e-06, + "loss": 0.48653531074523926, + "step": 4312 + }, + { + "epoch": 1.261149290831993, + "grad_norm": 1.851387133095935, + "learning_rate": 6.5843421048485915e-06, + "loss": 0.6594399213790894, + "step": 4313 + }, + { + "epoch": 1.2614417312472583, + "grad_norm": 1.66909694599425, + "learning_rate": 6.579798566743314e-06, + "loss": 0.5164401531219482, + "step": 4314 + }, + { + "epoch": 1.2617341716625239, + "grad_norm": 1.7484363064869977, + "learning_rate": 6.5752558280893245e-06, + "loss": 0.6338971853256226, + "step": 4315 + }, + { + "epoch": 1.2620266120777892, + "grad_norm": 1.7526913055276123, + "learning_rate": 6.570713889948461e-06, + "loss": 0.5301859974861145, + "step": 4316 + }, + { + "epoch": 1.2623190524930545, + "grad_norm": 1.5016995868339762, + "learning_rate": 6.566172753382376e-06, + "loss": 0.4572887420654297, + "step": 4317 + }, + { + "epoch": 1.2626114929083199, + "grad_norm": 1.5874066468532555, + "learning_rate": 6.561632419452532e-06, + "loss": 0.5235984325408936, + "step": 4318 + }, + { + "epoch": 1.2629039333235852, + "grad_norm": 1.5456604836068861, + "learning_rate": 6.557092889220206e-06, + "loss": 0.586036205291748, + "step": 4319 + }, + { + "epoch": 1.2631963737388507, + "grad_norm": 1.6865403223453492, + "learning_rate": 6.5525541637464855e-06, + "loss": 0.4728356599807739, + "step": 4320 + }, + { + "epoch": 1.263488814154116, + "grad_norm": 1.5435862254535146, + "learning_rate": 6.548016244092265e-06, + "loss": 0.4932190179824829, + "step": 4321 + }, + { + "epoch": 1.2637812545693814, + "grad_norm": 1.6817765339416926, + "learning_rate": 6.543479131318259e-06, + "loss": 0.525676429271698, + "step": 4322 + }, + { + "epoch": 1.264073694984647, + "grad_norm": 1.4602981048339732, + "learning_rate": 6.538942826484991e-06, + "loss": 0.5462610721588135, + "step": 4323 + }, + { + "epoch": 1.2643661353999123, + "grad_norm": 1.6170865165049584, + "learning_rate": 6.534407330652792e-06, + "loss": 0.5391229391098022, + "step": 4324 + }, + { + "epoch": 1.2646585758151776, + "grad_norm": 1.7047610503615187, + "learning_rate": 6.529872644881811e-06, + "loss": 0.5361309051513672, + "step": 4325 + }, + { + "epoch": 1.2649510162304431, + "grad_norm": 1.7296167923882715, + "learning_rate": 6.525338770232001e-06, + "loss": 0.5692390203475952, + "step": 4326 + }, + { + "epoch": 1.2652434566457085, + "grad_norm": 1.7314833561159049, + "learning_rate": 6.520805707763125e-06, + "loss": 0.5337555408477783, + "step": 4327 + }, + { + "epoch": 1.2655358970609738, + "grad_norm": 1.5538338127930955, + "learning_rate": 6.5162734585347605e-06, + "loss": 0.604168176651001, + "step": 4328 + }, + { + "epoch": 1.2658283374762394, + "grad_norm": 1.621069176676038, + "learning_rate": 6.5117420236062955e-06, + "loss": 0.5404821038246155, + "step": 4329 + }, + { + "epoch": 1.2661207778915047, + "grad_norm": 1.8779165644410452, + "learning_rate": 6.507211404036922e-06, + "loss": 0.6097038388252258, + "step": 4330 + }, + { + "epoch": 1.26641321830677, + "grad_norm": 1.41106750899854, + "learning_rate": 6.50268160088565e-06, + "loss": 0.44309180974960327, + "step": 4331 + }, + { + "epoch": 1.2667056587220353, + "grad_norm": 1.633689199912191, + "learning_rate": 6.498152615211286e-06, + "loss": 0.5703015923500061, + "step": 4332 + }, + { + "epoch": 1.2669980991373007, + "grad_norm": 1.9239494523704173, + "learning_rate": 6.4936244480724575e-06, + "loss": 0.5745347738265991, + "step": 4333 + }, + { + "epoch": 1.2672905395525662, + "grad_norm": 1.7558467932702122, + "learning_rate": 6.489097100527595e-06, + "loss": 0.6611922979354858, + "step": 4334 + }, + { + "epoch": 1.2675829799678315, + "grad_norm": 1.373367301388142, + "learning_rate": 6.484570573634939e-06, + "loss": 0.4560534358024597, + "step": 4335 + }, + { + "epoch": 1.2678754203830969, + "grad_norm": 1.3735982195225196, + "learning_rate": 6.480044868452535e-06, + "loss": 0.3765673041343689, + "step": 4336 + }, + { + "epoch": 1.2681678607983624, + "grad_norm": 1.631255659187599, + "learning_rate": 6.475519986038246e-06, + "loss": 0.6471004486083984, + "step": 4337 + }, + { + "epoch": 1.2684603012136277, + "grad_norm": 1.6199016829966775, + "learning_rate": 6.4709959274497284e-06, + "loss": 0.5639084577560425, + "step": 4338 + }, + { + "epoch": 1.268752741628893, + "grad_norm": 1.6880087227037737, + "learning_rate": 6.4664726937444545e-06, + "loss": 0.6367507576942444, + "step": 4339 + }, + { + "epoch": 1.2690451820441586, + "grad_norm": 2.0302420653268958, + "learning_rate": 6.4619502859797055e-06, + "loss": 0.6803586483001709, + "step": 4340 + }, + { + "epoch": 1.269337622459424, + "grad_norm": 1.7398101139995543, + "learning_rate": 6.457428705212565e-06, + "loss": 0.49068397283554077, + "step": 4341 + }, + { + "epoch": 1.2696300628746893, + "grad_norm": 1.8759736386903334, + "learning_rate": 6.4529079524999296e-06, + "loss": 0.616880476474762, + "step": 4342 + }, + { + "epoch": 1.2699225032899546, + "grad_norm": 1.3483643409763457, + "learning_rate": 6.448388028898489e-06, + "loss": 0.45614945888519287, + "step": 4343 + }, + { + "epoch": 1.27021494370522, + "grad_norm": 1.4554785032074153, + "learning_rate": 6.443868935464754e-06, + "loss": 0.49267178773880005, + "step": 4344 + }, + { + "epoch": 1.2705073841204855, + "grad_norm": 1.6269409722468795, + "learning_rate": 6.439350673255033e-06, + "loss": 0.5169225335121155, + "step": 4345 + }, + { + "epoch": 1.2707998245357508, + "grad_norm": 1.4955295461512919, + "learning_rate": 6.434833243325442e-06, + "loss": 0.4999169111251831, + "step": 4346 + }, + { + "epoch": 1.2710922649510161, + "grad_norm": 1.6243334237328435, + "learning_rate": 6.430316646731906e-06, + "loss": 0.6282567977905273, + "step": 4347 + }, + { + "epoch": 1.2713847053662817, + "grad_norm": 1.6085299245102849, + "learning_rate": 6.425800884530151e-06, + "loss": 0.5007494688034058, + "step": 4348 + }, + { + "epoch": 1.271677145781547, + "grad_norm": 1.656568917278449, + "learning_rate": 6.421285957775705e-06, + "loss": 0.5178118944168091, + "step": 4349 + }, + { + "epoch": 1.2719695861968123, + "grad_norm": 1.560370266514351, + "learning_rate": 6.4167718675239075e-06, + "loss": 0.5473636388778687, + "step": 4350 + }, + { + "epoch": 1.2722620266120779, + "grad_norm": 1.6953423126666767, + "learning_rate": 6.4122586148299004e-06, + "loss": 0.5863620042800903, + "step": 4351 + }, + { + "epoch": 1.2725544670273432, + "grad_norm": 1.8607908969719156, + "learning_rate": 6.407746200748628e-06, + "loss": 0.5301654934883118, + "step": 4352 + }, + { + "epoch": 1.2728469074426085, + "grad_norm": 1.6932378497792755, + "learning_rate": 6.403234626334842e-06, + "loss": 0.5856075286865234, + "step": 4353 + }, + { + "epoch": 1.273139347857874, + "grad_norm": 1.678003179838639, + "learning_rate": 6.39872389264309e-06, + "loss": 0.49686455726623535, + "step": 4354 + }, + { + "epoch": 1.2734317882731394, + "grad_norm": 1.4854139308295418, + "learning_rate": 6.394214000727734e-06, + "loss": 0.5032684803009033, + "step": 4355 + }, + { + "epoch": 1.2737242286884047, + "grad_norm": 1.8801294667488437, + "learning_rate": 6.389704951642931e-06, + "loss": 0.6855330467224121, + "step": 4356 + }, + { + "epoch": 1.27401666910367, + "grad_norm": 1.479367610859775, + "learning_rate": 6.385196746442644e-06, + "loss": 0.5333864688873291, + "step": 4357 + }, + { + "epoch": 1.2743091095189354, + "grad_norm": 1.5944305875728124, + "learning_rate": 6.380689386180641e-06, + "loss": 0.5597629547119141, + "step": 4358 + }, + { + "epoch": 1.274601549934201, + "grad_norm": 1.467403558865203, + "learning_rate": 6.376182871910488e-06, + "loss": 0.4576488137245178, + "step": 4359 + }, + { + "epoch": 1.2748939903494663, + "grad_norm": 1.7247772731373485, + "learning_rate": 6.371677204685555e-06, + "loss": 0.45165061950683594, + "step": 4360 + }, + { + "epoch": 1.2751864307647316, + "grad_norm": 1.5415632861050979, + "learning_rate": 6.367172385559014e-06, + "loss": 0.5451514720916748, + "step": 4361 + }, + { + "epoch": 1.2754788711799971, + "grad_norm": 1.874618224476165, + "learning_rate": 6.362668415583841e-06, + "loss": 0.6141163110733032, + "step": 4362 + }, + { + "epoch": 1.2757713115952625, + "grad_norm": 1.6869879622469415, + "learning_rate": 6.358165295812809e-06, + "loss": 0.5156669020652771, + "step": 4363 + }, + { + "epoch": 1.2760637520105278, + "grad_norm": 1.8328178355603366, + "learning_rate": 6.3536630272984974e-06, + "loss": 0.41485118865966797, + "step": 4364 + }, + { + "epoch": 1.2763561924257933, + "grad_norm": 1.546563271256682, + "learning_rate": 6.3491616110932845e-06, + "loss": 0.386514276266098, + "step": 4365 + }, + { + "epoch": 1.2766486328410587, + "grad_norm": 1.472426766767245, + "learning_rate": 6.344661048249345e-06, + "loss": 0.5620483160018921, + "step": 4366 + }, + { + "epoch": 1.276941073256324, + "grad_norm": 1.6328857080628636, + "learning_rate": 6.340161339818662e-06, + "loss": 0.4910007119178772, + "step": 4367 + }, + { + "epoch": 1.2772335136715895, + "grad_norm": 1.3312787841228058, + "learning_rate": 6.335662486853014e-06, + "loss": 0.4628123939037323, + "step": 4368 + }, + { + "epoch": 1.2775259540868549, + "grad_norm": 1.7576669653081538, + "learning_rate": 6.331164490403978e-06, + "loss": 0.5129125118255615, + "step": 4369 + }, + { + "epoch": 1.2778183945021202, + "grad_norm": 1.3282548492081792, + "learning_rate": 6.326667351522939e-06, + "loss": 0.45091521739959717, + "step": 4370 + }, + { + "epoch": 1.2781108349173855, + "grad_norm": 1.4312089210542207, + "learning_rate": 6.322171071261071e-06, + "loss": 0.4914324879646301, + "step": 4371 + }, + { + "epoch": 1.2784032753326509, + "grad_norm": 1.7409991660962885, + "learning_rate": 6.317675650669353e-06, + "loss": 0.6361461877822876, + "step": 4372 + }, + { + "epoch": 1.2786957157479164, + "grad_norm": 1.6196651007639755, + "learning_rate": 6.313181090798561e-06, + "loss": 0.4251636564731598, + "step": 4373 + }, + { + "epoch": 1.2789881561631817, + "grad_norm": 1.7204832108380748, + "learning_rate": 6.308687392699275e-06, + "loss": 0.5605714321136475, + "step": 4374 + }, + { + "epoch": 1.279280596578447, + "grad_norm": 1.5898129202606366, + "learning_rate": 6.304194557421867e-06, + "loss": 0.5366392731666565, + "step": 4375 + }, + { + "epoch": 1.2795730369937126, + "grad_norm": 1.9084263306328586, + "learning_rate": 6.299702586016512e-06, + "loss": 0.5501587986946106, + "step": 4376 + }, + { + "epoch": 1.279865477408978, + "grad_norm": 1.856477952130892, + "learning_rate": 6.295211479533177e-06, + "loss": 0.6145694851875305, + "step": 4377 + }, + { + "epoch": 1.2801579178242433, + "grad_norm": 1.9271512769721166, + "learning_rate": 6.2907212390216335e-06, + "loss": 0.5921984910964966, + "step": 4378 + }, + { + "epoch": 1.2804503582395088, + "grad_norm": 1.5061577707687395, + "learning_rate": 6.286231865531447e-06, + "loss": 0.4376833140850067, + "step": 4379 + }, + { + "epoch": 1.2807427986547741, + "grad_norm": 1.5348932565255202, + "learning_rate": 6.281743360111983e-06, + "loss": 0.5141662955284119, + "step": 4380 + }, + { + "epoch": 1.2810352390700395, + "grad_norm": 1.700541758244486, + "learning_rate": 6.2772557238124025e-06, + "loss": 0.7065848112106323, + "step": 4381 + }, + { + "epoch": 1.2813276794853048, + "grad_norm": 1.500203661604044, + "learning_rate": 6.272768957681659e-06, + "loss": 0.5662813186645508, + "step": 4382 + }, + { + "epoch": 1.2816201199005701, + "grad_norm": 1.5006210101215816, + "learning_rate": 6.268283062768512e-06, + "loss": 0.46340662240982056, + "step": 4383 + }, + { + "epoch": 1.2819125603158357, + "grad_norm": 1.5406586553103667, + "learning_rate": 6.263798040121508e-06, + "loss": 0.5258422493934631, + "step": 4384 + }, + { + "epoch": 1.282205000731101, + "grad_norm": 1.8313859097442655, + "learning_rate": 6.2593138907889965e-06, + "loss": 0.5586943030357361, + "step": 4385 + }, + { + "epoch": 1.2824974411463663, + "grad_norm": 1.707661958872181, + "learning_rate": 6.254830615819116e-06, + "loss": 0.5224723815917969, + "step": 4386 + }, + { + "epoch": 1.2827898815616319, + "grad_norm": 1.8755820352841006, + "learning_rate": 6.250348216259812e-06, + "loss": 0.6092125177383423, + "step": 4387 + }, + { + "epoch": 1.2830823219768972, + "grad_norm": 1.6601692047393128, + "learning_rate": 6.245866693158813e-06, + "loss": 0.5582839250564575, + "step": 4388 + }, + { + "epoch": 1.2833747623921625, + "grad_norm": 1.529218817283274, + "learning_rate": 6.241386047563649e-06, + "loss": 0.6074620485305786, + "step": 4389 + }, + { + "epoch": 1.283667202807428, + "grad_norm": 1.3747332990929297, + "learning_rate": 6.236906280521646e-06, + "loss": 0.6247550845146179, + "step": 4390 + }, + { + "epoch": 1.2839596432226934, + "grad_norm": 1.6645308511195784, + "learning_rate": 6.232427393079919e-06, + "loss": 0.5325940847396851, + "step": 4391 + }, + { + "epoch": 1.2842520836379587, + "grad_norm": 1.5279900789464966, + "learning_rate": 6.227949386285379e-06, + "loss": 0.5082288980484009, + "step": 4392 + }, + { + "epoch": 1.2845445240532243, + "grad_norm": 1.587332587045442, + "learning_rate": 6.223472261184738e-06, + "loss": 0.5704036355018616, + "step": 4393 + }, + { + "epoch": 1.2848369644684896, + "grad_norm": 1.7646477307813349, + "learning_rate": 6.218996018824492e-06, + "loss": 0.5301543474197388, + "step": 4394 + }, + { + "epoch": 1.285129404883755, + "grad_norm": 1.6829663682000435, + "learning_rate": 6.21452066025094e-06, + "loss": 0.48660725355148315, + "step": 4395 + }, + { + "epoch": 1.2854218452990203, + "grad_norm": 1.7324467857194032, + "learning_rate": 6.210046186510168e-06, + "loss": 0.5744560956954956, + "step": 4396 + }, + { + "epoch": 1.2857142857142856, + "grad_norm": 1.6645302463411007, + "learning_rate": 6.205572598648055e-06, + "loss": 0.5714898109436035, + "step": 4397 + }, + { + "epoch": 1.2860067261295511, + "grad_norm": 1.8166911532739076, + "learning_rate": 6.201099897710277e-06, + "loss": 0.6616571545600891, + "step": 4398 + }, + { + "epoch": 1.2862991665448165, + "grad_norm": 1.75450880953695, + "learning_rate": 6.1966280847423e-06, + "loss": 0.5552959442138672, + "step": 4399 + }, + { + "epoch": 1.2865916069600818, + "grad_norm": 1.6738534376194054, + "learning_rate": 6.192157160789382e-06, + "loss": 0.5544919967651367, + "step": 4400 + }, + { + "epoch": 1.2868840473753473, + "grad_norm": 1.6448049553355306, + "learning_rate": 6.18768712689658e-06, + "loss": 0.5914726853370667, + "step": 4401 + }, + { + "epoch": 1.2871764877906127, + "grad_norm": 1.76025336575331, + "learning_rate": 6.183217984108729e-06, + "loss": 0.47191259264945984, + "step": 4402 + }, + { + "epoch": 1.287468928205878, + "grad_norm": 1.690038062727397, + "learning_rate": 6.178749733470468e-06, + "loss": 0.6479181051254272, + "step": 4403 + }, + { + "epoch": 1.2877613686211435, + "grad_norm": 1.5093061541159978, + "learning_rate": 6.174282376026225e-06, + "loss": 0.42491137981414795, + "step": 4404 + }, + { + "epoch": 1.2880538090364089, + "grad_norm": 1.5952968160469727, + "learning_rate": 6.169815912820214e-06, + "loss": 0.6037728786468506, + "step": 4405 + }, + { + "epoch": 1.2883462494516742, + "grad_norm": 1.6035701682484467, + "learning_rate": 6.165350344896446e-06, + "loss": 0.4979787766933441, + "step": 4406 + }, + { + "epoch": 1.2886386898669397, + "grad_norm": 1.800062229580063, + "learning_rate": 6.160885673298722e-06, + "loss": 0.5863564014434814, + "step": 4407 + }, + { + "epoch": 1.288931130282205, + "grad_norm": 1.735193401842224, + "learning_rate": 6.156421899070628e-06, + "loss": 0.6516878008842468, + "step": 4408 + }, + { + "epoch": 1.2892235706974704, + "grad_norm": 1.3644068122534347, + "learning_rate": 6.151959023255545e-06, + "loss": 0.45655903220176697, + "step": 4409 + }, + { + "epoch": 1.2895160111127357, + "grad_norm": 1.5401566996811273, + "learning_rate": 6.147497046896644e-06, + "loss": 0.4751289486885071, + "step": 4410 + }, + { + "epoch": 1.289808451528001, + "grad_norm": 1.6902527178920421, + "learning_rate": 6.1430359710368845e-06, + "loss": 0.48472684621810913, + "step": 4411 + }, + { + "epoch": 1.2901008919432666, + "grad_norm": 1.5473669029252384, + "learning_rate": 6.138575796719017e-06, + "loss": 0.5014214515686035, + "step": 4412 + }, + { + "epoch": 1.290393332358532, + "grad_norm": 1.7827106404845192, + "learning_rate": 6.134116524985581e-06, + "loss": 0.5979991555213928, + "step": 4413 + }, + { + "epoch": 1.2906857727737973, + "grad_norm": 1.325839826079579, + "learning_rate": 6.129658156878899e-06, + "loss": 0.4651130437850952, + "step": 4414 + }, + { + "epoch": 1.2909782131890628, + "grad_norm": 1.7806648175874917, + "learning_rate": 6.125200693441092e-06, + "loss": 0.5938215255737305, + "step": 4415 + }, + { + "epoch": 1.2912706536043281, + "grad_norm": 1.5490961027602033, + "learning_rate": 6.1207441357140626e-06, + "loss": 0.4893927574157715, + "step": 4416 + }, + { + "epoch": 1.2915630940195935, + "grad_norm": 1.7524993955466766, + "learning_rate": 6.116288484739507e-06, + "loss": 0.5546435713768005, + "step": 4417 + }, + { + "epoch": 1.291855534434859, + "grad_norm": 1.8413981048239587, + "learning_rate": 6.111833741558905e-06, + "loss": 0.545367419719696, + "step": 4418 + }, + { + "epoch": 1.2921479748501243, + "grad_norm": 1.4120684443774227, + "learning_rate": 6.1073799072135245e-06, + "loss": 0.47479283809661865, + "step": 4419 + }, + { + "epoch": 1.2924404152653897, + "grad_norm": 1.6721044710471762, + "learning_rate": 6.102926982744423e-06, + "loss": 0.5109270215034485, + "step": 4420 + }, + { + "epoch": 1.292732855680655, + "grad_norm": 1.774842272860347, + "learning_rate": 6.098474969192445e-06, + "loss": 0.5862404108047485, + "step": 4421 + }, + { + "epoch": 1.2930252960959203, + "grad_norm": 1.5821200459355214, + "learning_rate": 6.09402386759822e-06, + "loss": 0.5031660795211792, + "step": 4422 + }, + { + "epoch": 1.2933177365111859, + "grad_norm": 1.7397846198854208, + "learning_rate": 6.089573679002168e-06, + "loss": 0.47179776430130005, + "step": 4423 + }, + { + "epoch": 1.2936101769264512, + "grad_norm": 1.5340233803824985, + "learning_rate": 6.085124404444495e-06, + "loss": 0.45889902114868164, + "step": 4424 + }, + { + "epoch": 1.2939026173417165, + "grad_norm": 1.5550814946749143, + "learning_rate": 6.080676044965188e-06, + "loss": 0.49759042263031006, + "step": 4425 + }, + { + "epoch": 1.294195057756982, + "grad_norm": 1.9841525065569887, + "learning_rate": 6.076228601604024e-06, + "loss": 0.5980732440948486, + "step": 4426 + }, + { + "epoch": 1.2944874981722474, + "grad_norm": 1.6256180215634828, + "learning_rate": 6.07178207540057e-06, + "loss": 0.6167548894882202, + "step": 4427 + }, + { + "epoch": 1.2947799385875127, + "grad_norm": 1.7343822678821683, + "learning_rate": 6.067336467394169e-06, + "loss": 0.5632568597793579, + "step": 4428 + }, + { + "epoch": 1.2950723790027783, + "grad_norm": 1.713926568632917, + "learning_rate": 6.062891778623961e-06, + "loss": 0.5521456003189087, + "step": 4429 + }, + { + "epoch": 1.2953648194180436, + "grad_norm": 1.4514202434870498, + "learning_rate": 6.058448010128861e-06, + "loss": 0.5916576385498047, + "step": 4430 + }, + { + "epoch": 1.295657259833309, + "grad_norm": 1.4200773171635346, + "learning_rate": 6.054005162947571e-06, + "loss": 0.546825647354126, + "step": 4431 + }, + { + "epoch": 1.2959497002485745, + "grad_norm": 1.903586469303659, + "learning_rate": 6.049563238118584e-06, + "loss": 0.5704302787780762, + "step": 4432 + }, + { + "epoch": 1.2962421406638398, + "grad_norm": 1.6923235048512564, + "learning_rate": 6.0451222366801706e-06, + "loss": 0.5791710615158081, + "step": 4433 + }, + { + "epoch": 1.2965345810791051, + "grad_norm": 1.5242567102891653, + "learning_rate": 6.040682159670389e-06, + "loss": 0.41179752349853516, + "step": 4434 + }, + { + "epoch": 1.2968270214943705, + "grad_norm": 1.7120079687188825, + "learning_rate": 6.03624300812708e-06, + "loss": 0.5213680267333984, + "step": 4435 + }, + { + "epoch": 1.2971194619096358, + "grad_norm": 1.6198208396506975, + "learning_rate": 6.0318047830878675e-06, + "loss": 0.4917318522930145, + "step": 4436 + }, + { + "epoch": 1.2974119023249013, + "grad_norm": 1.9301576881874427, + "learning_rate": 6.027367485590159e-06, + "loss": 0.6347956657409668, + "step": 4437 + }, + { + "epoch": 1.2977043427401667, + "grad_norm": 1.454096730257314, + "learning_rate": 6.022931116671147e-06, + "loss": 0.5263427495956421, + "step": 4438 + }, + { + "epoch": 1.297996783155432, + "grad_norm": 1.3982615348649814, + "learning_rate": 6.018495677367806e-06, + "loss": 0.5686784982681274, + "step": 4439 + }, + { + "epoch": 1.2982892235706975, + "grad_norm": 1.6986790860575087, + "learning_rate": 6.0140611687168934e-06, + "loss": 0.576974630355835, + "step": 4440 + }, + { + "epoch": 1.2985816639859629, + "grad_norm": 1.7183954732732796, + "learning_rate": 6.009627591754946e-06, + "loss": 0.5375877618789673, + "step": 4441 + }, + { + "epoch": 1.2988741044012282, + "grad_norm": 1.7026702794952187, + "learning_rate": 6.005194947518287e-06, + "loss": 0.6106576919555664, + "step": 4442 + }, + { + "epoch": 1.2991665448164937, + "grad_norm": 1.6076086367802058, + "learning_rate": 6.000763237043021e-06, + "loss": 0.475483238697052, + "step": 4443 + }, + { + "epoch": 1.299458985231759, + "grad_norm": 1.7568326021636087, + "learning_rate": 5.9963324613650335e-06, + "loss": 0.5819226503372192, + "step": 4444 + }, + { + "epoch": 1.2997514256470244, + "grad_norm": 1.6384408260054233, + "learning_rate": 5.991902621519988e-06, + "loss": 0.6394410133361816, + "step": 4445 + }, + { + "epoch": 1.30004386606229, + "grad_norm": 1.653615111391099, + "learning_rate": 5.987473718543338e-06, + "loss": 0.48502016067504883, + "step": 4446 + }, + { + "epoch": 1.3003363064775553, + "grad_norm": 1.5217151928427126, + "learning_rate": 5.983045753470308e-06, + "loss": 0.5782333612442017, + "step": 4447 + }, + { + "epoch": 1.3006287468928206, + "grad_norm": 1.8358895387455052, + "learning_rate": 5.97861872733591e-06, + "loss": 0.5498893857002258, + "step": 4448 + }, + { + "epoch": 1.300921187308086, + "grad_norm": 1.5773905938706185, + "learning_rate": 5.974192641174934e-06, + "loss": 0.47757571935653687, + "step": 4449 + }, + { + "epoch": 1.3012136277233513, + "grad_norm": 1.751650457738534, + "learning_rate": 5.96976749602195e-06, + "loss": 0.5401994585990906, + "step": 4450 + }, + { + "epoch": 1.3015060681386168, + "grad_norm": 1.7445816604225337, + "learning_rate": 5.965343292911309e-06, + "loss": 0.5818814635276794, + "step": 4451 + }, + { + "epoch": 1.3017985085538821, + "grad_norm": 1.890298335476633, + "learning_rate": 5.9609200328771465e-06, + "loss": 0.524645984172821, + "step": 4452 + }, + { + "epoch": 1.3020909489691475, + "grad_norm": 1.6124004265504417, + "learning_rate": 5.956497716953365e-06, + "loss": 0.46523183584213257, + "step": 4453 + }, + { + "epoch": 1.302383389384413, + "grad_norm": 1.6328139064911342, + "learning_rate": 5.952076346173657e-06, + "loss": 0.6066159009933472, + "step": 4454 + }, + { + "epoch": 1.3026758297996783, + "grad_norm": 1.5743831575113747, + "learning_rate": 5.947655921571491e-06, + "loss": 0.48635774850845337, + "step": 4455 + }, + { + "epoch": 1.3029682702149437, + "grad_norm": 1.7296441740948125, + "learning_rate": 5.943236444180116e-06, + "loss": 0.5159435868263245, + "step": 4456 + }, + { + "epoch": 1.3032607106302092, + "grad_norm": 1.43545214825073, + "learning_rate": 5.938817915032558e-06, + "loss": 0.5566878914833069, + "step": 4457 + }, + { + "epoch": 1.3035531510454745, + "grad_norm": 1.5736652583628634, + "learning_rate": 5.934400335161618e-06, + "loss": 0.46998029947280884, + "step": 4458 + }, + { + "epoch": 1.3038455914607399, + "grad_norm": 1.7808256717613173, + "learning_rate": 5.92998370559988e-06, + "loss": 0.5554553270339966, + "step": 4459 + }, + { + "epoch": 1.3041380318760052, + "grad_norm": 1.7335497855414168, + "learning_rate": 5.925568027379704e-06, + "loss": 0.5659651756286621, + "step": 4460 + }, + { + "epoch": 1.3044304722912705, + "grad_norm": 1.4784849199972236, + "learning_rate": 5.921153301533229e-06, + "loss": 0.5105445981025696, + "step": 4461 + }, + { + "epoch": 1.304722912706536, + "grad_norm": 1.6833489269681376, + "learning_rate": 5.91673952909237e-06, + "loss": 0.5255740284919739, + "step": 4462 + }, + { + "epoch": 1.3050153531218014, + "grad_norm": 1.6388447853221406, + "learning_rate": 5.912326711088821e-06, + "loss": 0.5691270232200623, + "step": 4463 + }, + { + "epoch": 1.3053077935370667, + "grad_norm": 1.64945916767282, + "learning_rate": 5.907914848554048e-06, + "loss": 0.5783474445343018, + "step": 4464 + }, + { + "epoch": 1.3056002339523323, + "grad_norm": 1.631334603802349, + "learning_rate": 5.903503942519299e-06, + "loss": 0.6305002570152283, + "step": 4465 + }, + { + "epoch": 1.3058926743675976, + "grad_norm": 1.9357776829199835, + "learning_rate": 5.8990939940156e-06, + "loss": 0.6465631723403931, + "step": 4466 + }, + { + "epoch": 1.306185114782863, + "grad_norm": 1.8264406193491898, + "learning_rate": 5.8946850040737434e-06, + "loss": 0.4883456230163574, + "step": 4467 + }, + { + "epoch": 1.3064775551981285, + "grad_norm": 1.3902013367704193, + "learning_rate": 5.890276973724305e-06, + "loss": 0.4896056056022644, + "step": 4468 + }, + { + "epoch": 1.3067699956133938, + "grad_norm": 1.6292986861573446, + "learning_rate": 5.885869903997638e-06, + "loss": 0.603757917881012, + "step": 4469 + }, + { + "epoch": 1.3070624360286591, + "grad_norm": 1.6368879465310389, + "learning_rate": 5.881463795923866e-06, + "loss": 0.5412129163742065, + "step": 4470 + }, + { + "epoch": 1.3073548764439247, + "grad_norm": 1.576979548849775, + "learning_rate": 5.877058650532891e-06, + "loss": 0.5255335569381714, + "step": 4471 + }, + { + "epoch": 1.30764731685919, + "grad_norm": 1.861250264495057, + "learning_rate": 5.87265446885439e-06, + "loss": 0.5855039358139038, + "step": 4472 + }, + { + "epoch": 1.3079397572744553, + "grad_norm": 1.7387082626664492, + "learning_rate": 5.868251251917811e-06, + "loss": 0.5763603448867798, + "step": 4473 + }, + { + "epoch": 1.3082321976897207, + "grad_norm": 1.7494976398773932, + "learning_rate": 5.86384900075238e-06, + "loss": 0.5148910880088806, + "step": 4474 + }, + { + "epoch": 1.308524638104986, + "grad_norm": 1.726220320494232, + "learning_rate": 5.859447716387097e-06, + "loss": 0.6387143135070801, + "step": 4475 + }, + { + "epoch": 1.3088170785202515, + "grad_norm": 1.6421362434800872, + "learning_rate": 5.855047399850735e-06, + "loss": 0.5492211580276489, + "step": 4476 + }, + { + "epoch": 1.3091095189355169, + "grad_norm": 1.748321310864673, + "learning_rate": 5.850648052171843e-06, + "loss": 0.5715115070343018, + "step": 4477 + }, + { + "epoch": 1.3094019593507822, + "grad_norm": 1.8948603499593957, + "learning_rate": 5.8462496743787385e-06, + "loss": 0.6295989155769348, + "step": 4478 + }, + { + "epoch": 1.3096943997660477, + "grad_norm": 1.6169983680834699, + "learning_rate": 5.841852267499518e-06, + "loss": 0.5843105316162109, + "step": 4479 + }, + { + "epoch": 1.309986840181313, + "grad_norm": 1.443044009123256, + "learning_rate": 5.837455832562049e-06, + "loss": 0.43283605575561523, + "step": 4480 + }, + { + "epoch": 1.3102792805965784, + "grad_norm": 1.6217104179487012, + "learning_rate": 5.8330603705939684e-06, + "loss": 0.6115404367446899, + "step": 4481 + }, + { + "epoch": 1.310571721011844, + "grad_norm": 1.2325386929467517, + "learning_rate": 5.828665882622692e-06, + "loss": 0.4274179935455322, + "step": 4482 + }, + { + "epoch": 1.3108641614271093, + "grad_norm": 1.3722363792161896, + "learning_rate": 5.824272369675403e-06, + "loss": 0.4385778307914734, + "step": 4483 + }, + { + "epoch": 1.3111566018423746, + "grad_norm": 1.939305382555819, + "learning_rate": 5.819879832779058e-06, + "loss": 0.6310205459594727, + "step": 4484 + }, + { + "epoch": 1.3114490422576401, + "grad_norm": 1.5511013635003787, + "learning_rate": 5.815488272960388e-06, + "loss": 0.6309192180633545, + "step": 4485 + }, + { + "epoch": 1.3117414826729055, + "grad_norm": 1.8051032087296774, + "learning_rate": 5.811097691245895e-06, + "loss": 0.4751497507095337, + "step": 4486 + }, + { + "epoch": 1.3120339230881708, + "grad_norm": 1.5897893613027336, + "learning_rate": 5.806708088661846e-06, + "loss": 0.5540175437927246, + "step": 4487 + }, + { + "epoch": 1.3123263635034361, + "grad_norm": 1.924801228279098, + "learning_rate": 5.802319466234283e-06, + "loss": 0.5533273816108704, + "step": 4488 + }, + { + "epoch": 1.3126188039187014, + "grad_norm": 1.5486991099512135, + "learning_rate": 5.797931824989023e-06, + "loss": 0.463643878698349, + "step": 4489 + }, + { + "epoch": 1.312911244333967, + "grad_norm": 1.9073169839874196, + "learning_rate": 5.79354516595165e-06, + "loss": 0.5990232229232788, + "step": 4490 + }, + { + "epoch": 1.3132036847492323, + "grad_norm": 1.7681103257151853, + "learning_rate": 5.789159490147518e-06, + "loss": 0.5569760799407959, + "step": 4491 + }, + { + "epoch": 1.3134961251644977, + "grad_norm": 1.598897244778613, + "learning_rate": 5.784774798601755e-06, + "loss": 0.5016749501228333, + "step": 4492 + }, + { + "epoch": 1.3137885655797632, + "grad_norm": 1.8830720070455038, + "learning_rate": 5.780391092339253e-06, + "loss": 0.5624934434890747, + "step": 4493 + }, + { + "epoch": 1.3140810059950285, + "grad_norm": 2.146444811832683, + "learning_rate": 5.776008372384676e-06, + "loss": 0.7445797920227051, + "step": 4494 + }, + { + "epoch": 1.3143734464102939, + "grad_norm": 1.9276650555591395, + "learning_rate": 5.771626639762461e-06, + "loss": 0.5849495530128479, + "step": 4495 + }, + { + "epoch": 1.3146658868255594, + "grad_norm": 1.6679644602081254, + "learning_rate": 5.767245895496809e-06, + "loss": 0.5672163367271423, + "step": 4496 + }, + { + "epoch": 1.3149583272408247, + "grad_norm": 1.4482015307125622, + "learning_rate": 5.762866140611698e-06, + "loss": 0.5278276801109314, + "step": 4497 + }, + { + "epoch": 1.31525076765609, + "grad_norm": 1.8273800354421317, + "learning_rate": 5.7584873761308615e-06, + "loss": 0.54908686876297, + "step": 4498 + }, + { + "epoch": 1.3155432080713554, + "grad_norm": 1.7592605115208164, + "learning_rate": 5.754109603077811e-06, + "loss": 0.5257589817047119, + "step": 4499 + }, + { + "epoch": 1.3158356484866207, + "grad_norm": 1.4910358958486878, + "learning_rate": 5.749732822475825e-06, + "loss": 0.5744988918304443, + "step": 4500 + }, + { + "epoch": 1.3161280889018863, + "grad_norm": 1.4827754689170145, + "learning_rate": 5.74535703534795e-06, + "loss": 0.5186365246772766, + "step": 4501 + }, + { + "epoch": 1.3164205293171516, + "grad_norm": 1.6539527720112557, + "learning_rate": 5.740982242716999e-06, + "loss": 0.53574538230896, + "step": 4502 + }, + { + "epoch": 1.316712969732417, + "grad_norm": 1.5347054109635063, + "learning_rate": 5.736608445605555e-06, + "loss": 0.6087717413902283, + "step": 4503 + }, + { + "epoch": 1.3170054101476825, + "grad_norm": 1.5413257189374059, + "learning_rate": 5.732235645035964e-06, + "loss": 0.5132769346237183, + "step": 4504 + }, + { + "epoch": 1.3172978505629478, + "grad_norm": 1.6361856291197476, + "learning_rate": 5.727863842030342e-06, + "loss": 0.588458776473999, + "step": 4505 + }, + { + "epoch": 1.3175902909782131, + "grad_norm": 1.6129388653597692, + "learning_rate": 5.723493037610572e-06, + "loss": 0.5154894590377808, + "step": 4506 + }, + { + "epoch": 1.3178827313934787, + "grad_norm": 1.5507002889867831, + "learning_rate": 5.719123232798304e-06, + "loss": 0.586688220500946, + "step": 4507 + }, + { + "epoch": 1.318175171808744, + "grad_norm": 1.8125403251714918, + "learning_rate": 5.714754428614956e-06, + "loss": 0.4948856830596924, + "step": 4508 + }, + { + "epoch": 1.3184676122240093, + "grad_norm": 1.5128350944665496, + "learning_rate": 5.7103866260817005e-06, + "loss": 0.6179821491241455, + "step": 4509 + }, + { + "epoch": 1.3187600526392749, + "grad_norm": 1.876290206668384, + "learning_rate": 5.7060198262194914e-06, + "loss": 0.5865011811256409, + "step": 4510 + }, + { + "epoch": 1.3190524930545402, + "grad_norm": 1.660419141577327, + "learning_rate": 5.701654030049038e-06, + "loss": 0.519783079624176, + "step": 4511 + }, + { + "epoch": 1.3193449334698055, + "grad_norm": 1.5035780556155738, + "learning_rate": 5.697289238590822e-06, + "loss": 0.4238147437572479, + "step": 4512 + }, + { + "epoch": 1.3196373738850709, + "grad_norm": 1.6350345014151721, + "learning_rate": 5.6929254528650855e-06, + "loss": 0.5931107997894287, + "step": 4513 + }, + { + "epoch": 1.3199298143003362, + "grad_norm": 1.7485415603348589, + "learning_rate": 5.688562673891837e-06, + "loss": 0.7454524040222168, + "step": 4514 + }, + { + "epoch": 1.3202222547156017, + "grad_norm": 1.6756127294636487, + "learning_rate": 5.684200902690848e-06, + "loss": 0.5909554362297058, + "step": 4515 + }, + { + "epoch": 1.320514695130867, + "grad_norm": 1.449068353866628, + "learning_rate": 5.67984014028166e-06, + "loss": 0.5059943199157715, + "step": 4516 + }, + { + "epoch": 1.3208071355461324, + "grad_norm": 1.3855018310443914, + "learning_rate": 5.675480387683572e-06, + "loss": 0.4387373924255371, + "step": 4517 + }, + { + "epoch": 1.321099575961398, + "grad_norm": 1.6368288915875209, + "learning_rate": 5.671121645915648e-06, + "loss": 0.6452310681343079, + "step": 4518 + }, + { + "epoch": 1.3213920163766633, + "grad_norm": 1.4569471180570228, + "learning_rate": 5.666763915996725e-06, + "loss": 0.5629088282585144, + "step": 4519 + }, + { + "epoch": 1.3216844567919286, + "grad_norm": 1.6108062624448902, + "learning_rate": 5.662407198945386e-06, + "loss": 0.6442849636077881, + "step": 4520 + }, + { + "epoch": 1.3219768972071941, + "grad_norm": 1.4707356833436183, + "learning_rate": 5.6580514957799894e-06, + "loss": 0.5330031514167786, + "step": 4521 + }, + { + "epoch": 1.3222693376224595, + "grad_norm": 1.4396348923376052, + "learning_rate": 5.6536968075186575e-06, + "loss": 0.471035361289978, + "step": 4522 + }, + { + "epoch": 1.3225617780377248, + "grad_norm": 1.5589169874424196, + "learning_rate": 5.649343135179271e-06, + "loss": 0.5675650835037231, + "step": 4523 + }, + { + "epoch": 1.3228542184529903, + "grad_norm": 1.6961906881686575, + "learning_rate": 5.644990479779473e-06, + "loss": 0.5458093881607056, + "step": 4524 + }, + { + "epoch": 1.3231466588682557, + "grad_norm": 1.5690712646364733, + "learning_rate": 5.640638842336672e-06, + "loss": 0.5625189542770386, + "step": 4525 + }, + { + "epoch": 1.323439099283521, + "grad_norm": 1.778677748743509, + "learning_rate": 5.636288223868038e-06, + "loss": 0.5868214964866638, + "step": 4526 + }, + { + "epoch": 1.3237315396987863, + "grad_norm": 1.6502123203157841, + "learning_rate": 5.631938625390498e-06, + "loss": 0.5340765714645386, + "step": 4527 + }, + { + "epoch": 1.3240239801140516, + "grad_norm": 1.4463169385647288, + "learning_rate": 5.627590047920747e-06, + "loss": 0.4487069845199585, + "step": 4528 + }, + { + "epoch": 1.3243164205293172, + "grad_norm": 1.5750183859940412, + "learning_rate": 5.623242492475237e-06, + "loss": 0.4246913194656372, + "step": 4529 + }, + { + "epoch": 1.3246088609445825, + "grad_norm": 1.6537085849345186, + "learning_rate": 5.618895960070188e-06, + "loss": 0.49904564023017883, + "step": 4530 + }, + { + "epoch": 1.3249013013598478, + "grad_norm": 1.6201874773916152, + "learning_rate": 5.614550451721566e-06, + "loss": 0.5506085157394409, + "step": 4531 + }, + { + "epoch": 1.3251937417751134, + "grad_norm": 1.6929750939693964, + "learning_rate": 5.610205968445111e-06, + "loss": 0.4861884117126465, + "step": 4532 + }, + { + "epoch": 1.3254861821903787, + "grad_norm": 1.5616728357477914, + "learning_rate": 5.605862511256322e-06, + "loss": 0.5639146566390991, + "step": 4533 + }, + { + "epoch": 1.325778622605644, + "grad_norm": 1.3747626231277423, + "learning_rate": 5.601520081170455e-06, + "loss": 0.43305879831314087, + "step": 4534 + }, + { + "epoch": 1.3260710630209096, + "grad_norm": 1.4728588464752952, + "learning_rate": 5.597178679202524e-06, + "loss": 0.4820408821105957, + "step": 4535 + }, + { + "epoch": 1.326363503436175, + "grad_norm": 2.092875019342334, + "learning_rate": 5.592838306367307e-06, + "loss": 0.5601707100868225, + "step": 4536 + }, + { + "epoch": 1.3266559438514403, + "grad_norm": 1.6269012393440097, + "learning_rate": 5.588498963679339e-06, + "loss": 0.5655055046081543, + "step": 4537 + }, + { + "epoch": 1.3269483842667056, + "grad_norm": 1.871556737283143, + "learning_rate": 5.584160652152917e-06, + "loss": 0.5425975322723389, + "step": 4538 + }, + { + "epoch": 1.327240824681971, + "grad_norm": 1.5388263554547548, + "learning_rate": 5.579823372802098e-06, + "loss": 0.607103168964386, + "step": 4539 + }, + { + "epoch": 1.3275332650972365, + "grad_norm": 1.6396827179367406, + "learning_rate": 5.575487126640686e-06, + "loss": 0.6011538505554199, + "step": 4540 + }, + { + "epoch": 1.3278257055125018, + "grad_norm": 1.6364470669862505, + "learning_rate": 5.571151914682258e-06, + "loss": 0.5333601236343384, + "step": 4541 + }, + { + "epoch": 1.3281181459277671, + "grad_norm": 1.7756177203838306, + "learning_rate": 5.566817737940142e-06, + "loss": 0.576410174369812, + "step": 4542 + }, + { + "epoch": 1.3284105863430327, + "grad_norm": 1.8060302167235907, + "learning_rate": 5.562484597427425e-06, + "loss": 0.506458044052124, + "step": 4543 + }, + { + "epoch": 1.328703026758298, + "grad_norm": 2.0174061298696975, + "learning_rate": 5.558152494156955e-06, + "loss": 0.5893718004226685, + "step": 4544 + }, + { + "epoch": 1.3289954671735633, + "grad_norm": 1.6979483029237916, + "learning_rate": 5.55382142914133e-06, + "loss": 0.508120059967041, + "step": 4545 + }, + { + "epoch": 1.3292879075888289, + "grad_norm": 1.5737735987577735, + "learning_rate": 5.5494914033929126e-06, + "loss": 0.6103616952896118, + "step": 4546 + }, + { + "epoch": 1.3295803480040942, + "grad_norm": 1.7304904972315491, + "learning_rate": 5.545162417923822e-06, + "loss": 0.5290235280990601, + "step": 4547 + }, + { + "epoch": 1.3298727884193595, + "grad_norm": 1.5350904839753017, + "learning_rate": 5.540834473745929e-06, + "loss": 0.5729631185531616, + "step": 4548 + }, + { + "epoch": 1.330165228834625, + "grad_norm": 1.5574358916011883, + "learning_rate": 5.536507571870866e-06, + "loss": 0.48720547556877136, + "step": 4549 + }, + { + "epoch": 1.3304576692498904, + "grad_norm": 1.5393587740053045, + "learning_rate": 5.532181713310023e-06, + "loss": 0.4987955689430237, + "step": 4550 + }, + { + "epoch": 1.3307501096651557, + "grad_norm": 1.3126988702980638, + "learning_rate": 5.527856899074536e-06, + "loss": 0.4002467393875122, + "step": 4551 + }, + { + "epoch": 1.331042550080421, + "grad_norm": 2.0947575410388866, + "learning_rate": 5.523533130175308e-06, + "loss": 0.7435724139213562, + "step": 4552 + }, + { + "epoch": 1.3313349904956864, + "grad_norm": 1.541726198150986, + "learning_rate": 5.519210407622993e-06, + "loss": 0.34711340069770813, + "step": 4553 + }, + { + "epoch": 1.331627430910952, + "grad_norm": 1.6396721749099359, + "learning_rate": 5.514888732428003e-06, + "loss": 0.4749720096588135, + "step": 4554 + }, + { + "epoch": 1.3319198713262173, + "grad_norm": 1.7586628740577253, + "learning_rate": 5.5105681056005e-06, + "loss": 0.5818741321563721, + "step": 4555 + }, + { + "epoch": 1.3322123117414826, + "grad_norm": 1.7095504305078453, + "learning_rate": 5.506248528150407e-06, + "loss": 0.5715004801750183, + "step": 4556 + }, + { + "epoch": 1.3325047521567481, + "grad_norm": 1.7722621684818736, + "learning_rate": 5.501930001087399e-06, + "loss": 0.5465661287307739, + "step": 4557 + }, + { + "epoch": 1.3327971925720135, + "grad_norm": 1.7620411170921917, + "learning_rate": 5.4976125254209035e-06, + "loss": 0.6324847936630249, + "step": 4558 + }, + { + "epoch": 1.3330896329872788, + "grad_norm": 1.4165701736936904, + "learning_rate": 5.493296102160105e-06, + "loss": 0.4616294503211975, + "step": 4559 + }, + { + "epoch": 1.3333820734025443, + "grad_norm": 1.6922106714814378, + "learning_rate": 5.488980732313942e-06, + "loss": 0.5187079310417175, + "step": 4560 + }, + { + "epoch": 1.3336745138178097, + "grad_norm": 1.8396067182286635, + "learning_rate": 5.484666416891109e-06, + "loss": 0.6120654344558716, + "step": 4561 + }, + { + "epoch": 1.333966954233075, + "grad_norm": 1.6878860661661148, + "learning_rate": 5.480353156900044e-06, + "loss": 0.6171379685401917, + "step": 4562 + }, + { + "epoch": 1.3342593946483405, + "grad_norm": 1.510636167770684, + "learning_rate": 5.4760409533489475e-06, + "loss": 0.4690072536468506, + "step": 4563 + }, + { + "epoch": 1.3345518350636059, + "grad_norm": 1.5961764389633983, + "learning_rate": 5.471729807245773e-06, + "loss": 0.511309802532196, + "step": 4564 + }, + { + "epoch": 1.3348442754788712, + "grad_norm": 1.6355911684199975, + "learning_rate": 5.467419719598223e-06, + "loss": 0.5657862424850464, + "step": 4565 + }, + { + "epoch": 1.3351367158941365, + "grad_norm": 1.7641189489668823, + "learning_rate": 5.4631106914137555e-06, + "loss": 0.4263400733470917, + "step": 4566 + }, + { + "epoch": 1.3354291563094018, + "grad_norm": 1.8179548841156754, + "learning_rate": 5.458802723699579e-06, + "loss": 0.6275177001953125, + "step": 4567 + }, + { + "epoch": 1.3357215967246674, + "grad_norm": 1.6668120373290058, + "learning_rate": 5.454495817462655e-06, + "loss": 0.3857421278953552, + "step": 4568 + }, + { + "epoch": 1.3360140371399327, + "grad_norm": 1.7165178528012586, + "learning_rate": 5.450189973709697e-06, + "loss": 0.5834560394287109, + "step": 4569 + }, + { + "epoch": 1.336306477555198, + "grad_norm": 1.6632572235317495, + "learning_rate": 5.445885193447169e-06, + "loss": 0.6165010929107666, + "step": 4570 + }, + { + "epoch": 1.3365989179704636, + "grad_norm": 1.7470412065212853, + "learning_rate": 5.441581477681288e-06, + "loss": 0.6034595966339111, + "step": 4571 + }, + { + "epoch": 1.336891358385729, + "grad_norm": 1.740024112758077, + "learning_rate": 5.43727882741802e-06, + "loss": 0.570164144039154, + "step": 4572 + }, + { + "epoch": 1.3371837988009942, + "grad_norm": 1.4917354928366209, + "learning_rate": 5.432977243663089e-06, + "loss": 0.5369169116020203, + "step": 4573 + }, + { + "epoch": 1.3374762392162598, + "grad_norm": 1.7875464183853407, + "learning_rate": 5.428676727421954e-06, + "loss": 0.5624364614486694, + "step": 4574 + }, + { + "epoch": 1.3377686796315251, + "grad_norm": 1.517348885410251, + "learning_rate": 5.424377279699842e-06, + "loss": 0.5002127885818481, + "step": 4575 + }, + { + "epoch": 1.3380611200467905, + "grad_norm": 1.7071888960959534, + "learning_rate": 5.42007890150172e-06, + "loss": 0.5998499393463135, + "step": 4576 + }, + { + "epoch": 1.3383535604620558, + "grad_norm": 1.7074905497433162, + "learning_rate": 5.415781593832307e-06, + "loss": 0.5988572835922241, + "step": 4577 + }, + { + "epoch": 1.338646000877321, + "grad_norm": 1.6551550553396004, + "learning_rate": 5.411485357696075e-06, + "loss": 0.5202064514160156, + "step": 4578 + }, + { + "epoch": 1.3389384412925867, + "grad_norm": 2.519364812628366, + "learning_rate": 5.407190194097241e-06, + "loss": 0.5246714949607849, + "step": 4579 + }, + { + "epoch": 1.339230881707852, + "grad_norm": 1.5907571805696734, + "learning_rate": 5.4028961040397765e-06, + "loss": 0.5998588800430298, + "step": 4580 + }, + { + "epoch": 1.3395233221231173, + "grad_norm": 1.7851321190756844, + "learning_rate": 5.3986030885273945e-06, + "loss": 0.5971418023109436, + "step": 4581 + }, + { + "epoch": 1.3398157625383829, + "grad_norm": 1.5857061971181772, + "learning_rate": 5.3943111485635644e-06, + "loss": 0.4638952910900116, + "step": 4582 + }, + { + "epoch": 1.3401082029536482, + "grad_norm": 1.5981773831835344, + "learning_rate": 5.390020285151502e-06, + "loss": 0.5007182955741882, + "step": 4583 + }, + { + "epoch": 1.3404006433689135, + "grad_norm": 1.610643010141743, + "learning_rate": 5.385730499294171e-06, + "loss": 0.5013964772224426, + "step": 4584 + }, + { + "epoch": 1.340693083784179, + "grad_norm": 1.6360724667305655, + "learning_rate": 5.381441791994276e-06, + "loss": 0.5699980854988098, + "step": 4585 + }, + { + "epoch": 1.3409855241994444, + "grad_norm": 1.6423818252193456, + "learning_rate": 5.377154164254283e-06, + "loss": 0.5326210260391235, + "step": 4586 + }, + { + "epoch": 1.3412779646147097, + "grad_norm": 1.5111806674915849, + "learning_rate": 5.372867617076395e-06, + "loss": 0.6065158843994141, + "step": 4587 + }, + { + "epoch": 1.3415704050299753, + "grad_norm": 1.356022290658006, + "learning_rate": 5.368582151462569e-06, + "loss": 0.48427143692970276, + "step": 4588 + }, + { + "epoch": 1.3418628454452406, + "grad_norm": 1.4868111001385538, + "learning_rate": 5.364297768414505e-06, + "loss": 0.5755994915962219, + "step": 4589 + }, + { + "epoch": 1.342155285860506, + "grad_norm": 1.4690268021295017, + "learning_rate": 5.360014468933652e-06, + "loss": 0.4959644377231598, + "step": 4590 + }, + { + "epoch": 1.3424477262757712, + "grad_norm": 1.5383458553689457, + "learning_rate": 5.355732254021205e-06, + "loss": 0.5374274253845215, + "step": 4591 + }, + { + "epoch": 1.3427401666910366, + "grad_norm": 1.6286753609495908, + "learning_rate": 5.351451124678106e-06, + "loss": 0.5875111818313599, + "step": 4592 + }, + { + "epoch": 1.3430326071063021, + "grad_norm": 1.7964496178319949, + "learning_rate": 5.347171081905045e-06, + "loss": 0.5230692028999329, + "step": 4593 + }, + { + "epoch": 1.3433250475215675, + "grad_norm": 1.424672908012482, + "learning_rate": 5.342892126702453e-06, + "loss": 0.4624518156051636, + "step": 4594 + }, + { + "epoch": 1.3436174879368328, + "grad_norm": 1.9140370650793175, + "learning_rate": 5.3386142600705134e-06, + "loss": 0.5141074061393738, + "step": 4595 + }, + { + "epoch": 1.3439099283520983, + "grad_norm": 1.6249918744835086, + "learning_rate": 5.334337483009147e-06, + "loss": 0.4655565023422241, + "step": 4596 + }, + { + "epoch": 1.3442023687673637, + "grad_norm": 1.6516547156710706, + "learning_rate": 5.330061796518025e-06, + "loss": 0.6135094165802002, + "step": 4597 + }, + { + "epoch": 1.344494809182629, + "grad_norm": 1.595543646054287, + "learning_rate": 5.325787201596563e-06, + "loss": 0.5865254402160645, + "step": 4598 + }, + { + "epoch": 1.3447872495978945, + "grad_norm": 1.8032344885262006, + "learning_rate": 5.321513699243924e-06, + "loss": 0.5290840268135071, + "step": 4599 + }, + { + "epoch": 1.3450796900131599, + "grad_norm": 1.5294052976370318, + "learning_rate": 5.317241290459012e-06, + "loss": 0.554675817489624, + "step": 4600 + }, + { + "epoch": 1.3453721304284252, + "grad_norm": 1.499219614332531, + "learning_rate": 5.312969976240479e-06, + "loss": 0.5033853650093079, + "step": 4601 + }, + { + "epoch": 1.3456645708436907, + "grad_norm": 1.8108264508032192, + "learning_rate": 5.308699757586713e-06, + "loss": 0.44666093587875366, + "step": 4602 + }, + { + "epoch": 1.345957011258956, + "grad_norm": 1.5332559280539126, + "learning_rate": 5.304430635495856e-06, + "loss": 0.5447900891304016, + "step": 4603 + }, + { + "epoch": 1.3462494516742214, + "grad_norm": 1.507503116151542, + "learning_rate": 5.30016261096579e-06, + "loss": 0.4425917863845825, + "step": 4604 + }, + { + "epoch": 1.3465418920894867, + "grad_norm": 1.508411296889156, + "learning_rate": 5.295895684994137e-06, + "loss": 0.4411497712135315, + "step": 4605 + }, + { + "epoch": 1.346834332504752, + "grad_norm": 1.537668383754579, + "learning_rate": 5.291629858578271e-06, + "loss": 0.5577414631843567, + "step": 4606 + }, + { + "epoch": 1.3471267729200176, + "grad_norm": 1.7128549715372505, + "learning_rate": 5.287365132715293e-06, + "loss": 0.4754186272621155, + "step": 4607 + }, + { + "epoch": 1.347419213335283, + "grad_norm": 1.6521724702121328, + "learning_rate": 5.283101508402063e-06, + "loss": 0.5582431554794312, + "step": 4608 + }, + { + "epoch": 1.3477116537505482, + "grad_norm": 1.7476811492664892, + "learning_rate": 5.2788389866351755e-06, + "loss": 0.5552654266357422, + "step": 4609 + }, + { + "epoch": 1.3480040941658138, + "grad_norm": 1.8662632335270106, + "learning_rate": 5.2745775684109705e-06, + "loss": 0.5776556730270386, + "step": 4610 + }, + { + "epoch": 1.3482965345810791, + "grad_norm": 1.7735552141557176, + "learning_rate": 5.270317254725528e-06, + "loss": 0.5859286785125732, + "step": 4611 + }, + { + "epoch": 1.3485889749963444, + "grad_norm": 1.5182169678473143, + "learning_rate": 5.2660580465746694e-06, + "loss": 0.5914887189865112, + "step": 4612 + }, + { + "epoch": 1.34888141541161, + "grad_norm": 1.6371325039607922, + "learning_rate": 5.261799944953956e-06, + "loss": 0.43669426441192627, + "step": 4613 + }, + { + "epoch": 1.3491738558268753, + "grad_norm": 1.718792113074269, + "learning_rate": 5.2575429508587e-06, + "loss": 0.473773717880249, + "step": 4614 + }, + { + "epoch": 1.3494662962421407, + "grad_norm": 1.7451807781202082, + "learning_rate": 5.253287065283949e-06, + "loss": 0.5011228919029236, + "step": 4615 + }, + { + "epoch": 1.349758736657406, + "grad_norm": 1.6598931266775088, + "learning_rate": 5.249032289224483e-06, + "loss": 0.5839254856109619, + "step": 4616 + }, + { + "epoch": 1.3500511770726713, + "grad_norm": 1.7262514320572941, + "learning_rate": 5.244778623674831e-06, + "loss": 0.5375077128410339, + "step": 4617 + }, + { + "epoch": 1.3503436174879369, + "grad_norm": 1.4572654878782452, + "learning_rate": 5.240526069629265e-06, + "loss": 0.49445679783821106, + "step": 4618 + }, + { + "epoch": 1.3506360579032022, + "grad_norm": 1.5263979209526246, + "learning_rate": 5.236274628081792e-06, + "loss": 0.5369694828987122, + "step": 4619 + }, + { + "epoch": 1.3509284983184675, + "grad_norm": 1.8018674546255473, + "learning_rate": 5.23202430002616e-06, + "loss": 0.6017554402351379, + "step": 4620 + }, + { + "epoch": 1.351220938733733, + "grad_norm": 1.9428924144840352, + "learning_rate": 5.227775086455859e-06, + "loss": 0.5380403995513916, + "step": 4621 + }, + { + "epoch": 1.3515133791489984, + "grad_norm": 1.6665289001084298, + "learning_rate": 5.223526988364116e-06, + "loss": 0.5650593042373657, + "step": 4622 + }, + { + "epoch": 1.3518058195642637, + "grad_norm": 1.5672489406384107, + "learning_rate": 5.219280006743897e-06, + "loss": 0.5572884678840637, + "step": 4623 + }, + { + "epoch": 1.3520982599795293, + "grad_norm": 1.839257774768153, + "learning_rate": 5.21503414258791e-06, + "loss": 0.5304458141326904, + "step": 4624 + }, + { + "epoch": 1.3523907003947946, + "grad_norm": 1.8264084905380675, + "learning_rate": 5.2107893968886005e-06, + "loss": 0.6702588796615601, + "step": 4625 + }, + { + "epoch": 1.35268314081006, + "grad_norm": 1.5301776431109881, + "learning_rate": 5.206545770638152e-06, + "loss": 0.4607279300689697, + "step": 4626 + }, + { + "epoch": 1.3529755812253255, + "grad_norm": 1.4702386368708713, + "learning_rate": 5.202303264828482e-06, + "loss": 0.5759040713310242, + "step": 4627 + }, + { + "epoch": 1.3532680216405908, + "grad_norm": 1.6340224609334149, + "learning_rate": 5.198061880451253e-06, + "loss": 0.446469783782959, + "step": 4628 + }, + { + "epoch": 1.3535604620558561, + "grad_norm": 1.6416831158378962, + "learning_rate": 5.193821618497864e-06, + "loss": 0.4869040846824646, + "step": 4629 + }, + { + "epoch": 1.3538529024711214, + "grad_norm": 1.59588454548975, + "learning_rate": 5.189582479959449e-06, + "loss": 0.5153477191925049, + "step": 4630 + }, + { + "epoch": 1.3541453428863868, + "grad_norm": 1.6964185114911852, + "learning_rate": 5.185344465826883e-06, + "loss": 0.4958652853965759, + "step": 4631 + }, + { + "epoch": 1.3544377833016523, + "grad_norm": 1.544404184800908, + "learning_rate": 5.1811075770907715e-06, + "loss": 0.5314347743988037, + "step": 4632 + }, + { + "epoch": 1.3547302237169176, + "grad_norm": 1.6488125019330604, + "learning_rate": 5.176871814741466e-06, + "loss": 0.5366088151931763, + "step": 4633 + }, + { + "epoch": 1.355022664132183, + "grad_norm": 1.7011582339400138, + "learning_rate": 5.172637179769049e-06, + "loss": 0.6239185929298401, + "step": 4634 + }, + { + "epoch": 1.3553151045474485, + "grad_norm": 1.8789833552926098, + "learning_rate": 5.168403673163341e-06, + "loss": 0.5516507625579834, + "step": 4635 + }, + { + "epoch": 1.3556075449627139, + "grad_norm": 1.6420696506744512, + "learning_rate": 5.164171295913898e-06, + "loss": 0.5859683156013489, + "step": 4636 + }, + { + "epoch": 1.3558999853779792, + "grad_norm": 1.6138084463921514, + "learning_rate": 5.159940049010015e-06, + "loss": 0.5913225412368774, + "step": 4637 + }, + { + "epoch": 1.3561924257932447, + "grad_norm": 1.690951404825549, + "learning_rate": 5.155709933440714e-06, + "loss": 0.650983989238739, + "step": 4638 + }, + { + "epoch": 1.35648486620851, + "grad_norm": 1.7360324268029201, + "learning_rate": 5.151480950194762e-06, + "loss": 0.5631625652313232, + "step": 4639 + }, + { + "epoch": 1.3567773066237754, + "grad_norm": 1.9305214623229574, + "learning_rate": 5.147253100260659e-06, + "loss": 0.48153650760650635, + "step": 4640 + }, + { + "epoch": 1.357069747039041, + "grad_norm": 1.382159174171422, + "learning_rate": 5.143026384626637e-06, + "loss": 0.43598422408103943, + "step": 4641 + }, + { + "epoch": 1.3573621874543063, + "grad_norm": 1.5586949144187017, + "learning_rate": 5.138800804280668e-06, + "loss": 0.5323987007141113, + "step": 4642 + }, + { + "epoch": 1.3576546278695716, + "grad_norm": 1.739858834969472, + "learning_rate": 5.134576360210454e-06, + "loss": 0.5386587977409363, + "step": 4643 + }, + { + "epoch": 1.357947068284837, + "grad_norm": 1.7229356194902612, + "learning_rate": 5.130353053403434e-06, + "loss": 0.4913867115974426, + "step": 4644 + }, + { + "epoch": 1.3582395087001022, + "grad_norm": 2.681042611993396, + "learning_rate": 5.12613088484678e-06, + "loss": 0.6516048908233643, + "step": 4645 + }, + { + "epoch": 1.3585319491153678, + "grad_norm": 1.7863407962771196, + "learning_rate": 5.121909855527398e-06, + "loss": 0.5290599465370178, + "step": 4646 + }, + { + "epoch": 1.3588243895306331, + "grad_norm": 1.992281323100596, + "learning_rate": 5.117689966431927e-06, + "loss": 0.7909928560256958, + "step": 4647 + }, + { + "epoch": 1.3591168299458984, + "grad_norm": 1.7798386890797042, + "learning_rate": 5.113471218546746e-06, + "loss": 0.4751276969909668, + "step": 4648 + }, + { + "epoch": 1.359409270361164, + "grad_norm": 1.3934486662021524, + "learning_rate": 5.109253612857954e-06, + "loss": 0.4542301893234253, + "step": 4649 + }, + { + "epoch": 1.3597017107764293, + "grad_norm": 1.6724566490890436, + "learning_rate": 5.105037150351393e-06, + "loss": 0.5355349779129028, + "step": 4650 + }, + { + "epoch": 1.3599941511916946, + "grad_norm": 1.7131391763754547, + "learning_rate": 5.100821832012637e-06, + "loss": 0.4994719326496124, + "step": 4651 + }, + { + "epoch": 1.3602865916069602, + "grad_norm": 1.7061763475820229, + "learning_rate": 5.096607658826989e-06, + "loss": 0.6171674728393555, + "step": 4652 + }, + { + "epoch": 1.3605790320222255, + "grad_norm": 1.6851325839422124, + "learning_rate": 5.092394631779487e-06, + "loss": 0.5386878252029419, + "step": 4653 + }, + { + "epoch": 1.3608714724374908, + "grad_norm": 1.4863597978488459, + "learning_rate": 5.088182751854903e-06, + "loss": 0.4495810270309448, + "step": 4654 + }, + { + "epoch": 1.3611639128527562, + "grad_norm": 1.560829764762291, + "learning_rate": 5.083972020037735e-06, + "loss": 0.5540642142295837, + "step": 4655 + }, + { + "epoch": 1.3614563532680215, + "grad_norm": 1.7743988570673719, + "learning_rate": 5.079762437312219e-06, + "loss": 0.6020554900169373, + "step": 4656 + }, + { + "epoch": 1.361748793683287, + "grad_norm": 1.5410143370370128, + "learning_rate": 5.075554004662316e-06, + "loss": 0.47981250286102295, + "step": 4657 + }, + { + "epoch": 1.3620412340985524, + "grad_norm": 1.6809006565320033, + "learning_rate": 5.071346723071724e-06, + "loss": 0.6206443905830383, + "step": 4658 + }, + { + "epoch": 1.3623336745138177, + "grad_norm": 1.2946163710464256, + "learning_rate": 5.067140593523869e-06, + "loss": 0.46899446845054626, + "step": 4659 + }, + { + "epoch": 1.3626261149290833, + "grad_norm": 1.3692435027739418, + "learning_rate": 5.062935617001912e-06, + "loss": 0.5695985555648804, + "step": 4660 + }, + { + "epoch": 1.3629185553443486, + "grad_norm": 1.5567765237338644, + "learning_rate": 5.058731794488732e-06, + "loss": 0.5524671077728271, + "step": 4661 + }, + { + "epoch": 1.363210995759614, + "grad_norm": 1.5953543121744755, + "learning_rate": 5.054529126966953e-06, + "loss": 0.4655245244503021, + "step": 4662 + }, + { + "epoch": 1.3635034361748795, + "grad_norm": 1.6197588686677031, + "learning_rate": 5.050327615418921e-06, + "loss": 0.5617693662643433, + "step": 4663 + }, + { + "epoch": 1.3637958765901448, + "grad_norm": 1.515126796303483, + "learning_rate": 5.046127260826714e-06, + "loss": 0.52044677734375, + "step": 4664 + }, + { + "epoch": 1.3640883170054101, + "grad_norm": 1.6797173356320934, + "learning_rate": 5.041928064172139e-06, + "loss": 0.4567520022392273, + "step": 4665 + }, + { + "epoch": 1.3643807574206757, + "grad_norm": 1.5794296901996336, + "learning_rate": 5.037730026436736e-06, + "loss": 0.5942729711532593, + "step": 4666 + }, + { + "epoch": 1.364673197835941, + "grad_norm": 1.6501244665537385, + "learning_rate": 5.033533148601766e-06, + "loss": 0.3824811279773712, + "step": 4667 + }, + { + "epoch": 1.3649656382512063, + "grad_norm": 1.4770402468740385, + "learning_rate": 5.029337431648227e-06, + "loss": 0.4710771441459656, + "step": 4668 + }, + { + "epoch": 1.3652580786664716, + "grad_norm": 1.5059979846835174, + "learning_rate": 5.02514287655684e-06, + "loss": 0.6617978811264038, + "step": 4669 + }, + { + "epoch": 1.365550519081737, + "grad_norm": 1.5829629132621983, + "learning_rate": 5.020949484308058e-06, + "loss": 0.5237355828285217, + "step": 4670 + }, + { + "epoch": 1.3658429594970025, + "grad_norm": 1.4158253094169178, + "learning_rate": 5.016757255882065e-06, + "loss": 0.4544803500175476, + "step": 4671 + }, + { + "epoch": 1.3661353999122678, + "grad_norm": 1.8761810485620272, + "learning_rate": 5.012566192258763e-06, + "loss": 0.5854490399360657, + "step": 4672 + }, + { + "epoch": 1.3664278403275332, + "grad_norm": 1.902502544434852, + "learning_rate": 5.008376294417787e-06, + "loss": 0.6275635361671448, + "step": 4673 + }, + { + "epoch": 1.3667202807427987, + "grad_norm": 1.6133596882151136, + "learning_rate": 5.004187563338504e-06, + "loss": 0.5160082578659058, + "step": 4674 + }, + { + "epoch": 1.367012721158064, + "grad_norm": 1.439845673979846, + "learning_rate": 5.000000000000003e-06, + "loss": 0.5203640460968018, + "step": 4675 + }, + { + "epoch": 1.3673051615733294, + "grad_norm": 2.025079516078861, + "learning_rate": 4.9958136053811e-06, + "loss": 0.6836066246032715, + "step": 4676 + }, + { + "epoch": 1.367597601988595, + "grad_norm": 1.5727820508513324, + "learning_rate": 4.991628380460343e-06, + "loss": 0.5566641092300415, + "step": 4677 + }, + { + "epoch": 1.3678900424038603, + "grad_norm": 1.643119627925769, + "learning_rate": 4.9874443262159984e-06, + "loss": 0.5618000030517578, + "step": 4678 + }, + { + "epoch": 1.3681824828191256, + "grad_norm": 1.4054605482949574, + "learning_rate": 4.983261443626068e-06, + "loss": 0.4605063796043396, + "step": 4679 + }, + { + "epoch": 1.3684749232343911, + "grad_norm": 1.7557732951775291, + "learning_rate": 4.97907973366827e-06, + "loss": 0.48282021284103394, + "step": 4680 + }, + { + "epoch": 1.3687673636496565, + "grad_norm": 1.467194830130128, + "learning_rate": 4.974899197320059e-06, + "loss": 0.42356133460998535, + "step": 4681 + }, + { + "epoch": 1.3690598040649218, + "grad_norm": 1.3266470239270218, + "learning_rate": 4.97071983555861e-06, + "loss": 0.459377646446228, + "step": 4682 + }, + { + "epoch": 1.369352244480187, + "grad_norm": 1.9278413810039654, + "learning_rate": 4.966541649360819e-06, + "loss": 0.5539775490760803, + "step": 4683 + }, + { + "epoch": 1.3696446848954524, + "grad_norm": 1.7014699336581571, + "learning_rate": 4.962364639703311e-06, + "loss": 0.5593239068984985, + "step": 4684 + }, + { + "epoch": 1.369937125310718, + "grad_norm": 1.8333805174527635, + "learning_rate": 4.958188807562441e-06, + "loss": 0.5425251722335815, + "step": 4685 + }, + { + "epoch": 1.3702295657259833, + "grad_norm": 1.564182289934299, + "learning_rate": 4.954014153914282e-06, + "loss": 0.5183289051055908, + "step": 4686 + }, + { + "epoch": 1.3705220061412486, + "grad_norm": 1.6834251116472225, + "learning_rate": 4.9498406797346345e-06, + "loss": 0.5278980731964111, + "step": 4687 + }, + { + "epoch": 1.3708144465565142, + "grad_norm": 1.6861784833580373, + "learning_rate": 4.9456683859990185e-06, + "loss": 0.4857858419418335, + "step": 4688 + }, + { + "epoch": 1.3711068869717795, + "grad_norm": 1.4955733852507764, + "learning_rate": 4.94149727368269e-06, + "loss": 0.4889591336250305, + "step": 4689 + }, + { + "epoch": 1.3713993273870448, + "grad_norm": 2.1119376280699105, + "learning_rate": 4.937327343760617e-06, + "loss": 0.5475220680236816, + "step": 4690 + }, + { + "epoch": 1.3716917678023104, + "grad_norm": 1.8065068083746048, + "learning_rate": 4.933158597207501e-06, + "loss": 0.5794380903244019, + "step": 4691 + }, + { + "epoch": 1.3719842082175757, + "grad_norm": 1.5916906211687458, + "learning_rate": 4.928991034997752e-06, + "loss": 0.42212024331092834, + "step": 4692 + }, + { + "epoch": 1.372276648632841, + "grad_norm": 1.8447627986814241, + "learning_rate": 4.924824658105516e-06, + "loss": 0.6091631054878235, + "step": 4693 + }, + { + "epoch": 1.3725690890481064, + "grad_norm": 1.8839419484958528, + "learning_rate": 4.9206594675046595e-06, + "loss": 0.544279158115387, + "step": 4694 + }, + { + "epoch": 1.3728615294633717, + "grad_norm": 1.4361678658463186, + "learning_rate": 4.916495464168768e-06, + "loss": 0.46237099170684814, + "step": 4695 + }, + { + "epoch": 1.3731539698786372, + "grad_norm": 1.5990237040506552, + "learning_rate": 4.912332649071154e-06, + "loss": 0.5615352392196655, + "step": 4696 + }, + { + "epoch": 1.3734464102939026, + "grad_norm": 1.7554295249178744, + "learning_rate": 4.90817102318485e-06, + "loss": 0.5552200078964233, + "step": 4697 + }, + { + "epoch": 1.373738850709168, + "grad_norm": 1.798510214490848, + "learning_rate": 4.904010587482612e-06, + "loss": 0.5466557741165161, + "step": 4698 + }, + { + "epoch": 1.3740312911244335, + "grad_norm": 1.8536275815794498, + "learning_rate": 4.8998513429369135e-06, + "loss": 0.6131544709205627, + "step": 4699 + }, + { + "epoch": 1.3743237315396988, + "grad_norm": 1.7671899353023186, + "learning_rate": 4.895693290519954e-06, + "loss": 0.5264796018600464, + "step": 4700 + }, + { + "epoch": 1.374616171954964, + "grad_norm": 1.6582809024037055, + "learning_rate": 4.891536431203653e-06, + "loss": 0.5179097652435303, + "step": 4701 + }, + { + "epoch": 1.3749086123702297, + "grad_norm": 1.7203915102871608, + "learning_rate": 4.887380765959655e-06, + "loss": 0.46007782220840454, + "step": 4702 + }, + { + "epoch": 1.375201052785495, + "grad_norm": 1.3949646851760964, + "learning_rate": 4.8832262957593145e-06, + "loss": 0.48182815313339233, + "step": 4703 + }, + { + "epoch": 1.3754934932007603, + "grad_norm": 1.6488295590740498, + "learning_rate": 4.879073021573717e-06, + "loss": 0.5334529280662537, + "step": 4704 + }, + { + "epoch": 1.3757859336160259, + "grad_norm": 1.824410831192183, + "learning_rate": 4.874920944373665e-06, + "loss": 0.5984899997711182, + "step": 4705 + }, + { + "epoch": 1.3760783740312912, + "grad_norm": 1.633539262172952, + "learning_rate": 4.870770065129681e-06, + "loss": 0.46676474809646606, + "step": 4706 + }, + { + "epoch": 1.3763708144465565, + "grad_norm": 1.6766360321424407, + "learning_rate": 4.866620384812008e-06, + "loss": 0.4608241617679596, + "step": 4707 + }, + { + "epoch": 1.3766632548618218, + "grad_norm": 1.6783484732888503, + "learning_rate": 4.862471904390609e-06, + "loss": 0.5877207517623901, + "step": 4708 + }, + { + "epoch": 1.3769556952770872, + "grad_norm": 1.9194747868225221, + "learning_rate": 4.858324624835164e-06, + "loss": 0.5243252515792847, + "step": 4709 + }, + { + "epoch": 1.3772481356923527, + "grad_norm": 1.7326979192308607, + "learning_rate": 4.854178547115078e-06, + "loss": 0.528606653213501, + "step": 4710 + }, + { + "epoch": 1.377540576107618, + "grad_norm": 1.761919042167513, + "learning_rate": 4.850033672199469e-06, + "loss": 0.46468549966812134, + "step": 4711 + }, + { + "epoch": 1.3778330165228834, + "grad_norm": 1.5919653348557072, + "learning_rate": 4.8458900010571765e-06, + "loss": 0.5368300676345825, + "step": 4712 + }, + { + "epoch": 1.378125456938149, + "grad_norm": 1.6462148743894651, + "learning_rate": 4.8417475346567635e-06, + "loss": 0.5156906843185425, + "step": 4713 + }, + { + "epoch": 1.3784178973534142, + "grad_norm": 1.718628393460986, + "learning_rate": 4.837606273966496e-06, + "loss": 0.5899196863174438, + "step": 4714 + }, + { + "epoch": 1.3787103377686796, + "grad_norm": 1.6725614455419595, + "learning_rate": 4.833466219954376e-06, + "loss": 0.5820844769477844, + "step": 4715 + }, + { + "epoch": 1.3790027781839451, + "grad_norm": 1.5883271974734077, + "learning_rate": 4.829327373588113e-06, + "loss": 0.4926246404647827, + "step": 4716 + }, + { + "epoch": 1.3792952185992104, + "grad_norm": 1.5404696535835014, + "learning_rate": 4.825189735835138e-06, + "loss": 0.5417006611824036, + "step": 4717 + }, + { + "epoch": 1.3795876590144758, + "grad_norm": 1.5296186550545692, + "learning_rate": 4.821053307662599e-06, + "loss": 0.4130229949951172, + "step": 4718 + }, + { + "epoch": 1.3798800994297413, + "grad_norm": 1.279729123751172, + "learning_rate": 4.8169180900373615e-06, + "loss": 0.4553627371788025, + "step": 4719 + }, + { + "epoch": 1.3801725398450067, + "grad_norm": 1.3535233614920503, + "learning_rate": 4.812784083926005e-06, + "loss": 0.523567259311676, + "step": 4720 + }, + { + "epoch": 1.380464980260272, + "grad_norm": 1.585136917164004, + "learning_rate": 4.808651290294832e-06, + "loss": 0.4643239378929138, + "step": 4721 + }, + { + "epoch": 1.3807574206755373, + "grad_norm": 1.4443352165881056, + "learning_rate": 4.804519710109856e-06, + "loss": 0.4631537199020386, + "step": 4722 + }, + { + "epoch": 1.3810498610908026, + "grad_norm": 1.9168786498716517, + "learning_rate": 4.8003893443368075e-06, + "loss": 0.5304736495018005, + "step": 4723 + }, + { + "epoch": 1.3813423015060682, + "grad_norm": 1.7679231174871453, + "learning_rate": 4.79626019394114e-06, + "loss": 0.4357796907424927, + "step": 4724 + }, + { + "epoch": 1.3816347419213335, + "grad_norm": 1.9313439900637919, + "learning_rate": 4.7921322598880095e-06, + "loss": 0.6693407297134399, + "step": 4725 + }, + { + "epoch": 1.3819271823365988, + "grad_norm": 1.614277655310262, + "learning_rate": 4.788005543142299e-06, + "loss": 0.5333320498466492, + "step": 4726 + }, + { + "epoch": 1.3822196227518644, + "grad_norm": 1.900002017358812, + "learning_rate": 4.783880044668603e-06, + "loss": 0.5782167911529541, + "step": 4727 + }, + { + "epoch": 1.3825120631671297, + "grad_norm": 1.8216810622231216, + "learning_rate": 4.779755765431231e-06, + "loss": 0.581318199634552, + "step": 4728 + }, + { + "epoch": 1.382804503582395, + "grad_norm": 1.6899321824779212, + "learning_rate": 4.775632706394211e-06, + "loss": 0.5812945365905762, + "step": 4729 + }, + { + "epoch": 1.3830969439976606, + "grad_norm": 1.7981132988330288, + "learning_rate": 4.771510868521279e-06, + "loss": 0.460615873336792, + "step": 4730 + }, + { + "epoch": 1.383389384412926, + "grad_norm": 1.8316112888726737, + "learning_rate": 4.767390252775894e-06, + "loss": 0.5934186577796936, + "step": 4731 + }, + { + "epoch": 1.3836818248281912, + "grad_norm": 1.6355522234245776, + "learning_rate": 4.763270860121222e-06, + "loss": 0.4928584098815918, + "step": 4732 + }, + { + "epoch": 1.3839742652434566, + "grad_norm": 1.6231538800234695, + "learning_rate": 4.759152691520146e-06, + "loss": 0.505489706993103, + "step": 4733 + }, + { + "epoch": 1.3842667056587221, + "grad_norm": 1.5771553081820557, + "learning_rate": 4.755035747935264e-06, + "loss": 0.5679354667663574, + "step": 4734 + }, + { + "epoch": 1.3845591460739874, + "grad_norm": 1.7096467723863036, + "learning_rate": 4.750920030328889e-06, + "loss": 0.5744746923446655, + "step": 4735 + }, + { + "epoch": 1.3848515864892528, + "grad_norm": 1.6483531613381477, + "learning_rate": 4.7468055396630395e-06, + "loss": 0.4953685402870178, + "step": 4736 + }, + { + "epoch": 1.385144026904518, + "grad_norm": 1.8803927120396235, + "learning_rate": 4.742692276899454e-06, + "loss": 0.6083461046218872, + "step": 4737 + }, + { + "epoch": 1.3854364673197836, + "grad_norm": 1.5633925902592396, + "learning_rate": 4.738580242999584e-06, + "loss": 0.4980735778808594, + "step": 4738 + }, + { + "epoch": 1.385728907735049, + "grad_norm": 1.4499409145464446, + "learning_rate": 4.734469438924594e-06, + "loss": 0.46363019943237305, + "step": 4739 + }, + { + "epoch": 1.3860213481503143, + "grad_norm": 1.818813219831182, + "learning_rate": 4.730359865635355e-06, + "loss": 0.5946298837661743, + "step": 4740 + }, + { + "epoch": 1.3863137885655799, + "grad_norm": 1.6327330611392554, + "learning_rate": 4.726251524092459e-06, + "loss": 0.5630123615264893, + "step": 4741 + }, + { + "epoch": 1.3866062289808452, + "grad_norm": 1.5382056004014089, + "learning_rate": 4.7221444152562045e-06, + "loss": 0.5353481769561768, + "step": 4742 + }, + { + "epoch": 1.3868986693961105, + "grad_norm": 1.7585652476725264, + "learning_rate": 4.718038540086602e-06, + "loss": 0.5170711874961853, + "step": 4743 + }, + { + "epoch": 1.387191109811376, + "grad_norm": 1.8043747351160766, + "learning_rate": 4.713933899543377e-06, + "loss": 0.600492000579834, + "step": 4744 + }, + { + "epoch": 1.3874835502266414, + "grad_norm": 1.5446435468278237, + "learning_rate": 4.709830494585962e-06, + "loss": 0.5291938781738281, + "step": 4745 + }, + { + "epoch": 1.3877759906419067, + "grad_norm": 1.658022225410227, + "learning_rate": 4.7057283261735055e-06, + "loss": 0.5664317011833191, + "step": 4746 + }, + { + "epoch": 1.388068431057172, + "grad_norm": 1.8477945736694077, + "learning_rate": 4.701627395264866e-06, + "loss": 0.606655478477478, + "step": 4747 + }, + { + "epoch": 1.3883608714724374, + "grad_norm": 1.5930247770190467, + "learning_rate": 4.697527702818604e-06, + "loss": 0.6160893440246582, + "step": 4748 + }, + { + "epoch": 1.388653311887703, + "grad_norm": 1.510283707012234, + "learning_rate": 4.693429249793002e-06, + "loss": 0.45944249629974365, + "step": 4749 + }, + { + "epoch": 1.3889457523029682, + "grad_norm": 1.7369442621234958, + "learning_rate": 4.689332037146049e-06, + "loss": 0.5737302303314209, + "step": 4750 + }, + { + "epoch": 1.3892381927182336, + "grad_norm": 1.7885159565933124, + "learning_rate": 4.685236065835443e-06, + "loss": 0.4075150787830353, + "step": 4751 + }, + { + "epoch": 1.3895306331334991, + "grad_norm": 1.7699683741602097, + "learning_rate": 4.681141336818592e-06, + "loss": 0.5832744836807251, + "step": 4752 + }, + { + "epoch": 1.3898230735487644, + "grad_norm": 1.6617741591328279, + "learning_rate": 4.6770478510526155e-06, + "loss": 0.5444560647010803, + "step": 4753 + }, + { + "epoch": 1.3901155139640298, + "grad_norm": 1.5343212819990357, + "learning_rate": 4.672955609494339e-06, + "loss": 0.6087433695793152, + "step": 4754 + }, + { + "epoch": 1.3904079543792953, + "grad_norm": 1.3783003966189016, + "learning_rate": 4.6688646131002995e-06, + "loss": 0.3781468868255615, + "step": 4755 + }, + { + "epoch": 1.3907003947945606, + "grad_norm": 2.0008130334792953, + "learning_rate": 4.664774862826742e-06, + "loss": 0.43719804286956787, + "step": 4756 + }, + { + "epoch": 1.390992835209826, + "grad_norm": 1.7926138812382992, + "learning_rate": 4.660686359629623e-06, + "loss": 0.550011932849884, + "step": 4757 + }, + { + "epoch": 1.3912852756250915, + "grad_norm": 1.670816081047031, + "learning_rate": 4.656599104464607e-06, + "loss": 0.6060909032821655, + "step": 4758 + }, + { + "epoch": 1.3915777160403568, + "grad_norm": 1.727898538684726, + "learning_rate": 4.652513098287058e-06, + "loss": 0.5169791579246521, + "step": 4759 + }, + { + "epoch": 1.3918701564556222, + "grad_norm": 1.667801698839589, + "learning_rate": 4.6484283420520594e-06, + "loss": 0.43063026666641235, + "step": 4760 + }, + { + "epoch": 1.3921625968708875, + "grad_norm": 1.6770983664766483, + "learning_rate": 4.644344836714397e-06, + "loss": 0.5426993370056152, + "step": 4761 + }, + { + "epoch": 1.3924550372861528, + "grad_norm": 1.7220159777866155, + "learning_rate": 4.6402625832285665e-06, + "loss": 0.5260995030403137, + "step": 4762 + }, + { + "epoch": 1.3927474777014184, + "grad_norm": 1.791130103339175, + "learning_rate": 4.63618158254877e-06, + "loss": 0.5206680297851562, + "step": 4763 + }, + { + "epoch": 1.3930399181166837, + "grad_norm": 1.8800757395074672, + "learning_rate": 4.632101835628912e-06, + "loss": 0.5250430703163147, + "step": 4764 + }, + { + "epoch": 1.393332358531949, + "grad_norm": 1.5663601185417966, + "learning_rate": 4.628023343422616e-06, + "loss": 0.5409445762634277, + "step": 4765 + }, + { + "epoch": 1.3936247989472146, + "grad_norm": 1.6199099812994435, + "learning_rate": 4.6239461068832056e-06, + "loss": 0.4676284193992615, + "step": 4766 + }, + { + "epoch": 1.39391723936248, + "grad_norm": 1.6644750420264167, + "learning_rate": 4.6198701269637014e-06, + "loss": 0.6019079089164734, + "step": 4767 + }, + { + "epoch": 1.3942096797777452, + "grad_norm": 1.6721679687151758, + "learning_rate": 4.615795404616844e-06, + "loss": 0.5434615612030029, + "step": 4768 + }, + { + "epoch": 1.3945021201930108, + "grad_norm": 1.8615818009836036, + "learning_rate": 4.611721940795074e-06, + "loss": 0.5817157030105591, + "step": 4769 + }, + { + "epoch": 1.3947945606082761, + "grad_norm": 1.7318982025014367, + "learning_rate": 4.607649736450539e-06, + "loss": 0.5601100921630859, + "step": 4770 + }, + { + "epoch": 1.3950870010235414, + "grad_norm": 1.8105361405271991, + "learning_rate": 4.6035787925350915e-06, + "loss": 0.5955039262771606, + "step": 4771 + }, + { + "epoch": 1.3953794414388068, + "grad_norm": 1.735716832820506, + "learning_rate": 4.5995091100002905e-06, + "loss": 0.47491732239723206, + "step": 4772 + }, + { + "epoch": 1.3956718818540723, + "grad_norm": 1.7916635810918338, + "learning_rate": 4.595440689797402e-06, + "loss": 0.5451281070709229, + "step": 4773 + }, + { + "epoch": 1.3959643222693376, + "grad_norm": 1.5652511418689858, + "learning_rate": 4.591373532877389e-06, + "loss": 0.3973035514354706, + "step": 4774 + }, + { + "epoch": 1.396256762684603, + "grad_norm": 1.6712606601404056, + "learning_rate": 4.587307640190929e-06, + "loss": 0.604694128036499, + "step": 4775 + }, + { + "epoch": 1.3965492030998683, + "grad_norm": 1.3684363761943823, + "learning_rate": 4.583243012688397e-06, + "loss": 0.4120032489299774, + "step": 4776 + }, + { + "epoch": 1.3968416435151338, + "grad_norm": 1.5200379644064634, + "learning_rate": 4.579179651319878e-06, + "loss": 0.4864089787006378, + "step": 4777 + }, + { + "epoch": 1.3971340839303992, + "grad_norm": 1.7660999886821023, + "learning_rate": 4.57511755703516e-06, + "loss": 0.5774982571601868, + "step": 4778 + }, + { + "epoch": 1.3974265243456645, + "grad_norm": 1.7243096372475708, + "learning_rate": 4.571056730783725e-06, + "loss": 0.48220688104629517, + "step": 4779 + }, + { + "epoch": 1.39771896476093, + "grad_norm": 1.4235878512993427, + "learning_rate": 4.566997173514771e-06, + "loss": 0.4636304974555969, + "step": 4780 + }, + { + "epoch": 1.3980114051761954, + "grad_norm": 1.3469561341500977, + "learning_rate": 4.562938886177194e-06, + "loss": 0.500522792339325, + "step": 4781 + }, + { + "epoch": 1.3983038455914607, + "grad_norm": 1.8391525606302594, + "learning_rate": 4.558881869719595e-06, + "loss": 0.5322657823562622, + "step": 4782 + }, + { + "epoch": 1.3985962860067263, + "grad_norm": 1.8673725266705359, + "learning_rate": 4.554826125090276e-06, + "loss": 0.5013759136199951, + "step": 4783 + }, + { + "epoch": 1.3988887264219916, + "grad_norm": 1.5888002392216285, + "learning_rate": 4.550771653237242e-06, + "loss": 0.4261836111545563, + "step": 4784 + }, + { + "epoch": 1.399181166837257, + "grad_norm": 1.6811392186782483, + "learning_rate": 4.546718455108205e-06, + "loss": 0.6181522607803345, + "step": 4785 + }, + { + "epoch": 1.3994736072525222, + "grad_norm": 1.7420663714537028, + "learning_rate": 4.54266653165057e-06, + "loss": 0.6267478466033936, + "step": 4786 + }, + { + "epoch": 1.3997660476677876, + "grad_norm": 1.841391700351839, + "learning_rate": 4.5386158838114535e-06, + "loss": 0.5382452607154846, + "step": 4787 + }, + { + "epoch": 1.400058488083053, + "grad_norm": 1.5361116059310378, + "learning_rate": 4.534566512537668e-06, + "loss": 0.5973625183105469, + "step": 4788 + }, + { + "epoch": 1.4003509284983184, + "grad_norm": 1.7115299901221885, + "learning_rate": 4.530518418775734e-06, + "loss": 0.57401442527771, + "step": 4789 + }, + { + "epoch": 1.4006433689135838, + "grad_norm": 1.7539136213830773, + "learning_rate": 4.52647160347186e-06, + "loss": 0.5712965726852417, + "step": 4790 + }, + { + "epoch": 1.4009358093288493, + "grad_norm": 1.7324506482257287, + "learning_rate": 4.52242606757197e-06, + "loss": 0.5678268671035767, + "step": 4791 + }, + { + "epoch": 1.4012282497441146, + "grad_norm": 1.8696367540913243, + "learning_rate": 4.518381812021682e-06, + "loss": 0.4798399806022644, + "step": 4792 + }, + { + "epoch": 1.40152069015938, + "grad_norm": 1.570253187142898, + "learning_rate": 4.514338837766317e-06, + "loss": 0.48918360471725464, + "step": 4793 + }, + { + "epoch": 1.4018131305746455, + "grad_norm": 1.4711408699123494, + "learning_rate": 4.510297145750894e-06, + "loss": 0.47836846113204956, + "step": 4794 + }, + { + "epoch": 1.4021055709899108, + "grad_norm": 1.6409652265079098, + "learning_rate": 4.506256736920136e-06, + "loss": 0.4956067204475403, + "step": 4795 + }, + { + "epoch": 1.4023980114051762, + "grad_norm": 1.6571409914414528, + "learning_rate": 4.502217612218463e-06, + "loss": 0.39146924018859863, + "step": 4796 + }, + { + "epoch": 1.4026904518204417, + "grad_norm": 1.6190957574837974, + "learning_rate": 4.498179772589998e-06, + "loss": 0.46657800674438477, + "step": 4797 + }, + { + "epoch": 1.402982892235707, + "grad_norm": 1.5760103505209448, + "learning_rate": 4.4941432189785574e-06, + "loss": 0.4949738383293152, + "step": 4798 + }, + { + "epoch": 1.4032753326509724, + "grad_norm": 1.882895838026707, + "learning_rate": 4.490107952327663e-06, + "loss": 0.5256912708282471, + "step": 4799 + }, + { + "epoch": 1.4035677730662377, + "grad_norm": 1.7128737744359326, + "learning_rate": 4.486073973580539e-06, + "loss": 0.38139551877975464, + "step": 4800 + }, + { + "epoch": 1.403860213481503, + "grad_norm": 1.8140605273544137, + "learning_rate": 4.482041283680095e-06, + "loss": 0.5014597177505493, + "step": 4801 + }, + { + "epoch": 1.4041526538967686, + "grad_norm": 1.8595922924331247, + "learning_rate": 4.478009883568951e-06, + "loss": 0.5497276186943054, + "step": 4802 + }, + { + "epoch": 1.404445094312034, + "grad_norm": 2.0532585085438524, + "learning_rate": 4.473979774189422e-06, + "loss": 0.6098340749740601, + "step": 4803 + }, + { + "epoch": 1.4047375347272992, + "grad_norm": 1.7416135071315817, + "learning_rate": 4.469950956483522e-06, + "loss": 0.40206801891326904, + "step": 4804 + }, + { + "epoch": 1.4050299751425648, + "grad_norm": 1.5567497019384768, + "learning_rate": 4.465923431392962e-06, + "loss": 0.5362050533294678, + "step": 4805 + }, + { + "epoch": 1.40532241555783, + "grad_norm": 1.6896555289921489, + "learning_rate": 4.461897199859153e-06, + "loss": 0.5688962936401367, + "step": 4806 + }, + { + "epoch": 1.4056148559730954, + "grad_norm": 2.0519988466480723, + "learning_rate": 4.457872262823202e-06, + "loss": 0.5270779132843018, + "step": 4807 + }, + { + "epoch": 1.405907296388361, + "grad_norm": 1.9613398978608871, + "learning_rate": 4.453848621225913e-06, + "loss": 0.5656974911689758, + "step": 4808 + }, + { + "epoch": 1.4061997368036263, + "grad_norm": 1.517853308784437, + "learning_rate": 4.449826276007786e-06, + "loss": 0.44072896242141724, + "step": 4809 + }, + { + "epoch": 1.4064921772188916, + "grad_norm": 1.642033723460973, + "learning_rate": 4.445805228109022e-06, + "loss": 0.5851765871047974, + "step": 4810 + }, + { + "epoch": 1.406784617634157, + "grad_norm": 1.71031586004946, + "learning_rate": 4.441785478469519e-06, + "loss": 0.6174030303955078, + "step": 4811 + }, + { + "epoch": 1.4070770580494225, + "grad_norm": 1.5609662983326855, + "learning_rate": 4.437767028028863e-06, + "loss": 0.542346715927124, + "step": 4812 + }, + { + "epoch": 1.4073694984646878, + "grad_norm": 1.855237193625426, + "learning_rate": 4.433749877726345e-06, + "loss": 0.4964073598384857, + "step": 4813 + }, + { + "epoch": 1.4076619388799532, + "grad_norm": 1.798693836443108, + "learning_rate": 4.429734028500951e-06, + "loss": 0.5309566259384155, + "step": 4814 + }, + { + "epoch": 1.4079543792952185, + "grad_norm": 1.7569401782763947, + "learning_rate": 4.425719481291359e-06, + "loss": 0.5799233913421631, + "step": 4815 + }, + { + "epoch": 1.408246819710484, + "grad_norm": 1.6640340310451727, + "learning_rate": 4.4217062370359456e-06, + "loss": 0.37344229221343994, + "step": 4816 + }, + { + "epoch": 1.4085392601257494, + "grad_norm": 1.9633336456325348, + "learning_rate": 4.417694296672783e-06, + "loss": 0.5752555727958679, + "step": 4817 + }, + { + "epoch": 1.4088317005410147, + "grad_norm": 1.8625982582112681, + "learning_rate": 4.413683661139638e-06, + "loss": 0.61701500415802, + "step": 4818 + }, + { + "epoch": 1.4091241409562802, + "grad_norm": 1.6641617857653193, + "learning_rate": 4.409674331373972e-06, + "loss": 0.4163259267807007, + "step": 4819 + }, + { + "epoch": 1.4094165813715456, + "grad_norm": 1.4025408210631873, + "learning_rate": 4.40566630831294e-06, + "loss": 0.46583253145217896, + "step": 4820 + }, + { + "epoch": 1.409709021786811, + "grad_norm": 1.739036857290848, + "learning_rate": 4.401659592893396e-06, + "loss": 0.5230617523193359, + "step": 4821 + }, + { + "epoch": 1.4100014622020764, + "grad_norm": 1.7435910389535008, + "learning_rate": 4.397654186051887e-06, + "loss": 0.6351375579833984, + "step": 4822 + }, + { + "epoch": 1.4102939026173418, + "grad_norm": 1.6526547277716674, + "learning_rate": 4.3936500887246445e-06, + "loss": 0.5895766615867615, + "step": 4823 + }, + { + "epoch": 1.410586343032607, + "grad_norm": 1.7357556256264726, + "learning_rate": 4.389647301847607e-06, + "loss": 0.49772539734840393, + "step": 4824 + }, + { + "epoch": 1.4108787834478724, + "grad_norm": 1.6867136550948763, + "learning_rate": 4.385645826356402e-06, + "loss": 0.593197226524353, + "step": 4825 + }, + { + "epoch": 1.4111712238631378, + "grad_norm": 1.497358571958903, + "learning_rate": 4.381645663186348e-06, + "loss": 0.4971385598182678, + "step": 4826 + }, + { + "epoch": 1.4114636642784033, + "grad_norm": 1.772016135609381, + "learning_rate": 4.3776468132724605e-06, + "loss": 0.5452263951301575, + "step": 4827 + }, + { + "epoch": 1.4117561046936686, + "grad_norm": 1.9896815505139207, + "learning_rate": 4.373649277549446e-06, + "loss": 0.6085976362228394, + "step": 4828 + }, + { + "epoch": 1.412048545108934, + "grad_norm": 1.4346670326917912, + "learning_rate": 4.369653056951705e-06, + "loss": 0.5594700574874878, + "step": 4829 + }, + { + "epoch": 1.4123409855241995, + "grad_norm": 1.6570477364640872, + "learning_rate": 4.365658152413328e-06, + "loss": 0.5099719166755676, + "step": 4830 + }, + { + "epoch": 1.4126334259394648, + "grad_norm": 1.557110878077197, + "learning_rate": 4.3616645648681e-06, + "loss": 0.5683532953262329, + "step": 4831 + }, + { + "epoch": 1.4129258663547302, + "grad_norm": 1.9307182018155977, + "learning_rate": 4.3576722952495e-06, + "loss": 0.5311406850814819, + "step": 4832 + }, + { + "epoch": 1.4132183067699957, + "grad_norm": 1.6214149336480879, + "learning_rate": 4.353681344490693e-06, + "loss": 0.5299100875854492, + "step": 4833 + }, + { + "epoch": 1.413510747185261, + "grad_norm": 1.6883675181677418, + "learning_rate": 4.349691713524546e-06, + "loss": 0.5531362891197205, + "step": 4834 + }, + { + "epoch": 1.4138031876005264, + "grad_norm": 1.7469666557337236, + "learning_rate": 4.345703403283603e-06, + "loss": 0.5315259099006653, + "step": 4835 + }, + { + "epoch": 1.414095628015792, + "grad_norm": 2.0019997249517645, + "learning_rate": 4.341716414700112e-06, + "loss": 0.583083987236023, + "step": 4836 + }, + { + "epoch": 1.4143880684310572, + "grad_norm": 1.680867008867613, + "learning_rate": 4.337730748706005e-06, + "loss": 0.5273857116699219, + "step": 4837 + }, + { + "epoch": 1.4146805088463226, + "grad_norm": 1.6688598484210682, + "learning_rate": 4.333746406232908e-06, + "loss": 0.4903373718261719, + "step": 4838 + }, + { + "epoch": 1.414972949261588, + "grad_norm": 1.4926269811940354, + "learning_rate": 4.329763388212134e-06, + "loss": 0.5807479619979858, + "step": 4839 + }, + { + "epoch": 1.4152653896768532, + "grad_norm": 1.6552276273685866, + "learning_rate": 4.325781695574695e-06, + "loss": 0.5613743662834167, + "step": 4840 + }, + { + "epoch": 1.4155578300921188, + "grad_norm": 1.6028157865716284, + "learning_rate": 4.321801329251286e-06, + "loss": 0.5801016092300415, + "step": 4841 + }, + { + "epoch": 1.415850270507384, + "grad_norm": 1.6267997915866552, + "learning_rate": 4.3178222901722956e-06, + "loss": 0.6412584781646729, + "step": 4842 + }, + { + "epoch": 1.4161427109226494, + "grad_norm": 1.7251596479619187, + "learning_rate": 4.313844579267793e-06, + "loss": 0.5687737464904785, + "step": 4843 + }, + { + "epoch": 1.416435151337915, + "grad_norm": 1.6343964176323358, + "learning_rate": 4.309868197467548e-06, + "loss": 0.5668497085571289, + "step": 4844 + }, + { + "epoch": 1.4167275917531803, + "grad_norm": 1.811368112437045, + "learning_rate": 4.305893145701015e-06, + "loss": 0.5814717411994934, + "step": 4845 + }, + { + "epoch": 1.4170200321684456, + "grad_norm": 1.9246707148702022, + "learning_rate": 4.301919424897339e-06, + "loss": 0.5974467992782593, + "step": 4846 + }, + { + "epoch": 1.4173124725837112, + "grad_norm": 1.5643373795961777, + "learning_rate": 4.297947035985351e-06, + "loss": 0.48333030939102173, + "step": 4847 + }, + { + "epoch": 1.4176049129989765, + "grad_norm": 1.7102352976297683, + "learning_rate": 4.293975979893576e-06, + "loss": 0.5851039886474609, + "step": 4848 + }, + { + "epoch": 1.4178973534142418, + "grad_norm": 1.4778659468844006, + "learning_rate": 4.290006257550221e-06, + "loss": 0.5510480403900146, + "step": 4849 + }, + { + "epoch": 1.4181897938295072, + "grad_norm": 1.6670833236483533, + "learning_rate": 4.286037869883187e-06, + "loss": 0.6053529977798462, + "step": 4850 + }, + { + "epoch": 1.4184822342447727, + "grad_norm": 1.5745047113214952, + "learning_rate": 4.282070817820059e-06, + "loss": 0.471671462059021, + "step": 4851 + }, + { + "epoch": 1.418774674660038, + "grad_norm": 1.6834167266574704, + "learning_rate": 4.278105102288113e-06, + "loss": 0.4864043593406677, + "step": 4852 + }, + { + "epoch": 1.4190671150753034, + "grad_norm": 1.7275065448049989, + "learning_rate": 4.274140724214311e-06, + "loss": 0.6283255815505981, + "step": 4853 + }, + { + "epoch": 1.4193595554905687, + "grad_norm": 1.7634272907173199, + "learning_rate": 4.270177684525299e-06, + "loss": 0.4990651607513428, + "step": 4854 + }, + { + "epoch": 1.4196519959058342, + "grad_norm": 1.6718595783894241, + "learning_rate": 4.2662159841474145e-06, + "loss": 0.6053239703178406, + "step": 4855 + }, + { + "epoch": 1.4199444363210996, + "grad_norm": 1.541217587678611, + "learning_rate": 4.262255624006683e-06, + "loss": 0.45790988206863403, + "step": 4856 + }, + { + "epoch": 1.420236876736365, + "grad_norm": 1.5408074963828202, + "learning_rate": 4.2582966050288125e-06, + "loss": 0.49944519996643066, + "step": 4857 + }, + { + "epoch": 1.4205293171516304, + "grad_norm": 1.7145691587216874, + "learning_rate": 4.2543389281392e-06, + "loss": 0.5365482568740845, + "step": 4858 + }, + { + "epoch": 1.4208217575668958, + "grad_norm": 1.709871732141181, + "learning_rate": 4.2503825942629285e-06, + "loss": 0.7763599157333374, + "step": 4859 + }, + { + "epoch": 1.421114197982161, + "grad_norm": 1.6376653647841246, + "learning_rate": 4.246427604324768e-06, + "loss": 0.6125203371047974, + "step": 4860 + }, + { + "epoch": 1.4214066383974266, + "grad_norm": 1.8190946758346407, + "learning_rate": 4.242473959249172e-06, + "loss": 0.6634939312934875, + "step": 4861 + }, + { + "epoch": 1.421699078812692, + "grad_norm": 1.607723662080485, + "learning_rate": 4.238521659960283e-06, + "loss": 0.5117735862731934, + "step": 4862 + }, + { + "epoch": 1.4219915192279573, + "grad_norm": 1.6860730867984624, + "learning_rate": 4.234570707381925e-06, + "loss": 0.5700962543487549, + "step": 4863 + }, + { + "epoch": 1.4222839596432226, + "grad_norm": 1.5634193566609638, + "learning_rate": 4.23062110243761e-06, + "loss": 0.5443791151046753, + "step": 4864 + }, + { + "epoch": 1.422576400058488, + "grad_norm": 1.4504951290152908, + "learning_rate": 4.226672846050538e-06, + "loss": 0.5474614500999451, + "step": 4865 + }, + { + "epoch": 1.4228688404737535, + "grad_norm": 1.9578528314343135, + "learning_rate": 4.222725939143582e-06, + "loss": 0.5938940048217773, + "step": 4866 + }, + { + "epoch": 1.4231612808890188, + "grad_norm": 1.720980371359197, + "learning_rate": 4.21878038263931e-06, + "loss": 0.5010229349136353, + "step": 4867 + }, + { + "epoch": 1.4234537213042842, + "grad_norm": 1.8142108741121714, + "learning_rate": 4.214836177459975e-06, + "loss": 0.5186876058578491, + "step": 4868 + }, + { + "epoch": 1.4237461617195497, + "grad_norm": 1.6608706852165134, + "learning_rate": 4.210893324527507e-06, + "loss": 0.5998060703277588, + "step": 4869 + }, + { + "epoch": 1.424038602134815, + "grad_norm": 1.9807145100005583, + "learning_rate": 4.206951824763528e-06, + "loss": 0.5127147436141968, + "step": 4870 + }, + { + "epoch": 1.4243310425500804, + "grad_norm": 1.4194980170815183, + "learning_rate": 4.203011679089336e-06, + "loss": 0.5134439468383789, + "step": 4871 + }, + { + "epoch": 1.424623482965346, + "grad_norm": 1.728900083762804, + "learning_rate": 4.199072888425919e-06, + "loss": 0.6244111657142639, + "step": 4872 + }, + { + "epoch": 1.4249159233806112, + "grad_norm": 1.6442803911967188, + "learning_rate": 4.195135453693944e-06, + "loss": 0.4431127905845642, + "step": 4873 + }, + { + "epoch": 1.4252083637958766, + "grad_norm": 1.7030697753848931, + "learning_rate": 4.191199375813761e-06, + "loss": 0.6479794979095459, + "step": 4874 + }, + { + "epoch": 1.4255008042111421, + "grad_norm": 2.04011086867295, + "learning_rate": 4.187264655705407e-06, + "loss": 0.6386070847511292, + "step": 4875 + }, + { + "epoch": 1.4257932446264074, + "grad_norm": 1.6039579455905961, + "learning_rate": 4.183331294288603e-06, + "loss": 0.5201597213745117, + "step": 4876 + }, + { + "epoch": 1.4260856850416728, + "grad_norm": 1.7232164566002766, + "learning_rate": 4.179399292482737e-06, + "loss": 0.46355581283569336, + "step": 4877 + }, + { + "epoch": 1.426378125456938, + "grad_norm": 2.2615584884797975, + "learning_rate": 4.175468651206898e-06, + "loss": 0.5360985398292542, + "step": 4878 + }, + { + "epoch": 1.4266705658722034, + "grad_norm": 1.552480099700309, + "learning_rate": 4.171539371379847e-06, + "loss": 0.5545670390129089, + "step": 4879 + }, + { + "epoch": 1.426963006287469, + "grad_norm": 1.4276797255790008, + "learning_rate": 4.167611453920031e-06, + "loss": 0.445978581905365, + "step": 4880 + }, + { + "epoch": 1.4272554467027343, + "grad_norm": 1.7199888948749738, + "learning_rate": 4.163684899745576e-06, + "loss": 0.5242947340011597, + "step": 4881 + }, + { + "epoch": 1.4275478871179996, + "grad_norm": 1.7383193525416518, + "learning_rate": 4.15975970977429e-06, + "loss": 0.5544728636741638, + "step": 4882 + }, + { + "epoch": 1.4278403275332652, + "grad_norm": 2.073499174067984, + "learning_rate": 4.1558358849236626e-06, + "loss": 0.5400837063789368, + "step": 4883 + }, + { + "epoch": 1.4281327679485305, + "grad_norm": 1.6385411261569034, + "learning_rate": 4.151913426110864e-06, + "loss": 0.5201395153999329, + "step": 4884 + }, + { + "epoch": 1.4284252083637958, + "grad_norm": 1.7888379069815619, + "learning_rate": 4.147992334252745e-06, + "loss": 0.4414210319519043, + "step": 4885 + }, + { + "epoch": 1.4287176487790614, + "grad_norm": 1.7818076981346203, + "learning_rate": 4.144072610265838e-06, + "loss": 0.6590272188186646, + "step": 4886 + }, + { + "epoch": 1.4290100891943267, + "grad_norm": 1.4800084296243576, + "learning_rate": 4.140154255066356e-06, + "loss": 0.4734429717063904, + "step": 4887 + }, + { + "epoch": 1.429302529609592, + "grad_norm": 1.5398179955798732, + "learning_rate": 4.136237269570186e-06, + "loss": 0.45204073190689087, + "step": 4888 + }, + { + "epoch": 1.4295949700248574, + "grad_norm": 1.6199970278575915, + "learning_rate": 4.132321654692901e-06, + "loss": 0.6570174694061279, + "step": 4889 + }, + { + "epoch": 1.429887410440123, + "grad_norm": 1.7926483421459931, + "learning_rate": 4.128407411349754e-06, + "loss": 0.5159077644348145, + "step": 4890 + }, + { + "epoch": 1.4301798508553882, + "grad_norm": 1.603963849008659, + "learning_rate": 4.124494540455674e-06, + "loss": 0.5778994560241699, + "step": 4891 + }, + { + "epoch": 1.4304722912706536, + "grad_norm": 1.4954754441376699, + "learning_rate": 4.120583042925273e-06, + "loss": 0.4740722179412842, + "step": 4892 + }, + { + "epoch": 1.430764731685919, + "grad_norm": 1.4416066465695618, + "learning_rate": 4.116672919672837e-06, + "loss": 0.5561014413833618, + "step": 4893 + }, + { + "epoch": 1.4310571721011844, + "grad_norm": 1.5040800316270475, + "learning_rate": 4.112764171612335e-06, + "loss": 0.4834856688976288, + "step": 4894 + }, + { + "epoch": 1.4313496125164498, + "grad_norm": 1.691313354112802, + "learning_rate": 4.108856799657412e-06, + "loss": 0.5565547943115234, + "step": 4895 + }, + { + "epoch": 1.431642052931715, + "grad_norm": 1.8883359305911547, + "learning_rate": 4.104950804721395e-06, + "loss": 0.5401065349578857, + "step": 4896 + }, + { + "epoch": 1.4319344933469806, + "grad_norm": 1.3793655379788223, + "learning_rate": 4.101046187717284e-06, + "loss": 0.4792686700820923, + "step": 4897 + }, + { + "epoch": 1.432226933762246, + "grad_norm": 1.5922549032476903, + "learning_rate": 4.097142949557764e-06, + "loss": 0.5255981683731079, + "step": 4898 + }, + { + "epoch": 1.4325193741775113, + "grad_norm": 1.614736024187036, + "learning_rate": 4.093241091155187e-06, + "loss": 0.5535293817520142, + "step": 4899 + }, + { + "epoch": 1.4328118145927768, + "grad_norm": 1.8976199736566215, + "learning_rate": 4.089340613421589e-06, + "loss": 0.5235373973846436, + "step": 4900 + }, + { + "epoch": 1.4331042550080422, + "grad_norm": 1.8120415147677507, + "learning_rate": 4.085441517268687e-06, + "loss": 0.5538134574890137, + "step": 4901 + }, + { + "epoch": 1.4333966954233075, + "grad_norm": 1.5442149105119904, + "learning_rate": 4.081543803607869e-06, + "loss": 0.5394395589828491, + "step": 4902 + }, + { + "epoch": 1.4336891358385728, + "grad_norm": 1.6068663887611208, + "learning_rate": 4.077647473350201e-06, + "loss": 0.522742509841919, + "step": 4903 + }, + { + "epoch": 1.4339815762538382, + "grad_norm": 1.6377229499845016, + "learning_rate": 4.073752527406429e-06, + "loss": 0.559830367565155, + "step": 4904 + }, + { + "epoch": 1.4342740166691037, + "grad_norm": 1.7578675965544384, + "learning_rate": 4.069858966686971e-06, + "loss": 0.42535799741744995, + "step": 4905 + }, + { + "epoch": 1.434566457084369, + "grad_norm": 1.7745987719575682, + "learning_rate": 4.065966792101924e-06, + "loss": 0.6075177192687988, + "step": 4906 + }, + { + "epoch": 1.4348588974996344, + "grad_norm": 1.7444570198074862, + "learning_rate": 4.06207600456106e-06, + "loss": 0.5010570883750916, + "step": 4907 + }, + { + "epoch": 1.4351513379149, + "grad_norm": 1.621587467371749, + "learning_rate": 4.058186604973826e-06, + "loss": 0.571307897567749, + "step": 4908 + }, + { + "epoch": 1.4354437783301652, + "grad_norm": 1.643170818508206, + "learning_rate": 4.0542985942493505e-06, + "loss": 0.4918866455554962, + "step": 4909 + }, + { + "epoch": 1.4357362187454306, + "grad_norm": 1.8933520643034856, + "learning_rate": 4.050411973296425e-06, + "loss": 0.6588176488876343, + "step": 4910 + }, + { + "epoch": 1.436028659160696, + "grad_norm": 1.9180926902562168, + "learning_rate": 4.046526743023526e-06, + "loss": 0.7341527938842773, + "step": 4911 + }, + { + "epoch": 1.4363210995759614, + "grad_norm": 1.7782521784505012, + "learning_rate": 4.042642904338801e-06, + "loss": 0.5233849287033081, + "step": 4912 + }, + { + "epoch": 1.4366135399912268, + "grad_norm": 1.6182742405882007, + "learning_rate": 4.038760458150079e-06, + "loss": 0.5144373178482056, + "step": 4913 + }, + { + "epoch": 1.4369059804064923, + "grad_norm": 1.55901993468911, + "learning_rate": 4.034879405364853e-06, + "loss": 0.4520954489707947, + "step": 4914 + }, + { + "epoch": 1.4371984208217576, + "grad_norm": 1.6208081934978835, + "learning_rate": 4.030999746890295e-06, + "loss": 0.5632743835449219, + "step": 4915 + }, + { + "epoch": 1.437490861237023, + "grad_norm": 1.5950473237167822, + "learning_rate": 4.027121483633257e-06, + "loss": 0.49681180715560913, + "step": 4916 + }, + { + "epoch": 1.4377833016522883, + "grad_norm": 1.684721295445507, + "learning_rate": 4.023244616500257e-06, + "loss": 0.5182398557662964, + "step": 4917 + }, + { + "epoch": 1.4380757420675536, + "grad_norm": 1.6044294787301046, + "learning_rate": 4.019369146397493e-06, + "loss": 0.5686701536178589, + "step": 4918 + }, + { + "epoch": 1.4383681824828192, + "grad_norm": 1.682926006912085, + "learning_rate": 4.015495074230823e-06, + "loss": 0.5668520927429199, + "step": 4919 + }, + { + "epoch": 1.4386606228980845, + "grad_norm": 1.556828511748538, + "learning_rate": 4.011622400905794e-06, + "loss": 0.4511116147041321, + "step": 4920 + }, + { + "epoch": 1.4389530633133498, + "grad_norm": 1.677757503686359, + "learning_rate": 4.007751127327618e-06, + "loss": 0.4736326336860657, + "step": 4921 + }, + { + "epoch": 1.4392455037286154, + "grad_norm": 1.68287466179835, + "learning_rate": 4.003881254401183e-06, + "loss": 0.5705248117446899, + "step": 4922 + }, + { + "epoch": 1.4395379441438807, + "grad_norm": 1.4732853876066263, + "learning_rate": 4.000012783031047e-06, + "loss": 0.45527490973472595, + "step": 4923 + }, + { + "epoch": 1.439830384559146, + "grad_norm": 1.5504418192282816, + "learning_rate": 3.996145714121444e-06, + "loss": 0.4926735758781433, + "step": 4924 + }, + { + "epoch": 1.4401228249744116, + "grad_norm": 1.523617382800049, + "learning_rate": 3.992280048576276e-06, + "loss": 0.42700374126434326, + "step": 4925 + }, + { + "epoch": 1.440415265389677, + "grad_norm": 1.6783270187790582, + "learning_rate": 3.988415787299118e-06, + "loss": 0.5833145976066589, + "step": 4926 + }, + { + "epoch": 1.4407077058049422, + "grad_norm": 1.70461399954195, + "learning_rate": 3.98455293119322e-06, + "loss": 0.5290282964706421, + "step": 4927 + }, + { + "epoch": 1.4410001462202076, + "grad_norm": 1.9146871710495363, + "learning_rate": 3.9806914811614984e-06, + "loss": 0.4489266872406006, + "step": 4928 + }, + { + "epoch": 1.441292586635473, + "grad_norm": 1.9109717939773812, + "learning_rate": 3.97683143810655e-06, + "loss": 0.5630865097045898, + "step": 4929 + }, + { + "epoch": 1.4415850270507384, + "grad_norm": 1.6030492821452516, + "learning_rate": 3.972972802930627e-06, + "loss": 0.5962105989456177, + "step": 4930 + }, + { + "epoch": 1.4418774674660038, + "grad_norm": 1.789368844700869, + "learning_rate": 3.9691155765356674e-06, + "loss": 0.6059410572052002, + "step": 4931 + }, + { + "epoch": 1.442169907881269, + "grad_norm": 1.6894490985884645, + "learning_rate": 3.965259759823272e-06, + "loss": 0.5476605296134949, + "step": 4932 + }, + { + "epoch": 1.4424623482965346, + "grad_norm": 1.7561171676767597, + "learning_rate": 3.961405353694716e-06, + "loss": 0.70278000831604, + "step": 4933 + }, + { + "epoch": 1.4427547887118, + "grad_norm": 1.6884311650773163, + "learning_rate": 3.9575523590509445e-06, + "loss": 0.5838963389396667, + "step": 4934 + }, + { + "epoch": 1.4430472291270653, + "grad_norm": 1.536536052995308, + "learning_rate": 3.95370077679257e-06, + "loss": 0.508273720741272, + "step": 4935 + }, + { + "epoch": 1.4433396695423308, + "grad_norm": 1.4692622152510404, + "learning_rate": 3.949850607819876e-06, + "loss": 0.5053583383560181, + "step": 4936 + }, + { + "epoch": 1.4436321099575962, + "grad_norm": 1.5754477318406401, + "learning_rate": 3.946001853032818e-06, + "loss": 0.5729954242706299, + "step": 4937 + }, + { + "epoch": 1.4439245503728615, + "grad_norm": 1.833619886253515, + "learning_rate": 3.942154513331018e-06, + "loss": 0.5261870622634888, + "step": 4938 + }, + { + "epoch": 1.444216990788127, + "grad_norm": 1.3956467871190747, + "learning_rate": 3.9383085896137675e-06, + "loss": 0.34802311658859253, + "step": 4939 + }, + { + "epoch": 1.4445094312033924, + "grad_norm": 1.8896307306874633, + "learning_rate": 3.934464082780032e-06, + "loss": 0.48302024602890015, + "step": 4940 + }, + { + "epoch": 1.4448018716186577, + "grad_norm": 1.8507631130251807, + "learning_rate": 3.930620993728434e-06, + "loss": 0.6649061441421509, + "step": 4941 + }, + { + "epoch": 1.445094312033923, + "grad_norm": 1.705526500334542, + "learning_rate": 3.926779323357278e-06, + "loss": 0.5945848822593689, + "step": 4942 + }, + { + "epoch": 1.4453867524491884, + "grad_norm": 1.5476382055190478, + "learning_rate": 3.922939072564528e-06, + "loss": 0.4783032536506653, + "step": 4943 + }, + { + "epoch": 1.445679192864454, + "grad_norm": 1.6453487782833462, + "learning_rate": 3.919100242247821e-06, + "loss": 0.4619516134262085, + "step": 4944 + }, + { + "epoch": 1.4459716332797192, + "grad_norm": 1.5327149597771257, + "learning_rate": 3.915262833304461e-06, + "loss": 0.5652358531951904, + "step": 4945 + }, + { + "epoch": 1.4462640736949846, + "grad_norm": 1.4734419470243802, + "learning_rate": 3.911426846631416e-06, + "loss": 0.4523610472679138, + "step": 4946 + }, + { + "epoch": 1.44655651411025, + "grad_norm": 1.5670101583017915, + "learning_rate": 3.9075922831253276e-06, + "loss": 0.4914482831954956, + "step": 4947 + }, + { + "epoch": 1.4468489545255154, + "grad_norm": 1.7113071980283088, + "learning_rate": 3.9037591436825005e-06, + "loss": 0.4060005247592926, + "step": 4948 + }, + { + "epoch": 1.4471413949407808, + "grad_norm": 1.9320743237560347, + "learning_rate": 3.899927429198908e-06, + "loss": 0.49987125396728516, + "step": 4949 + }, + { + "epoch": 1.4474338353560463, + "grad_norm": 2.0596677045202036, + "learning_rate": 3.896097140570189e-06, + "loss": 0.6205358505249023, + "step": 4950 + }, + { + "epoch": 1.4477262757713116, + "grad_norm": 1.7670476784744638, + "learning_rate": 3.892268278691651e-06, + "loss": 0.5302955508232117, + "step": 4951 + }, + { + "epoch": 1.448018716186577, + "grad_norm": 1.7962585212488547, + "learning_rate": 3.888440844458272e-06, + "loss": 0.5225962400436401, + "step": 4952 + }, + { + "epoch": 1.4483111566018425, + "grad_norm": 1.8247561425410785, + "learning_rate": 3.884614838764682e-06, + "loss": 0.5030089616775513, + "step": 4953 + }, + { + "epoch": 1.4486035970171078, + "grad_norm": 1.8999355010605985, + "learning_rate": 3.880790262505192e-06, + "loss": 0.6060030460357666, + "step": 4954 + }, + { + "epoch": 1.4488960374323732, + "grad_norm": 1.8229751812699673, + "learning_rate": 3.8769671165737725e-06, + "loss": 0.5244846343994141, + "step": 4955 + }, + { + "epoch": 1.4491884778476385, + "grad_norm": 1.4616444667042836, + "learning_rate": 3.873145401864061e-06, + "loss": 0.46979671716690063, + "step": 4956 + }, + { + "epoch": 1.4494809182629038, + "grad_norm": 1.8452052569073554, + "learning_rate": 3.8693251192693596e-06, + "loss": 0.5201131105422974, + "step": 4957 + }, + { + "epoch": 1.4497733586781694, + "grad_norm": 1.679443447217904, + "learning_rate": 3.865506269682638e-06, + "loss": 0.5124838352203369, + "step": 4958 + }, + { + "epoch": 1.4500657990934347, + "grad_norm": 1.830132365627518, + "learning_rate": 3.861688853996525e-06, + "loss": 0.5613473653793335, + "step": 4959 + }, + { + "epoch": 1.4503582395087, + "grad_norm": 1.5976816836472583, + "learning_rate": 3.857872873103322e-06, + "loss": 0.46196621656417847, + "step": 4960 + }, + { + "epoch": 1.4506506799239656, + "grad_norm": 1.9393165963504067, + "learning_rate": 3.8540583278949905e-06, + "loss": 0.6427509784698486, + "step": 4961 + }, + { + "epoch": 1.450943120339231, + "grad_norm": 1.7485862700938968, + "learning_rate": 3.850245219263157e-06, + "loss": 0.6306381821632385, + "step": 4962 + }, + { + "epoch": 1.4512355607544962, + "grad_norm": 1.5645194602237047, + "learning_rate": 3.846433548099114e-06, + "loss": 0.46638673543930054, + "step": 4963 + }, + { + "epoch": 1.4515280011697618, + "grad_norm": 1.5360842567610604, + "learning_rate": 3.842623315293814e-06, + "loss": 0.4950143098831177, + "step": 4964 + }, + { + "epoch": 1.451820441585027, + "grad_norm": 1.5810107141405056, + "learning_rate": 3.838814521737875e-06, + "loss": 0.45698249340057373, + "step": 4965 + }, + { + "epoch": 1.4521128820002924, + "grad_norm": 1.6457012436395508, + "learning_rate": 3.8350071683215814e-06, + "loss": 0.6068260669708252, + "step": 4966 + }, + { + "epoch": 1.4524053224155578, + "grad_norm": 1.8188775401166803, + "learning_rate": 3.831201255934879e-06, + "loss": 0.5264104008674622, + "step": 4967 + }, + { + "epoch": 1.4526977628308233, + "grad_norm": 1.6372667669239498, + "learning_rate": 3.827396785467375e-06, + "loss": 0.5198315978050232, + "step": 4968 + }, + { + "epoch": 1.4529902032460886, + "grad_norm": 1.6294906688066837, + "learning_rate": 3.823593757808342e-06, + "loss": 0.504194438457489, + "step": 4969 + }, + { + "epoch": 1.453282643661354, + "grad_norm": 1.6016674444230832, + "learning_rate": 3.819792173846717e-06, + "loss": 0.5018986463546753, + "step": 4970 + }, + { + "epoch": 1.4535750840766193, + "grad_norm": 1.6893120935929504, + "learning_rate": 3.8159920344710936e-06, + "loss": 0.4847358465194702, + "step": 4971 + }, + { + "epoch": 1.4538675244918848, + "grad_norm": 1.6703483014148515, + "learning_rate": 3.812193340569733e-06, + "loss": 0.547623872756958, + "step": 4972 + }, + { + "epoch": 1.4541599649071502, + "grad_norm": 2.1389235560975615, + "learning_rate": 3.8083960930305562e-06, + "loss": 0.534354031085968, + "step": 4973 + }, + { + "epoch": 1.4544524053224155, + "grad_norm": 1.788418032061747, + "learning_rate": 3.8046002927411506e-06, + "loss": 0.6123033165931702, + "step": 4974 + }, + { + "epoch": 1.454744845737681, + "grad_norm": 1.6087574153138633, + "learning_rate": 3.8008059405887553e-06, + "loss": 0.5222622752189636, + "step": 4975 + }, + { + "epoch": 1.4550372861529464, + "grad_norm": 1.684901707974216, + "learning_rate": 3.7970130374602785e-06, + "loss": 0.5568759441375732, + "step": 4976 + }, + { + "epoch": 1.4553297265682117, + "grad_norm": 1.7459991230210548, + "learning_rate": 3.7932215842422903e-06, + "loss": 0.5458661317825317, + "step": 4977 + }, + { + "epoch": 1.4556221669834772, + "grad_norm": 1.6216302867008319, + "learning_rate": 3.789431581821019e-06, + "loss": 0.48293566703796387, + "step": 4978 + }, + { + "epoch": 1.4559146073987426, + "grad_norm": 1.893470262052562, + "learning_rate": 3.7856430310823546e-06, + "loss": 0.647431492805481, + "step": 4979 + }, + { + "epoch": 1.456207047814008, + "grad_norm": 1.6735249045743477, + "learning_rate": 3.7818559329118475e-06, + "loss": 0.48039543628692627, + "step": 4980 + }, + { + "epoch": 1.4564994882292732, + "grad_norm": 1.6704036620696165, + "learning_rate": 3.7780702881947084e-06, + "loss": 0.6705803871154785, + "step": 4981 + }, + { + "epoch": 1.4567919286445385, + "grad_norm": 1.7404901320645014, + "learning_rate": 3.7742860978158103e-06, + "loss": 0.564405083656311, + "step": 4982 + }, + { + "epoch": 1.457084369059804, + "grad_norm": 1.7081222209997355, + "learning_rate": 3.7705033626596844e-06, + "loss": 0.5208612084388733, + "step": 4983 + }, + { + "epoch": 1.4573768094750694, + "grad_norm": 1.909829427679328, + "learning_rate": 3.766722083610521e-06, + "loss": 0.6230732202529907, + "step": 4984 + }, + { + "epoch": 1.4576692498903348, + "grad_norm": 1.6601663066885601, + "learning_rate": 3.7629422615521747e-06, + "loss": 0.5741504430770874, + "step": 4985 + }, + { + "epoch": 1.4579616903056003, + "grad_norm": 1.584208244849031, + "learning_rate": 3.75916389736815e-06, + "loss": 0.5321571826934814, + "step": 4986 + }, + { + "epoch": 1.4582541307208656, + "grad_norm": 1.95685306597155, + "learning_rate": 3.7553869919416186e-06, + "loss": 0.6367009878158569, + "step": 4987 + }, + { + "epoch": 1.458546571136131, + "grad_norm": 1.5904913997392975, + "learning_rate": 3.75161154615541e-06, + "loss": 0.5736235976219177, + "step": 4988 + }, + { + "epoch": 1.4588390115513965, + "grad_norm": 2.0157501917439866, + "learning_rate": 3.7478375608920127e-06, + "loss": 0.5799358487129211, + "step": 4989 + }, + { + "epoch": 1.4591314519666618, + "grad_norm": 1.7515991790236536, + "learning_rate": 3.7440650370335675e-06, + "loss": 0.6065561771392822, + "step": 4990 + }, + { + "epoch": 1.4594238923819272, + "grad_norm": 1.4583944256149548, + "learning_rate": 3.740293975461886e-06, + "loss": 0.5182442665100098, + "step": 4991 + }, + { + "epoch": 1.4597163327971927, + "grad_norm": 1.6877116508095484, + "learning_rate": 3.736524377058429e-06, + "loss": 0.5065605640411377, + "step": 4992 + }, + { + "epoch": 1.460008773212458, + "grad_norm": 1.5024812411134352, + "learning_rate": 3.7327562427043163e-06, + "loss": 0.44326460361480713, + "step": 4993 + }, + { + "epoch": 1.4603012136277234, + "grad_norm": 1.9166701258714811, + "learning_rate": 3.7289895732803306e-06, + "loss": 0.6192547082901001, + "step": 4994 + }, + { + "epoch": 1.4605936540429887, + "grad_norm": 1.794387571688338, + "learning_rate": 3.725224369666899e-06, + "loss": 0.5487738847732544, + "step": 4995 + }, + { + "epoch": 1.460886094458254, + "grad_norm": 1.922772286834415, + "learning_rate": 3.7214606327441203e-06, + "loss": 0.558982253074646, + "step": 4996 + }, + { + "epoch": 1.4611785348735196, + "grad_norm": 1.770836311904495, + "learning_rate": 3.717698363391744e-06, + "loss": 0.5277853012084961, + "step": 4997 + }, + { + "epoch": 1.461470975288785, + "grad_norm": 1.7748123557502546, + "learning_rate": 3.7139375624891795e-06, + "loss": 0.6561184525489807, + "step": 4998 + }, + { + "epoch": 1.4617634157040502, + "grad_norm": 1.5647900159041126, + "learning_rate": 3.710178230915489e-06, + "loss": 0.46555888652801514, + "step": 4999 + }, + { + "epoch": 1.4620558561193158, + "grad_norm": 1.7414970962586886, + "learning_rate": 3.706420369549394e-06, + "loss": 0.5808060765266418, + "step": 5000 + }, + { + "epoch": 1.462348296534581, + "grad_norm": 1.442227314234909, + "learning_rate": 3.7026639792692722e-06, + "loss": 0.5407893061637878, + "step": 5001 + }, + { + "epoch": 1.4626407369498464, + "grad_norm": 2.580423891920115, + "learning_rate": 3.6989090609531574e-06, + "loss": 0.538393497467041, + "step": 5002 + }, + { + "epoch": 1.462933177365112, + "grad_norm": 1.8751864874321293, + "learning_rate": 3.6951556154787373e-06, + "loss": 0.530704140663147, + "step": 5003 + }, + { + "epoch": 1.4632256177803773, + "grad_norm": 1.4470439364888814, + "learning_rate": 3.691403643723359e-06, + "loss": 0.43352627754211426, + "step": 5004 + }, + { + "epoch": 1.4635180581956426, + "grad_norm": 1.6573279039642985, + "learning_rate": 3.687653146564025e-06, + "loss": 0.6047205924987793, + "step": 5005 + }, + { + "epoch": 1.463810498610908, + "grad_norm": 1.6556697002732312, + "learning_rate": 3.6839041248773857e-06, + "loss": 0.44708865880966187, + "step": 5006 + }, + { + "epoch": 1.4641029390261735, + "grad_norm": 1.6445747944839355, + "learning_rate": 3.680156579539753e-06, + "loss": 0.5653451681137085, + "step": 5007 + }, + { + "epoch": 1.4643953794414388, + "grad_norm": 1.750839565103172, + "learning_rate": 3.6764105114270966e-06, + "loss": 0.49293750524520874, + "step": 5008 + }, + { + "epoch": 1.4646878198567042, + "grad_norm": 1.7691390827672615, + "learning_rate": 3.672665921415034e-06, + "loss": 0.5761851072311401, + "step": 5009 + }, + { + "epoch": 1.4649802602719695, + "grad_norm": 1.7025752756263197, + "learning_rate": 3.668922810378841e-06, + "loss": 0.5188437700271606, + "step": 5010 + }, + { + "epoch": 1.465272700687235, + "grad_norm": 1.7765263620108804, + "learning_rate": 3.6651811791934476e-06, + "loss": 0.5159400701522827, + "step": 5011 + }, + { + "epoch": 1.4655651411025004, + "grad_norm": 1.4463295265937102, + "learning_rate": 3.6614410287334377e-06, + "loss": 0.478866845369339, + "step": 5012 + }, + { + "epoch": 1.4658575815177657, + "grad_norm": 1.6006806590634375, + "learning_rate": 3.6577023598730486e-06, + "loss": 0.5509926080703735, + "step": 5013 + }, + { + "epoch": 1.4661500219330312, + "grad_norm": 1.5613591503777215, + "learning_rate": 3.6539651734861705e-06, + "loss": 0.4872981309890747, + "step": 5014 + }, + { + "epoch": 1.4664424623482966, + "grad_norm": 1.4569843282992687, + "learning_rate": 3.6502294704463493e-06, + "loss": 0.47478264570236206, + "step": 5015 + }, + { + "epoch": 1.4667349027635619, + "grad_norm": 1.765955621655722, + "learning_rate": 3.646495251626785e-06, + "loss": 0.5140335559844971, + "step": 5016 + }, + { + "epoch": 1.4670273431788274, + "grad_norm": 1.5785594027919339, + "learning_rate": 3.6427625179003223e-06, + "loss": 0.41033172607421875, + "step": 5017 + }, + { + "epoch": 1.4673197835940928, + "grad_norm": 1.7731644033346952, + "learning_rate": 3.639031270139468e-06, + "loss": 0.4290558099746704, + "step": 5018 + }, + { + "epoch": 1.467612224009358, + "grad_norm": 1.8964888989060893, + "learning_rate": 3.635301509216379e-06, + "loss": 0.5903435349464417, + "step": 5019 + }, + { + "epoch": 1.4679046644246234, + "grad_norm": 1.7302589846174075, + "learning_rate": 3.6315732360028655e-06, + "loss": 0.6410748958587646, + "step": 5020 + }, + { + "epoch": 1.4681971048398887, + "grad_norm": 1.584781169707585, + "learning_rate": 3.6278464513703858e-06, + "loss": 0.5499910712242126, + "step": 5021 + }, + { + "epoch": 1.4684895452551543, + "grad_norm": 1.4876234400926511, + "learning_rate": 3.624121156190056e-06, + "loss": 0.4980154037475586, + "step": 5022 + }, + { + "epoch": 1.4687819856704196, + "grad_norm": 1.7622618315552074, + "learning_rate": 3.6203973513326395e-06, + "loss": 0.5910995006561279, + "step": 5023 + }, + { + "epoch": 1.469074426085685, + "grad_norm": 1.837302229581672, + "learning_rate": 3.6166750376685534e-06, + "loss": 0.6003058552742004, + "step": 5024 + }, + { + "epoch": 1.4693668665009505, + "grad_norm": 2.0086634437416215, + "learning_rate": 3.6129542160678655e-06, + "loss": 0.5655561685562134, + "step": 5025 + }, + { + "epoch": 1.4696593069162158, + "grad_norm": 1.6720399704395428, + "learning_rate": 3.609234887400297e-06, + "loss": 0.713152289390564, + "step": 5026 + }, + { + "epoch": 1.4699517473314812, + "grad_norm": 1.3619130802184511, + "learning_rate": 3.605517052535219e-06, + "loss": 0.41018784046173096, + "step": 5027 + }, + { + "epoch": 1.4702441877467467, + "grad_norm": 1.7429761856148576, + "learning_rate": 3.6018007123416486e-06, + "loss": 0.5852759480476379, + "step": 5028 + }, + { + "epoch": 1.470536628162012, + "grad_norm": 1.6763203292398523, + "learning_rate": 3.598085867688259e-06, + "loss": 0.5942279696464539, + "step": 5029 + }, + { + "epoch": 1.4708290685772774, + "grad_norm": 1.5957062749275768, + "learning_rate": 3.594372519443374e-06, + "loss": 0.6265639662742615, + "step": 5030 + }, + { + "epoch": 1.471121508992543, + "grad_norm": 1.6944518172910965, + "learning_rate": 3.5906606684749668e-06, + "loss": 0.4539163112640381, + "step": 5031 + }, + { + "epoch": 1.4714139494078082, + "grad_norm": 1.8810670575321342, + "learning_rate": 3.586950315650658e-06, + "loss": 0.5682815909385681, + "step": 5032 + }, + { + "epoch": 1.4717063898230736, + "grad_norm": 1.5382985580447415, + "learning_rate": 3.583241461837721e-06, + "loss": 0.5188582539558411, + "step": 5033 + }, + { + "epoch": 1.4719988302383389, + "grad_norm": 1.923705094705072, + "learning_rate": 3.5795341079030777e-06, + "loss": 0.501958966255188, + "step": 5034 + }, + { + "epoch": 1.4722912706536042, + "grad_norm": 1.769758245215022, + "learning_rate": 3.5758282547132995e-06, + "loss": 0.5748735666275024, + "step": 5035 + }, + { + "epoch": 1.4725837110688698, + "grad_norm": 1.720811530645175, + "learning_rate": 3.5721239031346067e-06, + "loss": 0.5796875357627869, + "step": 5036 + }, + { + "epoch": 1.472876151484135, + "grad_norm": 1.7760443740240528, + "learning_rate": 3.56842105403287e-06, + "loss": 0.457103431224823, + "step": 5037 + }, + { + "epoch": 1.4731685918994004, + "grad_norm": 1.607843165834991, + "learning_rate": 3.564719708273607e-06, + "loss": 0.5300487875938416, + "step": 5038 + }, + { + "epoch": 1.473461032314666, + "grad_norm": 1.7877129065541937, + "learning_rate": 3.5610198667219886e-06, + "loss": 0.48143619298934937, + "step": 5039 + }, + { + "epoch": 1.4737534727299313, + "grad_norm": 1.9171325817627416, + "learning_rate": 3.557321530242824e-06, + "loss": 0.5523685216903687, + "step": 5040 + }, + { + "epoch": 1.4740459131451966, + "grad_norm": 1.7367077785146405, + "learning_rate": 3.5536246997005785e-06, + "loss": 0.5820931196212769, + "step": 5041 + }, + { + "epoch": 1.4743383535604622, + "grad_norm": 1.6717570524697325, + "learning_rate": 3.5499293759593656e-06, + "loss": 0.6287394762039185, + "step": 5042 + }, + { + "epoch": 1.4746307939757275, + "grad_norm": 1.737914835396703, + "learning_rate": 3.5462355598829433e-06, + "loss": 0.4621254801750183, + "step": 5043 + }, + { + "epoch": 1.4749232343909928, + "grad_norm": 1.687652415457897, + "learning_rate": 3.5425432523347205e-06, + "loss": 0.5571160316467285, + "step": 5044 + }, + { + "epoch": 1.4752156748062581, + "grad_norm": 1.716802557057107, + "learning_rate": 3.5388524541777492e-06, + "loss": 0.4135715365409851, + "step": 5045 + }, + { + "epoch": 1.4755081152215237, + "grad_norm": 1.868527213017395, + "learning_rate": 3.535163166274733e-06, + "loss": 0.524153470993042, + "step": 5046 + }, + { + "epoch": 1.475800555636789, + "grad_norm": 1.9441558365554423, + "learning_rate": 3.5314753894880205e-06, + "loss": 0.6330267786979675, + "step": 5047 + }, + { + "epoch": 1.4760929960520544, + "grad_norm": 1.7270524835767156, + "learning_rate": 3.527789124679605e-06, + "loss": 0.46210330724716187, + "step": 5048 + }, + { + "epoch": 1.4763854364673197, + "grad_norm": 1.8799684878196978, + "learning_rate": 3.524104372711131e-06, + "loss": 0.49293309450149536, + "step": 5049 + }, + { + "epoch": 1.4766778768825852, + "grad_norm": 1.7601042593478657, + "learning_rate": 3.520421134443889e-06, + "loss": 0.6196815967559814, + "step": 5050 + }, + { + "epoch": 1.4769703172978506, + "grad_norm": 1.568738566408146, + "learning_rate": 3.5167394107388064e-06, + "loss": 0.42622530460357666, + "step": 5051 + }, + { + "epoch": 1.4772627577131159, + "grad_norm": 1.6087834768838942, + "learning_rate": 3.513059202456468e-06, + "loss": 0.4475107491016388, + "step": 5052 + }, + { + "epoch": 1.4775551981283814, + "grad_norm": 1.549049360877832, + "learning_rate": 3.5093805104571e-06, + "loss": 0.4295683205127716, + "step": 5053 + }, + { + "epoch": 1.4778476385436468, + "grad_norm": 1.512499491264911, + "learning_rate": 3.505703335600573e-06, + "loss": 0.5331642627716064, + "step": 5054 + }, + { + "epoch": 1.478140078958912, + "grad_norm": 1.7125050045051866, + "learning_rate": 3.5020276787464058e-06, + "loss": 0.5615599155426025, + "step": 5055 + }, + { + "epoch": 1.4784325193741776, + "grad_norm": 1.470462641632426, + "learning_rate": 3.4983535407537618e-06, + "loss": 0.5611366033554077, + "step": 5056 + }, + { + "epoch": 1.478724959789443, + "grad_norm": 2.0861134690908325, + "learning_rate": 3.494680922481445e-06, + "loss": 0.5891577005386353, + "step": 5057 + }, + { + "epoch": 1.4790174002047083, + "grad_norm": 1.981139638659905, + "learning_rate": 3.491009824787911e-06, + "loss": 0.5583761930465698, + "step": 5058 + }, + { + "epoch": 1.4793098406199736, + "grad_norm": 1.5020288470897978, + "learning_rate": 3.4873402485312548e-06, + "loss": 0.5001339912414551, + "step": 5059 + }, + { + "epoch": 1.479602281035239, + "grad_norm": 1.445341864944132, + "learning_rate": 3.4836721945692175e-06, + "loss": 0.5050641894340515, + "step": 5060 + }, + { + "epoch": 1.4798947214505045, + "grad_norm": 1.5825314066620513, + "learning_rate": 3.4800056637591885e-06, + "loss": 0.5377815365791321, + "step": 5061 + }, + { + "epoch": 1.4801871618657698, + "grad_norm": 1.6490614330323619, + "learning_rate": 3.4763406569581892e-06, + "loss": 0.5517662763595581, + "step": 5062 + }, + { + "epoch": 1.4804796022810351, + "grad_norm": 1.7535356829599726, + "learning_rate": 3.4726771750228984e-06, + "loss": 0.5908320546150208, + "step": 5063 + }, + { + "epoch": 1.4807720426963007, + "grad_norm": 1.640782634903257, + "learning_rate": 3.4690152188096293e-06, + "loss": 0.5169299840927124, + "step": 5064 + }, + { + "epoch": 1.481064483111566, + "grad_norm": 1.5566091974805318, + "learning_rate": 3.4653547891743457e-06, + "loss": 0.6198064088821411, + "step": 5065 + }, + { + "epoch": 1.4813569235268313, + "grad_norm": 1.7822104060368598, + "learning_rate": 3.4616958869726436e-06, + "loss": 0.4971558153629303, + "step": 5066 + }, + { + "epoch": 1.481649363942097, + "grad_norm": 1.8117473020924466, + "learning_rate": 3.4580385130597794e-06, + "loss": 0.556640088558197, + "step": 5067 + }, + { + "epoch": 1.4819418043573622, + "grad_norm": 1.7297037385384992, + "learning_rate": 3.4543826682906358e-06, + "loss": 0.5336956977844238, + "step": 5068 + }, + { + "epoch": 1.4822342447726276, + "grad_norm": 1.8723627634024749, + "learning_rate": 3.4507283535197454e-06, + "loss": 0.5185145735740662, + "step": 5069 + }, + { + "epoch": 1.482526685187893, + "grad_norm": 1.5962927751585108, + "learning_rate": 3.447075569601287e-06, + "loss": 0.5460748672485352, + "step": 5070 + }, + { + "epoch": 1.4828191256031584, + "grad_norm": 1.7486536420516579, + "learning_rate": 3.4434243173890667e-06, + "loss": 0.5860699415206909, + "step": 5071 + }, + { + "epoch": 1.4831115660184238, + "grad_norm": 1.5377337582646984, + "learning_rate": 3.4397745977365482e-06, + "loss": 0.5818450450897217, + "step": 5072 + }, + { + "epoch": 1.483404006433689, + "grad_norm": 1.6591511763241749, + "learning_rate": 3.4361264114968316e-06, + "loss": 0.4205876588821411, + "step": 5073 + }, + { + "epoch": 1.4836964468489544, + "grad_norm": 1.6097740909701606, + "learning_rate": 3.4324797595226567e-06, + "loss": 0.5503501892089844, + "step": 5074 + }, + { + "epoch": 1.48398888726422, + "grad_norm": 1.7613851561474803, + "learning_rate": 3.4288346426664063e-06, + "loss": 0.5388503074645996, + "step": 5075 + }, + { + "epoch": 1.4842813276794853, + "grad_norm": 1.5726280695427581, + "learning_rate": 3.4251910617801054e-06, + "loss": 0.5866841673851013, + "step": 5076 + }, + { + "epoch": 1.4845737680947506, + "grad_norm": 1.7063663913828162, + "learning_rate": 3.4215490177154176e-06, + "loss": 0.5377970337867737, + "step": 5077 + }, + { + "epoch": 1.4848662085100162, + "grad_norm": 2.013961516297246, + "learning_rate": 3.41790851132365e-06, + "loss": 0.6311028003692627, + "step": 5078 + }, + { + "epoch": 1.4851586489252815, + "grad_norm": 1.7100175604987324, + "learning_rate": 3.414269543455747e-06, + "loss": 0.5226441621780396, + "step": 5079 + }, + { + "epoch": 1.4854510893405468, + "grad_norm": 1.73285658375087, + "learning_rate": 3.410632114962298e-06, + "loss": 0.6306775212287903, + "step": 5080 + }, + { + "epoch": 1.4857435297558124, + "grad_norm": 1.8061194998201888, + "learning_rate": 3.406996226693531e-06, + "loss": 0.5432136058807373, + "step": 5081 + }, + { + "epoch": 1.4860359701710777, + "grad_norm": 1.564250952291821, + "learning_rate": 3.403361879499305e-06, + "loss": 0.4218754470348358, + "step": 5082 + }, + { + "epoch": 1.486328410586343, + "grad_norm": 1.7436245532279955, + "learning_rate": 3.3997290742291335e-06, + "loss": 0.5121650099754333, + "step": 5083 + }, + { + "epoch": 1.4866208510016083, + "grad_norm": 1.713174617853516, + "learning_rate": 3.39609781173216e-06, + "loss": 0.5489382743835449, + "step": 5084 + }, + { + "epoch": 1.486913291416874, + "grad_norm": 1.7492646537049668, + "learning_rate": 3.3924680928571694e-06, + "loss": 0.4190993309020996, + "step": 5085 + }, + { + "epoch": 1.4872057318321392, + "grad_norm": 2.012504952292692, + "learning_rate": 3.388839918452589e-06, + "loss": 0.5927796363830566, + "step": 5086 + }, + { + "epoch": 1.4874981722474045, + "grad_norm": 1.5385674447124333, + "learning_rate": 3.3852132893664803e-06, + "loss": 0.43746429681777954, + "step": 5087 + }, + { + "epoch": 1.4877906126626699, + "grad_norm": 1.592965785800762, + "learning_rate": 3.381588206446548e-06, + "loss": 0.41599413752555847, + "step": 5088 + }, + { + "epoch": 1.4880830530779354, + "grad_norm": 1.640030018717508, + "learning_rate": 3.3779646705401305e-06, + "loss": 0.5803484320640564, + "step": 5089 + }, + { + "epoch": 1.4883754934932008, + "grad_norm": 1.6162932555816476, + "learning_rate": 3.3743426824942082e-06, + "loss": 0.5277384519577026, + "step": 5090 + }, + { + "epoch": 1.488667933908466, + "grad_norm": 1.5149011711130314, + "learning_rate": 3.370722243155401e-06, + "loss": 0.5842317342758179, + "step": 5091 + }, + { + "epoch": 1.4889603743237316, + "grad_norm": 1.8602157485440332, + "learning_rate": 3.367103353369965e-06, + "loss": 0.5394416451454163, + "step": 5092 + }, + { + "epoch": 1.489252814738997, + "grad_norm": 1.6652727466684587, + "learning_rate": 3.3634860139837877e-06, + "loss": 0.5457144975662231, + "step": 5093 + }, + { + "epoch": 1.4895452551542623, + "grad_norm": 1.6270719194791377, + "learning_rate": 3.3598702258424044e-06, + "loss": 0.49552473425865173, + "step": 5094 + }, + { + "epoch": 1.4898376955695278, + "grad_norm": 1.8756044563450258, + "learning_rate": 3.3562559897909842e-06, + "loss": 0.5922214984893799, + "step": 5095 + }, + { + "epoch": 1.4901301359847932, + "grad_norm": 1.6902952443841357, + "learning_rate": 3.35264330667433e-06, + "loss": 0.5844507217407227, + "step": 5096 + }, + { + "epoch": 1.4904225764000585, + "grad_norm": 1.6441848915551236, + "learning_rate": 3.3490321773368872e-06, + "loss": 0.5096029043197632, + "step": 5097 + }, + { + "epoch": 1.4907150168153238, + "grad_norm": 1.8296617417124132, + "learning_rate": 3.345422602622734e-06, + "loss": 0.6343984603881836, + "step": 5098 + }, + { + "epoch": 1.4910074572305891, + "grad_norm": 1.7032992920741425, + "learning_rate": 3.3418145833755875e-06, + "loss": 0.5319832563400269, + "step": 5099 + }, + { + "epoch": 1.4912998976458547, + "grad_norm": 1.8127365107062148, + "learning_rate": 3.3382081204388006e-06, + "loss": 0.6453676819801331, + "step": 5100 + }, + { + "epoch": 1.49159233806112, + "grad_norm": 1.7068058578414038, + "learning_rate": 3.33460321465536e-06, + "loss": 0.5129305720329285, + "step": 5101 + }, + { + "epoch": 1.4918847784763853, + "grad_norm": 1.7103748262888143, + "learning_rate": 3.3309998668678912e-06, + "loss": 0.5680958032608032, + "step": 5102 + }, + { + "epoch": 1.492177218891651, + "grad_norm": 1.654140366409291, + "learning_rate": 3.32739807791866e-06, + "loss": 0.5959445834159851, + "step": 5103 + }, + { + "epoch": 1.4924696593069162, + "grad_norm": 1.5546485584978795, + "learning_rate": 3.3237978486495536e-06, + "loss": 0.5549102425575256, + "step": 5104 + }, + { + "epoch": 1.4927620997221815, + "grad_norm": 1.5522771682213525, + "learning_rate": 3.3201991799021084e-06, + "loss": 0.4219816327095032, + "step": 5105 + }, + { + "epoch": 1.493054540137447, + "grad_norm": 1.8150814493123832, + "learning_rate": 3.3166020725174906e-06, + "loss": 0.46013522148132324, + "step": 5106 + }, + { + "epoch": 1.4933469805527124, + "grad_norm": 2.2057259724068885, + "learning_rate": 3.3130065273365033e-06, + "loss": 0.6013174057006836, + "step": 5107 + }, + { + "epoch": 1.4936394209679777, + "grad_norm": 1.9081850485789635, + "learning_rate": 3.3094125451995827e-06, + "loss": 0.7097996473312378, + "step": 5108 + }, + { + "epoch": 1.4939318613832433, + "grad_norm": 1.6725604100107134, + "learning_rate": 3.305820126946799e-06, + "loss": 0.6704884767532349, + "step": 5109 + }, + { + "epoch": 1.4942243017985086, + "grad_norm": 1.735486744932862, + "learning_rate": 3.3022292734178605e-06, + "loss": 0.5211119651794434, + "step": 5110 + }, + { + "epoch": 1.494516742213774, + "grad_norm": 1.7718418689676594, + "learning_rate": 3.2986399854521065e-06, + "loss": 0.5830427408218384, + "step": 5111 + }, + { + "epoch": 1.4948091826290393, + "grad_norm": 1.574048881929475, + "learning_rate": 3.2950522638885106e-06, + "loss": 0.5647883415222168, + "step": 5112 + }, + { + "epoch": 1.4951016230443046, + "grad_norm": 1.3783682279274316, + "learning_rate": 3.2914661095656807e-06, + "loss": 0.46678125858306885, + "step": 5113 + }, + { + "epoch": 1.4953940634595702, + "grad_norm": 1.768460226758459, + "learning_rate": 3.287881523321863e-06, + "loss": 0.5391934514045715, + "step": 5114 + }, + { + "epoch": 1.4956865038748355, + "grad_norm": 1.532723290545503, + "learning_rate": 3.284298505994926e-06, + "loss": 0.4039243459701538, + "step": 5115 + }, + { + "epoch": 1.4959789442901008, + "grad_norm": 1.8718379114919181, + "learning_rate": 3.2807170584223802e-06, + "loss": 0.6187412738800049, + "step": 5116 + }, + { + "epoch": 1.4962713847053664, + "grad_norm": 1.730072311160077, + "learning_rate": 3.277137181441369e-06, + "loss": 0.5165137648582458, + "step": 5117 + }, + { + "epoch": 1.4965638251206317, + "grad_norm": 1.7402216150888872, + "learning_rate": 3.273558875888665e-06, + "loss": 0.6315420866012573, + "step": 5118 + }, + { + "epoch": 1.496856265535897, + "grad_norm": 1.6811341442796868, + "learning_rate": 3.269982142600677e-06, + "loss": 0.5522993206977844, + "step": 5119 + }, + { + "epoch": 1.4971487059511626, + "grad_norm": 1.8103742244487522, + "learning_rate": 3.266406982413444e-06, + "loss": 0.5751636028289795, + "step": 5120 + }, + { + "epoch": 1.4974411463664279, + "grad_norm": 1.8346826868047423, + "learning_rate": 3.262833396162637e-06, + "loss": 0.5552358031272888, + "step": 5121 + }, + { + "epoch": 1.4977335867816932, + "grad_norm": 1.4553347230926987, + "learning_rate": 3.259261384683562e-06, + "loss": 0.4971257150173187, + "step": 5122 + }, + { + "epoch": 1.4980260271969585, + "grad_norm": 1.7328825599332134, + "learning_rate": 3.2556909488111533e-06, + "loss": 0.3803454637527466, + "step": 5123 + }, + { + "epoch": 1.498318467612224, + "grad_norm": 1.7448185442015292, + "learning_rate": 3.25212208937998e-06, + "loss": 0.45348531007766724, + "step": 5124 + }, + { + "epoch": 1.4986109080274894, + "grad_norm": 1.6593501166731528, + "learning_rate": 3.2485548072242403e-06, + "loss": 0.4839708209037781, + "step": 5125 + }, + { + "epoch": 1.4989033484427547, + "grad_norm": 1.7004886969570365, + "learning_rate": 3.244989103177768e-06, + "loss": 0.4743500351905823, + "step": 5126 + }, + { + "epoch": 1.49919578885802, + "grad_norm": 1.7042585723205583, + "learning_rate": 3.241424978074018e-06, + "loss": 0.558182954788208, + "step": 5127 + }, + { + "epoch": 1.4994882292732856, + "grad_norm": 1.5886443982701122, + "learning_rate": 3.2378624327460874e-06, + "loss": 0.41309911012649536, + "step": 5128 + }, + { + "epoch": 1.499780669688551, + "grad_norm": 1.7452725700601364, + "learning_rate": 3.2343014680266984e-06, + "loss": 0.5627751350402832, + "step": 5129 + }, + { + "epoch": 1.5000731101038163, + "grad_norm": 1.8911076385977756, + "learning_rate": 3.230742084748204e-06, + "loss": 0.5374714732170105, + "step": 5130 + }, + { + "epoch": 1.5003655505190818, + "grad_norm": 1.7659792305895352, + "learning_rate": 3.2271842837425917e-06, + "loss": 0.4264039993286133, + "step": 5131 + }, + { + "epoch": 1.5006579909343472, + "grad_norm": 1.8312136055327797, + "learning_rate": 3.223628065841472e-06, + "loss": 0.44204217195510864, + "step": 5132 + }, + { + "epoch": 1.5009504313496125, + "grad_norm": 1.6892686547824762, + "learning_rate": 3.220073431876092e-06, + "loss": 0.5322041511535645, + "step": 5133 + }, + { + "epoch": 1.501242871764878, + "grad_norm": 1.6801975106342348, + "learning_rate": 3.216520382677324e-06, + "loss": 0.4741417169570923, + "step": 5134 + }, + { + "epoch": 1.5015353121801431, + "grad_norm": 1.9712166683153383, + "learning_rate": 3.212968919075672e-06, + "loss": 0.7069851756095886, + "step": 5135 + }, + { + "epoch": 1.5018277525954087, + "grad_norm": 1.6644566597906936, + "learning_rate": 3.2094190419012694e-06, + "loss": 0.6049044132232666, + "step": 5136 + }, + { + "epoch": 1.5021201930106742, + "grad_norm": 1.6420500389509403, + "learning_rate": 3.2058707519838817e-06, + "loss": 0.556586503982544, + "step": 5137 + }, + { + "epoch": 1.5024126334259393, + "grad_norm": 1.4612168804015682, + "learning_rate": 3.202324050152894e-06, + "loss": 0.46489936113357544, + "step": 5138 + }, + { + "epoch": 1.5027050738412049, + "grad_norm": 1.6808104719845611, + "learning_rate": 3.1987789372373292e-06, + "loss": 0.5332333445549011, + "step": 5139 + }, + { + "epoch": 1.5029975142564702, + "grad_norm": 1.5897163584111842, + "learning_rate": 3.1952354140658346e-06, + "loss": 0.5547586679458618, + "step": 5140 + }, + { + "epoch": 1.5032899546717355, + "grad_norm": 1.7343008366786887, + "learning_rate": 3.1916934814666858e-06, + "loss": 0.5500372648239136, + "step": 5141 + }, + { + "epoch": 1.503582395087001, + "grad_norm": 1.6657659858957796, + "learning_rate": 3.1881531402677934e-06, + "loss": 0.5065571069717407, + "step": 5142 + }, + { + "epoch": 1.5038748355022664, + "grad_norm": 2.106659003681642, + "learning_rate": 3.1846143912966887e-06, + "loss": 0.5942833423614502, + "step": 5143 + }, + { + "epoch": 1.5041672759175317, + "grad_norm": 1.5318136638727409, + "learning_rate": 3.181077235380531e-06, + "loss": 0.4089720547199249, + "step": 5144 + }, + { + "epoch": 1.5044597163327973, + "grad_norm": 1.959628279475518, + "learning_rate": 3.1775416733461107e-06, + "loss": 0.5360317230224609, + "step": 5145 + }, + { + "epoch": 1.5047521567480626, + "grad_norm": 1.8497642502339247, + "learning_rate": 3.174007706019845e-06, + "loss": 0.5403856635093689, + "step": 5146 + }, + { + "epoch": 1.505044597163328, + "grad_norm": 1.583723666722825, + "learning_rate": 3.1704753342277727e-06, + "loss": 0.5377147197723389, + "step": 5147 + }, + { + "epoch": 1.5053370375785935, + "grad_norm": 1.908833197627838, + "learning_rate": 3.166944558795567e-06, + "loss": 0.49888312816619873, + "step": 5148 + }, + { + "epoch": 1.5056294779938586, + "grad_norm": 1.604723023798687, + "learning_rate": 3.1634153805485245e-06, + "loss": 0.5105957984924316, + "step": 5149 + }, + { + "epoch": 1.5059219184091241, + "grad_norm": 1.530550544138999, + "learning_rate": 3.1598878003115694e-06, + "loss": 0.5653882026672363, + "step": 5150 + }, + { + "epoch": 1.5062143588243895, + "grad_norm": 1.7528922447010231, + "learning_rate": 3.1563618189092536e-06, + "loss": 0.5293145179748535, + "step": 5151 + }, + { + "epoch": 1.5065067992396548, + "grad_norm": 1.7049104339852403, + "learning_rate": 3.1528374371657524e-06, + "loss": 0.5852463841438293, + "step": 5152 + }, + { + "epoch": 1.5067992396549204, + "grad_norm": 1.7074372465536334, + "learning_rate": 3.1493146559048683e-06, + "loss": 0.5986759662628174, + "step": 5153 + }, + { + "epoch": 1.5070916800701857, + "grad_norm": 1.8410699226798701, + "learning_rate": 3.1457934759500298e-06, + "loss": 0.6363133788108826, + "step": 5154 + }, + { + "epoch": 1.507384120485451, + "grad_norm": 1.7703119171725752, + "learning_rate": 3.1422738981242927e-06, + "loss": 0.4757901430130005, + "step": 5155 + }, + { + "epoch": 1.5076765609007166, + "grad_norm": 1.8042941675603332, + "learning_rate": 3.1387559232503374e-06, + "loss": 0.7614980936050415, + "step": 5156 + }, + { + "epoch": 1.5079690013159819, + "grad_norm": 1.8353916940267578, + "learning_rate": 3.13523955215047e-06, + "loss": 0.5739883184432983, + "step": 5157 + }, + { + "epoch": 1.5082614417312472, + "grad_norm": 1.6405466984899346, + "learning_rate": 3.131724785646616e-06, + "loss": 0.5893388390541077, + "step": 5158 + }, + { + "epoch": 1.5085538821465128, + "grad_norm": 1.4613031069188664, + "learning_rate": 3.1282116245603333e-06, + "loss": 0.5809957981109619, + "step": 5159 + }, + { + "epoch": 1.508846322561778, + "grad_norm": 1.558509757762028, + "learning_rate": 3.124700069712803e-06, + "loss": 0.5651090741157532, + "step": 5160 + }, + { + "epoch": 1.5091387629770434, + "grad_norm": 1.5870160926102073, + "learning_rate": 3.1211901219248273e-06, + "loss": 0.3736303448677063, + "step": 5161 + }, + { + "epoch": 1.509431203392309, + "grad_norm": 1.744264206007829, + "learning_rate": 3.117681782016838e-06, + "loss": 0.5501068234443665, + "step": 5162 + }, + { + "epoch": 1.509723643807574, + "grad_norm": 1.7377852819958348, + "learning_rate": 3.1141750508088865e-06, + "loss": 0.6210630536079407, + "step": 5163 + }, + { + "epoch": 1.5100160842228396, + "grad_norm": 1.5741938339988393, + "learning_rate": 3.110669929120651e-06, + "loss": 0.5722042322158813, + "step": 5164 + }, + { + "epoch": 1.510308524638105, + "grad_norm": 1.617906406413033, + "learning_rate": 3.107166417771431e-06, + "loss": 0.5813776254653931, + "step": 5165 + }, + { + "epoch": 1.5106009650533703, + "grad_norm": 1.5816945478856634, + "learning_rate": 3.1036645175801515e-06, + "loss": 0.4911368787288666, + "step": 5166 + }, + { + "epoch": 1.5108934054686358, + "grad_norm": 1.5812988749732655, + "learning_rate": 3.100164229365361e-06, + "loss": 0.5136172771453857, + "step": 5167 + }, + { + "epoch": 1.5111858458839011, + "grad_norm": 1.7202185949801794, + "learning_rate": 3.096665553945234e-06, + "loss": 0.5746543407440186, + "step": 5168 + }, + { + "epoch": 1.5114782862991665, + "grad_norm": 1.8577610332100818, + "learning_rate": 3.0931684921375572e-06, + "loss": 0.4949193000793457, + "step": 5169 + }, + { + "epoch": 1.511770726714432, + "grad_norm": 1.6744220879324234, + "learning_rate": 3.089673044759751e-06, + "loss": 0.5732932090759277, + "step": 5170 + }, + { + "epoch": 1.5120631671296973, + "grad_norm": 1.5865659073822531, + "learning_rate": 3.086179212628855e-06, + "loss": 0.5329696536064148, + "step": 5171 + }, + { + "epoch": 1.5123556075449627, + "grad_norm": 1.7970382860153173, + "learning_rate": 3.082686996561531e-06, + "loss": 0.631770670413971, + "step": 5172 + }, + { + "epoch": 1.5126480479602282, + "grad_norm": 1.5998021767601671, + "learning_rate": 3.0791963973740646e-06, + "loss": 0.5183405876159668, + "step": 5173 + }, + { + "epoch": 1.5129404883754933, + "grad_norm": 1.7133603210505308, + "learning_rate": 3.075707415882361e-06, + "loss": 0.5616034269332886, + "step": 5174 + }, + { + "epoch": 1.5132329287907589, + "grad_norm": 1.5912245556380846, + "learning_rate": 3.0722200529019477e-06, + "loss": 0.48513877391815186, + "step": 5175 + }, + { + "epoch": 1.5135253692060244, + "grad_norm": 1.574805808870548, + "learning_rate": 3.068734309247976e-06, + "loss": 0.5226399898529053, + "step": 5176 + }, + { + "epoch": 1.5138178096212895, + "grad_norm": 1.592402045128277, + "learning_rate": 3.0652501857352167e-06, + "loss": 0.48817533254623413, + "step": 5177 + }, + { + "epoch": 1.514110250036555, + "grad_norm": 1.5523305292465257, + "learning_rate": 3.061767683178063e-06, + "loss": 0.4163327217102051, + "step": 5178 + }, + { + "epoch": 1.5144026904518204, + "grad_norm": 1.6254224030737643, + "learning_rate": 3.058286802390531e-06, + "loss": 0.5984256267547607, + "step": 5179 + }, + { + "epoch": 1.5146951308670857, + "grad_norm": 1.8006518354372911, + "learning_rate": 3.054807544186249e-06, + "loss": 0.47233515977859497, + "step": 5180 + }, + { + "epoch": 1.5149875712823513, + "grad_norm": 1.6896342506826862, + "learning_rate": 3.0513299093784766e-06, + "loss": 0.5545482635498047, + "step": 5181 + }, + { + "epoch": 1.5152800116976166, + "grad_norm": 1.5925171354605219, + "learning_rate": 3.047853898780089e-06, + "loss": 0.46200019121170044, + "step": 5182 + }, + { + "epoch": 1.515572452112882, + "grad_norm": 1.7986358499610187, + "learning_rate": 3.0443795132035824e-06, + "loss": 0.6146235466003418, + "step": 5183 + }, + { + "epoch": 1.5158648925281475, + "grad_norm": 1.6180210942837954, + "learning_rate": 3.040906753461075e-06, + "loss": 0.5653461217880249, + "step": 5184 + }, + { + "epoch": 1.5161573329434128, + "grad_norm": 1.7782122645526974, + "learning_rate": 3.0374356203643008e-06, + "loss": 0.6514929533004761, + "step": 5185 + }, + { + "epoch": 1.5164497733586781, + "grad_norm": 1.6488410817366923, + "learning_rate": 3.033966114724618e-06, + "loss": 0.48213401436805725, + "step": 5186 + }, + { + "epoch": 1.5167422137739437, + "grad_norm": 1.8810893536328739, + "learning_rate": 3.0304982373530013e-06, + "loss": 0.4935530424118042, + "step": 5187 + }, + { + "epoch": 1.5170346541892088, + "grad_norm": 1.9406636249591702, + "learning_rate": 3.0270319890600465e-06, + "loss": 0.6435343027114868, + "step": 5188 + }, + { + "epoch": 1.5173270946044743, + "grad_norm": 1.4722259236044228, + "learning_rate": 3.0235673706559675e-06, + "loss": 0.49350717663764954, + "step": 5189 + }, + { + "epoch": 1.5176195350197397, + "grad_norm": 1.636152242750681, + "learning_rate": 3.0201043829506015e-06, + "loss": 0.4745938777923584, + "step": 5190 + }, + { + "epoch": 1.517911975435005, + "grad_norm": 1.747247707841839, + "learning_rate": 3.0166430267533944e-06, + "loss": 0.5867031812667847, + "step": 5191 + }, + { + "epoch": 1.5182044158502705, + "grad_norm": 2.0836038611604275, + "learning_rate": 3.01318330287342e-06, + "loss": 0.5477231740951538, + "step": 5192 + }, + { + "epoch": 1.5184968562655359, + "grad_norm": 1.5825293698408722, + "learning_rate": 3.0097252121193687e-06, + "loss": 0.5788818597793579, + "step": 5193 + }, + { + "epoch": 1.5187892966808012, + "grad_norm": 1.5819522244244852, + "learning_rate": 3.0062687552995475e-06, + "loss": 0.4967714548110962, + "step": 5194 + }, + { + "epoch": 1.5190817370960668, + "grad_norm": 1.810354148695448, + "learning_rate": 3.002813933221882e-06, + "loss": 0.6427319645881653, + "step": 5195 + }, + { + "epoch": 1.519374177511332, + "grad_norm": 1.7324283900525337, + "learning_rate": 2.999360746693916e-06, + "loss": 0.5615307688713074, + "step": 5196 + }, + { + "epoch": 1.5196666179265974, + "grad_norm": 1.8017068269121923, + "learning_rate": 2.9959091965228102e-06, + "loss": 0.6646313667297363, + "step": 5197 + }, + { + "epoch": 1.519959058341863, + "grad_norm": 1.4648905848591907, + "learning_rate": 2.9924592835153454e-06, + "loss": 0.47536247968673706, + "step": 5198 + }, + { + "epoch": 1.5202514987571283, + "grad_norm": 1.701001149097395, + "learning_rate": 2.9890110084779157e-06, + "loss": 0.5850256681442261, + "step": 5199 + }, + { + "epoch": 1.5205439391723936, + "grad_norm": 1.6650942638342863, + "learning_rate": 2.985564372216536e-06, + "loss": 0.5724887251853943, + "step": 5200 + }, + { + "epoch": 1.5208363795876592, + "grad_norm": 1.6379341688791944, + "learning_rate": 2.9821193755368383e-06, + "loss": 0.5052510499954224, + "step": 5201 + }, + { + "epoch": 1.5211288200029243, + "grad_norm": 1.5270508750040293, + "learning_rate": 2.9786760192440644e-06, + "loss": 0.439144492149353, + "step": 5202 + }, + { + "epoch": 1.5214212604181898, + "grad_norm": 1.624134940512823, + "learning_rate": 2.97523430414308e-06, + "loss": 0.4560511112213135, + "step": 5203 + }, + { + "epoch": 1.5217137008334551, + "grad_norm": 1.9447169329839864, + "learning_rate": 2.9717942310383664e-06, + "loss": 0.6848068237304688, + "step": 5204 + }, + { + "epoch": 1.5220061412487205, + "grad_norm": 1.5338251170475576, + "learning_rate": 2.9683558007340184e-06, + "loss": 0.5541313886642456, + "step": 5205 + }, + { + "epoch": 1.522298581663986, + "grad_norm": 1.4921475223936211, + "learning_rate": 2.964919014033749e-06, + "loss": 0.5117338299751282, + "step": 5206 + }, + { + "epoch": 1.5225910220792513, + "grad_norm": 1.8454970950489444, + "learning_rate": 2.9614838717408866e-06, + "loss": 0.5164151191711426, + "step": 5207 + }, + { + "epoch": 1.5228834624945167, + "grad_norm": 1.6612213438595136, + "learning_rate": 2.9580503746583744e-06, + "loss": 0.5461020469665527, + "step": 5208 + }, + { + "epoch": 1.5231759029097822, + "grad_norm": 1.580589085309813, + "learning_rate": 2.9546185235887705e-06, + "loss": 0.4265401065349579, + "step": 5209 + }, + { + "epoch": 1.5234683433250475, + "grad_norm": 1.822483254200033, + "learning_rate": 2.9511883193342505e-06, + "loss": 0.47372496128082275, + "step": 5210 + }, + { + "epoch": 1.5237607837403129, + "grad_norm": 1.5409548150660597, + "learning_rate": 2.9477597626966036e-06, + "loss": 0.43951019644737244, + "step": 5211 + }, + { + "epoch": 1.5240532241555784, + "grad_norm": 2.1038432849237862, + "learning_rate": 2.9443328544772343e-06, + "loss": 0.6514073610305786, + "step": 5212 + }, + { + "epoch": 1.5243456645708435, + "grad_norm": 1.6794879789857167, + "learning_rate": 2.940907595477164e-06, + "loss": 0.523013710975647, + "step": 5213 + }, + { + "epoch": 1.524638104986109, + "grad_norm": 1.6399154124434079, + "learning_rate": 2.9374839864970194e-06, + "loss": 0.4945281744003296, + "step": 5214 + }, + { + "epoch": 1.5249305454013746, + "grad_norm": 1.83414324289986, + "learning_rate": 2.9340620283370525e-06, + "loss": 0.5768609046936035, + "step": 5215 + }, + { + "epoch": 1.5252229858166397, + "grad_norm": 1.7611799606025424, + "learning_rate": 2.930641721797125e-06, + "loss": 0.45644205808639526, + "step": 5216 + }, + { + "epoch": 1.5255154262319053, + "grad_norm": 1.5932175762441756, + "learning_rate": 2.92722306767671e-06, + "loss": 0.590227484703064, + "step": 5217 + }, + { + "epoch": 1.5258078666471706, + "grad_norm": 1.8078838529845034, + "learning_rate": 2.9238060667749014e-06, + "loss": 0.5618122816085815, + "step": 5218 + }, + { + "epoch": 1.526100307062436, + "grad_norm": 1.9135498575527394, + "learning_rate": 2.9203907198904027e-06, + "loss": 0.6431877613067627, + "step": 5219 + }, + { + "epoch": 1.5263927474777015, + "grad_norm": 1.5548470750003383, + "learning_rate": 2.916977027821527e-06, + "loss": 0.5019941329956055, + "step": 5220 + }, + { + "epoch": 1.5266851878929668, + "grad_norm": 1.9013308084843434, + "learning_rate": 2.913564991366209e-06, + "loss": 0.5413016080856323, + "step": 5221 + }, + { + "epoch": 1.5269776283082321, + "grad_norm": 1.6880920277336984, + "learning_rate": 2.9101546113219846e-06, + "loss": 0.6546905636787415, + "step": 5222 + }, + { + "epoch": 1.5272700687234977, + "grad_norm": 1.7013707157233615, + "learning_rate": 2.906745888486013e-06, + "loss": 0.5689815878868103, + "step": 5223 + }, + { + "epoch": 1.527562509138763, + "grad_norm": 1.8369848799419313, + "learning_rate": 2.9033388236550632e-06, + "loss": 0.5134810209274292, + "step": 5224 + }, + { + "epoch": 1.5278549495540283, + "grad_norm": 1.4280052174004847, + "learning_rate": 2.8999334176255143e-06, + "loss": 0.4880787134170532, + "step": 5225 + }, + { + "epoch": 1.528147389969294, + "grad_norm": 1.8292283637694566, + "learning_rate": 2.89652967119336e-06, + "loss": 0.4345950782299042, + "step": 5226 + }, + { + "epoch": 1.528439830384559, + "grad_norm": 1.724451812949585, + "learning_rate": 2.893127585154205e-06, + "loss": 0.43327242136001587, + "step": 5227 + }, + { + "epoch": 1.5287322707998245, + "grad_norm": 1.780345207484487, + "learning_rate": 2.889727160303266e-06, + "loss": 0.6423674821853638, + "step": 5228 + }, + { + "epoch": 1.5290247112150899, + "grad_norm": 1.5540524492201802, + "learning_rate": 2.886328397435374e-06, + "loss": 0.5263554453849792, + "step": 5229 + }, + { + "epoch": 1.5293171516303552, + "grad_norm": 1.6433428703006638, + "learning_rate": 2.882931297344965e-06, + "loss": 0.4111948013305664, + "step": 5230 + }, + { + "epoch": 1.5296095920456207, + "grad_norm": 1.804627326985323, + "learning_rate": 2.8795358608260936e-06, + "loss": 0.43803131580352783, + "step": 5231 + }, + { + "epoch": 1.529902032460886, + "grad_norm": 1.5504311785369362, + "learning_rate": 2.8761420886724223e-06, + "loss": 0.4708956778049469, + "step": 5232 + }, + { + "epoch": 1.5301944728761514, + "grad_norm": 1.7185936460565197, + "learning_rate": 2.8727499816772265e-06, + "loss": 0.5268635749816895, + "step": 5233 + }, + { + "epoch": 1.530486913291417, + "grad_norm": 1.6977720322438927, + "learning_rate": 2.869359540633385e-06, + "loss": 0.5092788934707642, + "step": 5234 + }, + { + "epoch": 1.5307793537066823, + "grad_norm": 1.630735809850627, + "learning_rate": 2.8659707663333958e-06, + "loss": 0.4603293836116791, + "step": 5235 + }, + { + "epoch": 1.5310717941219476, + "grad_norm": 1.7857705195277582, + "learning_rate": 2.8625836595693646e-06, + "loss": 0.545462965965271, + "step": 5236 + }, + { + "epoch": 1.5313642345372132, + "grad_norm": 1.6146415057105645, + "learning_rate": 2.8591982211330073e-06, + "loss": 0.511603832244873, + "step": 5237 + }, + { + "epoch": 1.5316566749524785, + "grad_norm": 1.7935851159627383, + "learning_rate": 2.8558144518156485e-06, + "loss": 0.5076707601547241, + "step": 5238 + }, + { + "epoch": 1.5319491153677438, + "grad_norm": 1.7012818042378361, + "learning_rate": 2.852432352408224e-06, + "loss": 0.5923745632171631, + "step": 5239 + }, + { + "epoch": 1.5322415557830094, + "grad_norm": 1.8251553548092714, + "learning_rate": 2.849051923701279e-06, + "loss": 0.5588465332984924, + "step": 5240 + }, + { + "epoch": 1.5325339961982745, + "grad_norm": 1.6493521356208132, + "learning_rate": 2.845673166484969e-06, + "loss": 0.6681923270225525, + "step": 5241 + }, + { + "epoch": 1.53282643661354, + "grad_norm": 1.8683876960783266, + "learning_rate": 2.8422960815490564e-06, + "loss": 0.5702543258666992, + "step": 5242 + }, + { + "epoch": 1.5331188770288053, + "grad_norm": 1.8090012581479555, + "learning_rate": 2.8389206696829165e-06, + "loss": 0.5401744842529297, + "step": 5243 + }, + { + "epoch": 1.5334113174440707, + "grad_norm": 1.6641276436242072, + "learning_rate": 2.8355469316755324e-06, + "loss": 0.43371906876564026, + "step": 5244 + }, + { + "epoch": 1.5337037578593362, + "grad_norm": 1.6323739542625777, + "learning_rate": 2.8321748683154893e-06, + "loss": 0.5598163604736328, + "step": 5245 + }, + { + "epoch": 1.5339961982746015, + "grad_norm": 1.8330291281030966, + "learning_rate": 2.8288044803909896e-06, + "loss": 0.5836831331253052, + "step": 5246 + }, + { + "epoch": 1.5342886386898669, + "grad_norm": 1.6637462764959579, + "learning_rate": 2.8254357686898404e-06, + "loss": 0.5308898687362671, + "step": 5247 + }, + { + "epoch": 1.5345810791051324, + "grad_norm": 1.7589253104867197, + "learning_rate": 2.822068733999459e-06, + "loss": 0.6104828119277954, + "step": 5248 + }, + { + "epoch": 1.5348735195203977, + "grad_norm": 1.9266285032289332, + "learning_rate": 2.8187033771068685e-06, + "loss": 0.48373985290527344, + "step": 5249 + }, + { + "epoch": 1.535165959935663, + "grad_norm": 1.745809860715047, + "learning_rate": 2.8153396987987e-06, + "loss": 0.5213532447814941, + "step": 5250 + }, + { + "epoch": 1.5354584003509286, + "grad_norm": 1.7052291407432676, + "learning_rate": 2.811977699861195e-06, + "loss": 0.5241051912307739, + "step": 5251 + }, + { + "epoch": 1.5357508407661937, + "grad_norm": 1.54399807563896, + "learning_rate": 2.8086173810801974e-06, + "loss": 0.48321712017059326, + "step": 5252 + }, + { + "epoch": 1.5360432811814593, + "grad_norm": 1.831716416150244, + "learning_rate": 2.8052587432411626e-06, + "loss": 0.5352765917778015, + "step": 5253 + }, + { + "epoch": 1.5363357215967248, + "grad_norm": 1.7051244593885417, + "learning_rate": 2.8019017871291522e-06, + "loss": 0.5402188301086426, + "step": 5254 + }, + { + "epoch": 1.53662816201199, + "grad_norm": 1.5780940900489064, + "learning_rate": 2.798546513528837e-06, + "loss": 0.4398813545703888, + "step": 5255 + }, + { + "epoch": 1.5369206024272555, + "grad_norm": 1.6682503262337565, + "learning_rate": 2.7951929232244855e-06, + "loss": 0.5661803483963013, + "step": 5256 + }, + { + "epoch": 1.5372130428425208, + "grad_norm": 1.9389870116334766, + "learning_rate": 2.791841016999982e-06, + "loss": 0.5051732063293457, + "step": 5257 + }, + { + "epoch": 1.5375054832577861, + "grad_norm": 1.7323475801875265, + "learning_rate": 2.788490795638815e-06, + "loss": 0.5712389945983887, + "step": 5258 + }, + { + "epoch": 1.5377979236730517, + "grad_norm": 1.7189716580722423, + "learning_rate": 2.7851422599240773e-06, + "loss": 0.6257319450378418, + "step": 5259 + }, + { + "epoch": 1.538090364088317, + "grad_norm": 1.7862483931054027, + "learning_rate": 2.7817954106384704e-06, + "loss": 0.5788396596908569, + "step": 5260 + }, + { + "epoch": 1.5383828045035823, + "grad_norm": 1.508089974245087, + "learning_rate": 2.7784502485642985e-06, + "loss": 0.37253260612487793, + "step": 5261 + }, + { + "epoch": 1.5386752449188479, + "grad_norm": 2.206166372523085, + "learning_rate": 2.7751067744834726e-06, + "loss": 0.6547001004219055, + "step": 5262 + }, + { + "epoch": 1.5389676853341132, + "grad_norm": 1.551783656656575, + "learning_rate": 2.77176498917751e-06, + "loss": 0.510914146900177, + "step": 5263 + }, + { + "epoch": 1.5392601257493785, + "grad_norm": 1.731638922465708, + "learning_rate": 2.7684248934275327e-06, + "loss": 0.4387754201889038, + "step": 5264 + }, + { + "epoch": 1.539552566164644, + "grad_norm": 1.573259655998941, + "learning_rate": 2.765086488014268e-06, + "loss": 0.5640195608139038, + "step": 5265 + }, + { + "epoch": 1.5398450065799092, + "grad_norm": 2.3327619392306684, + "learning_rate": 2.7617497737180508e-06, + "loss": 0.5780993103981018, + "step": 5266 + }, + { + "epoch": 1.5401374469951747, + "grad_norm": 1.7296077762304434, + "learning_rate": 2.758414751318813e-06, + "loss": 0.5190057158470154, + "step": 5267 + }, + { + "epoch": 1.54042988741044, + "grad_norm": 1.6180118608432006, + "learning_rate": 2.7550814215960964e-06, + "loss": 0.4204869270324707, + "step": 5268 + }, + { + "epoch": 1.5407223278257054, + "grad_norm": 1.5345717637092124, + "learning_rate": 2.7517497853290477e-06, + "loss": 0.5649294853210449, + "step": 5269 + }, + { + "epoch": 1.541014768240971, + "grad_norm": 1.8541084629609554, + "learning_rate": 2.748419843296416e-06, + "loss": 0.49142545461654663, + "step": 5270 + }, + { + "epoch": 1.5413072086562363, + "grad_norm": 2.006144774477858, + "learning_rate": 2.745091596276557e-06, + "loss": 0.483539879322052, + "step": 5271 + }, + { + "epoch": 1.5415996490715016, + "grad_norm": 1.8772157933692841, + "learning_rate": 2.7417650450474253e-06, + "loss": 0.5400283336639404, + "step": 5272 + }, + { + "epoch": 1.5418920894867671, + "grad_norm": 1.6915167892784866, + "learning_rate": 2.7384401903865844e-06, + "loss": 0.5490765571594238, + "step": 5273 + }, + { + "epoch": 1.5421845299020325, + "grad_norm": 2.267512124400057, + "learning_rate": 2.7351170330711975e-06, + "loss": 0.5434873700141907, + "step": 5274 + }, + { + "epoch": 1.5424769703172978, + "grad_norm": 1.8064402200670897, + "learning_rate": 2.7317955738780333e-06, + "loss": 0.6195025444030762, + "step": 5275 + }, + { + "epoch": 1.5427694107325634, + "grad_norm": 1.6751288499310806, + "learning_rate": 2.728475813583462e-06, + "loss": 0.5552260875701904, + "step": 5276 + }, + { + "epoch": 1.5430618511478287, + "grad_norm": 1.8146552227089312, + "learning_rate": 2.725157752963461e-06, + "loss": 0.5430501699447632, + "step": 5277 + }, + { + "epoch": 1.543354291563094, + "grad_norm": 2.1339271947469047, + "learning_rate": 2.7218413927936006e-06, + "loss": 0.633337676525116, + "step": 5278 + }, + { + "epoch": 1.5436467319783596, + "grad_norm": 1.6483089945499043, + "learning_rate": 2.718526733849062e-06, + "loss": 0.4974183738231659, + "step": 5279 + }, + { + "epoch": 1.5439391723936247, + "grad_norm": 2.06701718299293, + "learning_rate": 2.715213776904628e-06, + "loss": 0.5840449929237366, + "step": 5280 + }, + { + "epoch": 1.5442316128088902, + "grad_norm": 1.480832016038464, + "learning_rate": 2.7119025227346807e-06, + "loss": 0.4684101343154907, + "step": 5281 + }, + { + "epoch": 1.5445240532241555, + "grad_norm": 1.5849030043466241, + "learning_rate": 2.7085929721132078e-06, + "loss": 0.48402637243270874, + "step": 5282 + }, + { + "epoch": 1.5448164936394209, + "grad_norm": 1.6449199299919448, + "learning_rate": 2.7052851258137936e-06, + "loss": 0.6122831106185913, + "step": 5283 + }, + { + "epoch": 1.5451089340546864, + "grad_norm": 1.6951661547391625, + "learning_rate": 2.701978984609629e-06, + "loss": 0.5731217861175537, + "step": 5284 + }, + { + "epoch": 1.5454013744699517, + "grad_norm": 1.869052563685483, + "learning_rate": 2.6986745492735044e-06, + "loss": 0.5610803961753845, + "step": 5285 + }, + { + "epoch": 1.545693814885217, + "grad_norm": 1.4190791359210344, + "learning_rate": 2.695371820577811e-06, + "loss": 0.46112626791000366, + "step": 5286 + }, + { + "epoch": 1.5459862553004826, + "grad_norm": 2.1150576387004247, + "learning_rate": 2.692070799294542e-06, + "loss": 0.5368741154670715, + "step": 5287 + }, + { + "epoch": 1.546278695715748, + "grad_norm": 1.905327182706658, + "learning_rate": 2.688771486195293e-06, + "loss": 0.5991438627243042, + "step": 5288 + }, + { + "epoch": 1.5465711361310133, + "grad_norm": 1.9084615434749013, + "learning_rate": 2.685473882051254e-06, + "loss": 0.5751149654388428, + "step": 5289 + }, + { + "epoch": 1.5468635765462788, + "grad_norm": 2.0751264575493247, + "learning_rate": 2.682177987633221e-06, + "loss": 0.6055437326431274, + "step": 5290 + }, + { + "epoch": 1.547156016961544, + "grad_norm": 1.8883429200709412, + "learning_rate": 2.6788838037115916e-06, + "loss": 0.6009221076965332, + "step": 5291 + }, + { + "epoch": 1.5474484573768095, + "grad_norm": 1.8170478309101001, + "learning_rate": 2.6755913310563585e-06, + "loss": 0.6071531772613525, + "step": 5292 + }, + { + "epoch": 1.547740897792075, + "grad_norm": 1.4851824864906211, + "learning_rate": 2.6723005704371164e-06, + "loss": 0.4102080464363098, + "step": 5293 + }, + { + "epoch": 1.5480333382073401, + "grad_norm": 1.861843061560023, + "learning_rate": 2.6690115226230663e-06, + "loss": 0.48021870851516724, + "step": 5294 + }, + { + "epoch": 1.5483257786226057, + "grad_norm": 1.916351154521063, + "learning_rate": 2.665724188382999e-06, + "loss": 0.4893236458301544, + "step": 5295 + }, + { + "epoch": 1.548618219037871, + "grad_norm": 1.611822755629755, + "learning_rate": 2.6624385684853095e-06, + "loss": 0.6365019083023071, + "step": 5296 + }, + { + "epoch": 1.5489106594531363, + "grad_norm": 1.8901541843584413, + "learning_rate": 2.659154663697995e-06, + "loss": 0.46510767936706543, + "step": 5297 + }, + { + "epoch": 1.5492030998684019, + "grad_norm": 1.4887188273793392, + "learning_rate": 2.655872474788641e-06, + "loss": 0.4355175495147705, + "step": 5298 + }, + { + "epoch": 1.5494955402836672, + "grad_norm": 1.3536753107928572, + "learning_rate": 2.6525920025244432e-06, + "loss": 0.5180836915969849, + "step": 5299 + }, + { + "epoch": 1.5497879806989325, + "grad_norm": 1.9072335806805663, + "learning_rate": 2.6493132476721927e-06, + "loss": 0.5597968101501465, + "step": 5300 + }, + { + "epoch": 1.550080421114198, + "grad_norm": 1.7134796878533993, + "learning_rate": 2.646036210998276e-06, + "loss": 0.6581016778945923, + "step": 5301 + }, + { + "epoch": 1.5503728615294634, + "grad_norm": 1.8671635537156963, + "learning_rate": 2.642760893268684e-06, + "loss": 0.4875848889350891, + "step": 5302 + }, + { + "epoch": 1.5506653019447287, + "grad_norm": 1.571897962721608, + "learning_rate": 2.639487295248999e-06, + "loss": 0.4410843253135681, + "step": 5303 + }, + { + "epoch": 1.5509577423599943, + "grad_norm": 1.8113376757557438, + "learning_rate": 2.6362154177044076e-06, + "loss": 0.5829580426216125, + "step": 5304 + }, + { + "epoch": 1.5512501827752594, + "grad_norm": 1.6979805053981243, + "learning_rate": 2.6329452613996886e-06, + "loss": 0.6281459927558899, + "step": 5305 + }, + { + "epoch": 1.551542623190525, + "grad_norm": 1.6778942363253981, + "learning_rate": 2.629676827099222e-06, + "loss": 0.525640606880188, + "step": 5306 + }, + { + "epoch": 1.5518350636057903, + "grad_norm": 1.710219412838542, + "learning_rate": 2.626410115566985e-06, + "loss": 0.5219406485557556, + "step": 5307 + }, + { + "epoch": 1.5521275040210556, + "grad_norm": 1.7812622188686809, + "learning_rate": 2.623145127566555e-06, + "loss": 0.5120927691459656, + "step": 5308 + }, + { + "epoch": 1.5524199444363211, + "grad_norm": 1.856533490372594, + "learning_rate": 2.6198818638610967e-06, + "loss": 0.586410641670227, + "step": 5309 + }, + { + "epoch": 1.5527123848515865, + "grad_norm": 1.726189213717832, + "learning_rate": 2.6166203252133803e-06, + "loss": 0.5014485120773315, + "step": 5310 + }, + { + "epoch": 1.5530048252668518, + "grad_norm": 1.7251785105103856, + "learning_rate": 2.6133605123857707e-06, + "loss": 0.5087070465087891, + "step": 5311 + }, + { + "epoch": 1.5532972656821173, + "grad_norm": 1.9411711444593984, + "learning_rate": 2.610102426140231e-06, + "loss": 0.5829774737358093, + "step": 5312 + }, + { + "epoch": 1.5535897060973827, + "grad_norm": 1.9403338817582965, + "learning_rate": 2.6068460672383166e-06, + "loss": 0.5273870229721069, + "step": 5313 + }, + { + "epoch": 1.553882146512648, + "grad_norm": 1.6781304796241345, + "learning_rate": 2.603591436441183e-06, + "loss": 0.528778076171875, + "step": 5314 + }, + { + "epoch": 1.5541745869279135, + "grad_norm": 1.6477790459502455, + "learning_rate": 2.600338534509581e-06, + "loss": 0.4914259612560272, + "step": 5315 + }, + { + "epoch": 1.5544670273431789, + "grad_norm": 1.5838952242674544, + "learning_rate": 2.597087362203855e-06, + "loss": 0.48063480854034424, + "step": 5316 + }, + { + "epoch": 1.5547594677584442, + "grad_norm": 1.6948007690415343, + "learning_rate": 2.593837920283949e-06, + "loss": 0.4406088888645172, + "step": 5317 + }, + { + "epoch": 1.5550519081737098, + "grad_norm": 1.5839061375343884, + "learning_rate": 2.590590209509398e-06, + "loss": 0.5027159452438354, + "step": 5318 + }, + { + "epoch": 1.5553443485889749, + "grad_norm": 1.447462212774582, + "learning_rate": 2.5873442306393357e-06, + "loss": 0.3894188404083252, + "step": 5319 + }, + { + "epoch": 1.5556367890042404, + "grad_norm": 1.8834380096125083, + "learning_rate": 2.584099984432492e-06, + "loss": 0.5393104553222656, + "step": 5320 + }, + { + "epoch": 1.5559292294195057, + "grad_norm": 1.640256381642302, + "learning_rate": 2.580857471647186e-06, + "loss": 0.5701737999916077, + "step": 5321 + }, + { + "epoch": 1.556221669834771, + "grad_norm": 1.9050066043706444, + "learning_rate": 2.577616693041336e-06, + "loss": 0.6173145174980164, + "step": 5322 + }, + { + "epoch": 1.5565141102500366, + "grad_norm": 1.718666562714064, + "learning_rate": 2.5743776493724548e-06, + "loss": 0.534600555896759, + "step": 5323 + }, + { + "epoch": 1.556806550665302, + "grad_norm": 1.7258193752543447, + "learning_rate": 2.571140341397651e-06, + "loss": 0.5205268859863281, + "step": 5324 + }, + { + "epoch": 1.5570989910805673, + "grad_norm": 1.9160383524514086, + "learning_rate": 2.5679047698736224e-06, + "loss": 0.5631835460662842, + "step": 5325 + }, + { + "epoch": 1.5573914314958328, + "grad_norm": 1.786367865175988, + "learning_rate": 2.564670935556667e-06, + "loss": 0.5855015516281128, + "step": 5326 + }, + { + "epoch": 1.5576838719110981, + "grad_norm": 1.538967985462843, + "learning_rate": 2.5614388392026735e-06, + "loss": 0.5219928026199341, + "step": 5327 + }, + { + "epoch": 1.5579763123263635, + "grad_norm": 1.6118392863192783, + "learning_rate": 2.5582084815671225e-06, + "loss": 0.50178462266922, + "step": 5328 + }, + { + "epoch": 1.558268752741629, + "grad_norm": 1.65351304969076, + "learning_rate": 2.554979863405094e-06, + "loss": 0.643866777420044, + "step": 5329 + }, + { + "epoch": 1.5585611931568941, + "grad_norm": 1.6117676019433484, + "learning_rate": 2.5517529854712543e-06, + "loss": 0.4976714849472046, + "step": 5330 + }, + { + "epoch": 1.5588536335721597, + "grad_norm": 1.6012275122207043, + "learning_rate": 2.5485278485198716e-06, + "loss": 0.47352612018585205, + "step": 5331 + }, + { + "epoch": 1.5591460739874252, + "grad_norm": 1.5967917267320113, + "learning_rate": 2.5453044533047955e-06, + "loss": 0.6319230794906616, + "step": 5332 + }, + { + "epoch": 1.5594385144026903, + "grad_norm": 1.9005541524381997, + "learning_rate": 2.5420828005794786e-06, + "loss": 0.724555253982544, + "step": 5333 + }, + { + "epoch": 1.5597309548179559, + "grad_norm": 1.987695201205215, + "learning_rate": 2.5388628910969625e-06, + "loss": 0.6235928535461426, + "step": 5334 + }, + { + "epoch": 1.5600233952332212, + "grad_norm": 1.9501926966829706, + "learning_rate": 2.5356447256098805e-06, + "loss": 0.47880417108535767, + "step": 5335 + }, + { + "epoch": 1.5603158356484865, + "grad_norm": 1.451114547860928, + "learning_rate": 2.53242830487046e-06, + "loss": 0.3986828029155731, + "step": 5336 + }, + { + "epoch": 1.560608276063752, + "grad_norm": 1.747029246487311, + "learning_rate": 2.529213629630519e-06, + "loss": 0.515389084815979, + "step": 5337 + }, + { + "epoch": 1.5609007164790174, + "grad_norm": 1.4773319281213657, + "learning_rate": 2.52600070064147e-06, + "loss": 0.611845076084137, + "step": 5338 + }, + { + "epoch": 1.5611931568942827, + "grad_norm": 1.4758258492307896, + "learning_rate": 2.522789518654314e-06, + "loss": 0.4417461156845093, + "step": 5339 + }, + { + "epoch": 1.5614855973095483, + "grad_norm": 1.819505142519117, + "learning_rate": 2.519580084419646e-06, + "loss": 0.5082979798316956, + "step": 5340 + }, + { + "epoch": 1.5617780377248136, + "grad_norm": 1.6547823991622836, + "learning_rate": 2.516372398687652e-06, + "loss": 0.4535973072052002, + "step": 5341 + }, + { + "epoch": 1.562070478140079, + "grad_norm": 1.5836674832459754, + "learning_rate": 2.513166462208111e-06, + "loss": 0.5528950095176697, + "step": 5342 + }, + { + "epoch": 1.5623629185553445, + "grad_norm": 1.9642626952112248, + "learning_rate": 2.5099622757303865e-06, + "loss": 0.6272662281990051, + "step": 5343 + }, + { + "epoch": 1.5626553589706096, + "grad_norm": 1.6065246572629583, + "learning_rate": 2.506759840003439e-06, + "loss": 0.602135181427002, + "step": 5344 + }, + { + "epoch": 1.5629477993858751, + "grad_norm": 1.6289588222907745, + "learning_rate": 2.5035591557758197e-06, + "loss": 0.6336733102798462, + "step": 5345 + }, + { + "epoch": 1.5632402398011405, + "grad_norm": 1.6487862192612195, + "learning_rate": 2.500360223795668e-06, + "loss": 0.5819063186645508, + "step": 5346 + }, + { + "epoch": 1.5635326802164058, + "grad_norm": 1.9625665043715836, + "learning_rate": 2.4971630448107166e-06, + "loss": 0.6384624242782593, + "step": 5347 + }, + { + "epoch": 1.5638251206316713, + "grad_norm": 1.7408709214756897, + "learning_rate": 2.493967619568285e-06, + "loss": 0.5495754480361938, + "step": 5348 + }, + { + "epoch": 1.5641175610469367, + "grad_norm": 1.7544921790911043, + "learning_rate": 2.490773948815284e-06, + "loss": 0.5661545395851135, + "step": 5349 + }, + { + "epoch": 1.564410001462202, + "grad_norm": 1.6122536544450556, + "learning_rate": 2.487582033298217e-06, + "loss": 0.47731083631515503, + "step": 5350 + }, + { + "epoch": 1.5647024418774675, + "grad_norm": 1.6660059461046859, + "learning_rate": 2.4843918737631724e-06, + "loss": 0.5081999897956848, + "step": 5351 + }, + { + "epoch": 1.5649948822927329, + "grad_norm": 1.7409567692793637, + "learning_rate": 2.481203470955832e-06, + "loss": 0.4803314208984375, + "step": 5352 + }, + { + "epoch": 1.5652873227079982, + "grad_norm": 1.5751543533365946, + "learning_rate": 2.4780168256214687e-06, + "loss": 0.5049692392349243, + "step": 5353 + }, + { + "epoch": 1.5655797631232637, + "grad_norm": 1.5980094392584046, + "learning_rate": 2.4748319385049346e-06, + "loss": 0.46404945850372314, + "step": 5354 + }, + { + "epoch": 1.565872203538529, + "grad_norm": 1.8809652221147528, + "learning_rate": 2.471648810350681e-06, + "loss": 0.426737904548645, + "step": 5355 + }, + { + "epoch": 1.5661646439537944, + "grad_norm": 1.8658447876398343, + "learning_rate": 2.4684674419027445e-06, + "loss": 0.511459231376648, + "step": 5356 + }, + { + "epoch": 1.56645708436906, + "grad_norm": 1.6030611377734088, + "learning_rate": 2.4652878339047516e-06, + "loss": 0.5199254155158997, + "step": 5357 + }, + { + "epoch": 1.566749524784325, + "grad_norm": 1.8647690278368902, + "learning_rate": 2.4621099870999156e-06, + "loss": 0.6220999360084534, + "step": 5358 + }, + { + "epoch": 1.5670419651995906, + "grad_norm": 1.6243824818203765, + "learning_rate": 2.4589339022310386e-06, + "loss": 0.598499059677124, + "step": 5359 + }, + { + "epoch": 1.567334405614856, + "grad_norm": 1.6070369897776633, + "learning_rate": 2.455759580040512e-06, + "loss": 0.4726351499557495, + "step": 5360 + }, + { + "epoch": 1.5676268460301213, + "grad_norm": 1.5276631939356082, + "learning_rate": 2.452587021270314e-06, + "loss": 0.4492379426956177, + "step": 5361 + }, + { + "epoch": 1.5679192864453868, + "grad_norm": 1.5322598639207448, + "learning_rate": 2.4494162266620105e-06, + "loss": 0.46546655893325806, + "step": 5362 + }, + { + "epoch": 1.5682117268606521, + "grad_norm": 1.5784589531224524, + "learning_rate": 2.446247196956756e-06, + "loss": 0.45048198103904724, + "step": 5363 + }, + { + "epoch": 1.5685041672759175, + "grad_norm": 1.7001549698958467, + "learning_rate": 2.4430799328952935e-06, + "loss": 0.543383002281189, + "step": 5364 + }, + { + "epoch": 1.568796607691183, + "grad_norm": 1.881054972907132, + "learning_rate": 2.4399144352179484e-06, + "loss": 0.560661256313324, + "step": 5365 + }, + { + "epoch": 1.5690890481064483, + "grad_norm": 1.7380225532335671, + "learning_rate": 2.4367507046646367e-06, + "loss": 0.4915887117385864, + "step": 5366 + }, + { + "epoch": 1.5693814885217137, + "grad_norm": 3.6756946542988396, + "learning_rate": 2.433588741974863e-06, + "loss": 0.576668918132782, + "step": 5367 + }, + { + "epoch": 1.5696739289369792, + "grad_norm": 1.9696979271734443, + "learning_rate": 2.4304285478877134e-06, + "loss": 0.615422248840332, + "step": 5368 + }, + { + "epoch": 1.5699663693522443, + "grad_norm": 1.7262412669866045, + "learning_rate": 2.4272701231418706e-06, + "loss": 0.505649209022522, + "step": 5369 + }, + { + "epoch": 1.5702588097675099, + "grad_norm": 1.6721925296757776, + "learning_rate": 2.424113468475593e-06, + "loss": 0.4803265929222107, + "step": 5370 + }, + { + "epoch": 1.5705512501827754, + "grad_norm": 1.5546849518292136, + "learning_rate": 2.4209585846267293e-06, + "loss": 0.43251073360443115, + "step": 5371 + }, + { + "epoch": 1.5708436905980405, + "grad_norm": 1.517432850414526, + "learning_rate": 2.417805472332716e-06, + "loss": 0.6021081209182739, + "step": 5372 + }, + { + "epoch": 1.571136131013306, + "grad_norm": 1.5438721648404399, + "learning_rate": 2.414654132330575e-06, + "loss": 0.5236715078353882, + "step": 5373 + }, + { + "epoch": 1.5714285714285714, + "grad_norm": 1.7272971424194805, + "learning_rate": 2.4115045653569092e-06, + "loss": 0.45632290840148926, + "step": 5374 + }, + { + "epoch": 1.5717210118438367, + "grad_norm": 1.51681371819029, + "learning_rate": 2.408356772147912e-06, + "loss": 0.5745086669921875, + "step": 5375 + }, + { + "epoch": 1.5720134522591023, + "grad_norm": 1.7235832219181546, + "learning_rate": 2.405210753439361e-06, + "loss": 0.6032901406288147, + "step": 5376 + }, + { + "epoch": 1.5723058926743676, + "grad_norm": 1.9887425059975659, + "learning_rate": 2.40206650996662e-06, + "loss": 0.579899787902832, + "step": 5377 + }, + { + "epoch": 1.572598333089633, + "grad_norm": 1.84593228973349, + "learning_rate": 2.3989240424646355e-06, + "loss": 0.5920897722244263, + "step": 5378 + }, + { + "epoch": 1.5728907735048985, + "grad_norm": 1.6814027292095717, + "learning_rate": 2.395783351667941e-06, + "loss": 0.5080469846725464, + "step": 5379 + }, + { + "epoch": 1.5731832139201638, + "grad_norm": 1.6852885660534134, + "learning_rate": 2.392644438310654e-06, + "loss": 0.6438730955123901, + "step": 5380 + }, + { + "epoch": 1.5734756543354291, + "grad_norm": 1.5835392817230642, + "learning_rate": 2.389507303126475e-06, + "loss": 0.6496621370315552, + "step": 5381 + }, + { + "epoch": 1.5737680947506947, + "grad_norm": 2.056471050614057, + "learning_rate": 2.3863719468486925e-06, + "loss": 0.5780459642410278, + "step": 5382 + }, + { + "epoch": 1.5740605351659598, + "grad_norm": 1.6854861118133662, + "learning_rate": 2.3832383702101747e-06, + "loss": 0.47817176580429077, + "step": 5383 + }, + { + "epoch": 1.5743529755812253, + "grad_norm": 1.8294128359408837, + "learning_rate": 2.3801065739433816e-06, + "loss": 0.565629243850708, + "step": 5384 + }, + { + "epoch": 1.5746454159964907, + "grad_norm": 1.6612699899563574, + "learning_rate": 2.376976558780343e-06, + "loss": 0.6291453838348389, + "step": 5385 + }, + { + "epoch": 1.574937856411756, + "grad_norm": 1.538236610732314, + "learning_rate": 2.3738483254526856e-06, + "loss": 0.5309170484542847, + "step": 5386 + }, + { + "epoch": 1.5752302968270215, + "grad_norm": 1.5901478294831086, + "learning_rate": 2.370721874691614e-06, + "loss": 0.36860692501068115, + "step": 5387 + }, + { + "epoch": 1.5755227372422869, + "grad_norm": 1.4970687777761233, + "learning_rate": 2.3675972072279172e-06, + "loss": 0.4871997833251953, + "step": 5388 + }, + { + "epoch": 1.5758151776575522, + "grad_norm": 1.7243858787556505, + "learning_rate": 2.3644743237919674e-06, + "loss": 0.5318939685821533, + "step": 5389 + }, + { + "epoch": 1.5761076180728177, + "grad_norm": 1.6509311118620078, + "learning_rate": 2.3613532251137205e-06, + "loss": 0.5851289629936218, + "step": 5390 + }, + { + "epoch": 1.576400058488083, + "grad_norm": 1.7554122423009038, + "learning_rate": 2.358233911922713e-06, + "loss": 0.5535321235656738, + "step": 5391 + }, + { + "epoch": 1.5766924989033484, + "grad_norm": 1.6614076147074466, + "learning_rate": 2.3551163849480664e-06, + "loss": 0.5443980693817139, + "step": 5392 + }, + { + "epoch": 1.576984939318614, + "grad_norm": 1.7236213464789372, + "learning_rate": 2.352000644918483e-06, + "loss": 0.6381241083145142, + "step": 5393 + }, + { + "epoch": 1.5772773797338793, + "grad_norm": 1.7284545309348427, + "learning_rate": 2.348886692562248e-06, + "loss": 0.5710772275924683, + "step": 5394 + }, + { + "epoch": 1.5775698201491446, + "grad_norm": 1.5430684665624785, + "learning_rate": 2.3457745286072307e-06, + "loss": 0.5507428050041199, + "step": 5395 + }, + { + "epoch": 1.5778622605644101, + "grad_norm": 1.4206197407713899, + "learning_rate": 2.342664153780878e-06, + "loss": 0.4475744366645813, + "step": 5396 + }, + { + "epoch": 1.5781547009796753, + "grad_norm": 1.636583588423456, + "learning_rate": 2.339555568810221e-06, + "loss": 0.5237560868263245, + "step": 5397 + }, + { + "epoch": 1.5784471413949408, + "grad_norm": 1.8224385271688819, + "learning_rate": 2.3364487744218735e-06, + "loss": 0.513353705406189, + "step": 5398 + }, + { + "epoch": 1.5787395818102061, + "grad_norm": 1.7286392562782233, + "learning_rate": 2.3333437713420305e-06, + "loss": 0.5986731052398682, + "step": 5399 + }, + { + "epoch": 1.5790320222254715, + "grad_norm": 1.5907081834202914, + "learning_rate": 2.330240560296466e-06, + "loss": 0.5834506750106812, + "step": 5400 + }, + { + "epoch": 1.579324462640737, + "grad_norm": 1.4316449017872799, + "learning_rate": 2.3271391420105384e-06, + "loss": 0.4756021499633789, + "step": 5401 + }, + { + "epoch": 1.5796169030560023, + "grad_norm": 1.828748410964233, + "learning_rate": 2.3240395172091847e-06, + "loss": 0.5524263978004456, + "step": 5402 + }, + { + "epoch": 1.5799093434712677, + "grad_norm": 1.7797701447484084, + "learning_rate": 2.320941686616922e-06, + "loss": 0.5689926743507385, + "step": 5403 + }, + { + "epoch": 1.5802017838865332, + "grad_norm": 2.079791124123793, + "learning_rate": 2.317845650957852e-06, + "loss": 0.5737600326538086, + "step": 5404 + }, + { + "epoch": 1.5804942243017985, + "grad_norm": 2.1591480990218406, + "learning_rate": 2.314751410955652e-06, + "loss": 0.585626482963562, + "step": 5405 + }, + { + "epoch": 1.5807866647170639, + "grad_norm": 1.3475179143489473, + "learning_rate": 2.3116589673335833e-06, + "loss": 0.4410518407821655, + "step": 5406 + }, + { + "epoch": 1.5810791051323294, + "grad_norm": 1.4002471500541231, + "learning_rate": 2.308568320814487e-06, + "loss": 0.49071764945983887, + "step": 5407 + }, + { + "epoch": 1.5813715455475945, + "grad_norm": 1.7384943405251394, + "learning_rate": 2.3054794721207796e-06, + "loss": 0.5332186818122864, + "step": 5408 + }, + { + "epoch": 1.58166398596286, + "grad_norm": 1.672632129609112, + "learning_rate": 2.3023924219744607e-06, + "loss": 0.4655637741088867, + "step": 5409 + }, + { + "epoch": 1.5819564263781256, + "grad_norm": 1.8700821530052487, + "learning_rate": 2.2993071710971115e-06, + "loss": 0.4226027727127075, + "step": 5410 + }, + { + "epoch": 1.5822488667933907, + "grad_norm": 1.662889108823369, + "learning_rate": 2.2962237202098903e-06, + "loss": 0.5582948923110962, + "step": 5411 + }, + { + "epoch": 1.5825413072086563, + "grad_norm": 1.9177043486104604, + "learning_rate": 2.293142070033535e-06, + "loss": 0.6695314645767212, + "step": 5412 + }, + { + "epoch": 1.5828337476239216, + "grad_norm": 1.3346239854361734, + "learning_rate": 2.2900622212883617e-06, + "loss": 0.39315858483314514, + "step": 5413 + }, + { + "epoch": 1.583126188039187, + "grad_norm": 1.6781692583647863, + "learning_rate": 2.2869841746942666e-06, + "loss": 0.5034759044647217, + "step": 5414 + }, + { + "epoch": 1.5834186284544525, + "grad_norm": 1.9091862181504, + "learning_rate": 2.2839079309707256e-06, + "loss": 0.6739548444747925, + "step": 5415 + }, + { + "epoch": 1.5837110688697178, + "grad_norm": 1.700292089346711, + "learning_rate": 2.2808334908367914e-06, + "loss": 0.4091438949108124, + "step": 5416 + }, + { + "epoch": 1.5840035092849831, + "grad_norm": 1.9132208987373394, + "learning_rate": 2.277760855011094e-06, + "loss": 0.5543409585952759, + "step": 5417 + }, + { + "epoch": 1.5842959497002487, + "grad_norm": 1.5448108643055853, + "learning_rate": 2.2746900242118487e-06, + "loss": 0.44680702686309814, + "step": 5418 + }, + { + "epoch": 1.584588390115514, + "grad_norm": 1.812422444695138, + "learning_rate": 2.271620999156837e-06, + "loss": 0.604156494140625, + "step": 5419 + }, + { + "epoch": 1.5848808305307793, + "grad_norm": 1.7746704953171426, + "learning_rate": 2.268553780563427e-06, + "loss": 0.6055774688720703, + "step": 5420 + }, + { + "epoch": 1.5851732709460449, + "grad_norm": 1.6413153541100303, + "learning_rate": 2.265488369148563e-06, + "loss": 0.5826502442359924, + "step": 5421 + }, + { + "epoch": 1.58546571136131, + "grad_norm": 1.6438604610732335, + "learning_rate": 2.2624247656287658e-06, + "loss": 0.61782306432724, + "step": 5422 + }, + { + "epoch": 1.5857581517765755, + "grad_norm": 1.6412325546038886, + "learning_rate": 2.2593629707201348e-06, + "loss": 0.5561526417732239, + "step": 5423 + }, + { + "epoch": 1.5860505921918409, + "grad_norm": 1.934339107757701, + "learning_rate": 2.2563029851383447e-06, + "loss": 0.6122138500213623, + "step": 5424 + }, + { + "epoch": 1.5863430326071062, + "grad_norm": 1.7721974769204, + "learning_rate": 2.2532448095986504e-06, + "loss": 0.5694067478179932, + "step": 5425 + }, + { + "epoch": 1.5866354730223717, + "grad_norm": 2.0424311158796145, + "learning_rate": 2.2501884448158804e-06, + "loss": 0.5243874788284302, + "step": 5426 + }, + { + "epoch": 1.586927913437637, + "grad_norm": 1.8166715080001115, + "learning_rate": 2.2471338915044414e-06, + "loss": 0.5144485831260681, + "step": 5427 + }, + { + "epoch": 1.5872203538529024, + "grad_norm": 1.853424108367526, + "learning_rate": 2.244081150378318e-06, + "loss": 0.5013881325721741, + "step": 5428 + }, + { + "epoch": 1.587512794268168, + "grad_norm": 1.7554305935150418, + "learning_rate": 2.2410302221510704e-06, + "loss": 0.45199382305145264, + "step": 5429 + }, + { + "epoch": 1.5878052346834333, + "grad_norm": 1.7321007114143003, + "learning_rate": 2.2379811075358315e-06, + "loss": 0.4699060022830963, + "step": 5430 + }, + { + "epoch": 1.5880976750986986, + "grad_norm": 1.6542253790144112, + "learning_rate": 2.234933807245314e-06, + "loss": 0.6530928611755371, + "step": 5431 + }, + { + "epoch": 1.5883901155139641, + "grad_norm": 2.1734435533671337, + "learning_rate": 2.2318883219918075e-06, + "loss": 0.653563380241394, + "step": 5432 + }, + { + "epoch": 1.5886825559292295, + "grad_norm": 1.6977334736027891, + "learning_rate": 2.2288446524871743e-06, + "loss": 0.5283595323562622, + "step": 5433 + }, + { + "epoch": 1.5889749963444948, + "grad_norm": 1.8947978942641126, + "learning_rate": 2.2258027994428543e-06, + "loss": 0.4382442831993103, + "step": 5434 + }, + { + "epoch": 1.5892674367597603, + "grad_norm": 1.5530659953902877, + "learning_rate": 2.2227627635698624e-06, + "loss": 0.427448570728302, + "step": 5435 + }, + { + "epoch": 1.5895598771750254, + "grad_norm": 1.9154573086486193, + "learning_rate": 2.2197245455787875e-06, + "loss": 0.5794345140457153, + "step": 5436 + }, + { + "epoch": 1.589852317590291, + "grad_norm": 1.7112908574959096, + "learning_rate": 2.2166881461797953e-06, + "loss": 0.4996277987957001, + "step": 5437 + }, + { + "epoch": 1.5901447580055563, + "grad_norm": 1.6191576283665394, + "learning_rate": 2.213653566082625e-06, + "loss": 0.580248236656189, + "step": 5438 + }, + { + "epoch": 1.5904371984208217, + "grad_norm": 1.7775881602556973, + "learning_rate": 2.210620805996594e-06, + "loss": 0.5173758864402771, + "step": 5439 + }, + { + "epoch": 1.5907296388360872, + "grad_norm": 1.9899233221127093, + "learning_rate": 2.2075898666305908e-06, + "loss": 0.5336873531341553, + "step": 5440 + }, + { + "epoch": 1.5910220792513525, + "grad_norm": 1.6076505919691177, + "learning_rate": 2.204560748693074e-06, + "loss": 0.44921910762786865, + "step": 5441 + }, + { + "epoch": 1.5913145196666179, + "grad_norm": 1.6059480320951056, + "learning_rate": 2.201533452892086e-06, + "loss": 0.46475526690483093, + "step": 5442 + }, + { + "epoch": 1.5916069600818834, + "grad_norm": 1.9029311381102771, + "learning_rate": 2.1985079799352383e-06, + "loss": 0.6213991045951843, + "step": 5443 + }, + { + "epoch": 1.5918994004971487, + "grad_norm": 1.7215123021526133, + "learning_rate": 2.1954843305297138e-06, + "loss": 0.5271334648132324, + "step": 5444 + }, + { + "epoch": 1.592191840912414, + "grad_norm": 2.0224569757299333, + "learning_rate": 2.192462505382277e-06, + "loss": 0.6957610249519348, + "step": 5445 + }, + { + "epoch": 1.5924842813276796, + "grad_norm": 1.474394106198892, + "learning_rate": 2.1894425051992587e-06, + "loss": 0.4935681223869324, + "step": 5446 + }, + { + "epoch": 1.5927767217429447, + "grad_norm": 1.8779887346615283, + "learning_rate": 2.1864243306865663e-06, + "loss": 0.7389976978302002, + "step": 5447 + }, + { + "epoch": 1.5930691621582103, + "grad_norm": 1.6663547791548505, + "learning_rate": 2.183407982549679e-06, + "loss": 0.4711039662361145, + "step": 5448 + }, + { + "epoch": 1.5933616025734758, + "grad_norm": 1.7966119865723598, + "learning_rate": 2.180393461493654e-06, + "loss": 0.5640024542808533, + "step": 5449 + }, + { + "epoch": 1.593654042988741, + "grad_norm": 1.8836976245237465, + "learning_rate": 2.1773807682231095e-06, + "loss": 0.5471343994140625, + "step": 5450 + }, + { + "epoch": 1.5939464834040065, + "grad_norm": 1.948314000978572, + "learning_rate": 2.1743699034422483e-06, + "loss": 0.4971361756324768, + "step": 5451 + }, + { + "epoch": 1.5942389238192718, + "grad_norm": 1.816359724345797, + "learning_rate": 2.1713608678548414e-06, + "loss": 0.6338681578636169, + "step": 5452 + }, + { + "epoch": 1.5945313642345371, + "grad_norm": 1.5944974938870278, + "learning_rate": 2.168353662164233e-06, + "loss": 0.5218038558959961, + "step": 5453 + }, + { + "epoch": 1.5948238046498027, + "grad_norm": 1.4835669226996004, + "learning_rate": 2.165348287073339e-06, + "loss": 0.44414108991622925, + "step": 5454 + }, + { + "epoch": 1.595116245065068, + "grad_norm": 1.741912524884312, + "learning_rate": 2.162344743284647e-06, + "loss": 0.5994665622711182, + "step": 5455 + }, + { + "epoch": 1.5954086854803333, + "grad_norm": 1.8002543813503216, + "learning_rate": 2.159343031500217e-06, + "loss": 0.6745023727416992, + "step": 5456 + }, + { + "epoch": 1.5957011258955989, + "grad_norm": 1.8583415901166598, + "learning_rate": 2.1563431524216825e-06, + "loss": 0.4678364396095276, + "step": 5457 + }, + { + "epoch": 1.5959935663108642, + "grad_norm": 1.5733048792098263, + "learning_rate": 2.1533451067502464e-06, + "loss": 0.5792031288146973, + "step": 5458 + }, + { + "epoch": 1.5962860067261295, + "grad_norm": 1.575360514250564, + "learning_rate": 2.1503488951866822e-06, + "loss": 0.48152512311935425, + "step": 5459 + }, + { + "epoch": 1.596578447141395, + "grad_norm": 1.6753593421486697, + "learning_rate": 2.147354518431339e-06, + "loss": 0.4407780170440674, + "step": 5460 + }, + { + "epoch": 1.5968708875566602, + "grad_norm": 1.5845476508430212, + "learning_rate": 2.1443619771841308e-06, + "loss": 0.41062241792678833, + "step": 5461 + }, + { + "epoch": 1.5971633279719257, + "grad_norm": 1.6329985009235597, + "learning_rate": 2.1413712721445478e-06, + "loss": 0.4564778208732605, + "step": 5462 + }, + { + "epoch": 1.597455768387191, + "grad_norm": 1.7819738842734478, + "learning_rate": 2.1383824040116474e-06, + "loss": 0.4347888231277466, + "step": 5463 + }, + { + "epoch": 1.5977482088024564, + "grad_norm": 1.7547086253653914, + "learning_rate": 2.1353953734840615e-06, + "loss": 0.574216902256012, + "step": 5464 + }, + { + "epoch": 1.598040649217722, + "grad_norm": 1.5449681232026575, + "learning_rate": 2.1324101812599884e-06, + "loss": 0.46540650725364685, + "step": 5465 + }, + { + "epoch": 1.5983330896329873, + "grad_norm": 1.7330971380509632, + "learning_rate": 2.129426828037201e-06, + "loss": 0.5446870923042297, + "step": 5466 + }, + { + "epoch": 1.5986255300482526, + "grad_norm": 1.5387720739202952, + "learning_rate": 2.126445314513038e-06, + "loss": 0.5442406535148621, + "step": 5467 + }, + { + "epoch": 1.5989179704635181, + "grad_norm": 1.5552673745283687, + "learning_rate": 2.1234656413844114e-06, + "loss": 0.48960334062576294, + "step": 5468 + }, + { + "epoch": 1.5992104108787835, + "grad_norm": 1.6554781479614895, + "learning_rate": 2.1204878093477998e-06, + "loss": 0.5053935647010803, + "step": 5469 + }, + { + "epoch": 1.5995028512940488, + "grad_norm": 1.9853825289751812, + "learning_rate": 2.117511819099256e-06, + "loss": 0.5984711647033691, + "step": 5470 + }, + { + "epoch": 1.5997952917093143, + "grad_norm": 1.7887732493049897, + "learning_rate": 2.1145376713344e-06, + "loss": 0.6060935258865356, + "step": 5471 + }, + { + "epoch": 1.6000877321245797, + "grad_norm": 1.7731884284372257, + "learning_rate": 2.111565366748416e-06, + "loss": 0.5640311241149902, + "step": 5472 + }, + { + "epoch": 1.600380172539845, + "grad_norm": 1.4780823569090165, + "learning_rate": 2.1085949060360654e-06, + "loss": 0.5127131342887878, + "step": 5473 + }, + { + "epoch": 1.6006726129551105, + "grad_norm": 1.7137118890776333, + "learning_rate": 2.1056262898916747e-06, + "loss": 0.5630159378051758, + "step": 5474 + }, + { + "epoch": 1.6009650533703756, + "grad_norm": 1.6419339983794916, + "learning_rate": 2.1026595190091403e-06, + "loss": 0.4511195421218872, + "step": 5475 + }, + { + "epoch": 1.6012574937856412, + "grad_norm": 1.5933389134682139, + "learning_rate": 2.099694594081927e-06, + "loss": 0.47073638439178467, + "step": 5476 + }, + { + "epoch": 1.6015499342009065, + "grad_norm": 1.7678159005173808, + "learning_rate": 2.0967315158030675e-06, + "loss": 0.47757452726364136, + "step": 5477 + }, + { + "epoch": 1.6018423746161718, + "grad_norm": 1.612539233178663, + "learning_rate": 2.093770284865164e-06, + "loss": 0.4703200161457062, + "step": 5478 + }, + { + "epoch": 1.6021348150314374, + "grad_norm": 1.7112390228319339, + "learning_rate": 2.090810901960385e-06, + "loss": 0.47457796335220337, + "step": 5479 + }, + { + "epoch": 1.6024272554467027, + "grad_norm": 1.6069409002673796, + "learning_rate": 2.087853367780469e-06, + "loss": 0.4907105267047882, + "step": 5480 + }, + { + "epoch": 1.602719695861968, + "grad_norm": 1.8859078577608002, + "learning_rate": 2.0848976830167224e-06, + "loss": 0.5329782962799072, + "step": 5481 + }, + { + "epoch": 1.6030121362772336, + "grad_norm": 1.8407304692969428, + "learning_rate": 2.0819438483600197e-06, + "loss": 0.45858579874038696, + "step": 5482 + }, + { + "epoch": 1.603304576692499, + "grad_norm": 1.7103287599993058, + "learning_rate": 2.0789918645007977e-06, + "loss": 0.47545814514160156, + "step": 5483 + }, + { + "epoch": 1.6035970171077643, + "grad_norm": 1.7521375813446352, + "learning_rate": 2.076041732129066e-06, + "loss": 0.5482660531997681, + "step": 5484 + }, + { + "epoch": 1.6038894575230298, + "grad_norm": 1.650951498750666, + "learning_rate": 2.0730934519344025e-06, + "loss": 0.5252633094787598, + "step": 5485 + }, + { + "epoch": 1.604181897938295, + "grad_norm": 2.7727108215969882, + "learning_rate": 2.0701470246059472e-06, + "loss": 0.5400367379188538, + "step": 5486 + }, + { + "epoch": 1.6044743383535605, + "grad_norm": 1.5423948281806983, + "learning_rate": 2.0672024508324107e-06, + "loss": 0.4788953363895416, + "step": 5487 + }, + { + "epoch": 1.604766778768826, + "grad_norm": 1.6092306606930025, + "learning_rate": 2.0642597313020685e-06, + "loss": 0.5430850982666016, + "step": 5488 + }, + { + "epoch": 1.6050592191840911, + "grad_norm": 1.8683302543522238, + "learning_rate": 2.061318866702765e-06, + "loss": 0.5833520293235779, + "step": 5489 + }, + { + "epoch": 1.6053516595993567, + "grad_norm": 1.7369107165445012, + "learning_rate": 2.058379857721908e-06, + "loss": 0.5854958295822144, + "step": 5490 + }, + { + "epoch": 1.605644100014622, + "grad_norm": 1.6603772170749127, + "learning_rate": 2.0554427050464742e-06, + "loss": 0.5577352643013, + "step": 5491 + }, + { + "epoch": 1.6059365404298873, + "grad_norm": 1.6757677840410201, + "learning_rate": 2.052507409363004e-06, + "loss": 0.5328816175460815, + "step": 5492 + }, + { + "epoch": 1.6062289808451529, + "grad_norm": 1.7643397031335737, + "learning_rate": 2.0495739713576046e-06, + "loss": 0.5606744289398193, + "step": 5493 + }, + { + "epoch": 1.6065214212604182, + "grad_norm": 1.7836115172074085, + "learning_rate": 2.0466423917159526e-06, + "loss": 0.541358470916748, + "step": 5494 + }, + { + "epoch": 1.6068138616756835, + "grad_norm": 2.1455011977132714, + "learning_rate": 2.0437126711232826e-06, + "loss": 0.6578946709632874, + "step": 5495 + }, + { + "epoch": 1.607106302090949, + "grad_norm": 1.9512378226148355, + "learning_rate": 2.0407848102644002e-06, + "loss": 0.5967978239059448, + "step": 5496 + }, + { + "epoch": 1.6073987425062144, + "grad_norm": 1.623105883994405, + "learning_rate": 2.037858809823675e-06, + "loss": 0.46947693824768066, + "step": 5497 + }, + { + "epoch": 1.6076911829214797, + "grad_norm": 1.5763151196056784, + "learning_rate": 2.0349346704850436e-06, + "loss": 0.5014760494232178, + "step": 5498 + }, + { + "epoch": 1.6079836233367453, + "grad_norm": 1.5417734514532708, + "learning_rate": 2.0320123929320033e-06, + "loss": 0.4399675726890564, + "step": 5499 + }, + { + "epoch": 1.6082760637520104, + "grad_norm": 1.8719036359624468, + "learning_rate": 2.0290919778476214e-06, + "loss": 0.4729107618331909, + "step": 5500 + }, + { + "epoch": 1.608568504167276, + "grad_norm": 1.5894079730285777, + "learning_rate": 2.0261734259145248e-06, + "loss": 0.5669134259223938, + "step": 5501 + }, + { + "epoch": 1.6088609445825413, + "grad_norm": 1.554035864612711, + "learning_rate": 2.0232567378149082e-06, + "loss": 0.4200817942619324, + "step": 5502 + }, + { + "epoch": 1.6091533849978066, + "grad_norm": 1.8154865090092227, + "learning_rate": 2.0203419142305303e-06, + "loss": 0.6057849526405334, + "step": 5503 + }, + { + "epoch": 1.6094458254130721, + "grad_norm": 1.7156552575659618, + "learning_rate": 2.017428955842713e-06, + "loss": 0.5644170045852661, + "step": 5504 + }, + { + "epoch": 1.6097382658283375, + "grad_norm": 1.9102243104698693, + "learning_rate": 2.014517863332345e-06, + "loss": 0.6368730068206787, + "step": 5505 + }, + { + "epoch": 1.6100307062436028, + "grad_norm": 1.5712918255487374, + "learning_rate": 2.0116086373798704e-06, + "loss": 0.4829355478286743, + "step": 5506 + }, + { + "epoch": 1.6103231466588683, + "grad_norm": 1.642541904242283, + "learning_rate": 2.0087012786653072e-06, + "loss": 0.5604796409606934, + "step": 5507 + }, + { + "epoch": 1.6106155870741337, + "grad_norm": 1.8591393596163848, + "learning_rate": 2.005795787868232e-06, + "loss": 0.5594274997711182, + "step": 5508 + }, + { + "epoch": 1.610908027489399, + "grad_norm": 1.607362999733334, + "learning_rate": 2.0028921656677857e-06, + "loss": 0.5553449988365173, + "step": 5509 + }, + { + "epoch": 1.6112004679046645, + "grad_norm": 1.7968941470299316, + "learning_rate": 1.999990412742673e-06, + "loss": 0.5056631565093994, + "step": 5510 + }, + { + "epoch": 1.6114929083199299, + "grad_norm": 1.5654499452702673, + "learning_rate": 1.9970905297711606e-06, + "loss": 0.432037353515625, + "step": 5511 + }, + { + "epoch": 1.6117853487351952, + "grad_norm": 1.6991047972494284, + "learning_rate": 1.9941925174310773e-06, + "loss": 0.5152974128723145, + "step": 5512 + }, + { + "epoch": 1.6120777891504607, + "grad_norm": 2.1530610582321015, + "learning_rate": 1.9912963763998185e-06, + "loss": 0.59015291929245, + "step": 5513 + }, + { + "epoch": 1.6123702295657258, + "grad_norm": 2.024675130869183, + "learning_rate": 1.9884021073543368e-06, + "loss": 0.564031720161438, + "step": 5514 + }, + { + "epoch": 1.6126626699809914, + "grad_norm": 1.560415326953441, + "learning_rate": 1.985509710971152e-06, + "loss": 0.5930228233337402, + "step": 5515 + }, + { + "epoch": 1.6129551103962567, + "grad_norm": 1.6853261691368011, + "learning_rate": 1.9826191879263446e-06, + "loss": 0.540229082107544, + "step": 5516 + }, + { + "epoch": 1.613247550811522, + "grad_norm": 1.3918983021829734, + "learning_rate": 1.9797305388955547e-06, + "loss": 0.5473166704177856, + "step": 5517 + }, + { + "epoch": 1.6135399912267876, + "grad_norm": 1.888180196247059, + "learning_rate": 1.976843764553986e-06, + "loss": 0.5814535617828369, + "step": 5518 + }, + { + "epoch": 1.613832431642053, + "grad_norm": 1.556089571926902, + "learning_rate": 1.973958865576403e-06, + "loss": 0.4892576038837433, + "step": 5519 + }, + { + "epoch": 1.6141248720573182, + "grad_norm": 2.0461002845877454, + "learning_rate": 1.97107584263714e-06, + "loss": 0.5416869521141052, + "step": 5520 + }, + { + "epoch": 1.6144173124725838, + "grad_norm": 1.9685372161480885, + "learning_rate": 1.9681946964100807e-06, + "loss": 0.5956105589866638, + "step": 5521 + }, + { + "epoch": 1.6147097528878491, + "grad_norm": 1.7885384988170376, + "learning_rate": 1.9653154275686782e-06, + "loss": 0.5722565650939941, + "step": 5522 + }, + { + "epoch": 1.6150021933031145, + "grad_norm": 1.7917880328936266, + "learning_rate": 1.962438036785942e-06, + "loss": 0.3984888195991516, + "step": 5523 + }, + { + "epoch": 1.61529463371838, + "grad_norm": 1.6334267618118792, + "learning_rate": 1.959562524734445e-06, + "loss": 0.601211428642273, + "step": 5524 + }, + { + "epoch": 1.615587074133645, + "grad_norm": 1.8080265301577823, + "learning_rate": 1.9566888920863247e-06, + "loss": 0.4803691506385803, + "step": 5525 + }, + { + "epoch": 1.6158795145489107, + "grad_norm": 1.7017865626810558, + "learning_rate": 1.9538171395132688e-06, + "loss": 0.6914256811141968, + "step": 5526 + }, + { + "epoch": 1.6161719549641762, + "grad_norm": 1.6511977253132817, + "learning_rate": 1.950947267686536e-06, + "loss": 0.49076569080352783, + "step": 5527 + }, + { + "epoch": 1.6164643953794413, + "grad_norm": 1.747888743558531, + "learning_rate": 1.9480792772769384e-06, + "loss": 0.45781368017196655, + "step": 5528 + }, + { + "epoch": 1.6167568357947069, + "grad_norm": 1.6564602036044371, + "learning_rate": 1.9452131689548547e-06, + "loss": 0.5257985591888428, + "step": 5529 + }, + { + "epoch": 1.6170492762099722, + "grad_norm": 1.6440311298220422, + "learning_rate": 1.9423489433902186e-06, + "loss": 0.4170517921447754, + "step": 5530 + }, + { + "epoch": 1.6173417166252375, + "grad_norm": 2.0566981290938386, + "learning_rate": 1.939486601252525e-06, + "loss": 0.5612319707870483, + "step": 5531 + }, + { + "epoch": 1.617634157040503, + "grad_norm": 1.6971941543602107, + "learning_rate": 1.93662614321083e-06, + "loss": 0.4543185234069824, + "step": 5532 + }, + { + "epoch": 1.6179265974557684, + "grad_norm": 1.8366998724664239, + "learning_rate": 1.933767569933749e-06, + "loss": 0.5506256222724915, + "step": 5533 + }, + { + "epoch": 1.6182190378710337, + "grad_norm": 1.9975995427991473, + "learning_rate": 1.930910882089454e-06, + "loss": 0.5411139130592346, + "step": 5534 + }, + { + "epoch": 1.6185114782862993, + "grad_norm": 1.5549343206880035, + "learning_rate": 1.9280560803456794e-06, + "loss": 0.5332196950912476, + "step": 5535 + }, + { + "epoch": 1.6188039187015646, + "grad_norm": 1.6015028384804206, + "learning_rate": 1.92520316536972e-06, + "loss": 0.5159808993339539, + "step": 5536 + }, + { + "epoch": 1.61909635911683, + "grad_norm": 1.6182793083642761, + "learning_rate": 1.9223521378284227e-06, + "loss": 0.5483378767967224, + "step": 5537 + }, + { + "epoch": 1.6193887995320955, + "grad_norm": 1.748287896704832, + "learning_rate": 1.9195029983882008e-06, + "loss": 0.6451961994171143, + "step": 5538 + }, + { + "epoch": 1.6196812399473606, + "grad_norm": 1.799346834276764, + "learning_rate": 1.9166557477150227e-06, + "loss": 0.5904289484024048, + "step": 5539 + }, + { + "epoch": 1.6199736803626261, + "grad_norm": 1.7892510148499787, + "learning_rate": 1.9138103864744164e-06, + "loss": 0.6688845753669739, + "step": 5540 + }, + { + "epoch": 1.6202661207778914, + "grad_norm": 2.050448659373089, + "learning_rate": 1.910966915331467e-06, + "loss": 0.6299211382865906, + "step": 5541 + }, + { + "epoch": 1.6205585611931568, + "grad_norm": 1.7426964698819027, + "learning_rate": 1.908125334950819e-06, + "loss": 0.5502864122390747, + "step": 5542 + }, + { + "epoch": 1.6208510016084223, + "grad_norm": 1.7235241145346871, + "learning_rate": 1.905285645996674e-06, + "loss": 0.5332865118980408, + "step": 5543 + }, + { + "epoch": 1.6211434420236877, + "grad_norm": 1.6383658575716402, + "learning_rate": 1.9024478491327936e-06, + "loss": 0.43371304869651794, + "step": 5544 + }, + { + "epoch": 1.621435882438953, + "grad_norm": 1.6217362880484214, + "learning_rate": 1.8996119450224936e-06, + "loss": 0.6992501616477966, + "step": 5545 + }, + { + "epoch": 1.6217283228542185, + "grad_norm": 1.6128570910751827, + "learning_rate": 1.8967779343286507e-06, + "loss": 0.46558254957199097, + "step": 5546 + }, + { + "epoch": 1.6220207632694839, + "grad_norm": 1.8944422030763228, + "learning_rate": 1.8939458177136994e-06, + "loss": 0.48943620920181274, + "step": 5547 + }, + { + "epoch": 1.6223132036847492, + "grad_norm": 1.7707340805275773, + "learning_rate": 1.8911155958396256e-06, + "loss": 0.6041419506072998, + "step": 5548 + }, + { + "epoch": 1.6226056441000147, + "grad_norm": 1.9134744412177116, + "learning_rate": 1.8882872693679787e-06, + "loss": 0.5695589780807495, + "step": 5549 + }, + { + "epoch": 1.62289808451528, + "grad_norm": 1.5970321508292495, + "learning_rate": 1.8854608389598616e-06, + "loss": 0.45147764682769775, + "step": 5550 + }, + { + "epoch": 1.6231905249305454, + "grad_norm": 1.6145559649024994, + "learning_rate": 1.8826363052759367e-06, + "loss": 0.45560893416404724, + "step": 5551 + }, + { + "epoch": 1.623482965345811, + "grad_norm": 2.0039561405471855, + "learning_rate": 1.8798136689764213e-06, + "loss": 0.5714661478996277, + "step": 5552 + }, + { + "epoch": 1.623775405761076, + "grad_norm": 1.963169578207157, + "learning_rate": 1.8769929307210889e-06, + "loss": 0.6074325442314148, + "step": 5553 + }, + { + "epoch": 1.6240678461763416, + "grad_norm": 1.8912554250379197, + "learning_rate": 1.8741740911692708e-06, + "loss": 0.5406322479248047, + "step": 5554 + }, + { + "epoch": 1.624360286591607, + "grad_norm": 1.6201303288999382, + "learning_rate": 1.8713571509798524e-06, + "loss": 0.6098664999008179, + "step": 5555 + }, + { + "epoch": 1.6246527270068722, + "grad_norm": 1.5939995677707521, + "learning_rate": 1.8685421108112778e-06, + "loss": 0.4424859881401062, + "step": 5556 + }, + { + "epoch": 1.6249451674221378, + "grad_norm": 1.939602482231334, + "learning_rate": 1.8657289713215442e-06, + "loss": 0.5893913507461548, + "step": 5557 + }, + { + "epoch": 1.6252376078374031, + "grad_norm": 1.7158163961672592, + "learning_rate": 1.862917733168208e-06, + "loss": 0.5462610125541687, + "step": 5558 + }, + { + "epoch": 1.6255300482526684, + "grad_norm": 1.6886650692922842, + "learning_rate": 1.8601083970083766e-06, + "loss": 0.6044303178787231, + "step": 5559 + }, + { + "epoch": 1.625822488667934, + "grad_norm": 1.9164005939081288, + "learning_rate": 1.857300963498715e-06, + "loss": 0.4110264778137207, + "step": 5560 + }, + { + "epoch": 1.6261149290831993, + "grad_norm": 1.863152431496286, + "learning_rate": 1.8544954332954445e-06, + "loss": 0.5640783309936523, + "step": 5561 + }, + { + "epoch": 1.6264073694984647, + "grad_norm": 1.7604141548514003, + "learning_rate": 1.851691807054342e-06, + "loss": 0.43247851729393005, + "step": 5562 + }, + { + "epoch": 1.6266998099137302, + "grad_norm": 1.5534888737518595, + "learning_rate": 1.8488900854307367e-06, + "loss": 0.4909735918045044, + "step": 5563 + }, + { + "epoch": 1.6269922503289953, + "grad_norm": 2.0368143734269983, + "learning_rate": 1.8460902690795135e-06, + "loss": 0.5705426335334778, + "step": 5564 + }, + { + "epoch": 1.6272846907442609, + "grad_norm": 1.7974229709801126, + "learning_rate": 1.8432923586551144e-06, + "loss": 0.6455575823783875, + "step": 5565 + }, + { + "epoch": 1.6275771311595264, + "grad_norm": 1.5854644304225498, + "learning_rate": 1.8404963548115318e-06, + "loss": 0.4156351089477539, + "step": 5566 + }, + { + "epoch": 1.6278695715747915, + "grad_norm": 1.5858505218040218, + "learning_rate": 1.8377022582023163e-06, + "loss": 0.5497896075248718, + "step": 5567 + }, + { + "epoch": 1.628162011990057, + "grad_norm": 1.5247094519796704, + "learning_rate": 1.8349100694805711e-06, + "loss": 0.5237758159637451, + "step": 5568 + }, + { + "epoch": 1.6284544524053224, + "grad_norm": 1.8340347816856337, + "learning_rate": 1.832119789298954e-06, + "loss": 0.5140771865844727, + "step": 5569 + }, + { + "epoch": 1.6287468928205877, + "grad_norm": 2.04051717357991, + "learning_rate": 1.8293314183096721e-06, + "loss": 0.5942349433898926, + "step": 5570 + }, + { + "epoch": 1.6290393332358533, + "grad_norm": 1.7868670881272706, + "learning_rate": 1.8265449571644933e-06, + "loss": 0.6316613554954529, + "step": 5571 + }, + { + "epoch": 1.6293317736511186, + "grad_norm": 1.7168155291178147, + "learning_rate": 1.823760406514735e-06, + "loss": 0.4789954423904419, + "step": 5572 + }, + { + "epoch": 1.629624214066384, + "grad_norm": 1.67674259516067, + "learning_rate": 1.8209777670112706e-06, + "loss": 0.596744179725647, + "step": 5573 + }, + { + "epoch": 1.6299166544816495, + "grad_norm": 1.7162317239554103, + "learning_rate": 1.8181970393045223e-06, + "loss": 0.5785890817642212, + "step": 5574 + }, + { + "epoch": 1.6302090948969148, + "grad_norm": 1.72408279785472, + "learning_rate": 1.8154182240444706e-06, + "loss": 0.5399461388587952, + "step": 5575 + }, + { + "epoch": 1.6305015353121801, + "grad_norm": 1.789842505433769, + "learning_rate": 1.812641321880645e-06, + "loss": 0.5251961946487427, + "step": 5576 + }, + { + "epoch": 1.6307939757274457, + "grad_norm": 1.5658430659550284, + "learning_rate": 1.8098663334621314e-06, + "loss": 0.6094855070114136, + "step": 5577 + }, + { + "epoch": 1.6310864161427108, + "grad_norm": 1.7839781318616403, + "learning_rate": 1.8070932594375656e-06, + "loss": 0.5586157441139221, + "step": 5578 + }, + { + "epoch": 1.6313788565579763, + "grad_norm": 1.6074136925381057, + "learning_rate": 1.804322100455136e-06, + "loss": 0.5572035312652588, + "step": 5579 + }, + { + "epoch": 1.6316712969732416, + "grad_norm": 1.8419935059375991, + "learning_rate": 1.801552857162585e-06, + "loss": 0.5567929148674011, + "step": 5580 + }, + { + "epoch": 1.631963737388507, + "grad_norm": 1.6633256712541593, + "learning_rate": 1.79878553020721e-06, + "loss": 0.4823629558086395, + "step": 5581 + }, + { + "epoch": 1.6322561778037725, + "grad_norm": 1.6367496398860508, + "learning_rate": 1.7960201202358495e-06, + "loss": 0.52935791015625, + "step": 5582 + }, + { + "epoch": 1.6325486182190379, + "grad_norm": 1.612437469487566, + "learning_rate": 1.7932566278949049e-06, + "loss": 0.5486055016517639, + "step": 5583 + }, + { + "epoch": 1.6328410586343032, + "grad_norm": 1.638977663987494, + "learning_rate": 1.7904950538303256e-06, + "loss": 0.5606030225753784, + "step": 5584 + }, + { + "epoch": 1.6331334990495687, + "grad_norm": 1.5693957426770746, + "learning_rate": 1.7877353986876134e-06, + "loss": 0.5394873023033142, + "step": 5585 + }, + { + "epoch": 1.633425939464834, + "grad_norm": 1.7113121312436326, + "learning_rate": 1.7849776631118198e-06, + "loss": 0.6015416383743286, + "step": 5586 + }, + { + "epoch": 1.6337183798800994, + "grad_norm": 1.308708455891742, + "learning_rate": 1.7822218477475496e-06, + "loss": 0.3476119041442871, + "step": 5587 + }, + { + "epoch": 1.634010820295365, + "grad_norm": 1.6872606261874499, + "learning_rate": 1.7794679532389569e-06, + "loss": 0.43062901496887207, + "step": 5588 + }, + { + "epoch": 1.6343032607106303, + "grad_norm": 1.5715829289628913, + "learning_rate": 1.7767159802297497e-06, + "loss": 0.5267136096954346, + "step": 5589 + }, + { + "epoch": 1.6345957011258956, + "grad_norm": 1.5843839322860915, + "learning_rate": 1.7739659293631828e-06, + "loss": 0.40477365255355835, + "step": 5590 + }, + { + "epoch": 1.6348881415411611, + "grad_norm": 1.5464703907052304, + "learning_rate": 1.7712178012820657e-06, + "loss": 0.5166594386100769, + "step": 5591 + }, + { + "epoch": 1.6351805819564262, + "grad_norm": 1.8725681057880097, + "learning_rate": 1.768471596628757e-06, + "loss": 0.577332615852356, + "step": 5592 + }, + { + "epoch": 1.6354730223716918, + "grad_norm": 1.9361068668488919, + "learning_rate": 1.7657273160451626e-06, + "loss": 0.6265558004379272, + "step": 5593 + }, + { + "epoch": 1.6357654627869571, + "grad_norm": 1.359788014623014, + "learning_rate": 1.7629849601727422e-06, + "loss": 0.46483689546585083, + "step": 5594 + }, + { + "epoch": 1.6360579032022224, + "grad_norm": 1.763922790196176, + "learning_rate": 1.760244529652504e-06, + "loss": 0.5217114090919495, + "step": 5595 + }, + { + "epoch": 1.636350343617488, + "grad_norm": 1.6050490395737056, + "learning_rate": 1.7575060251250098e-06, + "loss": 0.40754032135009766, + "step": 5596 + }, + { + "epoch": 1.6366427840327533, + "grad_norm": 1.8321306870013994, + "learning_rate": 1.7547694472303677e-06, + "loss": 0.5153856873512268, + "step": 5597 + }, + { + "epoch": 1.6369352244480186, + "grad_norm": 1.7719174136737381, + "learning_rate": 1.7520347966082352e-06, + "loss": 0.47374534606933594, + "step": 5598 + }, + { + "epoch": 1.6372276648632842, + "grad_norm": 1.85303064846871, + "learning_rate": 1.7493020738978205e-06, + "loss": 0.375232070684433, + "step": 5599 + }, + { + "epoch": 1.6375201052785495, + "grad_norm": 1.6844665277996391, + "learning_rate": 1.746571279737884e-06, + "loss": 0.5731218457221985, + "step": 5600 + }, + { + "epoch": 1.6378125456938148, + "grad_norm": 1.7604017420749336, + "learning_rate": 1.7438424147667267e-06, + "loss": 0.4908478260040283, + "step": 5601 + }, + { + "epoch": 1.6381049861090804, + "grad_norm": 1.759771030770569, + "learning_rate": 1.741115479622205e-06, + "loss": 0.6114420890808105, + "step": 5602 + }, + { + "epoch": 1.6383974265243455, + "grad_norm": 1.6860969538693165, + "learning_rate": 1.738390474941727e-06, + "loss": 0.6207842826843262, + "step": 5603 + }, + { + "epoch": 1.638689866939611, + "grad_norm": 1.8627303036453442, + "learning_rate": 1.7356674013622431e-06, + "loss": 0.4745057225227356, + "step": 5604 + }, + { + "epoch": 1.6389823073548766, + "grad_norm": 1.7448279838579288, + "learning_rate": 1.7329462595202573e-06, + "loss": 0.5501791834831238, + "step": 5605 + }, + { + "epoch": 1.6392747477701417, + "grad_norm": 1.5723514930569527, + "learning_rate": 1.7302270500518181e-06, + "loss": 0.5497169494628906, + "step": 5606 + }, + { + "epoch": 1.6395671881854073, + "grad_norm": 1.5582550082102102, + "learning_rate": 1.7275097735925239e-06, + "loss": 0.4439499080181122, + "step": 5607 + }, + { + "epoch": 1.6398596286006726, + "grad_norm": 1.5421642594165323, + "learning_rate": 1.7247944307775245e-06, + "loss": 0.5869239568710327, + "step": 5608 + }, + { + "epoch": 1.640152069015938, + "grad_norm": 1.8811101308859866, + "learning_rate": 1.722081022241512e-06, + "loss": 0.6979252099990845, + "step": 5609 + }, + { + "epoch": 1.6404445094312035, + "grad_norm": 1.6052357503320651, + "learning_rate": 1.719369548618729e-06, + "loss": 0.43291550874710083, + "step": 5610 + }, + { + "epoch": 1.6407369498464688, + "grad_norm": 1.9414841639869573, + "learning_rate": 1.7166600105429676e-06, + "loss": 0.5670255422592163, + "step": 5611 + }, + { + "epoch": 1.6410293902617341, + "grad_norm": 1.8236286685742322, + "learning_rate": 1.7139524086475679e-06, + "loss": 0.5956759452819824, + "step": 5612 + }, + { + "epoch": 1.6413218306769997, + "grad_norm": 1.8379778243010318, + "learning_rate": 1.71124674356541e-06, + "loss": 0.624202311038971, + "step": 5613 + }, + { + "epoch": 1.641614271092265, + "grad_norm": 1.546976601945301, + "learning_rate": 1.7085430159289295e-06, + "loss": 0.5394845604896545, + "step": 5614 + }, + { + "epoch": 1.6419067115075303, + "grad_norm": 1.689395226298913, + "learning_rate": 1.7058412263701063e-06, + "loss": 0.5320364236831665, + "step": 5615 + }, + { + "epoch": 1.6421991519227959, + "grad_norm": 1.719103506089404, + "learning_rate": 1.7031413755204673e-06, + "loss": 0.5889087915420532, + "step": 5616 + }, + { + "epoch": 1.642491592338061, + "grad_norm": 2.1274597082343103, + "learning_rate": 1.7004434640110857e-06, + "loss": 0.63529372215271, + "step": 5617 + }, + { + "epoch": 1.6427840327533265, + "grad_norm": 1.4153766033649497, + "learning_rate": 1.6977474924725823e-06, + "loss": 0.48696887493133545, + "step": 5618 + }, + { + "epoch": 1.6430764731685918, + "grad_norm": 1.592412158520241, + "learning_rate": 1.6950534615351234e-06, + "loss": 0.5998564958572388, + "step": 5619 + }, + { + "epoch": 1.6433689135838572, + "grad_norm": 2.0156836237169142, + "learning_rate": 1.6923613718284237e-06, + "loss": 0.5256673693656921, + "step": 5620 + }, + { + "epoch": 1.6436613539991227, + "grad_norm": 1.6125826678096948, + "learning_rate": 1.6896712239817425e-06, + "loss": 0.4609792232513428, + "step": 5621 + }, + { + "epoch": 1.643953794414388, + "grad_norm": 1.6601641389435113, + "learning_rate": 1.6869830186238846e-06, + "loss": 0.6816249489784241, + "step": 5622 + }, + { + "epoch": 1.6442462348296534, + "grad_norm": 1.7917988570441037, + "learning_rate": 1.6842967563832036e-06, + "loss": 0.4622993767261505, + "step": 5623 + }, + { + "epoch": 1.644538675244919, + "grad_norm": 1.7039192593388794, + "learning_rate": 1.6816124378875942e-06, + "loss": 0.5089092254638672, + "step": 5624 + }, + { + "epoch": 1.6448311156601843, + "grad_norm": 1.804753905417491, + "learning_rate": 1.6789300637645e-06, + "loss": 0.49178463220596313, + "step": 5625 + }, + { + "epoch": 1.6451235560754496, + "grad_norm": 1.5823189990427826, + "learning_rate": 1.676249634640912e-06, + "loss": 0.4943847358226776, + "step": 5626 + }, + { + "epoch": 1.6454159964907151, + "grad_norm": 1.368767643177251, + "learning_rate": 1.6735711511433606e-06, + "loss": 0.38509243726730347, + "step": 5627 + }, + { + "epoch": 1.6457084369059805, + "grad_norm": 1.8368901340386043, + "learning_rate": 1.6708946138979288e-06, + "loss": 0.4765651822090149, + "step": 5628 + }, + { + "epoch": 1.6460008773212458, + "grad_norm": 1.9577136986762462, + "learning_rate": 1.6682200235302383e-06, + "loss": 0.5667406916618347, + "step": 5629 + }, + { + "epoch": 1.6462933177365113, + "grad_norm": 1.7936789387136831, + "learning_rate": 1.66554738066546e-06, + "loss": 0.702905535697937, + "step": 5630 + }, + { + "epoch": 1.6465857581517764, + "grad_norm": 1.8218045279879265, + "learning_rate": 1.6628766859283064e-06, + "loss": 0.5056663155555725, + "step": 5631 + }, + { + "epoch": 1.646878198567042, + "grad_norm": 1.5059989898819282, + "learning_rate": 1.660207939943037e-06, + "loss": 0.3949700593948364, + "step": 5632 + }, + { + "epoch": 1.6471706389823073, + "grad_norm": 1.8365180821647582, + "learning_rate": 1.6575411433334553e-06, + "loss": 0.5562522411346436, + "step": 5633 + }, + { + "epoch": 1.6474630793975726, + "grad_norm": 1.853282636299631, + "learning_rate": 1.6548762967229104e-06, + "loss": 0.5046012997627258, + "step": 5634 + }, + { + "epoch": 1.6477555198128382, + "grad_norm": 1.873405273649113, + "learning_rate": 1.6522134007342894e-06, + "loss": 0.510586678981781, + "step": 5635 + }, + { + "epoch": 1.6480479602281035, + "grad_norm": 1.7244709678320052, + "learning_rate": 1.649552455990031e-06, + "loss": 0.5587502717971802, + "step": 5636 + }, + { + "epoch": 1.6483404006433688, + "grad_norm": 1.4244703013642708, + "learning_rate": 1.6468934631121147e-06, + "loss": 0.4774302840232849, + "step": 5637 + }, + { + "epoch": 1.6486328410586344, + "grad_norm": 1.7840577383362874, + "learning_rate": 1.644236422722063e-06, + "loss": 0.5969966650009155, + "step": 5638 + }, + { + "epoch": 1.6489252814738997, + "grad_norm": 1.9166026144170052, + "learning_rate": 1.6415813354409438e-06, + "loss": 0.6344267129898071, + "step": 5639 + }, + { + "epoch": 1.649217721889165, + "grad_norm": 1.6455866581497667, + "learning_rate": 1.638928201889367e-06, + "loss": 0.5252394676208496, + "step": 5640 + }, + { + "epoch": 1.6495101623044306, + "grad_norm": 1.7211145798255698, + "learning_rate": 1.636277022687488e-06, + "loss": 0.5092496871948242, + "step": 5641 + }, + { + "epoch": 1.6498026027196957, + "grad_norm": 1.797167898340461, + "learning_rate": 1.633627798455002e-06, + "loss": 0.6530938148498535, + "step": 5642 + }, + { + "epoch": 1.6500950431349612, + "grad_norm": 1.9009398203220143, + "learning_rate": 1.6309805298111492e-06, + "loss": 0.5152128338813782, + "step": 5643 + }, + { + "epoch": 1.6503874835502268, + "grad_norm": 1.4233567646508596, + "learning_rate": 1.6283352173747148e-06, + "loss": 0.4670040011405945, + "step": 5644 + }, + { + "epoch": 1.650679923965492, + "grad_norm": 1.718468492793745, + "learning_rate": 1.625691861764024e-06, + "loss": 0.47373896837234497, + "step": 5645 + }, + { + "epoch": 1.6509723643807575, + "grad_norm": 1.7609405530750961, + "learning_rate": 1.6230504635969413e-06, + "loss": 0.44277217984199524, + "step": 5646 + }, + { + "epoch": 1.6512648047960228, + "grad_norm": 1.9781946361433098, + "learning_rate": 1.6204110234908798e-06, + "loss": 0.6000313758850098, + "step": 5647 + }, + { + "epoch": 1.651557245211288, + "grad_norm": 1.7535547863968097, + "learning_rate": 1.6177735420627939e-06, + "loss": 0.5950880646705627, + "step": 5648 + }, + { + "epoch": 1.6518496856265537, + "grad_norm": 1.6566653852646755, + "learning_rate": 1.6151380199291767e-06, + "loss": 0.6308536529541016, + "step": 5649 + }, + { + "epoch": 1.652142126041819, + "grad_norm": 1.6807098070808562, + "learning_rate": 1.6125044577060667e-06, + "loss": 0.619708240032196, + "step": 5650 + }, + { + "epoch": 1.6524345664570843, + "grad_norm": 1.5330415165514617, + "learning_rate": 1.6098728560090438e-06, + "loss": 0.4912105202674866, + "step": 5651 + }, + { + "epoch": 1.6527270068723499, + "grad_norm": 1.6147329529882195, + "learning_rate": 1.607243215453227e-06, + "loss": 0.5078046917915344, + "step": 5652 + }, + { + "epoch": 1.6530194472876152, + "grad_norm": 1.562057881587638, + "learning_rate": 1.60461553665328e-06, + "loss": 0.4845188856124878, + "step": 5653 + }, + { + "epoch": 1.6533118877028805, + "grad_norm": 1.7846716530712952, + "learning_rate": 1.6019898202234075e-06, + "loss": 0.6323055028915405, + "step": 5654 + }, + { + "epoch": 1.653604328118146, + "grad_norm": 1.6998274797888833, + "learning_rate": 1.5993660667773524e-06, + "loss": 0.5700039863586426, + "step": 5655 + }, + { + "epoch": 1.6538967685334112, + "grad_norm": 1.567032216624363, + "learning_rate": 1.596744276928406e-06, + "loss": 0.5415322780609131, + "step": 5656 + }, + { + "epoch": 1.6541892089486767, + "grad_norm": 1.5705725451812174, + "learning_rate": 1.5941244512893894e-06, + "loss": 0.47339457273483276, + "step": 5657 + }, + { + "epoch": 1.654481649363942, + "grad_norm": 1.7467509306260278, + "learning_rate": 1.5915065904726735e-06, + "loss": 0.5391967296600342, + "step": 5658 + }, + { + "epoch": 1.6547740897792074, + "grad_norm": 1.4866752002795596, + "learning_rate": 1.5888906950901683e-06, + "loss": 0.4832335114479065, + "step": 5659 + }, + { + "epoch": 1.655066530194473, + "grad_norm": 1.6779111992749078, + "learning_rate": 1.5862767657533217e-06, + "loss": 0.4539526104927063, + "step": 5660 + }, + { + "epoch": 1.6553589706097382, + "grad_norm": 1.6528644114250834, + "learning_rate": 1.583664803073125e-06, + "loss": 0.5261383056640625, + "step": 5661 + }, + { + "epoch": 1.6556514110250036, + "grad_norm": 1.497445031852123, + "learning_rate": 1.5810548076601096e-06, + "loss": 0.44060665369033813, + "step": 5662 + }, + { + "epoch": 1.6559438514402691, + "grad_norm": 1.5492116356252563, + "learning_rate": 1.578446780124344e-06, + "loss": 0.4202715754508972, + "step": 5663 + }, + { + "epoch": 1.6562362918555344, + "grad_norm": 1.6539664834530166, + "learning_rate": 1.57584072107544e-06, + "loss": 0.4736124873161316, + "step": 5664 + }, + { + "epoch": 1.6565287322707998, + "grad_norm": 1.71312353367257, + "learning_rate": 1.5732366311225466e-06, + "loss": 0.46696585416793823, + "step": 5665 + }, + { + "epoch": 1.6568211726860653, + "grad_norm": 1.9879711761174368, + "learning_rate": 1.570634510874356e-06, + "loss": 0.566236138343811, + "step": 5666 + }, + { + "epoch": 1.6571136131013307, + "grad_norm": 1.8521270783851422, + "learning_rate": 1.568034360939098e-06, + "loss": 0.5486587882041931, + "step": 5667 + }, + { + "epoch": 1.657406053516596, + "grad_norm": 1.7248593346342498, + "learning_rate": 1.5654361819245423e-06, + "loss": 0.49735748767852783, + "step": 5668 + }, + { + "epoch": 1.6576984939318615, + "grad_norm": 1.7146086057727925, + "learning_rate": 1.562839974437993e-06, + "loss": 0.6306062340736389, + "step": 5669 + }, + { + "epoch": 1.6579909343471266, + "grad_norm": 1.524921725608077, + "learning_rate": 1.5602457390863e-06, + "loss": 0.5062750577926636, + "step": 5670 + }, + { + "epoch": 1.6582833747623922, + "grad_norm": 1.511094812843301, + "learning_rate": 1.5576534764758522e-06, + "loss": 0.5037271976470947, + "step": 5671 + }, + { + "epoch": 1.6585758151776575, + "grad_norm": 1.5731242321067331, + "learning_rate": 1.5550631872125743e-06, + "loss": 0.5749099254608154, + "step": 5672 + }, + { + "epoch": 1.6588682555929228, + "grad_norm": 1.7434864310127793, + "learning_rate": 1.5524748719019312e-06, + "loss": 0.5241814255714417, + "step": 5673 + }, + { + "epoch": 1.6591606960081884, + "grad_norm": 1.6599956977784314, + "learning_rate": 1.5498885311489243e-06, + "loss": 0.5410301685333252, + "step": 5674 + }, + { + "epoch": 1.6594531364234537, + "grad_norm": 1.6011154372945764, + "learning_rate": 1.5473041655580956e-06, + "loss": 0.6363968253135681, + "step": 5675 + }, + { + "epoch": 1.659745576838719, + "grad_norm": 1.7262614943682328, + "learning_rate": 1.5447217757335264e-06, + "loss": 0.5476758480072021, + "step": 5676 + }, + { + "epoch": 1.6600380172539846, + "grad_norm": 2.1890795829733465, + "learning_rate": 1.5421413622788328e-06, + "loss": 0.5375553369522095, + "step": 5677 + }, + { + "epoch": 1.66033045766925, + "grad_norm": 1.6882335328441256, + "learning_rate": 1.53956292579717e-06, + "loss": 0.6227232217788696, + "step": 5678 + }, + { + "epoch": 1.6606228980845152, + "grad_norm": 1.6718724900526085, + "learning_rate": 1.5369864668912327e-06, + "loss": 0.5981070399284363, + "step": 5679 + }, + { + "epoch": 1.6609153384997808, + "grad_norm": 1.6935493861773532, + "learning_rate": 1.5344119861632535e-06, + "loss": 0.5535466074943542, + "step": 5680 + }, + { + "epoch": 1.661207778915046, + "grad_norm": 1.5610811166933365, + "learning_rate": 1.5318394842150009e-06, + "loss": 0.5562780499458313, + "step": 5681 + }, + { + "epoch": 1.6615002193303114, + "grad_norm": 2.8725866349660683, + "learning_rate": 1.5292689616477808e-06, + "loss": 0.5937552452087402, + "step": 5682 + }, + { + "epoch": 1.661792659745577, + "grad_norm": 1.578580766715999, + "learning_rate": 1.526700419062439e-06, + "loss": 0.5154576301574707, + "step": 5683 + }, + { + "epoch": 1.662085100160842, + "grad_norm": 1.691867883182686, + "learning_rate": 1.5241338570593557e-06, + "loss": 0.6887973546981812, + "step": 5684 + }, + { + "epoch": 1.6623775405761076, + "grad_norm": 1.6575847736482525, + "learning_rate": 1.5215692762384481e-06, + "loss": 0.5365385413169861, + "step": 5685 + }, + { + "epoch": 1.662669980991373, + "grad_norm": 1.8209681086307343, + "learning_rate": 1.519006677199173e-06, + "loss": 0.5906165838241577, + "step": 5686 + }, + { + "epoch": 1.6629624214066383, + "grad_norm": 1.5861180854035228, + "learning_rate": 1.5164460605405252e-06, + "loss": 0.5752634406089783, + "step": 5687 + }, + { + "epoch": 1.6632548618219039, + "grad_norm": 1.5319119666926662, + "learning_rate": 1.5138874268610259e-06, + "loss": 0.6265667676925659, + "step": 5688 + }, + { + "epoch": 1.6635473022371692, + "grad_norm": 1.9588164368468703, + "learning_rate": 1.5113307767587449e-06, + "loss": 0.5032769441604614, + "step": 5689 + }, + { + "epoch": 1.6638397426524345, + "grad_norm": 1.799708728422752, + "learning_rate": 1.5087761108312837e-06, + "loss": 0.5740037560462952, + "step": 5690 + }, + { + "epoch": 1.6641321830677, + "grad_norm": 1.7236751091410876, + "learning_rate": 1.5062234296757782e-06, + "loss": 0.5745523571968079, + "step": 5691 + }, + { + "epoch": 1.6644246234829654, + "grad_norm": 1.4898534105858117, + "learning_rate": 1.5036727338889035e-06, + "loss": 0.4448510408401489, + "step": 5692 + }, + { + "epoch": 1.6647170638982307, + "grad_norm": 1.7482222251428332, + "learning_rate": 1.5011240240668678e-06, + "loss": 0.5142196416854858, + "step": 5693 + }, + { + "epoch": 1.6650095043134963, + "grad_norm": 1.484675519827308, + "learning_rate": 1.4985773008054184e-06, + "loss": 0.3317479193210602, + "step": 5694 + }, + { + "epoch": 1.6653019447287614, + "grad_norm": 1.9471413048062423, + "learning_rate": 1.4960325646998353e-06, + "loss": 0.5721619129180908, + "step": 5695 + }, + { + "epoch": 1.665594385144027, + "grad_norm": 1.7546330993452042, + "learning_rate": 1.4934898163449341e-06, + "loss": 0.4937021732330322, + "step": 5696 + }, + { + "epoch": 1.6658868255592922, + "grad_norm": 1.9665727915679332, + "learning_rate": 1.4909490563350694e-06, + "loss": 0.6414870023727417, + "step": 5697 + }, + { + "epoch": 1.6661792659745576, + "grad_norm": 2.144217430639584, + "learning_rate": 1.4884102852641258e-06, + "loss": 0.6265281438827515, + "step": 5698 + }, + { + "epoch": 1.6664717063898231, + "grad_norm": 1.9171094003176723, + "learning_rate": 1.48587350372553e-06, + "loss": 0.5908917784690857, + "step": 5699 + }, + { + "epoch": 1.6667641468050884, + "grad_norm": 1.5794126433874063, + "learning_rate": 1.4833387123122334e-06, + "loss": 0.6098382472991943, + "step": 5700 + }, + { + "epoch": 1.6670565872203538, + "grad_norm": 1.7930907914682574, + "learning_rate": 1.4808059116167306e-06, + "loss": 0.5106536746025085, + "step": 5701 + }, + { + "epoch": 1.6673490276356193, + "grad_norm": 1.9054848074745216, + "learning_rate": 1.4782751022310481e-06, + "loss": 0.5548620820045471, + "step": 5702 + }, + { + "epoch": 1.6676414680508846, + "grad_norm": 1.5332665904029121, + "learning_rate": 1.4757462847467475e-06, + "loss": 0.4596245288848877, + "step": 5703 + }, + { + "epoch": 1.66793390846615, + "grad_norm": 1.98056012220508, + "learning_rate": 1.4732194597549244e-06, + "loss": 0.6000612378120422, + "step": 5704 + }, + { + "epoch": 1.6682263488814155, + "grad_norm": 1.53003821533968, + "learning_rate": 1.4706946278462097e-06, + "loss": 0.5522277355194092, + "step": 5705 + }, + { + "epoch": 1.6685187892966808, + "grad_norm": 1.741978737228361, + "learning_rate": 1.468171789610766e-06, + "loss": 0.4765724837779999, + "step": 5706 + }, + { + "epoch": 1.6688112297119462, + "grad_norm": 1.737640693413614, + "learning_rate": 1.4656509456382927e-06, + "loss": 0.564188539981842, + "step": 5707 + }, + { + "epoch": 1.6691036701272117, + "grad_norm": 1.8586603218062736, + "learning_rate": 1.4631320965180208e-06, + "loss": 0.4910390377044678, + "step": 5708 + }, + { + "epoch": 1.6693961105424768, + "grad_norm": 1.8232002468100077, + "learning_rate": 1.4606152428387166e-06, + "loss": 0.5992041826248169, + "step": 5709 + }, + { + "epoch": 1.6696885509577424, + "grad_norm": 1.7676126822410316, + "learning_rate": 1.4581003851886811e-06, + "loss": 0.4873291850090027, + "step": 5710 + }, + { + "epoch": 1.6699809913730077, + "grad_norm": 1.7366762629360202, + "learning_rate": 1.4555875241557426e-06, + "loss": 0.6487013101577759, + "step": 5711 + }, + { + "epoch": 1.670273431788273, + "grad_norm": 1.5436242943423213, + "learning_rate": 1.4530766603272695e-06, + "loss": 0.4624609351158142, + "step": 5712 + }, + { + "epoch": 1.6705658722035386, + "grad_norm": 1.8279691880026145, + "learning_rate": 1.4505677942901609e-06, + "loss": 0.5765592455863953, + "step": 5713 + }, + { + "epoch": 1.670858312618804, + "grad_norm": 1.6171369345434061, + "learning_rate": 1.4480609266308488e-06, + "loss": 0.6730339527130127, + "step": 5714 + }, + { + "epoch": 1.6711507530340692, + "grad_norm": 1.7616383708984635, + "learning_rate": 1.445556057935299e-06, + "loss": 0.6381770372390747, + "step": 5715 + }, + { + "epoch": 1.6714431934493348, + "grad_norm": 1.8270298461203718, + "learning_rate": 1.4430531887890076e-06, + "loss": 0.6236029863357544, + "step": 5716 + }, + { + "epoch": 1.6717356338646001, + "grad_norm": 1.8837930420569144, + "learning_rate": 1.4405523197770076e-06, + "loss": 0.521639347076416, + "step": 5717 + }, + { + "epoch": 1.6720280742798654, + "grad_norm": 1.651464369232987, + "learning_rate": 1.4380534514838596e-06, + "loss": 0.5912468433380127, + "step": 5718 + }, + { + "epoch": 1.672320514695131, + "grad_norm": 1.7062190862435904, + "learning_rate": 1.4355565844936602e-06, + "loss": 0.5533329248428345, + "step": 5719 + }, + { + "epoch": 1.672612955110396, + "grad_norm": 1.8368834191923704, + "learning_rate": 1.4330617193900365e-06, + "loss": 0.5901006460189819, + "step": 5720 + }, + { + "epoch": 1.6729053955256616, + "grad_norm": 1.7501848609248272, + "learning_rate": 1.4305688567561503e-06, + "loss": 0.5083344578742981, + "step": 5721 + }, + { + "epoch": 1.6731978359409272, + "grad_norm": 1.359271864269329, + "learning_rate": 1.4280779971746894e-06, + "loss": 0.4443317651748657, + "step": 5722 + }, + { + "epoch": 1.6734902763561923, + "grad_norm": 1.931591797384203, + "learning_rate": 1.4255891412278778e-06, + "loss": 0.6355078220367432, + "step": 5723 + }, + { + "epoch": 1.6737827167714578, + "grad_norm": 1.751894451134603, + "learning_rate": 1.423102289497471e-06, + "loss": 0.5403381586074829, + "step": 5724 + }, + { + "epoch": 1.6740751571867232, + "grad_norm": 1.7703719298346523, + "learning_rate": 1.4206174425647556e-06, + "loss": 0.5272151231765747, + "step": 5725 + }, + { + "epoch": 1.6743675976019885, + "grad_norm": 1.6453865684399975, + "learning_rate": 1.41813460101055e-06, + "loss": 0.6750346422195435, + "step": 5726 + }, + { + "epoch": 1.674660038017254, + "grad_norm": 1.7471006401236766, + "learning_rate": 1.4156537654152026e-06, + "loss": 0.552655816078186, + "step": 5727 + }, + { + "epoch": 1.6749524784325194, + "grad_norm": 1.5245393452927156, + "learning_rate": 1.4131749363585933e-06, + "loss": 0.3947732448577881, + "step": 5728 + }, + { + "epoch": 1.6752449188477847, + "grad_norm": 1.4115886314708204, + "learning_rate": 1.4106981144201337e-06, + "loss": 0.4910270571708679, + "step": 5729 + }, + { + "epoch": 1.6755373592630503, + "grad_norm": 1.9268335112778272, + "learning_rate": 1.408223300178767e-06, + "loss": 0.5717943906784058, + "step": 5730 + }, + { + "epoch": 1.6758297996783156, + "grad_norm": 1.6782914146067396, + "learning_rate": 1.4057504942129652e-06, + "loss": 0.4993055462837219, + "step": 5731 + }, + { + "epoch": 1.676122240093581, + "grad_norm": 1.851203153701759, + "learning_rate": 1.4032796971007322e-06, + "loss": 0.4772619605064392, + "step": 5732 + }, + { + "epoch": 1.6764146805088465, + "grad_norm": 1.6250346930838577, + "learning_rate": 1.400810909419601e-06, + "loss": 0.3824518322944641, + "step": 5733 + }, + { + "epoch": 1.6767071209241116, + "grad_norm": 1.815105841906862, + "learning_rate": 1.398344131746634e-06, + "loss": 0.5302368402481079, + "step": 5734 + }, + { + "epoch": 1.676999561339377, + "grad_norm": 1.738439871277175, + "learning_rate": 1.3958793646584279e-06, + "loss": 0.5776697397232056, + "step": 5735 + }, + { + "epoch": 1.6772920017546424, + "grad_norm": 1.7943869884408015, + "learning_rate": 1.3934166087311063e-06, + "loss": 0.53890061378479, + "step": 5736 + }, + { + "epoch": 1.6775844421699078, + "grad_norm": 1.71821325954837, + "learning_rate": 1.3909558645403243e-06, + "loss": 0.47210827469825745, + "step": 5737 + }, + { + "epoch": 1.6778768825851733, + "grad_norm": 1.8689211559459666, + "learning_rate": 1.388497132661264e-06, + "loss": 0.6020913124084473, + "step": 5738 + }, + { + "epoch": 1.6781693230004386, + "grad_norm": 1.8612626444994878, + "learning_rate": 1.3860404136686411e-06, + "loss": 0.4244590997695923, + "step": 5739 + }, + { + "epoch": 1.678461763415704, + "grad_norm": 1.8281285744352933, + "learning_rate": 1.3835857081366965e-06, + "loss": 0.5969624519348145, + "step": 5740 + }, + { + "epoch": 1.6787542038309695, + "grad_norm": 1.774375495611947, + "learning_rate": 1.3811330166392057e-06, + "loss": 0.6573030352592468, + "step": 5741 + }, + { + "epoch": 1.6790466442462348, + "grad_norm": 1.641770470616675, + "learning_rate": 1.3786823397494675e-06, + "loss": 0.4251132905483246, + "step": 5742 + }, + { + "epoch": 1.6793390846615002, + "grad_norm": 1.504067842347657, + "learning_rate": 1.3762336780403163e-06, + "loss": 0.5555700659751892, + "step": 5743 + }, + { + "epoch": 1.6796315250767657, + "grad_norm": 2.1042602241156128, + "learning_rate": 1.3737870320841073e-06, + "loss": 0.5651364326477051, + "step": 5744 + }, + { + "epoch": 1.679923965492031, + "grad_norm": 1.7135873981198582, + "learning_rate": 1.371342402452731e-06, + "loss": 0.6283698678016663, + "step": 5745 + }, + { + "epoch": 1.6802164059072964, + "grad_norm": 1.4720135811876174, + "learning_rate": 1.3688997897176037e-06, + "loss": 0.47864413261413574, + "step": 5746 + }, + { + "epoch": 1.680508846322562, + "grad_norm": 1.9268157997034314, + "learning_rate": 1.366459194449674e-06, + "loss": 0.6254131197929382, + "step": 5747 + }, + { + "epoch": 1.680801286737827, + "grad_norm": 1.4626288934383243, + "learning_rate": 1.364020617219415e-06, + "loss": 0.35147637128829956, + "step": 5748 + }, + { + "epoch": 1.6810937271530926, + "grad_norm": 1.9627536321629568, + "learning_rate": 1.3615840585968287e-06, + "loss": 0.6126410961151123, + "step": 5749 + }, + { + "epoch": 1.681386167568358, + "grad_norm": 1.7732443093164585, + "learning_rate": 1.359149519151447e-06, + "loss": 0.5807974338531494, + "step": 5750 + }, + { + "epoch": 1.6816786079836232, + "grad_norm": 1.5024396656291241, + "learning_rate": 1.3567169994523277e-06, + "loss": 0.5033349990844727, + "step": 5751 + }, + { + "epoch": 1.6819710483988888, + "grad_norm": 1.5694630419560385, + "learning_rate": 1.3542865000680604e-06, + "loss": 0.47656023502349854, + "step": 5752 + }, + { + "epoch": 1.682263488814154, + "grad_norm": 1.6495545571688441, + "learning_rate": 1.3518580215667542e-06, + "loss": 0.5137293338775635, + "step": 5753 + }, + { + "epoch": 1.6825559292294194, + "grad_norm": 1.5950800812601422, + "learning_rate": 1.3494315645160539e-06, + "loss": 0.4636800289154053, + "step": 5754 + }, + { + "epoch": 1.682848369644685, + "grad_norm": 1.937366310273075, + "learning_rate": 1.3470071294831289e-06, + "loss": 0.5825523138046265, + "step": 5755 + }, + { + "epoch": 1.6831408100599503, + "grad_norm": 1.798274160020649, + "learning_rate": 1.344584717034677e-06, + "loss": 0.49282288551330566, + "step": 5756 + }, + { + "epoch": 1.6834332504752156, + "grad_norm": 1.6393172330125654, + "learning_rate": 1.3421643277369211e-06, + "loss": 0.5551935434341431, + "step": 5757 + }, + { + "epoch": 1.6837256908904812, + "grad_norm": 1.7844394306187494, + "learning_rate": 1.339745962155613e-06, + "loss": 0.6423832178115845, + "step": 5758 + }, + { + "epoch": 1.6840181313057463, + "grad_norm": 1.7061841923170233, + "learning_rate": 1.3373296208560316e-06, + "loss": 0.6178075671195984, + "step": 5759 + }, + { + "epoch": 1.6843105717210118, + "grad_norm": 1.7099687312543272, + "learning_rate": 1.3349153044029816e-06, + "loss": 0.6781176328659058, + "step": 5760 + }, + { + "epoch": 1.6846030121362774, + "grad_norm": 1.837996192806761, + "learning_rate": 1.332503013360794e-06, + "loss": 0.6511910557746887, + "step": 5761 + }, + { + "epoch": 1.6848954525515425, + "grad_norm": 1.3861294384859772, + "learning_rate": 1.3300927482933279e-06, + "loss": 0.4980696141719818, + "step": 5762 + }, + { + "epoch": 1.685187892966808, + "grad_norm": 1.6100202697936232, + "learning_rate": 1.3276845097639702e-06, + "loss": 0.49176928400993347, + "step": 5763 + }, + { + "epoch": 1.6854803333820734, + "grad_norm": 1.893755486996651, + "learning_rate": 1.3252782983356272e-06, + "loss": 0.5198799967765808, + "step": 5764 + }, + { + "epoch": 1.6857727737973387, + "grad_norm": 1.4629269004624288, + "learning_rate": 1.322874114570739e-06, + "loss": 0.5058869123458862, + "step": 5765 + }, + { + "epoch": 1.6860652142126042, + "grad_norm": 1.7429625548536576, + "learning_rate": 1.3204719590312698e-06, + "loss": 0.46573105454444885, + "step": 5766 + }, + { + "epoch": 1.6863576546278696, + "grad_norm": 1.702952537068074, + "learning_rate": 1.3180718322787067e-06, + "loss": 0.5033260583877563, + "step": 5767 + }, + { + "epoch": 1.686650095043135, + "grad_norm": 1.4816614067920655, + "learning_rate": 1.3156737348740655e-06, + "loss": 0.5306515693664551, + "step": 5768 + }, + { + "epoch": 1.6869425354584004, + "grad_norm": 1.5781671263541353, + "learning_rate": 1.313277667377888e-06, + "loss": 0.45660221576690674, + "step": 5769 + }, + { + "epoch": 1.6872349758736658, + "grad_norm": 1.8450654821638361, + "learning_rate": 1.3108836303502392e-06, + "loss": 0.5353757739067078, + "step": 5770 + }, + { + "epoch": 1.687527416288931, + "grad_norm": 1.719850536675987, + "learning_rate": 1.3084916243507118e-06, + "loss": 0.5415239930152893, + "step": 5771 + }, + { + "epoch": 1.6878198567041967, + "grad_norm": 1.7512787251419633, + "learning_rate": 1.3061016499384217e-06, + "loss": 0.5860229730606079, + "step": 5772 + }, + { + "epoch": 1.6881122971194618, + "grad_norm": 1.7755386406909037, + "learning_rate": 1.3037137076720107e-06, + "loss": 0.5524891018867493, + "step": 5773 + }, + { + "epoch": 1.6884047375347273, + "grad_norm": 1.9368287931198411, + "learning_rate": 1.3013277981096484e-06, + "loss": 0.5557498931884766, + "step": 5774 + }, + { + "epoch": 1.6886971779499926, + "grad_norm": 1.6092314723070207, + "learning_rate": 1.2989439218090227e-06, + "loss": 0.45877397060394287, + "step": 5775 + }, + { + "epoch": 1.688989618365258, + "grad_norm": 1.587163397973365, + "learning_rate": 1.2965620793273515e-06, + "loss": 0.5310335159301758, + "step": 5776 + }, + { + "epoch": 1.6892820587805235, + "grad_norm": 1.4561579735469703, + "learning_rate": 1.294182271221377e-06, + "loss": 0.4855915904045105, + "step": 5777 + }, + { + "epoch": 1.6895744991957888, + "grad_norm": 1.5707049211364605, + "learning_rate": 1.2918044980473643e-06, + "loss": 0.6070747971534729, + "step": 5778 + }, + { + "epoch": 1.6898669396110542, + "grad_norm": 1.6739979529118527, + "learning_rate": 1.2894287603611033e-06, + "loss": 0.5108609795570374, + "step": 5779 + }, + { + "epoch": 1.6901593800263197, + "grad_norm": 1.7647615756485302, + "learning_rate": 1.2870550587179087e-06, + "loss": 0.49141189455986023, + "step": 5780 + }, + { + "epoch": 1.690451820441585, + "grad_norm": 1.4176033732152467, + "learning_rate": 1.2846833936726178e-06, + "loss": 0.4239678382873535, + "step": 5781 + }, + { + "epoch": 1.6907442608568504, + "grad_norm": 1.8431022697656632, + "learning_rate": 1.2823137657795948e-06, + "loss": 0.6348937153816223, + "step": 5782 + }, + { + "epoch": 1.691036701272116, + "grad_norm": 1.4853353146024342, + "learning_rate": 1.2799461755927233e-06, + "loss": 0.4561845064163208, + "step": 5783 + }, + { + "epoch": 1.6913291416873812, + "grad_norm": 2.1521785942560197, + "learning_rate": 1.2775806236654153e-06, + "loss": 0.5663880109786987, + "step": 5784 + }, + { + "epoch": 1.6916215821026466, + "grad_norm": 1.5729874297711008, + "learning_rate": 1.275217110550604e-06, + "loss": 0.5200550556182861, + "step": 5785 + }, + { + "epoch": 1.6919140225179121, + "grad_norm": 1.7072937541006934, + "learning_rate": 1.2728556368007461e-06, + "loss": 0.5401214361190796, + "step": 5786 + }, + { + "epoch": 1.6922064629331772, + "grad_norm": 2.0089316276908917, + "learning_rate": 1.2704962029678202e-06, + "loss": 0.5409752130508423, + "step": 5787 + }, + { + "epoch": 1.6924989033484428, + "grad_norm": 1.6551832796501305, + "learning_rate": 1.2681388096033298e-06, + "loss": 0.46215158700942993, + "step": 5788 + }, + { + "epoch": 1.692791343763708, + "grad_norm": 1.508586050733543, + "learning_rate": 1.2657834572583027e-06, + "loss": 0.44687867164611816, + "step": 5789 + }, + { + "epoch": 1.6930837841789734, + "grad_norm": 1.5112474922130816, + "learning_rate": 1.2634301464832877e-06, + "loss": 0.47882723808288574, + "step": 5790 + }, + { + "epoch": 1.693376224594239, + "grad_norm": 2.0838428918534264, + "learning_rate": 1.2610788778283567e-06, + "loss": 0.6108201742172241, + "step": 5791 + }, + { + "epoch": 1.6936686650095043, + "grad_norm": 1.4370335670353505, + "learning_rate": 1.2587296518431036e-06, + "loss": 0.45024657249450684, + "step": 5792 + }, + { + "epoch": 1.6939611054247696, + "grad_norm": 2.263053324487421, + "learning_rate": 1.256382469076648e-06, + "loss": 0.6746254563331604, + "step": 5793 + }, + { + "epoch": 1.6942535458400352, + "grad_norm": 1.7423805800598553, + "learning_rate": 1.2540373300776264e-06, + "loss": 0.6439248323440552, + "step": 5794 + }, + { + "epoch": 1.6945459862553005, + "grad_norm": 1.5101648188878154, + "learning_rate": 1.251694235394204e-06, + "loss": 0.467510461807251, + "step": 5795 + }, + { + "epoch": 1.6948384266705658, + "grad_norm": 2.0083904845815117, + "learning_rate": 1.2493531855740626e-06, + "loss": 0.5509516596794128, + "step": 5796 + }, + { + "epoch": 1.6951308670858314, + "grad_norm": 1.6291523574406077, + "learning_rate": 1.247014181164412e-06, + "loss": 0.49178194999694824, + "step": 5797 + }, + { + "epoch": 1.6954233075010965, + "grad_norm": 1.6626228068208797, + "learning_rate": 1.2446772227119753e-06, + "loss": 0.4825005531311035, + "step": 5798 + }, + { + "epoch": 1.695715747916362, + "grad_norm": 1.7609982762736733, + "learning_rate": 1.242342310763005e-06, + "loss": 0.7441064715385437, + "step": 5799 + }, + { + "epoch": 1.6960081883316276, + "grad_norm": 1.6166055740202077, + "learning_rate": 1.2400094458632717e-06, + "loss": 0.5020110011100769, + "step": 5800 + }, + { + "epoch": 1.6963006287468927, + "grad_norm": 1.6328086702132818, + "learning_rate": 1.237678628558069e-06, + "loss": 0.5439830422401428, + "step": 5801 + }, + { + "epoch": 1.6965930691621582, + "grad_norm": 2.0549338843530136, + "learning_rate": 1.235349859392211e-06, + "loss": 0.6235179901123047, + "step": 5802 + }, + { + "epoch": 1.6968855095774236, + "grad_norm": 1.7141848290041162, + "learning_rate": 1.2330231389100323e-06, + "loss": 0.6176612377166748, + "step": 5803 + }, + { + "epoch": 1.697177949992689, + "grad_norm": 1.7381769122607003, + "learning_rate": 1.2306984676553924e-06, + "loss": 0.5956840515136719, + "step": 5804 + }, + { + "epoch": 1.6974703904079544, + "grad_norm": 1.6857909163061566, + "learning_rate": 1.2283758461716667e-06, + "loss": 0.5025947690010071, + "step": 5805 + }, + { + "epoch": 1.6977628308232198, + "grad_norm": 1.507035347865144, + "learning_rate": 1.2260552750017551e-06, + "loss": 0.5772436857223511, + "step": 5806 + }, + { + "epoch": 1.698055271238485, + "grad_norm": 1.6665432076063584, + "learning_rate": 1.223736754688075e-06, + "loss": 0.4336615204811096, + "step": 5807 + }, + { + "epoch": 1.6983477116537506, + "grad_norm": 1.695081220374435, + "learning_rate": 1.221420285772572e-06, + "loss": 0.5697668790817261, + "step": 5808 + }, + { + "epoch": 1.698640152069016, + "grad_norm": 1.8545475442236217, + "learning_rate": 1.2191058687966995e-06, + "loss": 0.4966861605644226, + "step": 5809 + }, + { + "epoch": 1.6989325924842813, + "grad_norm": 1.777484506048346, + "learning_rate": 1.2167935043014411e-06, + "loss": 0.5805951952934265, + "step": 5810 + }, + { + "epoch": 1.6992250328995469, + "grad_norm": 1.6055305498040644, + "learning_rate": 1.2144831928272994e-06, + "loss": 0.4669906497001648, + "step": 5811 + }, + { + "epoch": 1.699517473314812, + "grad_norm": 1.7730179282571827, + "learning_rate": 1.212174934914294e-06, + "loss": 0.5630965828895569, + "step": 5812 + }, + { + "epoch": 1.6998099137300775, + "grad_norm": 1.7272395334456936, + "learning_rate": 1.2098687311019663e-06, + "loss": 0.5345104932785034, + "step": 5813 + }, + { + "epoch": 1.7001023541453428, + "grad_norm": 1.9547814584710963, + "learning_rate": 1.207564581929378e-06, + "loss": 0.5760249495506287, + "step": 5814 + }, + { + "epoch": 1.7003947945606082, + "grad_norm": 1.563397994600299, + "learning_rate": 1.2052624879351105e-06, + "loss": 0.506635308265686, + "step": 5815 + }, + { + "epoch": 1.7006872349758737, + "grad_norm": 1.792775034126629, + "learning_rate": 1.2029624496572622e-06, + "loss": 0.5107032656669617, + "step": 5816 + }, + { + "epoch": 1.700979675391139, + "grad_norm": 1.5891211780153636, + "learning_rate": 1.2006644676334557e-06, + "loss": 0.5888187885284424, + "step": 5817 + }, + { + "epoch": 1.7012721158064044, + "grad_norm": 1.8008314810247776, + "learning_rate": 1.1983685424008285e-06, + "loss": 0.5326075553894043, + "step": 5818 + }, + { + "epoch": 1.70156455622167, + "grad_norm": 1.6515493940564925, + "learning_rate": 1.1960746744960417e-06, + "loss": 0.5097993612289429, + "step": 5819 + }, + { + "epoch": 1.7018569966369352, + "grad_norm": 1.6532256911128915, + "learning_rate": 1.1937828644552696e-06, + "loss": 0.6001093983650208, + "step": 5820 + }, + { + "epoch": 1.7021494370522006, + "grad_norm": 1.7728326525757572, + "learning_rate": 1.1914931128142072e-06, + "loss": 0.513684093952179, + "step": 5821 + }, + { + "epoch": 1.7024418774674661, + "grad_norm": 1.6118848482453871, + "learning_rate": 1.189205420108076e-06, + "loss": 0.4688597321510315, + "step": 5822 + }, + { + "epoch": 1.7027343178827314, + "grad_norm": 1.6755720349462948, + "learning_rate": 1.1869197868716075e-06, + "loss": 0.4537498354911804, + "step": 5823 + }, + { + "epoch": 1.7030267582979968, + "grad_norm": 1.625108439053771, + "learning_rate": 1.1846362136390531e-06, + "loss": 0.43031078577041626, + "step": 5824 + }, + { + "epoch": 1.7033191987132623, + "grad_norm": 1.9244406734438975, + "learning_rate": 1.182354700944187e-06, + "loss": 0.5139330625534058, + "step": 5825 + }, + { + "epoch": 1.7036116391285274, + "grad_norm": 1.7897651312393703, + "learning_rate": 1.180075249320296e-06, + "loss": 0.6542010307312012, + "step": 5826 + }, + { + "epoch": 1.703904079543793, + "grad_norm": 1.6365189888188503, + "learning_rate": 1.1777978593001903e-06, + "loss": 0.5371676087379456, + "step": 5827 + }, + { + "epoch": 1.7041965199590583, + "grad_norm": 1.6793659914593386, + "learning_rate": 1.1755225314161967e-06, + "loss": 0.47583359479904175, + "step": 5828 + }, + { + "epoch": 1.7044889603743236, + "grad_norm": 1.7363884838234833, + "learning_rate": 1.173249266200156e-06, + "loss": 0.5471247434616089, + "step": 5829 + }, + { + "epoch": 1.7047814007895892, + "grad_norm": 1.850508925320166, + "learning_rate": 1.1709780641834323e-06, + "loss": 0.5095713138580322, + "step": 5830 + }, + { + "epoch": 1.7050738412048545, + "grad_norm": 1.5373790027628114, + "learning_rate": 1.1687089258969041e-06, + "loss": 0.41944777965545654, + "step": 5831 + }, + { + "epoch": 1.7053662816201198, + "grad_norm": 1.5434472143224902, + "learning_rate": 1.1664418518709697e-06, + "loss": 0.42380404472351074, + "step": 5832 + }, + { + "epoch": 1.7056587220353854, + "grad_norm": 1.8798510100106, + "learning_rate": 1.1641768426355427e-06, + "loss": 0.5688038468360901, + "step": 5833 + }, + { + "epoch": 1.7059511624506507, + "grad_norm": 1.6396391570153137, + "learning_rate": 1.1619138987200562e-06, + "loss": 0.5432788133621216, + "step": 5834 + }, + { + "epoch": 1.706243602865916, + "grad_norm": 1.699260651340017, + "learning_rate": 1.1596530206534606e-06, + "loss": 0.5408512949943542, + "step": 5835 + }, + { + "epoch": 1.7065360432811816, + "grad_norm": 1.5364052920051108, + "learning_rate": 1.1573942089642198e-06, + "loss": 0.5149247646331787, + "step": 5836 + }, + { + "epoch": 1.7068284836964467, + "grad_norm": 1.6490213140214325, + "learning_rate": 1.1551374641803193e-06, + "loss": 0.36905592679977417, + "step": 5837 + }, + { + "epoch": 1.7071209241117122, + "grad_norm": 1.7960598101415164, + "learning_rate": 1.152882786829259e-06, + "loss": 0.5370720624923706, + "step": 5838 + }, + { + "epoch": 1.7074133645269778, + "grad_norm": 1.5874644037104577, + "learning_rate": 1.1506301774380578e-06, + "loss": 0.4535629153251648, + "step": 5839 + }, + { + "epoch": 1.7077058049422429, + "grad_norm": 1.79916689116012, + "learning_rate": 1.1483796365332455e-06, + "loss": 0.5456075668334961, + "step": 5840 + }, + { + "epoch": 1.7079982453575084, + "grad_norm": 1.4286640626946725, + "learning_rate": 1.1461311646408756e-06, + "loss": 0.5884554386138916, + "step": 5841 + }, + { + "epoch": 1.7082906857727738, + "grad_norm": 1.6397329737807809, + "learning_rate": 1.1438847622865125e-06, + "loss": 0.605168879032135, + "step": 5842 + }, + { + "epoch": 1.708583126188039, + "grad_norm": 1.5178839829112376, + "learning_rate": 1.14164042999524e-06, + "loss": 0.43739163875579834, + "step": 5843 + }, + { + "epoch": 1.7088755666033046, + "grad_norm": 1.46949260133067, + "learning_rate": 1.1393981682916578e-06, + "loss": 0.4508574306964874, + "step": 5844 + }, + { + "epoch": 1.70916800701857, + "grad_norm": 1.822138537734332, + "learning_rate": 1.1371579776998798e-06, + "loss": 0.5918034315109253, + "step": 5845 + }, + { + "epoch": 1.7094604474338353, + "grad_norm": 2.0746386130567873, + "learning_rate": 1.1349198587435373e-06, + "loss": 0.5668582320213318, + "step": 5846 + }, + { + "epoch": 1.7097528878491008, + "grad_norm": 1.9282537614980426, + "learning_rate": 1.1326838119457784e-06, + "loss": 0.6374846696853638, + "step": 5847 + }, + { + "epoch": 1.7100453282643662, + "grad_norm": 1.8903891011788552, + "learning_rate": 1.130449837829264e-06, + "loss": 0.5074985027313232, + "step": 5848 + }, + { + "epoch": 1.7103377686796315, + "grad_norm": 1.5190057242638555, + "learning_rate": 1.1282179369161717e-06, + "loss": 0.5012484788894653, + "step": 5849 + }, + { + "epoch": 1.710630209094897, + "grad_norm": 1.632090745734556, + "learning_rate": 1.1259881097281977e-06, + "loss": 0.4417869746685028, + "step": 5850 + }, + { + "epoch": 1.7109226495101622, + "grad_norm": 1.8294483106085377, + "learning_rate": 1.1237603567865452e-06, + "loss": 0.6032637357711792, + "step": 5851 + }, + { + "epoch": 1.7112150899254277, + "grad_norm": 1.688609377749929, + "learning_rate": 1.121534678611942e-06, + "loss": 0.5790234804153442, + "step": 5852 + }, + { + "epoch": 1.711507530340693, + "grad_norm": 1.591889646615377, + "learning_rate": 1.1193110757246251e-06, + "loss": 0.5436397194862366, + "step": 5853 + }, + { + "epoch": 1.7117999707559584, + "grad_norm": 1.7117652881589365, + "learning_rate": 1.11708954864435e-06, + "loss": 0.5088083744049072, + "step": 5854 + }, + { + "epoch": 1.712092411171224, + "grad_norm": 1.889980799223528, + "learning_rate": 1.1148700978903826e-06, + "loss": 0.5907719135284424, + "step": 5855 + }, + { + "epoch": 1.7123848515864892, + "grad_norm": 1.567722853126729, + "learning_rate": 1.1126527239815078e-06, + "loss": 0.4744384288787842, + "step": 5856 + }, + { + "epoch": 1.7126772920017546, + "grad_norm": 1.9312865174889629, + "learning_rate": 1.110437427436023e-06, + "loss": 0.6644346714019775, + "step": 5857 + }, + { + "epoch": 1.71296973241702, + "grad_norm": 1.6765623554239069, + "learning_rate": 1.10822420877174e-06, + "loss": 0.4926042854785919, + "step": 5858 + }, + { + "epoch": 1.7132621728322854, + "grad_norm": 1.6722485452227753, + "learning_rate": 1.1060130685059845e-06, + "loss": 0.47684335708618164, + "step": 5859 + }, + { + "epoch": 1.7135546132475508, + "grad_norm": 1.8519560247307543, + "learning_rate": 1.1038040071555988e-06, + "loss": 0.5574014186859131, + "step": 5860 + }, + { + "epoch": 1.7138470536628163, + "grad_norm": 1.737717748392033, + "learning_rate": 1.101597025236939e-06, + "loss": 0.6276485323905945, + "step": 5861 + }, + { + "epoch": 1.7141394940780816, + "grad_norm": 1.7853097232505406, + "learning_rate": 1.099392123265869e-06, + "loss": 0.558611273765564, + "step": 5862 + }, + { + "epoch": 1.714431934493347, + "grad_norm": 1.8318989515664625, + "learning_rate": 1.097189301757773e-06, + "loss": 0.5561566948890686, + "step": 5863 + }, + { + "epoch": 1.7147243749086125, + "grad_norm": 1.7772127580066208, + "learning_rate": 1.094988561227548e-06, + "loss": 0.5360273122787476, + "step": 5864 + }, + { + "epoch": 1.7150168153238776, + "grad_norm": 1.9869672499266697, + "learning_rate": 1.0927899021896038e-06, + "loss": 0.5572026968002319, + "step": 5865 + }, + { + "epoch": 1.7153092557391432, + "grad_norm": 1.696631763346783, + "learning_rate": 1.0905933251578626e-06, + "loss": 0.4593105912208557, + "step": 5866 + }, + { + "epoch": 1.7156016961544085, + "grad_norm": 1.7954251083275348, + "learning_rate": 1.0883988306457627e-06, + "loss": 0.5017558336257935, + "step": 5867 + }, + { + "epoch": 1.7158941365696738, + "grad_norm": 1.6294086582619267, + "learning_rate": 1.0862064191662524e-06, + "loss": 0.4982030391693115, + "step": 5868 + }, + { + "epoch": 1.7161865769849394, + "grad_norm": 1.5832146918310879, + "learning_rate": 1.0840160912317943e-06, + "loss": 0.5563114881515503, + "step": 5869 + }, + { + "epoch": 1.7164790174002047, + "grad_norm": 1.6522408781609719, + "learning_rate": 1.0818278473543652e-06, + "loss": 0.4817348122596741, + "step": 5870 + }, + { + "epoch": 1.71677145781547, + "grad_norm": 1.6923338540004815, + "learning_rate": 1.079641688045453e-06, + "loss": 0.47907108068466187, + "step": 5871 + }, + { + "epoch": 1.7170638982307356, + "grad_norm": 1.985278987997586, + "learning_rate": 1.0774576138160596e-06, + "loss": 0.6158252954483032, + "step": 5872 + }, + { + "epoch": 1.717356338646001, + "grad_norm": 1.712800633970283, + "learning_rate": 1.0752756251767015e-06, + "loss": 0.5336505174636841, + "step": 5873 + }, + { + "epoch": 1.7176487790612662, + "grad_norm": 1.6889119428738892, + "learning_rate": 1.0730957226374006e-06, + "loss": 0.5806115865707397, + "step": 5874 + }, + { + "epoch": 1.7179412194765318, + "grad_norm": 1.7163109676688793, + "learning_rate": 1.070917906707698e-06, + "loss": 0.3701411485671997, + "step": 5875 + }, + { + "epoch": 1.7182336598917969, + "grad_norm": 1.5519162070562529, + "learning_rate": 1.0687421778966445e-06, + "loss": 0.5779517292976379, + "step": 5876 + }, + { + "epoch": 1.7185261003070624, + "grad_norm": 1.5444011974555767, + "learning_rate": 1.0665685367128041e-06, + "loss": 0.43965232372283936, + "step": 5877 + }, + { + "epoch": 1.718818540722328, + "grad_norm": 1.7154722678485648, + "learning_rate": 1.064396983664253e-06, + "loss": 0.4768058657646179, + "step": 5878 + }, + { + "epoch": 1.719110981137593, + "grad_norm": 1.6286437020829267, + "learning_rate": 1.0622275192585773e-06, + "loss": 0.5331600904464722, + "step": 5879 + }, + { + "epoch": 1.7194034215528586, + "grad_norm": 1.6603687606186237, + "learning_rate": 1.0600601440028758e-06, + "loss": 0.5495625734329224, + "step": 5880 + }, + { + "epoch": 1.719695861968124, + "grad_norm": 1.6915455937474744, + "learning_rate": 1.0578948584037608e-06, + "loss": 0.4244312345981598, + "step": 5881 + }, + { + "epoch": 1.7199883023833893, + "grad_norm": 1.7562786480710206, + "learning_rate": 1.0557316629673531e-06, + "loss": 0.4618447721004486, + "step": 5882 + }, + { + "epoch": 1.7202807427986548, + "grad_norm": 1.3835850144546908, + "learning_rate": 1.0535705581992873e-06, + "loss": 0.4226785898208618, + "step": 5883 + }, + { + "epoch": 1.7205731832139202, + "grad_norm": 1.8373576265806915, + "learning_rate": 1.0514115446047101e-06, + "loss": 0.5813404321670532, + "step": 5884 + }, + { + "epoch": 1.7208656236291855, + "grad_norm": 1.774672318962678, + "learning_rate": 1.0492546226882738e-06, + "loss": 0.6700260639190674, + "step": 5885 + }, + { + "epoch": 1.721158064044451, + "grad_norm": 1.8100136828076652, + "learning_rate": 1.0470997929541494e-06, + "loss": 0.6024131178855896, + "step": 5886 + }, + { + "epoch": 1.7214505044597164, + "grad_norm": 1.8033126749427817, + "learning_rate": 1.0449470559060125e-06, + "loss": 0.6015123724937439, + "step": 5887 + }, + { + "epoch": 1.7217429448749817, + "grad_norm": 1.714487906410119, + "learning_rate": 1.0427964120470534e-06, + "loss": 0.6631267070770264, + "step": 5888 + }, + { + "epoch": 1.7220353852902472, + "grad_norm": 1.7445362923992234, + "learning_rate": 1.0406478618799731e-06, + "loss": 0.5267488956451416, + "step": 5889 + }, + { + "epoch": 1.7223278257055123, + "grad_norm": 1.6683876570881706, + "learning_rate": 1.038501405906982e-06, + "loss": 0.5190263986587524, + "step": 5890 + }, + { + "epoch": 1.722620266120778, + "grad_norm": 1.6678272928853268, + "learning_rate": 1.0363570446297999e-06, + "loss": 0.5253189206123352, + "step": 5891 + }, + { + "epoch": 1.7229127065360432, + "grad_norm": 1.6306770585402846, + "learning_rate": 1.0342147785496581e-06, + "loss": 0.5271278619766235, + "step": 5892 + }, + { + "epoch": 1.7232051469513086, + "grad_norm": 1.7373442044536598, + "learning_rate": 1.0320746081672994e-06, + "loss": 0.5284109711647034, + "step": 5893 + }, + { + "epoch": 1.723497587366574, + "grad_norm": 1.764049872395232, + "learning_rate": 1.0299365339829747e-06, + "loss": 0.6119050979614258, + "step": 5894 + }, + { + "epoch": 1.7237900277818394, + "grad_norm": 1.583925980020329, + "learning_rate": 1.0278005564964488e-06, + "loss": 0.42297711968421936, + "step": 5895 + }, + { + "epoch": 1.7240824681971048, + "grad_norm": 1.7105013452989373, + "learning_rate": 1.02566667620699e-06, + "loss": 0.5923792123794556, + "step": 5896 + }, + { + "epoch": 1.7243749086123703, + "grad_norm": 1.6831938137571334, + "learning_rate": 1.023534893613377e-06, + "loss": 0.4999189078807831, + "step": 5897 + }, + { + "epoch": 1.7246673490276356, + "grad_norm": 1.6907699986400666, + "learning_rate": 1.0214052092139082e-06, + "loss": 0.49083560705184937, + "step": 5898 + }, + { + "epoch": 1.724959789442901, + "grad_norm": 3.9391609144586437, + "learning_rate": 1.0192776235063795e-06, + "loss": 0.6001632213592529, + "step": 5899 + }, + { + "epoch": 1.7252522298581665, + "grad_norm": 1.473933103211581, + "learning_rate": 1.0171521369881044e-06, + "loss": 0.4897228479385376, + "step": 5900 + }, + { + "epoch": 1.7255446702734318, + "grad_norm": 1.6762179044603425, + "learning_rate": 1.0150287501558997e-06, + "loss": 0.44784292578697205, + "step": 5901 + }, + { + "epoch": 1.7258371106886972, + "grad_norm": 1.542625612657722, + "learning_rate": 1.0129074635060943e-06, + "loss": 0.46105432510375977, + "step": 5902 + }, + { + "epoch": 1.7261295511039627, + "grad_norm": 1.9028079699425045, + "learning_rate": 1.0107882775345278e-06, + "loss": 0.5805546045303345, + "step": 5903 + }, + { + "epoch": 1.7264219915192278, + "grad_norm": 1.719859761694945, + "learning_rate": 1.0086711927365488e-06, + "loss": 0.560761570930481, + "step": 5904 + }, + { + "epoch": 1.7267144319344934, + "grad_norm": 1.475103420661766, + "learning_rate": 1.006556209607007e-06, + "loss": 0.533979058265686, + "step": 5905 + }, + { + "epoch": 1.7270068723497587, + "grad_norm": 1.7039894712110264, + "learning_rate": 1.004443328640271e-06, + "loss": 0.5742807984352112, + "step": 5906 + }, + { + "epoch": 1.727299312765024, + "grad_norm": 1.9394101910903232, + "learning_rate": 1.0023325503302129e-06, + "loss": 0.5617523789405823, + "step": 5907 + }, + { + "epoch": 1.7275917531802896, + "grad_norm": 1.620137966655423, + "learning_rate": 1.0002238751702143e-06, + "loss": 0.45596855878829956, + "step": 5908 + }, + { + "epoch": 1.727884193595555, + "grad_norm": 1.523715810181856, + "learning_rate": 9.981173036531655e-07, + "loss": 0.4917908012866974, + "step": 5909 + }, + { + "epoch": 1.7281766340108202, + "grad_norm": 1.8089323806924067, + "learning_rate": 9.960128362714637e-07, + "loss": 0.6204911470413208, + "step": 5910 + }, + { + "epoch": 1.7284690744260858, + "grad_norm": 1.615074466715287, + "learning_rate": 9.93910473517018e-07, + "loss": 0.47288352251052856, + "step": 5911 + }, + { + "epoch": 1.728761514841351, + "grad_norm": 1.9414111913595387, + "learning_rate": 9.918102158812404e-07, + "loss": 0.48668670654296875, + "step": 5912 + }, + { + "epoch": 1.7290539552566164, + "grad_norm": 1.723740686191889, + "learning_rate": 9.89712063855054e-07, + "loss": 0.43311381340026855, + "step": 5913 + }, + { + "epoch": 1.729346395671882, + "grad_norm": 1.748275288399291, + "learning_rate": 9.876160179288886e-07, + "loss": 0.5066087245941162, + "step": 5914 + }, + { + "epoch": 1.729638836087147, + "grad_norm": 1.6099318260759374, + "learning_rate": 9.855220785926856e-07, + "loss": 0.6022528409957886, + "step": 5915 + }, + { + "epoch": 1.7299312765024126, + "grad_norm": 1.6809686879748886, + "learning_rate": 9.834302463358858e-07, + "loss": 0.5288707613945007, + "step": 5916 + }, + { + "epoch": 1.7302237169176782, + "grad_norm": 1.7087060764928856, + "learning_rate": 9.813405216474436e-07, + "loss": 0.6150302290916443, + "step": 5917 + }, + { + "epoch": 1.7305161573329433, + "grad_norm": 1.7234099983807605, + "learning_rate": 9.792529050158218e-07, + "loss": 0.5431156158447266, + "step": 5918 + }, + { + "epoch": 1.7308085977482088, + "grad_norm": 1.7871856102017598, + "learning_rate": 9.771673969289851e-07, + "loss": 0.6844080686569214, + "step": 5919 + }, + { + "epoch": 1.7311010381634742, + "grad_norm": 1.9483136158091563, + "learning_rate": 9.750839978744098e-07, + "loss": 0.4778372049331665, + "step": 5920 + }, + { + "epoch": 1.7313934785787395, + "grad_norm": 1.803034120095624, + "learning_rate": 9.73002708339077e-07, + "loss": 0.6913809776306152, + "step": 5921 + }, + { + "epoch": 1.731685918994005, + "grad_norm": 1.5934425203745812, + "learning_rate": 9.709235288094765e-07, + "loss": 0.6289864778518677, + "step": 5922 + }, + { + "epoch": 1.7319783594092704, + "grad_norm": 1.7803434049533318, + "learning_rate": 9.68846459771604e-07, + "loss": 0.4735794961452484, + "step": 5923 + }, + { + "epoch": 1.7322707998245357, + "grad_norm": 1.7329775832839742, + "learning_rate": 9.667715017109614e-07, + "loss": 0.53554767370224, + "step": 5924 + }, + { + "epoch": 1.7325632402398012, + "grad_norm": 1.9726609824515038, + "learning_rate": 9.64698655112557e-07, + "loss": 0.5118460655212402, + "step": 5925 + }, + { + "epoch": 1.7328556806550666, + "grad_norm": 1.819236864509276, + "learning_rate": 9.626279204609079e-07, + "loss": 0.5739814043045044, + "step": 5926 + }, + { + "epoch": 1.733148121070332, + "grad_norm": 1.6784156679062403, + "learning_rate": 9.605592982400325e-07, + "loss": 0.5716123580932617, + "step": 5927 + }, + { + "epoch": 1.7334405614855974, + "grad_norm": 1.7916971306174196, + "learning_rate": 9.584927889334605e-07, + "loss": 0.5091898441314697, + "step": 5928 + }, + { + "epoch": 1.7337330019008625, + "grad_norm": 1.6267503374739263, + "learning_rate": 9.564283930242258e-07, + "loss": 0.46946650743484497, + "step": 5929 + }, + { + "epoch": 1.734025442316128, + "grad_norm": 1.6625890698419732, + "learning_rate": 9.543661109948688e-07, + "loss": 0.6238217353820801, + "step": 5930 + }, + { + "epoch": 1.7343178827313934, + "grad_norm": 1.8870256552743607, + "learning_rate": 9.52305943327434e-07, + "loss": 0.7464175224304199, + "step": 5931 + }, + { + "epoch": 1.7346103231466588, + "grad_norm": 2.417177332317345, + "learning_rate": 9.502478905034751e-07, + "loss": 0.6064578294754028, + "step": 5932 + }, + { + "epoch": 1.7349027635619243, + "grad_norm": 1.5711166860907437, + "learning_rate": 9.481919530040484e-07, + "loss": 0.5703303813934326, + "step": 5933 + }, + { + "epoch": 1.7351952039771896, + "grad_norm": 1.773413757718004, + "learning_rate": 9.461381313097162e-07, + "loss": 0.5570278167724609, + "step": 5934 + }, + { + "epoch": 1.735487644392455, + "grad_norm": 1.795987369299435, + "learning_rate": 9.440864259005477e-07, + "loss": 0.54972243309021, + "step": 5935 + }, + { + "epoch": 1.7357800848077205, + "grad_norm": 1.6140356285907533, + "learning_rate": 9.420368372561161e-07, + "loss": 0.5670010447502136, + "step": 5936 + }, + { + "epoch": 1.7360725252229858, + "grad_norm": 1.642113144044588, + "learning_rate": 9.399893658555026e-07, + "loss": 0.5306927561759949, + "step": 5937 + }, + { + "epoch": 1.7363649656382512, + "grad_norm": 1.5565759572639428, + "learning_rate": 9.379440121772876e-07, + "loss": 0.5080308318138123, + "step": 5938 + }, + { + "epoch": 1.7366574060535167, + "grad_norm": 1.5576076668453387, + "learning_rate": 9.359007766995609e-07, + "loss": 0.5444519519805908, + "step": 5939 + }, + { + "epoch": 1.736949846468782, + "grad_norm": 1.737287044912212, + "learning_rate": 9.338596598999172e-07, + "loss": 0.5353262424468994, + "step": 5940 + }, + { + "epoch": 1.7372422868840474, + "grad_norm": 1.5405646785157867, + "learning_rate": 9.318206622554549e-07, + "loss": 0.4766794443130493, + "step": 5941 + }, + { + "epoch": 1.737534727299313, + "grad_norm": 1.5818937282065444, + "learning_rate": 9.29783784242777e-07, + "loss": 0.4913482666015625, + "step": 5942 + }, + { + "epoch": 1.737827167714578, + "grad_norm": 1.5030657740252151, + "learning_rate": 9.277490263379918e-07, + "loss": 0.47637009620666504, + "step": 5943 + }, + { + "epoch": 1.7381196081298436, + "grad_norm": 1.8131560819786492, + "learning_rate": 9.25716389016712e-07, + "loss": 0.5122126936912537, + "step": 5944 + }, + { + "epoch": 1.738412048545109, + "grad_norm": 1.51801570238093, + "learning_rate": 9.236858727540543e-07, + "loss": 0.5263532400131226, + "step": 5945 + }, + { + "epoch": 1.7387044889603742, + "grad_norm": 1.607505719698849, + "learning_rate": 9.216574780246379e-07, + "loss": 0.5214182734489441, + "step": 5946 + }, + { + "epoch": 1.7389969293756398, + "grad_norm": 1.877073258708154, + "learning_rate": 9.196312053025891e-07, + "loss": 0.5955429077148438, + "step": 5947 + }, + { + "epoch": 1.739289369790905, + "grad_norm": 1.6543213511410424, + "learning_rate": 9.176070550615379e-07, + "loss": 0.4809807538986206, + "step": 5948 + }, + { + "epoch": 1.7395818102061704, + "grad_norm": 1.882804975326707, + "learning_rate": 9.155850277746148e-07, + "loss": 0.4769969582557678, + "step": 5949 + }, + { + "epoch": 1.739874250621436, + "grad_norm": 2.4545379886365954, + "learning_rate": 9.135651239144561e-07, + "loss": 0.48527538776397705, + "step": 5950 + }, + { + "epoch": 1.7401666910367013, + "grad_norm": 1.696389032166004, + "learning_rate": 9.115473439532041e-07, + "loss": 0.6703393459320068, + "step": 5951 + }, + { + "epoch": 1.7404591314519666, + "grad_norm": 1.807366721076005, + "learning_rate": 9.095316883625016e-07, + "loss": 0.5742951035499573, + "step": 5952 + }, + { + "epoch": 1.7407515718672322, + "grad_norm": 1.9552666950039521, + "learning_rate": 9.075181576134961e-07, + "loss": 0.6285614967346191, + "step": 5953 + }, + { + "epoch": 1.7410440122824973, + "grad_norm": 1.6961990538831806, + "learning_rate": 9.055067521768379e-07, + "loss": 0.5872488021850586, + "step": 5954 + }, + { + "epoch": 1.7413364526977628, + "grad_norm": 1.6900638369260592, + "learning_rate": 9.034974725226808e-07, + "loss": 0.5483776330947876, + "step": 5955 + }, + { + "epoch": 1.7416288931130284, + "grad_norm": 1.838843211951185, + "learning_rate": 9.014903191206825e-07, + "loss": 0.4913061261177063, + "step": 5956 + }, + { + "epoch": 1.7419213335282935, + "grad_norm": 1.6413412279440867, + "learning_rate": 8.994852924400022e-07, + "loss": 0.5431212186813354, + "step": 5957 + }, + { + "epoch": 1.742213773943559, + "grad_norm": 1.735940615294129, + "learning_rate": 8.974823929493015e-07, + "loss": 0.5391141176223755, + "step": 5958 + }, + { + "epoch": 1.7425062143588244, + "grad_norm": 1.455007956070738, + "learning_rate": 8.954816211167483e-07, + "loss": 0.48980265855789185, + "step": 5959 + }, + { + "epoch": 1.7427986547740897, + "grad_norm": 1.6465722416646151, + "learning_rate": 8.934829774100118e-07, + "loss": 0.6747336387634277, + "step": 5960 + }, + { + "epoch": 1.7430910951893552, + "grad_norm": 2.048914745001018, + "learning_rate": 8.914864622962582e-07, + "loss": 0.4911282956600189, + "step": 5961 + }, + { + "epoch": 1.7433835356046206, + "grad_norm": 1.6999465895023511, + "learning_rate": 8.894920762421644e-07, + "loss": 0.5863965153694153, + "step": 5962 + }, + { + "epoch": 1.7436759760198859, + "grad_norm": 1.6964011957158196, + "learning_rate": 8.87499819713904e-07, + "loss": 0.5413792729377747, + "step": 5963 + }, + { + "epoch": 1.7439684164351514, + "grad_norm": 1.8650590121272839, + "learning_rate": 8.855096931771568e-07, + "loss": 0.5288723707199097, + "step": 5964 + }, + { + "epoch": 1.7442608568504168, + "grad_norm": 1.72339918808855, + "learning_rate": 8.835216970971006e-07, + "loss": 0.5129783749580383, + "step": 5965 + }, + { + "epoch": 1.744553297265682, + "grad_norm": 1.7489856693904517, + "learning_rate": 8.815358319384193e-07, + "loss": 0.5606918334960938, + "step": 5966 + }, + { + "epoch": 1.7448457376809476, + "grad_norm": 1.9036006380739827, + "learning_rate": 8.79552098165296e-07, + "loss": 0.6277288198471069, + "step": 5967 + }, + { + "epoch": 1.7451381780962127, + "grad_norm": 1.7432749923566282, + "learning_rate": 8.775704962414167e-07, + "loss": 0.5390176773071289, + "step": 5968 + }, + { + "epoch": 1.7454306185114783, + "grad_norm": 1.8645943677337757, + "learning_rate": 8.755910266299684e-07, + "loss": 0.680462121963501, + "step": 5969 + }, + { + "epoch": 1.7457230589267436, + "grad_norm": 1.678166381653785, + "learning_rate": 8.736136897936398e-07, + "loss": 0.5134397149085999, + "step": 5970 + }, + { + "epoch": 1.746015499342009, + "grad_norm": 1.796274905651791, + "learning_rate": 8.716384861946248e-07, + "loss": 0.6280460357666016, + "step": 5971 + }, + { + "epoch": 1.7463079397572745, + "grad_norm": 1.8396010080181593, + "learning_rate": 8.696654162946094e-07, + "loss": 0.5425370931625366, + "step": 5972 + }, + { + "epoch": 1.7466003801725398, + "grad_norm": 1.7657169836698012, + "learning_rate": 8.676944805547882e-07, + "loss": 0.5831055045127869, + "step": 5973 + }, + { + "epoch": 1.7468928205878052, + "grad_norm": 1.3865571188404813, + "learning_rate": 8.657256794358592e-07, + "loss": 0.4615570306777954, + "step": 5974 + }, + { + "epoch": 1.7471852610030707, + "grad_norm": 1.6631260131171646, + "learning_rate": 8.637590133980145e-07, + "loss": 0.5727440118789673, + "step": 5975 + }, + { + "epoch": 1.747477701418336, + "grad_norm": 1.6981377401436724, + "learning_rate": 8.617944829009517e-07, + "loss": 0.5652801990509033, + "step": 5976 + }, + { + "epoch": 1.7477701418336014, + "grad_norm": 1.6704888560345945, + "learning_rate": 8.59832088403868e-07, + "loss": 0.42455562949180603, + "step": 5977 + }, + { + "epoch": 1.748062582248867, + "grad_norm": 1.8565352683598422, + "learning_rate": 8.578718303654588e-07, + "loss": 0.526951789855957, + "step": 5978 + }, + { + "epoch": 1.7483550226641322, + "grad_norm": 1.5113931171346078, + "learning_rate": 8.559137092439252e-07, + "loss": 0.45547354221343994, + "step": 5979 + }, + { + "epoch": 1.7486474630793976, + "grad_norm": 1.843493314178274, + "learning_rate": 8.539577254969667e-07, + "loss": 0.5470790863037109, + "step": 5980 + }, + { + "epoch": 1.748939903494663, + "grad_norm": 1.6766357010415907, + "learning_rate": 8.520038795817798e-07, + "loss": 0.5608032941818237, + "step": 5981 + }, + { + "epoch": 1.7492323439099282, + "grad_norm": 1.7251948475523264, + "learning_rate": 8.500521719550648e-07, + "loss": 0.5243809223175049, + "step": 5982 + }, + { + "epoch": 1.7495247843251938, + "grad_norm": 1.7843504248813373, + "learning_rate": 8.481026030730222e-07, + "loss": 0.5040958523750305, + "step": 5983 + }, + { + "epoch": 1.749817224740459, + "grad_norm": 1.9016982717852353, + "learning_rate": 8.461551733913509e-07, + "loss": 0.5026291012763977, + "step": 5984 + }, + { + "epoch": 1.7501096651557244, + "grad_norm": 1.578287817505696, + "learning_rate": 8.442098833652523e-07, + "loss": 0.5273059010505676, + "step": 5985 + }, + { + "epoch": 1.75040210557099, + "grad_norm": 1.7872787423587175, + "learning_rate": 8.42266733449425e-07, + "loss": 0.5811910629272461, + "step": 5986 + }, + { + "epoch": 1.7506945459862553, + "grad_norm": 1.9383664928260165, + "learning_rate": 8.4032572409807e-07, + "loss": 0.6078274250030518, + "step": 5987 + }, + { + "epoch": 1.7509869864015206, + "grad_norm": 1.8956639494069205, + "learning_rate": 8.383868557648833e-07, + "loss": 0.5214031338691711, + "step": 5988 + }, + { + "epoch": 1.7512794268167862, + "grad_norm": 1.8686899180431094, + "learning_rate": 8.364501289030669e-07, + "loss": 0.5464918613433838, + "step": 5989 + }, + { + "epoch": 1.7515718672320515, + "grad_norm": 1.9577387999849984, + "learning_rate": 8.345155439653175e-07, + "loss": 0.48296278715133667, + "step": 5990 + }, + { + "epoch": 1.7518643076473168, + "grad_norm": 1.6097156536359971, + "learning_rate": 8.325831014038344e-07, + "loss": 0.5441919565200806, + "step": 5991 + }, + { + "epoch": 1.7521567480625824, + "grad_norm": 1.524249865256617, + "learning_rate": 8.306528016703097e-07, + "loss": 0.4623905420303345, + "step": 5992 + }, + { + "epoch": 1.7524491884778475, + "grad_norm": 1.7850630013083288, + "learning_rate": 8.287246452159437e-07, + "loss": 0.5671495795249939, + "step": 5993 + }, + { + "epoch": 1.752741628893113, + "grad_norm": 1.7371249179959158, + "learning_rate": 8.267986324914278e-07, + "loss": 0.5400685667991638, + "step": 5994 + }, + { + "epoch": 1.7530340693083786, + "grad_norm": 1.7239850907759944, + "learning_rate": 8.24874763946959e-07, + "loss": 0.4362148642539978, + "step": 5995 + }, + { + "epoch": 1.7533265097236437, + "grad_norm": 1.7548276097653166, + "learning_rate": 8.229530400322283e-07, + "loss": 0.554877519607544, + "step": 5996 + }, + { + "epoch": 1.7536189501389092, + "grad_norm": 1.6421753593232726, + "learning_rate": 8.210334611964266e-07, + "loss": 0.5239896774291992, + "step": 5997 + }, + { + "epoch": 1.7539113905541746, + "grad_norm": 1.9442998633220852, + "learning_rate": 8.191160278882438e-07, + "loss": 0.4729669988155365, + "step": 5998 + }, + { + "epoch": 1.7542038309694399, + "grad_norm": 1.5789777380903094, + "learning_rate": 8.172007405558702e-07, + "loss": 0.5449322462081909, + "step": 5999 + }, + { + "epoch": 1.7544962713847054, + "grad_norm": 1.6329056939447448, + "learning_rate": 8.1528759964699e-07, + "loss": 0.5422194600105286, + "step": 6000 + }, + { + "epoch": 1.7547887117999708, + "grad_norm": 1.4174806038648198, + "learning_rate": 8.1337660560879e-07, + "loss": 0.3854302763938904, + "step": 6001 + }, + { + "epoch": 1.755081152215236, + "grad_norm": 1.7209418471597917, + "learning_rate": 8.114677588879549e-07, + "loss": 0.4678449034690857, + "step": 6002 + }, + { + "epoch": 1.7553735926305016, + "grad_norm": 1.5464176931987315, + "learning_rate": 8.095610599306614e-07, + "loss": 0.5462471842765808, + "step": 6003 + }, + { + "epoch": 1.755666033045767, + "grad_norm": 2.204727836372247, + "learning_rate": 8.076565091825916e-07, + "loss": 0.6314511299133301, + "step": 6004 + }, + { + "epoch": 1.7559584734610323, + "grad_norm": 1.5748030564701405, + "learning_rate": 8.057541070889229e-07, + "loss": 0.6373077630996704, + "step": 6005 + }, + { + "epoch": 1.7562509138762978, + "grad_norm": 1.3531361411828478, + "learning_rate": 8.038538540943297e-07, + "loss": 0.5279273986816406, + "step": 6006 + }, + { + "epoch": 1.756543354291563, + "grad_norm": 1.8888532901747122, + "learning_rate": 8.019557506429843e-07, + "loss": 0.5645443201065063, + "step": 6007 + }, + { + "epoch": 1.7568357947068285, + "grad_norm": 1.776791412383221, + "learning_rate": 8.000597971785573e-07, + "loss": 0.552385151386261, + "step": 6008 + }, + { + "epoch": 1.757128235122094, + "grad_norm": 1.8476397874412334, + "learning_rate": 7.981659941442154e-07, + "loss": 0.5790541172027588, + "step": 6009 + }, + { + "epoch": 1.7574206755373591, + "grad_norm": 1.8909444917759248, + "learning_rate": 7.962743419826247e-07, + "loss": 0.550809383392334, + "step": 6010 + }, + { + "epoch": 1.7577131159526247, + "grad_norm": 1.845124979293289, + "learning_rate": 7.943848411359479e-07, + "loss": 0.4659814238548279, + "step": 6011 + }, + { + "epoch": 1.75800555636789, + "grad_norm": 1.8856668900422473, + "learning_rate": 7.924974920458428e-07, + "loss": 0.5099040269851685, + "step": 6012 + }, + { + "epoch": 1.7582979967831553, + "grad_norm": 1.8389637809818669, + "learning_rate": 7.906122951534678e-07, + "loss": 0.4819038510322571, + "step": 6013 + }, + { + "epoch": 1.758590437198421, + "grad_norm": 1.6198962208116707, + "learning_rate": 7.887292508994737e-07, + "loss": 0.4033840298652649, + "step": 6014 + }, + { + "epoch": 1.7588828776136862, + "grad_norm": 2.0694906070649397, + "learning_rate": 7.868483597240117e-07, + "loss": 0.6316418647766113, + "step": 6015 + }, + { + "epoch": 1.7591753180289516, + "grad_norm": 1.574018695954754, + "learning_rate": 7.84969622066728e-07, + "loss": 0.5141040682792664, + "step": 6016 + }, + { + "epoch": 1.759467758444217, + "grad_norm": 1.625714616568423, + "learning_rate": 7.830930383667668e-07, + "loss": 0.44808077812194824, + "step": 6017 + }, + { + "epoch": 1.7597601988594824, + "grad_norm": 1.6060850378753004, + "learning_rate": 7.812186090627694e-07, + "loss": 0.5661089420318604, + "step": 6018 + }, + { + "epoch": 1.7600526392747478, + "grad_norm": 1.553528332849771, + "learning_rate": 7.793463345928697e-07, + "loss": 0.487590491771698, + "step": 6019 + }, + { + "epoch": 1.7603450796900133, + "grad_norm": 1.9699234516767667, + "learning_rate": 7.774762153947024e-07, + "loss": 0.5775022506713867, + "step": 6020 + }, + { + "epoch": 1.7606375201052784, + "grad_norm": 1.7091892859281639, + "learning_rate": 7.756082519053965e-07, + "loss": 0.5714563131332397, + "step": 6021 + }, + { + "epoch": 1.760929960520544, + "grad_norm": 1.8764497127243964, + "learning_rate": 7.73742444561576e-07, + "loss": 0.6063593626022339, + "step": 6022 + }, + { + "epoch": 1.7612224009358093, + "grad_norm": 1.7254357706950765, + "learning_rate": 7.718787937993622e-07, + "loss": 0.48034632205963135, + "step": 6023 + }, + { + "epoch": 1.7615148413510746, + "grad_norm": 1.4591503666123415, + "learning_rate": 7.700173000543742e-07, + "loss": 0.6003588438034058, + "step": 6024 + }, + { + "epoch": 1.7618072817663402, + "grad_norm": 1.6378303717233282, + "learning_rate": 7.681579637617209e-07, + "loss": 0.42842140793800354, + "step": 6025 + }, + { + "epoch": 1.7620997221816055, + "grad_norm": 1.502196803812996, + "learning_rate": 7.663007853560145e-07, + "loss": 0.5235859155654907, + "step": 6026 + }, + { + "epoch": 1.7623921625968708, + "grad_norm": 1.6904284507093605, + "learning_rate": 7.644457652713566e-07, + "loss": 0.47140365839004517, + "step": 6027 + }, + { + "epoch": 1.7626846030121364, + "grad_norm": 1.6014343948293654, + "learning_rate": 7.625929039413483e-07, + "loss": 0.53680419921875, + "step": 6028 + }, + { + "epoch": 1.7629770434274017, + "grad_norm": 1.6173156649426792, + "learning_rate": 7.60742201799084e-07, + "loss": 0.5280998349189758, + "step": 6029 + }, + { + "epoch": 1.763269483842667, + "grad_norm": 1.58299525140219, + "learning_rate": 7.588936592771545e-07, + "loss": 0.49653276801109314, + "step": 6030 + }, + { + "epoch": 1.7635619242579326, + "grad_norm": 1.6031325431493386, + "learning_rate": 7.570472768076464e-07, + "loss": 0.511070966720581, + "step": 6031 + }, + { + "epoch": 1.7638543646731977, + "grad_norm": 1.9173967106238505, + "learning_rate": 7.552030548221379e-07, + "loss": 0.6601030826568604, + "step": 6032 + }, + { + "epoch": 1.7641468050884632, + "grad_norm": 1.7630822043129881, + "learning_rate": 7.533609937517072e-07, + "loss": 0.6216480731964111, + "step": 6033 + }, + { + "epoch": 1.7644392455037288, + "grad_norm": 2.1444282721386765, + "learning_rate": 7.515210940269224e-07, + "loss": 0.7237618565559387, + "step": 6034 + }, + { + "epoch": 1.7647316859189939, + "grad_norm": 1.9895778147794236, + "learning_rate": 7.496833560778527e-07, + "loss": 0.4979498088359833, + "step": 6035 + }, + { + "epoch": 1.7650241263342594, + "grad_norm": 1.5816967377469986, + "learning_rate": 7.478477803340533e-07, + "loss": 0.49408191442489624, + "step": 6036 + }, + { + "epoch": 1.7653165667495248, + "grad_norm": 1.8439349693338256, + "learning_rate": 7.460143672245823e-07, + "loss": 0.524259626865387, + "step": 6037 + }, + { + "epoch": 1.76560900716479, + "grad_norm": 1.8574151410796558, + "learning_rate": 7.441831171779878e-07, + "loss": 0.625320315361023, + "step": 6038 + }, + { + "epoch": 1.7659014475800556, + "grad_norm": 1.7217980866482836, + "learning_rate": 7.42354030622312e-07, + "loss": 0.5971028804779053, + "step": 6039 + }, + { + "epoch": 1.766193887995321, + "grad_norm": 1.5069481360511938, + "learning_rate": 7.405271079850951e-07, + "loss": 0.48935002088546753, + "step": 6040 + }, + { + "epoch": 1.7664863284105863, + "grad_norm": 1.7616973297205794, + "learning_rate": 7.387023496933687e-07, + "loss": 0.46346336603164673, + "step": 6041 + }, + { + "epoch": 1.7667787688258518, + "grad_norm": 1.5425066644175864, + "learning_rate": 7.368797561736574e-07, + "loss": 0.5135314464569092, + "step": 6042 + }, + { + "epoch": 1.7670712092411172, + "grad_norm": 1.7938719309176694, + "learning_rate": 7.350593278519824e-07, + "loss": 0.45815128087997437, + "step": 6043 + }, + { + "epoch": 1.7673636496563825, + "grad_norm": 1.8253657375894647, + "learning_rate": 7.332410651538591e-07, + "loss": 0.5663015246391296, + "step": 6044 + }, + { + "epoch": 1.767656090071648, + "grad_norm": 1.6737365706300193, + "learning_rate": 7.314249685042929e-07, + "loss": 0.5323490500450134, + "step": 6045 + }, + { + "epoch": 1.7679485304869131, + "grad_norm": 1.8380863614801877, + "learning_rate": 7.296110383277866e-07, + "loss": 0.5489768981933594, + "step": 6046 + }, + { + "epoch": 1.7682409709021787, + "grad_norm": 1.867533811207324, + "learning_rate": 7.277992750483364e-07, + "loss": 0.5951086282730103, + "step": 6047 + }, + { + "epoch": 1.7685334113174442, + "grad_norm": 1.6688539257267474, + "learning_rate": 7.259896790894271e-07, + "loss": 0.48228102922439575, + "step": 6048 + }, + { + "epoch": 1.7688258517327093, + "grad_norm": 1.7579049817410466, + "learning_rate": 7.241822508740448e-07, + "loss": 0.6318891644477844, + "step": 6049 + }, + { + "epoch": 1.769118292147975, + "grad_norm": 1.967894881109258, + "learning_rate": 7.223769908246636e-07, + "loss": 0.4966656267642975, + "step": 6050 + }, + { + "epoch": 1.7694107325632402, + "grad_norm": 1.7465352091582635, + "learning_rate": 7.205738993632516e-07, + "loss": 0.5645290613174438, + "step": 6051 + }, + { + "epoch": 1.7697031729785055, + "grad_norm": 1.8324400656837103, + "learning_rate": 7.187729769112717e-07, + "loss": 0.560075044631958, + "step": 6052 + }, + { + "epoch": 1.769995613393771, + "grad_norm": 1.658346896913261, + "learning_rate": 7.169742238896771e-07, + "loss": 0.6375163793563843, + "step": 6053 + }, + { + "epoch": 1.7702880538090364, + "grad_norm": 1.9991114191844357, + "learning_rate": 7.15177640718916e-07, + "loss": 0.5620392560958862, + "step": 6054 + }, + { + "epoch": 1.7705804942243017, + "grad_norm": 1.7885795694198106, + "learning_rate": 7.133832278189301e-07, + "loss": 0.5382653474807739, + "step": 6055 + }, + { + "epoch": 1.7708729346395673, + "grad_norm": 1.954649524899457, + "learning_rate": 7.115909856091497e-07, + "loss": 0.502597451210022, + "step": 6056 + }, + { + "epoch": 1.7711653750548326, + "grad_norm": 1.782753780230982, + "learning_rate": 7.098009145085016e-07, + "loss": 0.5876599550247192, + "step": 6057 + }, + { + "epoch": 1.771457815470098, + "grad_norm": 1.7624219528533958, + "learning_rate": 7.080130149354048e-07, + "loss": 0.5164280533790588, + "step": 6058 + }, + { + "epoch": 1.7717502558853635, + "grad_norm": 1.7004652166347358, + "learning_rate": 7.062272873077691e-07, + "loss": 0.5192137360572815, + "step": 6059 + }, + { + "epoch": 1.7720426963006286, + "grad_norm": 1.6924472823946135, + "learning_rate": 7.044437320429987e-07, + "loss": 0.5298370122909546, + "step": 6060 + }, + { + "epoch": 1.7723351367158942, + "grad_norm": 1.671988873461514, + "learning_rate": 7.026623495579876e-07, + "loss": 0.5099462270736694, + "step": 6061 + }, + { + "epoch": 1.7726275771311595, + "grad_norm": 1.8314661737989666, + "learning_rate": 7.00883140269123e-07, + "loss": 0.6061269640922546, + "step": 6062 + }, + { + "epoch": 1.7729200175464248, + "grad_norm": 1.9189229950794147, + "learning_rate": 6.991061045922854e-07, + "loss": 0.683641254901886, + "step": 6063 + }, + { + "epoch": 1.7732124579616904, + "grad_norm": 2.089118565246571, + "learning_rate": 6.973312429428458e-07, + "loss": 0.6294830441474915, + "step": 6064 + }, + { + "epoch": 1.7735048983769557, + "grad_norm": 1.6252098698149335, + "learning_rate": 6.95558555735667e-07, + "loss": 0.40493613481521606, + "step": 6065 + }, + { + "epoch": 1.773797338792221, + "grad_norm": 1.7745752298261492, + "learning_rate": 6.93788043385103e-07, + "loss": 0.501255452632904, + "step": 6066 + }, + { + "epoch": 1.7740897792074866, + "grad_norm": 1.7883463098117711, + "learning_rate": 6.920197063050038e-07, + "loss": 0.6004104614257812, + "step": 6067 + }, + { + "epoch": 1.7743822196227519, + "grad_norm": 1.5939834110995985, + "learning_rate": 6.902535449087023e-07, + "loss": 0.48683321475982666, + "step": 6068 + }, + { + "epoch": 1.7746746600380172, + "grad_norm": 1.7279814402431617, + "learning_rate": 6.884895596090302e-07, + "loss": 0.6048111319541931, + "step": 6069 + }, + { + "epoch": 1.7749671004532828, + "grad_norm": 1.8759604993064984, + "learning_rate": 6.867277508183101e-07, + "loss": 0.5532732009887695, + "step": 6070 + }, + { + "epoch": 1.7752595408685479, + "grad_norm": 2.066556008321799, + "learning_rate": 6.849681189483515e-07, + "loss": 0.544552206993103, + "step": 6071 + }, + { + "epoch": 1.7755519812838134, + "grad_norm": 1.9161876673278242, + "learning_rate": 6.832106644104586e-07, + "loss": 0.5114158391952515, + "step": 6072 + }, + { + "epoch": 1.775844421699079, + "grad_norm": 1.6996182780694216, + "learning_rate": 6.814553876154273e-07, + "loss": 0.45777493715286255, + "step": 6073 + }, + { + "epoch": 1.776136862114344, + "grad_norm": 1.6209289540377791, + "learning_rate": 6.797022889735405e-07, + "loss": 0.5449005365371704, + "step": 6074 + }, + { + "epoch": 1.7764293025296096, + "grad_norm": 1.8749070330960134, + "learning_rate": 6.779513688945749e-07, + "loss": 0.6308485865592957, + "step": 6075 + }, + { + "epoch": 1.776721742944875, + "grad_norm": 1.951122544814841, + "learning_rate": 6.762026277877986e-07, + "loss": 0.5904842019081116, + "step": 6076 + }, + { + "epoch": 1.7770141833601403, + "grad_norm": 1.8358819377761475, + "learning_rate": 6.744560660619681e-07, + "loss": 0.6681115627288818, + "step": 6077 + }, + { + "epoch": 1.7773066237754058, + "grad_norm": 1.7337774705028348, + "learning_rate": 6.727116841253334e-07, + "loss": 0.5084429979324341, + "step": 6078 + }, + { + "epoch": 1.7775990641906712, + "grad_norm": 1.706737040250044, + "learning_rate": 6.709694823856305e-07, + "loss": 0.5705291032791138, + "step": 6079 + }, + { + "epoch": 1.7778915046059365, + "grad_norm": 1.541912819246542, + "learning_rate": 6.692294612500894e-07, + "loss": 0.6481744050979614, + "step": 6080 + }, + { + "epoch": 1.778183945021202, + "grad_norm": 1.5164317234096627, + "learning_rate": 6.67491621125429e-07, + "loss": 0.5236573815345764, + "step": 6081 + }, + { + "epoch": 1.7784763854364674, + "grad_norm": 1.761941770239031, + "learning_rate": 6.657559624178611e-07, + "loss": 0.5169326663017273, + "step": 6082 + }, + { + "epoch": 1.7787688258517327, + "grad_norm": 1.7653960525219785, + "learning_rate": 6.640224855330824e-07, + "loss": 0.5304254293441772, + "step": 6083 + }, + { + "epoch": 1.7790612662669982, + "grad_norm": 1.7073706399680681, + "learning_rate": 6.622911908762852e-07, + "loss": 0.457882285118103, + "step": 6084 + }, + { + "epoch": 1.7793537066822633, + "grad_norm": 1.4459810475641077, + "learning_rate": 6.605620788521472e-07, + "loss": 0.48427796363830566, + "step": 6085 + }, + { + "epoch": 1.7796461470975289, + "grad_norm": 1.7511368613506917, + "learning_rate": 6.588351498648382e-07, + "loss": 0.598512589931488, + "step": 6086 + }, + { + "epoch": 1.7799385875127944, + "grad_norm": 1.6445184894388314, + "learning_rate": 6.571104043180188e-07, + "loss": 0.5065094232559204, + "step": 6087 + }, + { + "epoch": 1.7802310279280595, + "grad_norm": 1.7505635404599922, + "learning_rate": 6.553878426148364e-07, + "loss": 0.5493142008781433, + "step": 6088 + }, + { + "epoch": 1.780523468343325, + "grad_norm": 1.5236545905427594, + "learning_rate": 6.5366746515793e-07, + "loss": 0.40520578622817993, + "step": 6089 + }, + { + "epoch": 1.7808159087585904, + "grad_norm": 1.6562045226817075, + "learning_rate": 6.51949272349427e-07, + "loss": 0.5416547656059265, + "step": 6090 + }, + { + "epoch": 1.7811083491738557, + "grad_norm": 1.5389792406208165, + "learning_rate": 6.502332645909438e-07, + "loss": 0.4531989097595215, + "step": 6091 + }, + { + "epoch": 1.7814007895891213, + "grad_norm": 1.9811412419033423, + "learning_rate": 6.485194422835872e-07, + "loss": 0.6385304927825928, + "step": 6092 + }, + { + "epoch": 1.7816932300043866, + "grad_norm": 1.631678357707061, + "learning_rate": 6.468078058279537e-07, + "loss": 0.5503095388412476, + "step": 6093 + }, + { + "epoch": 1.781985670419652, + "grad_norm": 1.810992666384156, + "learning_rate": 6.450983556241264e-07, + "loss": 0.5184366703033447, + "step": 6094 + }, + { + "epoch": 1.7822781108349175, + "grad_norm": 1.8021498649724184, + "learning_rate": 6.433910920716813e-07, + "loss": 0.5211689472198486, + "step": 6095 + }, + { + "epoch": 1.7825705512501828, + "grad_norm": 1.5495698877916986, + "learning_rate": 6.416860155696781e-07, + "loss": 0.7357909679412842, + "step": 6096 + }, + { + "epoch": 1.7828629916654481, + "grad_norm": 1.6814949660424658, + "learning_rate": 6.399831265166689e-07, + "loss": 0.6283953189849854, + "step": 6097 + }, + { + "epoch": 1.7831554320807137, + "grad_norm": 1.7274003515879492, + "learning_rate": 6.382824253106945e-07, + "loss": 0.45040953159332275, + "step": 6098 + }, + { + "epoch": 1.7834478724959788, + "grad_norm": 1.9179221464776945, + "learning_rate": 6.365839123492834e-07, + "loss": 0.5056609511375427, + "step": 6099 + }, + { + "epoch": 1.7837403129112444, + "grad_norm": 1.4295507016254647, + "learning_rate": 6.348875880294536e-07, + "loss": 0.4940416216850281, + "step": 6100 + }, + { + "epoch": 1.7840327533265097, + "grad_norm": 1.487738102541406, + "learning_rate": 6.33193452747708e-07, + "loss": 0.45796072483062744, + "step": 6101 + }, + { + "epoch": 1.784325193741775, + "grad_norm": 1.5314389713015535, + "learning_rate": 6.315015069000408e-07, + "loss": 0.4828432500362396, + "step": 6102 + }, + { + "epoch": 1.7846176341570406, + "grad_norm": 1.7652995666195541, + "learning_rate": 6.298117508819357e-07, + "loss": 0.5564515590667725, + "step": 6103 + }, + { + "epoch": 1.7849100745723059, + "grad_norm": 1.7672116497467336, + "learning_rate": 6.281241850883624e-07, + "loss": 0.5160977840423584, + "step": 6104 + }, + { + "epoch": 1.7852025149875712, + "grad_norm": 1.6835388368372863, + "learning_rate": 6.264388099137775e-07, + "loss": 0.585543155670166, + "step": 6105 + }, + { + "epoch": 1.7854949554028368, + "grad_norm": 1.9025389414417693, + "learning_rate": 6.247556257521303e-07, + "loss": 0.5377194881439209, + "step": 6106 + }, + { + "epoch": 1.785787395818102, + "grad_norm": 1.6124331818311004, + "learning_rate": 6.230746329968518e-07, + "loss": 0.46788060665130615, + "step": 6107 + }, + { + "epoch": 1.7860798362333674, + "grad_norm": 1.481941465563148, + "learning_rate": 6.213958320408664e-07, + "loss": 0.511722207069397, + "step": 6108 + }, + { + "epoch": 1.786372276648633, + "grad_norm": 1.7380505303184415, + "learning_rate": 6.197192232765814e-07, + "loss": 0.5609079599380493, + "step": 6109 + }, + { + "epoch": 1.786664717063898, + "grad_norm": 1.5715739237199864, + "learning_rate": 6.180448070958955e-07, + "loss": 0.47641855478286743, + "step": 6110 + }, + { + "epoch": 1.7869571574791636, + "grad_norm": 1.4072609352957208, + "learning_rate": 6.163725838901946e-07, + "loss": 0.4209919273853302, + "step": 6111 + }, + { + "epoch": 1.7872495978944292, + "grad_norm": 1.7120783337900378, + "learning_rate": 6.147025540503459e-07, + "loss": 0.6012829542160034, + "step": 6112 + }, + { + "epoch": 1.7875420383096943, + "grad_norm": 1.8789998564305304, + "learning_rate": 6.130347179667129e-07, + "loss": 0.6112918853759766, + "step": 6113 + }, + { + "epoch": 1.7878344787249598, + "grad_norm": 1.8641199827985835, + "learning_rate": 6.113690760291402e-07, + "loss": 0.6370030641555786, + "step": 6114 + }, + { + "epoch": 1.7881269191402251, + "grad_norm": 1.837749741108103, + "learning_rate": 6.097056286269631e-07, + "loss": 0.5385129451751709, + "step": 6115 + }, + { + "epoch": 1.7884193595554905, + "grad_norm": 1.7733960362556163, + "learning_rate": 6.080443761490007e-07, + "loss": 0.4707196354866028, + "step": 6116 + }, + { + "epoch": 1.788711799970756, + "grad_norm": 1.8302621423982353, + "learning_rate": 6.063853189835611e-07, + "loss": 0.5361602306365967, + "step": 6117 + }, + { + "epoch": 1.7890042403860213, + "grad_norm": 1.592603561791519, + "learning_rate": 6.047284575184398e-07, + "loss": 0.48841261863708496, + "step": 6118 + }, + { + "epoch": 1.7892966808012867, + "grad_norm": 1.6413123655048356, + "learning_rate": 6.030737921409169e-07, + "loss": 0.47491973638534546, + "step": 6119 + }, + { + "epoch": 1.7895891212165522, + "grad_norm": 1.608045516338794, + "learning_rate": 6.014213232377608e-07, + "loss": 0.4579542875289917, + "step": 6120 + }, + { + "epoch": 1.7898815616318176, + "grad_norm": 1.7739986275669979, + "learning_rate": 5.997710511952259e-07, + "loss": 0.4517485499382019, + "step": 6121 + }, + { + "epoch": 1.7901740020470829, + "grad_norm": 1.61243285020885, + "learning_rate": 5.981229763990559e-07, + "loss": 0.5656695365905762, + "step": 6122 + }, + { + "epoch": 1.7904664424623484, + "grad_norm": 1.8328920976142473, + "learning_rate": 5.964770992344737e-07, + "loss": 0.5000064373016357, + "step": 6123 + }, + { + "epoch": 1.7907588828776135, + "grad_norm": 1.691423776793607, + "learning_rate": 5.948334200861927e-07, + "loss": 0.4823925495147705, + "step": 6124 + }, + { + "epoch": 1.791051323292879, + "grad_norm": 1.6081373509153076, + "learning_rate": 5.931919393384189e-07, + "loss": 0.45079779624938965, + "step": 6125 + }, + { + "epoch": 1.7913437637081446, + "grad_norm": 1.7368976771393152, + "learning_rate": 5.915526573748331e-07, + "loss": 0.5887237787246704, + "step": 6126 + }, + { + "epoch": 1.7916362041234097, + "grad_norm": 1.5326002891728705, + "learning_rate": 5.8991557457861e-07, + "loss": 0.5625102519989014, + "step": 6127 + }, + { + "epoch": 1.7919286445386753, + "grad_norm": 1.773152580661058, + "learning_rate": 5.882806913324079e-07, + "loss": 0.5290789604187012, + "step": 6128 + }, + { + "epoch": 1.7922210849539406, + "grad_norm": 1.8240731968563617, + "learning_rate": 5.86648008018369e-07, + "loss": 0.47694748640060425, + "step": 6129 + }, + { + "epoch": 1.792513525369206, + "grad_norm": 1.7480468996944738, + "learning_rate": 5.850175250181244e-07, + "loss": 0.6297628879547119, + "step": 6130 + }, + { + "epoch": 1.7928059657844715, + "grad_norm": 1.767468792446569, + "learning_rate": 5.833892427127908e-07, + "loss": 0.5748087167739868, + "step": 6131 + }, + { + "epoch": 1.7930984061997368, + "grad_norm": 2.0367130445902313, + "learning_rate": 5.817631614829666e-07, + "loss": 0.552059531211853, + "step": 6132 + }, + { + "epoch": 1.7933908466150021, + "grad_norm": 1.881082319886368, + "learning_rate": 5.801392817087392e-07, + "loss": 0.5980287790298462, + "step": 6133 + }, + { + "epoch": 1.7936832870302677, + "grad_norm": 1.7948740811393897, + "learning_rate": 5.785176037696815e-07, + "loss": 0.5682743191719055, + "step": 6134 + }, + { + "epoch": 1.793975727445533, + "grad_norm": 1.6227048981437364, + "learning_rate": 5.768981280448494e-07, + "loss": 0.6907520294189453, + "step": 6135 + }, + { + "epoch": 1.7942681678607983, + "grad_norm": 1.82613812962419, + "learning_rate": 5.752808549127875e-07, + "loss": 0.5939712524414062, + "step": 6136 + }, + { + "epoch": 1.794560608276064, + "grad_norm": 1.961952469296216, + "learning_rate": 5.736657847515215e-07, + "loss": 0.5169910192489624, + "step": 6137 + }, + { + "epoch": 1.794853048691329, + "grad_norm": 1.7101466149490088, + "learning_rate": 5.720529179385659e-07, + "loss": 0.5795155167579651, + "step": 6138 + }, + { + "epoch": 1.7951454891065945, + "grad_norm": 1.6643593680063449, + "learning_rate": 5.704422548509181e-07, + "loss": 0.4296284317970276, + "step": 6139 + }, + { + "epoch": 1.7954379295218599, + "grad_norm": 1.780840768711558, + "learning_rate": 5.688337958650603e-07, + "loss": 0.5175303220748901, + "step": 6140 + }, + { + "epoch": 1.7957303699371252, + "grad_norm": 1.5534990300027502, + "learning_rate": 5.672275413569605e-07, + "loss": 0.49900466203689575, + "step": 6141 + }, + { + "epoch": 1.7960228103523908, + "grad_norm": 1.741229060320259, + "learning_rate": 5.65623491702072e-07, + "loss": 0.5047665238380432, + "step": 6142 + }, + { + "epoch": 1.796315250767656, + "grad_norm": 1.6004175896698871, + "learning_rate": 5.64021647275329e-07, + "loss": 0.5309686660766602, + "step": 6143 + }, + { + "epoch": 1.7966076911829214, + "grad_norm": 1.84753723892279, + "learning_rate": 5.624220084511544e-07, + "loss": 0.7270892858505249, + "step": 6144 + }, + { + "epoch": 1.796900131598187, + "grad_norm": 1.8607152469266723, + "learning_rate": 5.608245756034536e-07, + "loss": 0.515272319316864, + "step": 6145 + }, + { + "epoch": 1.7971925720134523, + "grad_norm": 1.5111910050436628, + "learning_rate": 5.592293491056167e-07, + "loss": 0.4919237196445465, + "step": 6146 + }, + { + "epoch": 1.7974850124287176, + "grad_norm": 1.8345189418412804, + "learning_rate": 5.576363293305187e-07, + "loss": 0.5812259316444397, + "step": 6147 + }, + { + "epoch": 1.7977774528439832, + "grad_norm": 1.7464814721572284, + "learning_rate": 5.560455166505185e-07, + "loss": 0.434345006942749, + "step": 6148 + }, + { + "epoch": 1.7980698932592483, + "grad_norm": 1.6287087584719833, + "learning_rate": 5.544569114374588e-07, + "loss": 0.4670771360397339, + "step": 6149 + }, + { + "epoch": 1.7983623336745138, + "grad_norm": 1.5038620849892772, + "learning_rate": 5.528705140626667e-07, + "loss": 0.5867526531219482, + "step": 6150 + }, + { + "epoch": 1.7986547740897794, + "grad_norm": 1.8981858755166237, + "learning_rate": 5.512863248969513e-07, + "loss": 0.5453605651855469, + "step": 6151 + }, + { + "epoch": 1.7989472145050445, + "grad_norm": 1.9030067654858334, + "learning_rate": 5.497043443106087e-07, + "loss": 0.5535463690757751, + "step": 6152 + }, + { + "epoch": 1.79923965492031, + "grad_norm": 1.72031713178446, + "learning_rate": 5.481245726734174e-07, + "loss": 0.6250847578048706, + "step": 6153 + }, + { + "epoch": 1.7995320953355753, + "grad_norm": 1.625961067284692, + "learning_rate": 5.465470103546399e-07, + "loss": 0.45504581928253174, + "step": 6154 + }, + { + "epoch": 1.7998245357508407, + "grad_norm": 2.039802523536217, + "learning_rate": 5.449716577230202e-07, + "loss": 0.6192604303359985, + "step": 6155 + }, + { + "epoch": 1.8001169761661062, + "grad_norm": 1.8695276161806251, + "learning_rate": 5.433985151467869e-07, + "loss": 0.5624358654022217, + "step": 6156 + }, + { + "epoch": 1.8004094165813715, + "grad_norm": 1.7494457460727728, + "learning_rate": 5.418275829936537e-07, + "loss": 0.5759576559066772, + "step": 6157 + }, + { + "epoch": 1.8007018569966369, + "grad_norm": 1.752894288026352, + "learning_rate": 5.402588616308169e-07, + "loss": 0.5710508227348328, + "step": 6158 + }, + { + "epoch": 1.8009942974119024, + "grad_norm": 1.6781697189669698, + "learning_rate": 5.386923514249542e-07, + "loss": 0.6146141290664673, + "step": 6159 + }, + { + "epoch": 1.8012867378271677, + "grad_norm": 1.618055518270054, + "learning_rate": 5.371280527422296e-07, + "loss": 0.425834983587265, + "step": 6160 + }, + { + "epoch": 1.801579178242433, + "grad_norm": 1.8062077594882358, + "learning_rate": 5.35565965948287e-07, + "loss": 0.4353194236755371, + "step": 6161 + }, + { + "epoch": 1.8018716186576986, + "grad_norm": 2.0598668441022037, + "learning_rate": 5.340060914082546e-07, + "loss": 0.7202355861663818, + "step": 6162 + }, + { + "epoch": 1.8021640590729637, + "grad_norm": 1.552014134498689, + "learning_rate": 5.324484294867449e-07, + "loss": 0.5371845960617065, + "step": 6163 + }, + { + "epoch": 1.8024564994882293, + "grad_norm": 1.7812688374701713, + "learning_rate": 5.308929805478513e-07, + "loss": 0.4995431900024414, + "step": 6164 + }, + { + "epoch": 1.8027489399034948, + "grad_norm": 1.9376433940202618, + "learning_rate": 5.293397449551519e-07, + "loss": 0.6503393650054932, + "step": 6165 + }, + { + "epoch": 1.80304138031876, + "grad_norm": 1.608511841040304, + "learning_rate": 5.277887230717027e-07, + "loss": 0.5083032250404358, + "step": 6166 + }, + { + "epoch": 1.8033338207340255, + "grad_norm": 1.7910725457082355, + "learning_rate": 5.262399152600473e-07, + "loss": 0.6067851781845093, + "step": 6167 + }, + { + "epoch": 1.8036262611492908, + "grad_norm": 1.6601362559713981, + "learning_rate": 5.246933218822104e-07, + "loss": 0.6446479558944702, + "step": 6168 + }, + { + "epoch": 1.8039187015645561, + "grad_norm": 1.9668874595165033, + "learning_rate": 5.231489432996984e-07, + "loss": 0.6940749883651733, + "step": 6169 + }, + { + "epoch": 1.8042111419798217, + "grad_norm": 1.6254914024201104, + "learning_rate": 5.216067798735014e-07, + "loss": 0.558691143989563, + "step": 6170 + }, + { + "epoch": 1.804503582395087, + "grad_norm": 1.706821795047188, + "learning_rate": 5.2006683196409e-07, + "loss": 0.4561213254928589, + "step": 6171 + }, + { + "epoch": 1.8047960228103523, + "grad_norm": 1.5741713506995776, + "learning_rate": 5.185290999314174e-07, + "loss": 0.514278769493103, + "step": 6172 + }, + { + "epoch": 1.805088463225618, + "grad_norm": 1.7438493762338294, + "learning_rate": 5.169935841349194e-07, + "loss": 0.41933614015579224, + "step": 6173 + }, + { + "epoch": 1.8053809036408832, + "grad_norm": 1.5639626592195386, + "learning_rate": 5.154602849335133e-07, + "loss": 0.5590407848358154, + "step": 6174 + }, + { + "epoch": 1.8056733440561485, + "grad_norm": 1.7923343761763981, + "learning_rate": 5.139292026855991e-07, + "loss": 0.49428898096084595, + "step": 6175 + }, + { + "epoch": 1.805965784471414, + "grad_norm": 1.6980318077322492, + "learning_rate": 5.124003377490582e-07, + "loss": 0.4737596809864044, + "step": 6176 + }, + { + "epoch": 1.8062582248866792, + "grad_norm": 1.6716862203734568, + "learning_rate": 5.108736904812517e-07, + "loss": 0.5017397403717041, + "step": 6177 + }, + { + "epoch": 1.8065506653019447, + "grad_norm": 1.733919571237643, + "learning_rate": 5.09349261239026e-07, + "loss": 0.4509057402610779, + "step": 6178 + }, + { + "epoch": 1.80684310571721, + "grad_norm": 1.9095997808768526, + "learning_rate": 5.078270503787053e-07, + "loss": 0.4440206289291382, + "step": 6179 + }, + { + "epoch": 1.8071355461324754, + "grad_norm": 1.6672235625660048, + "learning_rate": 5.063070582560991e-07, + "loss": 0.4981609582901001, + "step": 6180 + }, + { + "epoch": 1.807427986547741, + "grad_norm": 1.4041701397189061, + "learning_rate": 5.047892852264946e-07, + "loss": 0.4057808518409729, + "step": 6181 + }, + { + "epoch": 1.8077204269630063, + "grad_norm": 1.8238388895662465, + "learning_rate": 5.032737316446634e-07, + "loss": 0.5770435333251953, + "step": 6182 + }, + { + "epoch": 1.8080128673782716, + "grad_norm": 1.5817149529336438, + "learning_rate": 5.017603978648567e-07, + "loss": 0.5431563258171082, + "step": 6183 + }, + { + "epoch": 1.8083053077935372, + "grad_norm": 1.7959973431061746, + "learning_rate": 5.002492842408058e-07, + "loss": 0.469868928194046, + "step": 6184 + }, + { + "epoch": 1.8085977482088025, + "grad_norm": 1.6470575782998251, + "learning_rate": 4.98740391125726e-07, + "loss": 0.4581238925457001, + "step": 6185 + }, + { + "epoch": 1.8088901886240678, + "grad_norm": 1.5613704220145663, + "learning_rate": 4.972337188723108e-07, + "loss": 0.43255913257598877, + "step": 6186 + }, + { + "epoch": 1.8091826290393334, + "grad_norm": 1.6405804521880538, + "learning_rate": 4.957292678327374e-07, + "loss": 0.5817975997924805, + "step": 6187 + }, + { + "epoch": 1.8094750694545985, + "grad_norm": 1.701175567145501, + "learning_rate": 4.9422703835866e-07, + "loss": 0.506614089012146, + "step": 6188 + }, + { + "epoch": 1.809767509869864, + "grad_norm": 1.8093255501568073, + "learning_rate": 4.927270308012155e-07, + "loss": 0.5245084762573242, + "step": 6189 + }, + { + "epoch": 1.8100599502851296, + "grad_norm": 1.9638481802757681, + "learning_rate": 4.912292455110235e-07, + "loss": 0.48700785636901855, + "step": 6190 + }, + { + "epoch": 1.8103523907003947, + "grad_norm": 1.7084108143801102, + "learning_rate": 4.897336828381794e-07, + "loss": 0.5512829422950745, + "step": 6191 + }, + { + "epoch": 1.8106448311156602, + "grad_norm": 1.9425355962156208, + "learning_rate": 4.882403431322647e-07, + "loss": 0.444965660572052, + "step": 6192 + }, + { + "epoch": 1.8109372715309255, + "grad_norm": 1.6773870360526466, + "learning_rate": 4.86749226742338e-07, + "loss": 0.49120527505874634, + "step": 6193 + }, + { + "epoch": 1.8112297119461909, + "grad_norm": 1.5444026883137385, + "learning_rate": 4.852603340169371e-07, + "loss": 0.47114405035972595, + "step": 6194 + }, + { + "epoch": 1.8115221523614564, + "grad_norm": 1.3641759741105037, + "learning_rate": 4.837736653040825e-07, + "loss": 0.41404014825820923, + "step": 6195 + }, + { + "epoch": 1.8118145927767217, + "grad_norm": 1.5779692763243462, + "learning_rate": 4.822892209512742e-07, + "loss": 0.5773917436599731, + "step": 6196 + }, + { + "epoch": 1.812107033191987, + "grad_norm": 1.5867022738126413, + "learning_rate": 4.808070013054911e-07, + "loss": 0.5048927068710327, + "step": 6197 + }, + { + "epoch": 1.8123994736072526, + "grad_norm": 1.4880382186782968, + "learning_rate": 4.793270067131961e-07, + "loss": 0.48112595081329346, + "step": 6198 + }, + { + "epoch": 1.812691914022518, + "grad_norm": 1.5982708355484612, + "learning_rate": 4.778492375203236e-07, + "loss": 0.465067982673645, + "step": 6199 + }, + { + "epoch": 1.8129843544377833, + "grad_norm": 2.10382956966043, + "learning_rate": 4.763736940722985e-07, + "loss": 0.5456488132476807, + "step": 6200 + }, + { + "epoch": 1.8132767948530488, + "grad_norm": 1.7197696401081977, + "learning_rate": 4.74900376714017e-07, + "loss": 0.5078476071357727, + "step": 6201 + }, + { + "epoch": 1.813569235268314, + "grad_norm": 1.8035895737751002, + "learning_rate": 4.7342928578985814e-07, + "loss": 0.5087896585464478, + "step": 6202 + }, + { + "epoch": 1.8138616756835795, + "grad_norm": 1.8289842367399733, + "learning_rate": 4.719604216436824e-07, + "loss": 0.5734537243843079, + "step": 6203 + }, + { + "epoch": 1.814154116098845, + "grad_norm": 1.8255387764821909, + "learning_rate": 4.704937846188262e-07, + "loss": 0.5163359045982361, + "step": 6204 + }, + { + "epoch": 1.8144465565141101, + "grad_norm": 1.7367361746759034, + "learning_rate": 4.6902937505810765e-07, + "loss": 0.5884007811546326, + "step": 6205 + }, + { + "epoch": 1.8147389969293757, + "grad_norm": 1.459881439563451, + "learning_rate": 4.675671933038228e-07, + "loss": 0.454215407371521, + "step": 6206 + }, + { + "epoch": 1.815031437344641, + "grad_norm": 1.4834270754413148, + "learning_rate": 4.661072396977506e-07, + "loss": 0.4380212426185608, + "step": 6207 + }, + { + "epoch": 1.8153238777599063, + "grad_norm": 1.5724796080178702, + "learning_rate": 4.646495145811425e-07, + "loss": 0.6138126850128174, + "step": 6208 + }, + { + "epoch": 1.8156163181751719, + "grad_norm": 1.7578891144089137, + "learning_rate": 4.6319401829473366e-07, + "loss": 0.560515284538269, + "step": 6209 + }, + { + "epoch": 1.8159087585904372, + "grad_norm": 1.6717823771103892, + "learning_rate": 4.6174075117873976e-07, + "loss": 0.4744090735912323, + "step": 6210 + }, + { + "epoch": 1.8162011990057025, + "grad_norm": 1.566667953265204, + "learning_rate": 4.6028971357285126e-07, + "loss": 0.4508114457130432, + "step": 6211 + }, + { + "epoch": 1.816493639420968, + "grad_norm": 1.6686159118306128, + "learning_rate": 4.5884090581623906e-07, + "loss": 0.5437598824501038, + "step": 6212 + }, + { + "epoch": 1.8167860798362334, + "grad_norm": 1.871048661690424, + "learning_rate": 4.5739432824755456e-07, + "loss": 0.608635425567627, + "step": 6213 + }, + { + "epoch": 1.8170785202514987, + "grad_norm": 1.683927429440131, + "learning_rate": 4.5594998120492505e-07, + "loss": 0.45614784955978394, + "step": 6214 + }, + { + "epoch": 1.8173709606667643, + "grad_norm": 1.8175326303925177, + "learning_rate": 4.5450786502595933e-07, + "loss": 0.46722525358200073, + "step": 6215 + }, + { + "epoch": 1.8176634010820294, + "grad_norm": 1.6729337536988582, + "learning_rate": 4.5306798004774333e-07, + "loss": 0.5424127578735352, + "step": 6216 + }, + { + "epoch": 1.817955841497295, + "grad_norm": 1.8512870023540355, + "learning_rate": 4.5163032660684e-07, + "loss": 0.4360300302505493, + "step": 6217 + }, + { + "epoch": 1.8182482819125603, + "grad_norm": 1.4671759860658016, + "learning_rate": 4.5019490503929395e-07, + "loss": 0.43406206369400024, + "step": 6218 + }, + { + "epoch": 1.8185407223278256, + "grad_norm": 1.5669201854687904, + "learning_rate": 4.4876171568062346e-07, + "loss": 0.5435998439788818, + "step": 6219 + }, + { + "epoch": 1.8188331627430911, + "grad_norm": 1.7571994730111475, + "learning_rate": 4.4733075886583043e-07, + "loss": 0.4555914103984833, + "step": 6220 + }, + { + "epoch": 1.8191256031583565, + "grad_norm": 1.9267993644134682, + "learning_rate": 4.4590203492939076e-07, + "loss": 0.5246081352233887, + "step": 6221 + }, + { + "epoch": 1.8194180435736218, + "grad_norm": 1.4234567063452161, + "learning_rate": 4.4447554420525954e-07, + "loss": 0.5093664526939392, + "step": 6222 + }, + { + "epoch": 1.8197104839888874, + "grad_norm": 1.9251138549109805, + "learning_rate": 4.430512870268733e-07, + "loss": 0.5759550333023071, + "step": 6223 + }, + { + "epoch": 1.8200029244041527, + "grad_norm": 2.2446814471076184, + "learning_rate": 4.416292637271402e-07, + "loss": 0.5477207899093628, + "step": 6224 + }, + { + "epoch": 1.820295364819418, + "grad_norm": 1.7579783947323675, + "learning_rate": 4.402094746384511e-07, + "loss": 0.5786882638931274, + "step": 6225 + }, + { + "epoch": 1.8205878052346836, + "grad_norm": 1.6652775403735034, + "learning_rate": 4.3879192009267266e-07, + "loss": 0.36909428238868713, + "step": 6226 + }, + { + "epoch": 1.8208802456499487, + "grad_norm": 1.6359565015929571, + "learning_rate": 4.3737660042114993e-07, + "loss": 0.5471982955932617, + "step": 6227 + }, + { + "epoch": 1.8211726860652142, + "grad_norm": 1.633893653092529, + "learning_rate": 4.3596351595470596e-07, + "loss": 0.49737733602523804, + "step": 6228 + }, + { + "epoch": 1.8214651264804798, + "grad_norm": 1.8445639233475513, + "learning_rate": 4.3455266702363997e-07, + "loss": 0.70830237865448, + "step": 6229 + }, + { + "epoch": 1.8217575668957449, + "grad_norm": 1.5312305470870462, + "learning_rate": 4.331440539577281e-07, + "loss": 0.5844424962997437, + "step": 6230 + }, + { + "epoch": 1.8220500073110104, + "grad_norm": 1.5427896071730656, + "learning_rate": 4.317376770862269e-07, + "loss": 0.42457354068756104, + "step": 6231 + }, + { + "epoch": 1.8223424477262757, + "grad_norm": 2.058390634719774, + "learning_rate": 4.3033353673786695e-07, + "loss": 0.5154321193695068, + "step": 6232 + }, + { + "epoch": 1.822634888141541, + "grad_norm": 1.7898699548834731, + "learning_rate": 4.2893163324085886e-07, + "loss": 0.5896856784820557, + "step": 6233 + }, + { + "epoch": 1.8229273285568066, + "grad_norm": 1.8303948048078211, + "learning_rate": 4.2753196692288835e-07, + "loss": 0.5032835006713867, + "step": 6234 + }, + { + "epoch": 1.823219768972072, + "grad_norm": 1.8584560183845538, + "learning_rate": 4.2613453811111814e-07, + "loss": 0.4691713750362396, + "step": 6235 + }, + { + "epoch": 1.8235122093873373, + "grad_norm": 1.5627513261590378, + "learning_rate": 4.2473934713219033e-07, + "loss": 0.595095694065094, + "step": 6236 + }, + { + "epoch": 1.8238046498026028, + "grad_norm": 1.6531612719483142, + "learning_rate": 4.233463943122218e-07, + "loss": 0.5004895329475403, + "step": 6237 + }, + { + "epoch": 1.8240970902178681, + "grad_norm": 1.7047690953050751, + "learning_rate": 4.2195567997680654e-07, + "loss": 0.4924081563949585, + "step": 6238 + }, + { + "epoch": 1.8243895306331335, + "grad_norm": 1.7572886707576447, + "learning_rate": 4.2056720445101565e-07, + "loss": 0.5350006818771362, + "step": 6239 + }, + { + "epoch": 1.824681971048399, + "grad_norm": 1.9485734179206806, + "learning_rate": 4.191809680593961e-07, + "loss": 0.5404629707336426, + "step": 6240 + }, + { + "epoch": 1.8249744114636641, + "grad_norm": 1.6023324600099473, + "learning_rate": 4.177969711259744e-07, + "loss": 0.727859377861023, + "step": 6241 + }, + { + "epoch": 1.8252668518789297, + "grad_norm": 1.553973004264676, + "learning_rate": 4.164152139742494e-07, + "loss": 0.4805057644844055, + "step": 6242 + }, + { + "epoch": 1.8255592922941952, + "grad_norm": 1.7536116301732134, + "learning_rate": 4.1503569692719847e-07, + "loss": 0.5520761013031006, + "step": 6243 + }, + { + "epoch": 1.8258517327094603, + "grad_norm": 1.8327055737656117, + "learning_rate": 4.1365842030727576e-07, + "loss": 0.6130107641220093, + "step": 6244 + }, + { + "epoch": 1.8261441731247259, + "grad_norm": 1.7887203227793926, + "learning_rate": 4.122833844364116e-07, + "loss": 0.6048229932785034, + "step": 6245 + }, + { + "epoch": 1.8264366135399912, + "grad_norm": 1.717414490213998, + "learning_rate": 4.1091058963601214e-07, + "loss": 0.667324960231781, + "step": 6246 + }, + { + "epoch": 1.8267290539552565, + "grad_norm": 2.083699506724501, + "learning_rate": 4.095400362269597e-07, + "loss": 0.45595815777778625, + "step": 6247 + }, + { + "epoch": 1.827021494370522, + "grad_norm": 1.7162831332631867, + "learning_rate": 4.081717245296124e-07, + "loss": 0.49015533924102783, + "step": 6248 + }, + { + "epoch": 1.8273139347857874, + "grad_norm": 2.1906207360630763, + "learning_rate": 4.068056548638055e-07, + "loss": 0.5230038166046143, + "step": 6249 + }, + { + "epoch": 1.8276063752010527, + "grad_norm": 1.6860531929221865, + "learning_rate": 4.054418275488492e-07, + "loss": 0.5025942325592041, + "step": 6250 + }, + { + "epoch": 1.8278988156163183, + "grad_norm": 1.736980191753769, + "learning_rate": 4.0408024290352955e-07, + "loss": 0.5136677026748657, + "step": 6251 + }, + { + "epoch": 1.8281912560315836, + "grad_norm": 1.7988212644666006, + "learning_rate": 4.0272090124611086e-07, + "loss": 0.6209211945533752, + "step": 6252 + }, + { + "epoch": 1.828483696446849, + "grad_norm": 1.9742781188768104, + "learning_rate": 4.0136380289432784e-07, + "loss": 0.5913738012313843, + "step": 6253 + }, + { + "epoch": 1.8287761368621145, + "grad_norm": 1.9710058674803597, + "learning_rate": 4.000089481653946e-07, + "loss": 0.5745095610618591, + "step": 6254 + }, + { + "epoch": 1.8290685772773796, + "grad_norm": 1.4867167586867893, + "learning_rate": 3.9865633737600105e-07, + "loss": 0.4566704034805298, + "step": 6255 + }, + { + "epoch": 1.8293610176926451, + "grad_norm": 1.672257025513455, + "learning_rate": 3.9730597084231105e-07, + "loss": 0.49784860014915466, + "step": 6256 + }, + { + "epoch": 1.8296534581079105, + "grad_norm": 1.7381596787517106, + "learning_rate": 3.9595784887996647e-07, + "loss": 0.4489399790763855, + "step": 6257 + }, + { + "epoch": 1.8299458985231758, + "grad_norm": 1.9703484082158151, + "learning_rate": 3.946119718040797e-07, + "loss": 0.6335956454277039, + "step": 6258 + }, + { + "epoch": 1.8302383389384413, + "grad_norm": 1.4097270774574866, + "learning_rate": 3.932683399292436e-07, + "loss": 0.44865918159484863, + "step": 6259 + }, + { + "epoch": 1.8305307793537067, + "grad_norm": 1.6485718017332285, + "learning_rate": 3.919269535695225e-07, + "loss": 0.4328421354293823, + "step": 6260 + }, + { + "epoch": 1.830823219768972, + "grad_norm": 1.6528043958881276, + "learning_rate": 3.9058781303845886e-07, + "loss": 0.463814377784729, + "step": 6261 + }, + { + "epoch": 1.8311156601842375, + "grad_norm": 1.9336577936651187, + "learning_rate": 3.892509186490667e-07, + "loss": 0.5857536196708679, + "step": 6262 + }, + { + "epoch": 1.8314081005995029, + "grad_norm": 1.4512027972560333, + "learning_rate": 3.879162707138395e-07, + "loss": 0.4873831272125244, + "step": 6263 + }, + { + "epoch": 1.8317005410147682, + "grad_norm": 1.89367526659171, + "learning_rate": 3.8658386954474104e-07, + "loss": 0.5428040027618408, + "step": 6264 + }, + { + "epoch": 1.8319929814300338, + "grad_norm": 1.759804366679343, + "learning_rate": 3.852537154532121e-07, + "loss": 0.49092623591423035, + "step": 6265 + }, + { + "epoch": 1.8322854218452989, + "grad_norm": 1.7919708064212196, + "learning_rate": 3.839258087501685e-07, + "loss": 0.5515817999839783, + "step": 6266 + }, + { + "epoch": 1.8325778622605644, + "grad_norm": 1.5550731443697672, + "learning_rate": 3.8260014974600077e-07, + "loss": 0.48080340027809143, + "step": 6267 + }, + { + "epoch": 1.83287030267583, + "grad_norm": 2.292962123842254, + "learning_rate": 3.812767387505734e-07, + "loss": 0.6129888296127319, + "step": 6268 + }, + { + "epoch": 1.833162743091095, + "grad_norm": 1.8203026764024284, + "learning_rate": 3.7995557607322543e-07, + "loss": 0.5843402147293091, + "step": 6269 + }, + { + "epoch": 1.8334551835063606, + "grad_norm": 1.9423893526281284, + "learning_rate": 3.7863666202276996e-07, + "loss": 0.5573143362998962, + "step": 6270 + }, + { + "epoch": 1.833747623921626, + "grad_norm": 1.9386384718546945, + "learning_rate": 3.773199969074959e-07, + "loss": 0.552756667137146, + "step": 6271 + }, + { + "epoch": 1.8340400643368913, + "grad_norm": 1.7629811878645265, + "learning_rate": 3.7600558103516706e-07, + "loss": 0.5559083223342896, + "step": 6272 + }, + { + "epoch": 1.8343325047521568, + "grad_norm": 1.9388416947858518, + "learning_rate": 3.746934147130177e-07, + "loss": 0.5388067364692688, + "step": 6273 + }, + { + "epoch": 1.8346249451674221, + "grad_norm": 1.694909278172827, + "learning_rate": 3.7338349824776133e-07, + "loss": 0.5816110968589783, + "step": 6274 + }, + { + "epoch": 1.8349173855826875, + "grad_norm": 1.9312358476553817, + "learning_rate": 3.720758319455786e-07, + "loss": 0.5720102787017822, + "step": 6275 + }, + { + "epoch": 1.835209825997953, + "grad_norm": 1.5440220572809102, + "learning_rate": 3.707704161121328e-07, + "loss": 0.46005699038505554, + "step": 6276 + }, + { + "epoch": 1.8355022664132183, + "grad_norm": 2.0613584980065776, + "learning_rate": 3.6946725105255656e-07, + "loss": 0.5602168440818787, + "step": 6277 + }, + { + "epoch": 1.8357947068284837, + "grad_norm": 1.6156922208810771, + "learning_rate": 3.68166337071455e-07, + "loss": 0.5390583276748657, + "step": 6278 + }, + { + "epoch": 1.8360871472437492, + "grad_norm": 1.558407958302267, + "learning_rate": 3.668676744729094e-07, + "loss": 0.48980700969696045, + "step": 6279 + }, + { + "epoch": 1.8363795876590143, + "grad_norm": 1.5853357453165142, + "learning_rate": 3.655712635604747e-07, + "loss": 0.6565061807632446, + "step": 6280 + }, + { + "epoch": 1.8366720280742799, + "grad_norm": 1.5692146512642422, + "learning_rate": 3.642771046371785e-07, + "loss": 0.465609610080719, + "step": 6281 + }, + { + "epoch": 1.8369644684895454, + "grad_norm": 1.7219983092976099, + "learning_rate": 3.6298519800552434e-07, + "loss": 0.5698891282081604, + "step": 6282 + }, + { + "epoch": 1.8372569089048105, + "grad_norm": 2.103680074754177, + "learning_rate": 3.616955439674863e-07, + "loss": 0.5885399580001831, + "step": 6283 + }, + { + "epoch": 1.837549349320076, + "grad_norm": 1.7028861151189467, + "learning_rate": 3.60408142824511e-07, + "loss": 0.5158063173294067, + "step": 6284 + }, + { + "epoch": 1.8378417897353414, + "grad_norm": 1.6728867893623607, + "learning_rate": 3.5912299487752434e-07, + "loss": 0.49203822016716003, + "step": 6285 + }, + { + "epoch": 1.8381342301506067, + "grad_norm": 1.991753525300203, + "learning_rate": 3.578401004269183e-07, + "loss": 0.5756489038467407, + "step": 6286 + }, + { + "epoch": 1.8384266705658723, + "grad_norm": 1.9424738806131756, + "learning_rate": 3.565594597725652e-07, + "loss": 0.5970584154129028, + "step": 6287 + }, + { + "epoch": 1.8387191109811376, + "grad_norm": 1.4438564684738853, + "learning_rate": 3.552810732138046e-07, + "loss": 0.48702481389045715, + "step": 6288 + }, + { + "epoch": 1.839011551396403, + "grad_norm": 1.6632334435868308, + "learning_rate": 3.540049410494517e-07, + "loss": 0.4818963408470154, + "step": 6289 + }, + { + "epoch": 1.8393039918116685, + "grad_norm": 1.6617150886827665, + "learning_rate": 3.5273106357779585e-07, + "loss": 0.389699786901474, + "step": 6290 + }, + { + "epoch": 1.8395964322269338, + "grad_norm": 1.7654595369504777, + "learning_rate": 3.514594410965977e-07, + "loss": 0.6438174247741699, + "step": 6291 + }, + { + "epoch": 1.8398888726421991, + "grad_norm": 1.9409260673022277, + "learning_rate": 3.501900739030906e-07, + "loss": 0.654021143913269, + "step": 6292 + }, + { + "epoch": 1.8401813130574647, + "grad_norm": 1.921461492738401, + "learning_rate": 3.489229622939827e-07, + "loss": 0.748673677444458, + "step": 6293 + }, + { + "epoch": 1.8404737534727298, + "grad_norm": 1.850157344469969, + "learning_rate": 3.476581065654527e-07, + "loss": 0.47883105278015137, + "step": 6294 + }, + { + "epoch": 1.8407661938879953, + "grad_norm": 1.555147241743972, + "learning_rate": 3.4639550701315303e-07, + "loss": 0.5221554040908813, + "step": 6295 + }, + { + "epoch": 1.8410586343032607, + "grad_norm": 1.7256564846330384, + "learning_rate": 3.451351639322087e-07, + "loss": 0.482231080532074, + "step": 6296 + }, + { + "epoch": 1.841351074718526, + "grad_norm": 1.797442509245834, + "learning_rate": 3.4387707761721625e-07, + "loss": 0.5407366752624512, + "step": 6297 + }, + { + "epoch": 1.8416435151337915, + "grad_norm": 1.9177358417772523, + "learning_rate": 3.426212483622482e-07, + "loss": 0.626631498336792, + "step": 6298 + }, + { + "epoch": 1.8419359555490569, + "grad_norm": 1.4729327167263073, + "learning_rate": 3.4136767646084424e-07, + "loss": 0.4401513338088989, + "step": 6299 + }, + { + "epoch": 1.8422283959643222, + "grad_norm": 1.756926078765411, + "learning_rate": 3.4011636220602106e-07, + "loss": 0.48130229115486145, + "step": 6300 + }, + { + "epoch": 1.8425208363795877, + "grad_norm": 1.9010914484665373, + "learning_rate": 3.3886730589026475e-07, + "loss": 0.7132935523986816, + "step": 6301 + }, + { + "epoch": 1.842813276794853, + "grad_norm": 1.692313625720156, + "learning_rate": 3.37620507805535e-07, + "loss": 0.6665343642234802, + "step": 6302 + }, + { + "epoch": 1.8431057172101184, + "grad_norm": 1.7909091838212496, + "learning_rate": 3.3637596824326435e-07, + "loss": 0.4313231408596039, + "step": 6303 + }, + { + "epoch": 1.843398157625384, + "grad_norm": 1.6745971926171657, + "learning_rate": 3.3513368749435447e-07, + "loss": 0.6263744235038757, + "step": 6304 + }, + { + "epoch": 1.843690598040649, + "grad_norm": 1.6133043168174617, + "learning_rate": 3.3389366584918313e-07, + "loss": 0.6215947866439819, + "step": 6305 + }, + { + "epoch": 1.8439830384559146, + "grad_norm": 1.6349014502820445, + "learning_rate": 3.3265590359759517e-07, + "loss": 0.45956021547317505, + "step": 6306 + }, + { + "epoch": 1.8442754788711802, + "grad_norm": 1.6194578088821072, + "learning_rate": 3.3142040102891126e-07, + "loss": 0.5363642573356628, + "step": 6307 + }, + { + "epoch": 1.8445679192864453, + "grad_norm": 1.7115305858843777, + "learning_rate": 3.3018715843192273e-07, + "loss": 0.4574592709541321, + "step": 6308 + }, + { + "epoch": 1.8448603597017108, + "grad_norm": 1.6684239678735615, + "learning_rate": 3.2895617609489337e-07, + "loss": 0.43236005306243896, + "step": 6309 + }, + { + "epoch": 1.8451528001169761, + "grad_norm": 1.574172974777944, + "learning_rate": 3.277274543055564e-07, + "loss": 0.46349820494651794, + "step": 6310 + }, + { + "epoch": 1.8454452405322415, + "grad_norm": 1.9135327602518888, + "learning_rate": 3.265009933511176e-07, + "loss": 0.5233386754989624, + "step": 6311 + }, + { + "epoch": 1.845737680947507, + "grad_norm": 1.5165768096310508, + "learning_rate": 3.252767935182566e-07, + "loss": 0.44902727007865906, + "step": 6312 + }, + { + "epoch": 1.8460301213627723, + "grad_norm": 1.9281348385682333, + "learning_rate": 3.240548550931222e-07, + "loss": 0.709855854511261, + "step": 6313 + }, + { + "epoch": 1.8463225617780377, + "grad_norm": 1.8532989008830933, + "learning_rate": 3.228351783613348e-07, + "loss": 0.5194632411003113, + "step": 6314 + }, + { + "epoch": 1.8466150021933032, + "grad_norm": 1.750242735396334, + "learning_rate": 3.2161776360798535e-07, + "loss": 0.6027804017066956, + "step": 6315 + }, + { + "epoch": 1.8469074426085685, + "grad_norm": 1.591118544218686, + "learning_rate": 3.2040261111763946e-07, + "loss": 0.5047632455825806, + "step": 6316 + }, + { + "epoch": 1.8471998830238339, + "grad_norm": 2.082041129535105, + "learning_rate": 3.1918972117433e-07, + "loss": 0.5763708353042603, + "step": 6317 + }, + { + "epoch": 1.8474923234390994, + "grad_norm": 1.7701935148884373, + "learning_rate": 3.1797909406156234e-07, + "loss": 0.4725028872489929, + "step": 6318 + }, + { + "epoch": 1.8477847638543645, + "grad_norm": 1.5419878667068574, + "learning_rate": 3.167707300623135e-07, + "loss": 0.523047924041748, + "step": 6319 + }, + { + "epoch": 1.84807720426963, + "grad_norm": 1.6321175932285703, + "learning_rate": 3.15564629459032e-07, + "loss": 0.5100070238113403, + "step": 6320 + }, + { + "epoch": 1.8483696446848956, + "grad_norm": 1.7375024362733555, + "learning_rate": 3.143607925336356e-07, + "loss": 0.6019359827041626, + "step": 6321 + }, + { + "epoch": 1.8486620851001607, + "grad_norm": 1.8195133886893664, + "learning_rate": 3.1315921956751483e-07, + "loss": 0.5514570474624634, + "step": 6322 + }, + { + "epoch": 1.8489545255154263, + "grad_norm": 1.6002643586013279, + "learning_rate": 3.1195991084152944e-07, + "loss": 0.49585646390914917, + "step": 6323 + }, + { + "epoch": 1.8492469659306916, + "grad_norm": 1.724322382501938, + "learning_rate": 3.1076286663601076e-07, + "loss": 0.5738509297370911, + "step": 6324 + }, + { + "epoch": 1.849539406345957, + "grad_norm": 1.8621720995112787, + "learning_rate": 3.095680872307605e-07, + "loss": 0.5149112939834595, + "step": 6325 + }, + { + "epoch": 1.8498318467612225, + "grad_norm": 1.6738148879498993, + "learning_rate": 3.0837557290505083e-07, + "loss": 0.45808184146881104, + "step": 6326 + }, + { + "epoch": 1.8501242871764878, + "grad_norm": 1.6155317269058609, + "learning_rate": 3.0718532393762435e-07, + "loss": 0.5173396468162537, + "step": 6327 + }, + { + "epoch": 1.8504167275917531, + "grad_norm": 1.6905273546590853, + "learning_rate": 3.059973406066963e-07, + "loss": 0.6229383945465088, + "step": 6328 + }, + { + "epoch": 1.8507091680070187, + "grad_norm": 1.6794531990129002, + "learning_rate": 3.0481162318994894e-07, + "loss": 0.45520371198654175, + "step": 6329 + }, + { + "epoch": 1.851001608422284, + "grad_norm": 1.5024073523898138, + "learning_rate": 3.036281719645373e-07, + "loss": 0.43216121196746826, + "step": 6330 + }, + { + "epoch": 1.8512940488375493, + "grad_norm": 1.9238309164883824, + "learning_rate": 3.0244698720708456e-07, + "loss": 0.5440583825111389, + "step": 6331 + }, + { + "epoch": 1.8515864892528149, + "grad_norm": 1.8189444343843324, + "learning_rate": 3.0126806919368756e-07, + "loss": 0.5474626421928406, + "step": 6332 + }, + { + "epoch": 1.85187892966808, + "grad_norm": 1.7800420936387606, + "learning_rate": 3.000914181999093e-07, + "loss": 0.5122883915901184, + "step": 6333 + }, + { + "epoch": 1.8521713700833455, + "grad_norm": 1.776220435476035, + "learning_rate": 2.989170345007852e-07, + "loss": 0.48304370045661926, + "step": 6334 + }, + { + "epoch": 1.8524638104986109, + "grad_norm": 1.6949801188317577, + "learning_rate": 2.977449183708214e-07, + "loss": 0.566180408000946, + "step": 6335 + }, + { + "epoch": 1.8527562509138762, + "grad_norm": 1.7482351137010406, + "learning_rate": 2.96575070083992e-07, + "loss": 0.5218988656997681, + "step": 6336 + }, + { + "epoch": 1.8530486913291417, + "grad_norm": 1.8289145949576808, + "learning_rate": 2.954074899137427e-07, + "loss": 0.49669283628463745, + "step": 6337 + }, + { + "epoch": 1.853341131744407, + "grad_norm": 1.6012219042297557, + "learning_rate": 2.942421781329874e-07, + "loss": 0.5505487322807312, + "step": 6338 + }, + { + "epoch": 1.8536335721596724, + "grad_norm": 1.6156483149639533, + "learning_rate": 2.930791350141116e-07, + "loss": 0.5386735200881958, + "step": 6339 + }, + { + "epoch": 1.853926012574938, + "grad_norm": 2.0764057670166776, + "learning_rate": 2.919183608289689e-07, + "loss": 0.5266523957252502, + "step": 6340 + }, + { + "epoch": 1.8542184529902033, + "grad_norm": 1.573480922837112, + "learning_rate": 2.907598558488822e-07, + "loss": 0.5335103273391724, + "step": 6341 + }, + { + "epoch": 1.8545108934054686, + "grad_norm": 1.8447961626822076, + "learning_rate": 2.896036203446473e-07, + "loss": 0.6155405044555664, + "step": 6342 + }, + { + "epoch": 1.8548033338207341, + "grad_norm": 1.5602039082453873, + "learning_rate": 2.884496545865245e-07, + "loss": 0.5258159041404724, + "step": 6343 + }, + { + "epoch": 1.8550957742359993, + "grad_norm": 1.7894466773590292, + "learning_rate": 2.8729795884424927e-07, + "loss": 0.5428795218467712, + "step": 6344 + }, + { + "epoch": 1.8553882146512648, + "grad_norm": 1.4344098630811726, + "learning_rate": 2.8614853338702066e-07, + "loss": 0.4876418709754944, + "step": 6345 + }, + { + "epoch": 1.8556806550665303, + "grad_norm": 1.606511441088432, + "learning_rate": 2.850013784835115e-07, + "loss": 0.49640393257141113, + "step": 6346 + }, + { + "epoch": 1.8559730954817955, + "grad_norm": 1.8316843043903746, + "learning_rate": 2.838564944018618e-07, + "loss": 0.5726122260093689, + "step": 6347 + }, + { + "epoch": 1.856265535897061, + "grad_norm": 1.653087716973347, + "learning_rate": 2.827138814096819e-07, + "loss": 0.5106557011604309, + "step": 6348 + }, + { + "epoch": 1.8565579763123263, + "grad_norm": 1.5025453294784719, + "learning_rate": 2.8157353977405044e-07, + "loss": 0.45941129326820374, + "step": 6349 + }, + { + "epoch": 1.8568504167275917, + "grad_norm": 1.781767756464568, + "learning_rate": 2.8043546976151414e-07, + "loss": 0.488609254360199, + "step": 6350 + }, + { + "epoch": 1.8571428571428572, + "grad_norm": 1.764244860072195, + "learning_rate": 2.7929967163809135e-07, + "loss": 0.639745831489563, + "step": 6351 + }, + { + "epoch": 1.8574352975581225, + "grad_norm": 1.498822179909691, + "learning_rate": 2.7816614566926747e-07, + "loss": 0.45327228307724, + "step": 6352 + }, + { + "epoch": 1.8577277379733879, + "grad_norm": 1.625074504661963, + "learning_rate": 2.7703489211999725e-07, + "loss": 0.5606091022491455, + "step": 6353 + }, + { + "epoch": 1.8580201783886534, + "grad_norm": 1.7312129624633084, + "learning_rate": 2.759059112547047e-07, + "loss": 0.5078528523445129, + "step": 6354 + }, + { + "epoch": 1.8583126188039187, + "grad_norm": 1.6687258508972733, + "learning_rate": 2.74779203337282e-07, + "loss": 0.5558253526687622, + "step": 6355 + }, + { + "epoch": 1.858605059219184, + "grad_norm": 1.7014892476807573, + "learning_rate": 2.7365476863108974e-07, + "loss": 0.3962102234363556, + "step": 6356 + }, + { + "epoch": 1.8588974996344496, + "grad_norm": 1.6957699860554467, + "learning_rate": 2.725326073989587e-07, + "loss": 0.4737718105316162, + "step": 6357 + }, + { + "epoch": 1.8591899400497147, + "grad_norm": 3.313281560384309, + "learning_rate": 2.7141271990318576e-07, + "loss": 0.5389090180397034, + "step": 6358 + }, + { + "epoch": 1.8594823804649803, + "grad_norm": 1.7840378938084138, + "learning_rate": 2.7029510640554033e-07, + "loss": 0.5311479568481445, + "step": 6359 + }, + { + "epoch": 1.8597748208802458, + "grad_norm": 1.631290291956445, + "learning_rate": 2.691797671672558e-07, + "loss": 0.4753482937812805, + "step": 6360 + }, + { + "epoch": 1.860067261295511, + "grad_norm": 1.581254208029566, + "learning_rate": 2.6806670244903577e-07, + "loss": 0.5192427635192871, + "step": 6361 + }, + { + "epoch": 1.8603597017107765, + "grad_norm": 1.9540580966263197, + "learning_rate": 2.6695591251105214e-07, + "loss": 0.5910875797271729, + "step": 6362 + }, + { + "epoch": 1.8606521421260418, + "grad_norm": 1.7486575397054567, + "learning_rate": 2.658473976129472e-07, + "loss": 0.5465212464332581, + "step": 6363 + }, + { + "epoch": 1.8609445825413071, + "grad_norm": 1.7446293681201037, + "learning_rate": 2.647411580138282e-07, + "loss": 0.43188267946243286, + "step": 6364 + }, + { + "epoch": 1.8612370229565727, + "grad_norm": 2.144472636918694, + "learning_rate": 2.636371939722715e-07, + "loss": 0.5723724365234375, + "step": 6365 + }, + { + "epoch": 1.861529463371838, + "grad_norm": 1.6310859619397844, + "learning_rate": 2.62535505746323e-07, + "loss": 0.47383856773376465, + "step": 6366 + }, + { + "epoch": 1.8618219037871033, + "grad_norm": 1.764378835172625, + "learning_rate": 2.6143609359349566e-07, + "loss": 0.502855658531189, + "step": 6367 + }, + { + "epoch": 1.8621143442023689, + "grad_norm": 2.265501418087609, + "learning_rate": 2.6033895777077043e-07, + "loss": 0.5934205055236816, + "step": 6368 + }, + { + "epoch": 1.8624067846176342, + "grad_norm": 1.469455820490925, + "learning_rate": 2.5924409853459455e-07, + "loss": 0.4157971143722534, + "step": 6369 + }, + { + "epoch": 1.8626992250328995, + "grad_norm": 1.8051847044948597, + "learning_rate": 2.5815151614088764e-07, + "loss": 0.5944307446479797, + "step": 6370 + }, + { + "epoch": 1.862991665448165, + "grad_norm": 2.0081645135491812, + "learning_rate": 2.57061210845031e-07, + "loss": 0.5603153705596924, + "step": 6371 + }, + { + "epoch": 1.8632841058634302, + "grad_norm": 1.752999497142634, + "learning_rate": 2.559731829018786e-07, + "loss": 0.49231380224227905, + "step": 6372 + }, + { + "epoch": 1.8635765462786957, + "grad_norm": 1.666251917997058, + "learning_rate": 2.548874325657502e-07, + "loss": 0.46984565258026123, + "step": 6373 + }, + { + "epoch": 1.863868986693961, + "grad_norm": 1.7373025752546019, + "learning_rate": 2.5380396009043297e-07, + "loss": 0.5088338255882263, + "step": 6374 + }, + { + "epoch": 1.8641614271092264, + "grad_norm": 1.7554684094014161, + "learning_rate": 2.52722765729182e-07, + "loss": 0.4760589599609375, + "step": 6375 + }, + { + "epoch": 1.864453867524492, + "grad_norm": 1.6521606786384044, + "learning_rate": 2.5164384973471954e-07, + "loss": 0.44232040643692017, + "step": 6376 + }, + { + "epoch": 1.8647463079397573, + "grad_norm": 1.736903879415624, + "learning_rate": 2.505672123592373e-07, + "loss": 0.46714338660240173, + "step": 6377 + }, + { + "epoch": 1.8650387483550226, + "grad_norm": 1.9333860177281759, + "learning_rate": 2.494928538543917e-07, + "loss": 0.5527149438858032, + "step": 6378 + }, + { + "epoch": 1.8653311887702881, + "grad_norm": 1.690422887605866, + "learning_rate": 2.484207744713074e-07, + "loss": 0.5006313323974609, + "step": 6379 + }, + { + "epoch": 1.8656236291855535, + "grad_norm": 1.5247883016042734, + "learning_rate": 2.473509744605751e-07, + "loss": 0.5007860660552979, + "step": 6380 + }, + { + "epoch": 1.8659160696008188, + "grad_norm": 1.683063597354387, + "learning_rate": 2.4628345407225804e-07, + "loss": 0.4354132413864136, + "step": 6381 + }, + { + "epoch": 1.8662085100160843, + "grad_norm": 1.718309113338333, + "learning_rate": 2.452182135558789e-07, + "loss": 0.5199555158615112, + "step": 6382 + }, + { + "epoch": 1.8665009504313494, + "grad_norm": 1.6260046663066803, + "learning_rate": 2.441552531604319e-07, + "loss": 0.5117326974868774, + "step": 6383 + }, + { + "epoch": 1.866793390846615, + "grad_norm": 1.803024051218915, + "learning_rate": 2.43094573134377e-07, + "loss": 0.5169814825057983, + "step": 6384 + }, + { + "epoch": 1.8670858312618805, + "grad_norm": 1.7012998015666523, + "learning_rate": 2.420361737256438e-07, + "loss": 0.563339352607727, + "step": 6385 + }, + { + "epoch": 1.8673782716771457, + "grad_norm": 2.1248949598274325, + "learning_rate": 2.409800551816255e-07, + "loss": 0.710465133190155, + "step": 6386 + }, + { + "epoch": 1.8676707120924112, + "grad_norm": 1.6580658731053397, + "learning_rate": 2.3992621774918343e-07, + "loss": 0.6894562244415283, + "step": 6387 + }, + { + "epoch": 1.8679631525076765, + "grad_norm": 1.7380197058585787, + "learning_rate": 2.388746616746462e-07, + "loss": 0.5105183124542236, + "step": 6388 + }, + { + "epoch": 1.8682555929229419, + "grad_norm": 2.0034985048956684, + "learning_rate": 2.3782538720380722e-07, + "loss": 0.4602908492088318, + "step": 6389 + }, + { + "epoch": 1.8685480333382074, + "grad_norm": 1.7787197864367217, + "learning_rate": 2.3677839458192908e-07, + "loss": 0.5395161509513855, + "step": 6390 + }, + { + "epoch": 1.8688404737534727, + "grad_norm": 1.6121023481071262, + "learning_rate": 2.3573368405374054e-07, + "loss": 0.5842725038528442, + "step": 6391 + }, + { + "epoch": 1.869132914168738, + "grad_norm": 1.6354709739233064, + "learning_rate": 2.346912558634362e-07, + "loss": 0.5837947130203247, + "step": 6392 + }, + { + "epoch": 1.8694253545840036, + "grad_norm": 1.8136211176417363, + "learning_rate": 2.3365111025467568e-07, + "loss": 0.5255596041679382, + "step": 6393 + }, + { + "epoch": 1.869717794999269, + "grad_norm": 1.5586602271443384, + "learning_rate": 2.326132474705889e-07, + "loss": 0.5614485144615173, + "step": 6394 + }, + { + "epoch": 1.8700102354145343, + "grad_norm": 1.5895893761997042, + "learning_rate": 2.3157766775376733e-07, + "loss": 0.5510128736495972, + "step": 6395 + }, + { + "epoch": 1.8703026758297998, + "grad_norm": 2.295988070565878, + "learning_rate": 2.3054437134627406e-07, + "loss": 0.690884530544281, + "step": 6396 + }, + { + "epoch": 1.870595116245065, + "grad_norm": 1.94960784120805, + "learning_rate": 2.2951335848963364e-07, + "loss": 0.637476921081543, + "step": 6397 + }, + { + "epoch": 1.8708875566603305, + "grad_norm": 1.6526446878259382, + "learning_rate": 2.2848462942484108e-07, + "loss": 0.5254319906234741, + "step": 6398 + }, + { + "epoch": 1.871179997075596, + "grad_norm": 1.7552717813182315, + "learning_rate": 2.27458184392354e-07, + "loss": 0.5038233995437622, + "step": 6399 + }, + { + "epoch": 1.8714724374908611, + "grad_norm": 1.4123258498894362, + "learning_rate": 2.2643402363209832e-07, + "loss": 0.43701431155204773, + "step": 6400 + }, + { + "epoch": 1.8717648779061267, + "grad_norm": 1.8138198755485717, + "learning_rate": 2.2541214738346583e-07, + "loss": 0.5490877628326416, + "step": 6401 + }, + { + "epoch": 1.872057318321392, + "grad_norm": 1.5452561215431913, + "learning_rate": 2.2439255588531327e-07, + "loss": 0.48393410444259644, + "step": 6402 + }, + { + "epoch": 1.8723497587366573, + "grad_norm": 1.6213926610567049, + "learning_rate": 2.2337524937596444e-07, + "loss": 0.5439243912696838, + "step": 6403 + }, + { + "epoch": 1.8726421991519229, + "grad_norm": 1.6026974016529494, + "learning_rate": 2.22360228093208e-07, + "loss": 0.5272157192230225, + "step": 6404 + }, + { + "epoch": 1.8729346395671882, + "grad_norm": 1.6750451870732375, + "learning_rate": 2.2134749227429864e-07, + "loss": 0.6323473453521729, + "step": 6405 + }, + { + "epoch": 1.8732270799824535, + "grad_norm": 1.6749139186520705, + "learning_rate": 2.2033704215595808e-07, + "loss": 0.4568995237350464, + "step": 6406 + }, + { + "epoch": 1.873519520397719, + "grad_norm": 1.8331627672377568, + "learning_rate": 2.1932887797437296e-07, + "loss": 0.5817153453826904, + "step": 6407 + }, + { + "epoch": 1.8738119608129844, + "grad_norm": 1.4674902238035163, + "learning_rate": 2.183229999651948e-07, + "loss": 0.5104260444641113, + "step": 6408 + }, + { + "epoch": 1.8741044012282497, + "grad_norm": 1.7946613600749395, + "learning_rate": 2.1731940836354105e-07, + "loss": 0.44944921135902405, + "step": 6409 + }, + { + "epoch": 1.8743968416435153, + "grad_norm": 1.794977484250215, + "learning_rate": 2.163181034039974e-07, + "loss": 0.6935169696807861, + "step": 6410 + }, + { + "epoch": 1.8746892820587804, + "grad_norm": 1.7330999339843873, + "learning_rate": 2.1531908532060998e-07, + "loss": 0.55609130859375, + "step": 6411 + }, + { + "epoch": 1.874981722474046, + "grad_norm": 1.6428359107019144, + "learning_rate": 2.143223543468953e-07, + "loss": 0.5402215719223022, + "step": 6412 + }, + { + "epoch": 1.8752741628893113, + "grad_norm": 1.8163043216263146, + "learning_rate": 2.1332791071583258e-07, + "loss": 0.5669365525245667, + "step": 6413 + }, + { + "epoch": 1.8755666033045766, + "grad_norm": 2.2122008806914044, + "learning_rate": 2.123357546598659e-07, + "loss": 0.46257615089416504, + "step": 6414 + }, + { + "epoch": 1.8758590437198421, + "grad_norm": 1.6308794717153283, + "learning_rate": 2.1134588641090858e-07, + "loss": 0.4596136212348938, + "step": 6415 + }, + { + "epoch": 1.8761514841351075, + "grad_norm": 1.6758615624094995, + "learning_rate": 2.1035830620033227e-07, + "loss": 0.5086819529533386, + "step": 6416 + }, + { + "epoch": 1.8764439245503728, + "grad_norm": 1.8974547658257448, + "learning_rate": 2.0937301425898115e-07, + "loss": 0.6008501052856445, + "step": 6417 + }, + { + "epoch": 1.8767363649656383, + "grad_norm": 1.8448672190670345, + "learning_rate": 2.0839001081715882e-07, + "loss": 0.5943784713745117, + "step": 6418 + }, + { + "epoch": 1.8770288053809037, + "grad_norm": 1.3203141385144623, + "learning_rate": 2.0740929610463813e-07, + "loss": 0.5006660223007202, + "step": 6419 + }, + { + "epoch": 1.877321245796169, + "grad_norm": 1.7508035137785818, + "learning_rate": 2.0643087035065458e-07, + "loss": 0.5434073805809021, + "step": 6420 + }, + { + "epoch": 1.8776136862114345, + "grad_norm": 1.8446497118213794, + "learning_rate": 2.0545473378390858e-07, + "loss": 0.6426963210105896, + "step": 6421 + }, + { + "epoch": 1.8779061266266996, + "grad_norm": 1.7388169538440008, + "learning_rate": 2.044808866325676e-07, + "loss": 0.5190218687057495, + "step": 6422 + }, + { + "epoch": 1.8781985670419652, + "grad_norm": 1.5291942184143035, + "learning_rate": 2.035093291242607e-07, + "loss": 0.40918534994125366, + "step": 6423 + }, + { + "epoch": 1.8784910074572307, + "grad_norm": 1.719713887519883, + "learning_rate": 2.0254006148608507e-07, + "loss": 0.5403652191162109, + "step": 6424 + }, + { + "epoch": 1.8787834478724958, + "grad_norm": 1.3839892041506006, + "learning_rate": 2.0157308394460062e-07, + "loss": 0.49781516194343567, + "step": 6425 + }, + { + "epoch": 1.8790758882877614, + "grad_norm": 1.8332751958303748, + "learning_rate": 2.006083967258321e-07, + "loss": 0.5841303467750549, + "step": 6426 + }, + { + "epoch": 1.8793683287030267, + "grad_norm": 1.679945923485487, + "learning_rate": 1.9964600005527024e-07, + "loss": 0.5054808855056763, + "step": 6427 + }, + { + "epoch": 1.879660769118292, + "grad_norm": 1.7695393284467882, + "learning_rate": 1.9868589415786843e-07, + "loss": 0.4801362454891205, + "step": 6428 + }, + { + "epoch": 1.8799532095335576, + "grad_norm": 1.8547174560912147, + "learning_rate": 1.9772807925804494e-07, + "loss": 0.4709380269050598, + "step": 6429 + }, + { + "epoch": 1.880245649948823, + "grad_norm": 1.8447220446699908, + "learning_rate": 1.9677255557968511e-07, + "loss": 0.665968120098114, + "step": 6430 + }, + { + "epoch": 1.8805380903640883, + "grad_norm": 1.7494009698963573, + "learning_rate": 1.9581932334613585e-07, + "loss": 0.515839159488678, + "step": 6431 + }, + { + "epoch": 1.8808305307793538, + "grad_norm": 1.6699738562759978, + "learning_rate": 1.948683827802089e-07, + "loss": 0.5399242043495178, + "step": 6432 + }, + { + "epoch": 1.8811229711946191, + "grad_norm": 1.7478095955612059, + "learning_rate": 1.9391973410418097e-07, + "loss": 0.6167087554931641, + "step": 6433 + }, + { + "epoch": 1.8814154116098845, + "grad_norm": 1.826500337038364, + "learning_rate": 1.9297337753979462e-07, + "loss": 0.6139745116233826, + "step": 6434 + }, + { + "epoch": 1.88170785202515, + "grad_norm": 2.0873679343118257, + "learning_rate": 1.9202931330825292e-07, + "loss": 0.7103149890899658, + "step": 6435 + }, + { + "epoch": 1.8820002924404151, + "grad_norm": 1.6777685812633742, + "learning_rate": 1.9108754163022602e-07, + "loss": 0.5958741903305054, + "step": 6436 + }, + { + "epoch": 1.8822927328556807, + "grad_norm": 1.2489160599157765, + "learning_rate": 1.9014806272584673e-07, + "loss": 0.32660478353500366, + "step": 6437 + }, + { + "epoch": 1.8825851732709462, + "grad_norm": 1.822465954469875, + "learning_rate": 1.8921087681471272e-07, + "loss": 0.49485981464385986, + "step": 6438 + }, + { + "epoch": 1.8828776136862113, + "grad_norm": 1.5404253681507418, + "learning_rate": 1.8827598411588544e-07, + "loss": 0.5106277465820312, + "step": 6439 + }, + { + "epoch": 1.8831700541014769, + "grad_norm": 1.5696470040532076, + "learning_rate": 1.8734338484789115e-07, + "loss": 0.50006502866745, + "step": 6440 + }, + { + "epoch": 1.8834624945167422, + "grad_norm": 1.5827360977472946, + "learning_rate": 1.8641307922871887e-07, + "loss": 0.47097745537757874, + "step": 6441 + }, + { + "epoch": 1.8837549349320075, + "grad_norm": 1.718260594389779, + "learning_rate": 1.854850674758213e-07, + "loss": 0.5874402523040771, + "step": 6442 + }, + { + "epoch": 1.884047375347273, + "grad_norm": 1.7055917291229012, + "learning_rate": 1.8455934980611602e-07, + "loss": 0.45705318450927734, + "step": 6443 + }, + { + "epoch": 1.8843398157625384, + "grad_norm": 1.8262667617041222, + "learning_rate": 1.8363592643598328e-07, + "loss": 0.4949952960014343, + "step": 6444 + }, + { + "epoch": 1.8846322561778037, + "grad_norm": 2.0005095204142056, + "learning_rate": 1.827147975812693e-07, + "loss": 0.5311721563339233, + "step": 6445 + }, + { + "epoch": 1.8849246965930693, + "grad_norm": 1.8075375628836245, + "learning_rate": 1.817959634572819e-07, + "loss": 0.5652828216552734, + "step": 6446 + }, + { + "epoch": 1.8852171370083346, + "grad_norm": 1.7007026167846622, + "learning_rate": 1.8087942427879146e-07, + "loss": 0.4856044054031372, + "step": 6447 + }, + { + "epoch": 1.8855095774236, + "grad_norm": 1.6920105837383546, + "learning_rate": 1.799651802600344e-07, + "loss": 0.55420982837677, + "step": 6448 + }, + { + "epoch": 1.8858020178388655, + "grad_norm": 1.8804834035548856, + "learning_rate": 1.7905323161470867e-07, + "loss": 0.5869326591491699, + "step": 6449 + }, + { + "epoch": 1.8860944582541306, + "grad_norm": 1.761061751635786, + "learning_rate": 1.781435785559793e-07, + "loss": 0.4505504369735718, + "step": 6450 + }, + { + "epoch": 1.8863868986693961, + "grad_norm": 1.7194415376329713, + "learning_rate": 1.7723622129646955e-07, + "loss": 0.5460773706436157, + "step": 6451 + }, + { + "epoch": 1.8866793390846615, + "grad_norm": 1.7253684204963688, + "learning_rate": 1.7633116004826978e-07, + "loss": 0.6214778423309326, + "step": 6452 + }, + { + "epoch": 1.8869717794999268, + "grad_norm": 1.786722853658628, + "learning_rate": 1.7542839502293297e-07, + "loss": 0.4900703430175781, + "step": 6453 + }, + { + "epoch": 1.8872642199151923, + "grad_norm": 1.8351888114829378, + "learning_rate": 1.7452792643147364e-07, + "loss": 0.5177547931671143, + "step": 6454 + }, + { + "epoch": 1.8875566603304577, + "grad_norm": 1.6033594290974305, + "learning_rate": 1.7362975448437236e-07, + "loss": 0.3914458453655243, + "step": 6455 + }, + { + "epoch": 1.887849100745723, + "grad_norm": 1.7306995937297311, + "learning_rate": 1.7273387939157116e-07, + "loss": 0.5222523212432861, + "step": 6456 + }, + { + "epoch": 1.8881415411609885, + "grad_norm": 1.8351026582741266, + "learning_rate": 1.7184030136247477e-07, + "loss": 0.5097587704658508, + "step": 6457 + }, + { + "epoch": 1.8884339815762539, + "grad_norm": 1.711376264331189, + "learning_rate": 1.7094902060595053e-07, + "loss": 0.517410397529602, + "step": 6458 + }, + { + "epoch": 1.8887264219915192, + "grad_norm": 1.5054067124169248, + "learning_rate": 1.7006003733033182e-07, + "loss": 0.4951689839363098, + "step": 6459 + }, + { + "epoch": 1.8890188624067847, + "grad_norm": 1.8698243351971042, + "learning_rate": 1.6917335174341242e-07, + "loss": 0.5530004501342773, + "step": 6460 + }, + { + "epoch": 1.8893113028220498, + "grad_norm": 1.3793759581483827, + "learning_rate": 1.6828896405244988e-07, + "loss": 0.5231990814208984, + "step": 6461 + }, + { + "epoch": 1.8896037432373154, + "grad_norm": 1.7109665283076239, + "learning_rate": 1.6740687446416326e-07, + "loss": 0.5142268538475037, + "step": 6462 + }, + { + "epoch": 1.889896183652581, + "grad_norm": 1.5939124952252972, + "learning_rate": 1.6652708318473765e-07, + "loss": 0.4803999364376068, + "step": 6463 + }, + { + "epoch": 1.890188624067846, + "grad_norm": 1.8261203070041963, + "learning_rate": 1.6564959041981743e-07, + "loss": 0.38822099566459656, + "step": 6464 + }, + { + "epoch": 1.8904810644831116, + "grad_norm": 1.7158195687276572, + "learning_rate": 1.6477439637451186e-07, + "loss": 0.4778556823730469, + "step": 6465 + }, + { + "epoch": 1.890773504898377, + "grad_norm": 1.548976438279917, + "learning_rate": 1.6390150125339178e-07, + "loss": 0.5083664059638977, + "step": 6466 + }, + { + "epoch": 1.8910659453136422, + "grad_norm": 2.298817115631298, + "learning_rate": 1.6303090526049058e-07, + "loss": 0.6592142581939697, + "step": 6467 + }, + { + "epoch": 1.8913583857289078, + "grad_norm": 1.7188849828284447, + "learning_rate": 1.6216260859930776e-07, + "loss": 0.6350588798522949, + "step": 6468 + }, + { + "epoch": 1.8916508261441731, + "grad_norm": 1.900981319900476, + "learning_rate": 1.6129661147279763e-07, + "loss": 0.5542852282524109, + "step": 6469 + }, + { + "epoch": 1.8919432665594385, + "grad_norm": 1.7094379727839777, + "learning_rate": 1.6043291408338602e-07, + "loss": 0.572988748550415, + "step": 6470 + }, + { + "epoch": 1.892235706974704, + "grad_norm": 1.578693569659532, + "learning_rate": 1.5957151663295367e-07, + "loss": 0.4801466763019562, + "step": 6471 + }, + { + "epoch": 1.8925281473899693, + "grad_norm": 2.0149025268161207, + "learning_rate": 1.5871241932284953e-07, + "loss": 0.6286160349845886, + "step": 6472 + }, + { + "epoch": 1.8928205878052347, + "grad_norm": 1.8739502258074872, + "learning_rate": 1.5785562235388074e-07, + "loss": 0.5731645822525024, + "step": 6473 + }, + { + "epoch": 1.8931130282205002, + "grad_norm": 2.02559646967304, + "learning_rate": 1.5700112592631933e-07, + "loss": 0.47890836000442505, + "step": 6474 + }, + { + "epoch": 1.8934054686357653, + "grad_norm": 1.8833158182705436, + "learning_rate": 1.5614893023989886e-07, + "loss": 0.4379703998565674, + "step": 6475 + }, + { + "epoch": 1.8936979090510309, + "grad_norm": 1.886508266764503, + "learning_rate": 1.5529903549381331e-07, + "loss": 0.5629044771194458, + "step": 6476 + }, + { + "epoch": 1.8939903494662964, + "grad_norm": 1.6388873220258502, + "learning_rate": 1.5445144188672268e-07, + "loss": 0.4995439052581787, + "step": 6477 + }, + { + "epoch": 1.8942827898815615, + "grad_norm": 1.54762620576383, + "learning_rate": 1.5360614961674403e-07, + "loss": 0.5350549221038818, + "step": 6478 + }, + { + "epoch": 1.894575230296827, + "grad_norm": 1.636976407400752, + "learning_rate": 1.5276315888146266e-07, + "loss": 0.5245925188064575, + "step": 6479 + }, + { + "epoch": 1.8948676707120924, + "grad_norm": 1.870112790684546, + "learning_rate": 1.519224698779198e-07, + "loss": 0.5159675478935242, + "step": 6480 + }, + { + "epoch": 1.8951601111273577, + "grad_norm": 1.6327790205426773, + "learning_rate": 1.5108408280262276e-07, + "loss": 0.5046014189720154, + "step": 6481 + }, + { + "epoch": 1.8954525515426233, + "grad_norm": 1.5658787677393426, + "learning_rate": 1.502479978515381e-07, + "loss": 0.35977911949157715, + "step": 6482 + }, + { + "epoch": 1.8957449919578886, + "grad_norm": 1.6374646749200208, + "learning_rate": 1.4941421522009725e-07, + "loss": 0.4689600467681885, + "step": 6483 + }, + { + "epoch": 1.896037432373154, + "grad_norm": 1.713919299692529, + "learning_rate": 1.485827351031899e-07, + "loss": 0.5729683637619019, + "step": 6484 + }, + { + "epoch": 1.8963298727884195, + "grad_norm": 2.081397285004385, + "learning_rate": 1.4775355769517163e-07, + "loss": 0.5929673314094543, + "step": 6485 + }, + { + "epoch": 1.8966223132036848, + "grad_norm": 1.6003411415494537, + "learning_rate": 1.4692668318985636e-07, + "loss": 0.43075594305992126, + "step": 6486 + }, + { + "epoch": 1.8969147536189501, + "grad_norm": 1.7646064022155787, + "learning_rate": 1.461021117805217e-07, + "loss": 0.5247992277145386, + "step": 6487 + }, + { + "epoch": 1.8972071940342157, + "grad_norm": 1.7947400732319756, + "learning_rate": 1.4527984365990455e-07, + "loss": 0.4930630326271057, + "step": 6488 + }, + { + "epoch": 1.8974996344494808, + "grad_norm": 2.490399223660391, + "learning_rate": 1.4445987902020676e-07, + "loss": 0.7183758616447449, + "step": 6489 + }, + { + "epoch": 1.8977920748647463, + "grad_norm": 1.6603594705802933, + "learning_rate": 1.4364221805309052e-07, + "loss": 0.4766094982624054, + "step": 6490 + }, + { + "epoch": 1.8980845152800117, + "grad_norm": 1.7401365125544646, + "learning_rate": 1.4282686094967747e-07, + "loss": 0.43594151735305786, + "step": 6491 + }, + { + "epoch": 1.898376955695277, + "grad_norm": 1.4953976915814553, + "learning_rate": 1.4201380790055397e-07, + "loss": 0.49320366978645325, + "step": 6492 + }, + { + "epoch": 1.8986693961105425, + "grad_norm": 1.7835092237734465, + "learning_rate": 1.4120305909576359e-07, + "loss": 0.600296139717102, + "step": 6493 + }, + { + "epoch": 1.8989618365258079, + "grad_norm": 1.8325915671317163, + "learning_rate": 1.4039461472481696e-07, + "loss": 0.6692827939987183, + "step": 6494 + }, + { + "epoch": 1.8992542769410732, + "grad_norm": 1.5707947665490356, + "learning_rate": 1.395884749766807e-07, + "loss": 0.49206262826919556, + "step": 6495 + }, + { + "epoch": 1.8995467173563387, + "grad_norm": 1.5801197568349268, + "learning_rate": 1.3878464003978741e-07, + "loss": 0.4987361431121826, + "step": 6496 + }, + { + "epoch": 1.899839157771604, + "grad_norm": 1.5345056226134064, + "learning_rate": 1.3798311010202681e-07, + "loss": 0.5020350217819214, + "step": 6497 + }, + { + "epoch": 1.9001315981868694, + "grad_norm": 1.804856300616187, + "learning_rate": 1.3718388535075123e-07, + "loss": 0.5906451344490051, + "step": 6498 + }, + { + "epoch": 1.900424038602135, + "grad_norm": 1.7402170644717794, + "learning_rate": 1.3638696597277678e-07, + "loss": 0.5089905858039856, + "step": 6499 + }, + { + "epoch": 1.9007164790174, + "grad_norm": 1.7322257732687294, + "learning_rate": 1.3559235215437672e-07, + "loss": 0.4633820056915283, + "step": 6500 + }, + { + "epoch": 1.9010089194326656, + "grad_norm": 1.760704522549711, + "learning_rate": 1.34800044081288e-07, + "loss": 0.4214053750038147, + "step": 6501 + }, + { + "epoch": 1.9013013598479311, + "grad_norm": 1.8502899980656935, + "learning_rate": 1.3401004193870694e-07, + "loss": 0.6652689576148987, + "step": 6502 + }, + { + "epoch": 1.9015938002631962, + "grad_norm": 1.9902189127655232, + "learning_rate": 1.3322234591129247e-07, + "loss": 0.610877275466919, + "step": 6503 + }, + { + "epoch": 1.9018862406784618, + "grad_norm": 1.8174576160077263, + "learning_rate": 1.324369561831651e-07, + "loss": 0.5051777958869934, + "step": 6504 + }, + { + "epoch": 1.9021786810937271, + "grad_norm": 1.679424427708786, + "learning_rate": 1.3165387293790133e-07, + "loss": 0.5004675984382629, + "step": 6505 + }, + { + "epoch": 1.9024711215089924, + "grad_norm": 1.7771913721647656, + "learning_rate": 1.3087309635854583e-07, + "loss": 0.5778615474700928, + "step": 6506 + }, + { + "epoch": 1.902763561924258, + "grad_norm": 2.0292378822767807, + "learning_rate": 1.300946266275982e-07, + "loss": 0.5282145738601685, + "step": 6507 + }, + { + "epoch": 1.9030560023395233, + "grad_norm": 1.7978860546574749, + "learning_rate": 1.2931846392702174e-07, + "loss": 0.5965359807014465, + "step": 6508 + }, + { + "epoch": 1.9033484427547886, + "grad_norm": 1.6652110616876246, + "learning_rate": 1.2854460843823912e-07, + "loss": 0.5891281366348267, + "step": 6509 + }, + { + "epoch": 1.9036408831700542, + "grad_norm": 1.6248785946895787, + "learning_rate": 1.2777306034213677e-07, + "loss": 0.516204297542572, + "step": 6510 + }, + { + "epoch": 1.9039333235853195, + "grad_norm": 1.6814946652270708, + "learning_rate": 1.2700381981905486e-07, + "loss": 0.5148355960845947, + "step": 6511 + }, + { + "epoch": 1.9042257640005849, + "grad_norm": 1.8013354973349966, + "learning_rate": 1.2623688704880287e-07, + "loss": 0.5599791407585144, + "step": 6512 + }, + { + "epoch": 1.9045182044158504, + "grad_norm": 1.4831223871376102, + "learning_rate": 1.2547226221064412e-07, + "loss": 0.44349417090415955, + "step": 6513 + }, + { + "epoch": 1.9048106448311155, + "grad_norm": 1.8442502212828862, + "learning_rate": 1.2470994548330672e-07, + "loss": 0.5919830799102783, + "step": 6514 + }, + { + "epoch": 1.905103085246381, + "grad_norm": 1.5907952124169482, + "learning_rate": 1.2394993704497592e-07, + "loss": 0.4615499675273895, + "step": 6515 + }, + { + "epoch": 1.9053955256616466, + "grad_norm": 2.080938429630683, + "learning_rate": 1.2319223707330074e-07, + "loss": 0.5217719674110413, + "step": 6516 + }, + { + "epoch": 1.9056879660769117, + "grad_norm": 1.7218384779241716, + "learning_rate": 1.2243684574538838e-07, + "loss": 0.510722279548645, + "step": 6517 + }, + { + "epoch": 1.9059804064921773, + "grad_norm": 1.7577870608967676, + "learning_rate": 1.2168376323780652e-07, + "loss": 0.6744403839111328, + "step": 6518 + }, + { + "epoch": 1.9062728469074426, + "grad_norm": 1.852387461765699, + "learning_rate": 1.209329897265832e-07, + "loss": 0.4991394281387329, + "step": 6519 + }, + { + "epoch": 1.906565287322708, + "grad_norm": 1.5123359691224252, + "learning_rate": 1.2018452538720805e-07, + "loss": 0.43237754702568054, + "step": 6520 + }, + { + "epoch": 1.9068577277379735, + "grad_norm": 1.929873331270624, + "learning_rate": 1.1943837039463112e-07, + "loss": 0.6042662262916565, + "step": 6521 + }, + { + "epoch": 1.9071501681532388, + "grad_norm": 1.5924831654811167, + "learning_rate": 1.186945249232585e-07, + "loss": 0.4275910556316376, + "step": 6522 + }, + { + "epoch": 1.9074426085685041, + "grad_norm": 1.6404715584331906, + "learning_rate": 1.1795298914696219e-07, + "loss": 0.4368266463279724, + "step": 6523 + }, + { + "epoch": 1.9077350489837697, + "grad_norm": 1.7092717646033881, + "learning_rate": 1.172137632390713e-07, + "loss": 0.49492496252059937, + "step": 6524 + }, + { + "epoch": 1.908027489399035, + "grad_norm": 1.587478317321713, + "learning_rate": 1.164768473723743e-07, + "loss": 0.4296407103538513, + "step": 6525 + }, + { + "epoch": 1.9083199298143003, + "grad_norm": 1.746911434628144, + "learning_rate": 1.1574224171912118e-07, + "loss": 0.4609370231628418, + "step": 6526 + }, + { + "epoch": 1.9086123702295659, + "grad_norm": 1.687321204236502, + "learning_rate": 1.1500994645102237e-07, + "loss": 0.5201660394668579, + "step": 6527 + }, + { + "epoch": 1.908904810644831, + "grad_norm": 1.686028014701993, + "learning_rate": 1.1427996173924649e-07, + "loss": 0.49946731328964233, + "step": 6528 + }, + { + "epoch": 1.9091972510600965, + "grad_norm": 1.700984250030961, + "learning_rate": 1.1355228775442262e-07, + "loss": 0.5479187369346619, + "step": 6529 + }, + { + "epoch": 1.9094896914753618, + "grad_norm": 1.6485232123504545, + "learning_rate": 1.1282692466664247e-07, + "loss": 0.5227243900299072, + "step": 6530 + }, + { + "epoch": 1.9097821318906272, + "grad_norm": 1.896983089459967, + "learning_rate": 1.1210387264545264e-07, + "loss": 0.42863208055496216, + "step": 6531 + }, + { + "epoch": 1.9100745723058927, + "grad_norm": 1.552171259240321, + "learning_rate": 1.113831318598635e-07, + "loss": 0.37858498096466064, + "step": 6532 + }, + { + "epoch": 1.910367012721158, + "grad_norm": 1.852509398879224, + "learning_rate": 1.1066470247834471e-07, + "loss": 0.6447315216064453, + "step": 6533 + }, + { + "epoch": 1.9106594531364234, + "grad_norm": 1.4833597844037574, + "learning_rate": 1.0994858466882197e-07, + "loss": 0.4159877300262451, + "step": 6534 + }, + { + "epoch": 1.910951893551689, + "grad_norm": 1.7056274655886765, + "learning_rate": 1.0923477859868581e-07, + "loss": 0.5042530298233032, + "step": 6535 + }, + { + "epoch": 1.9112443339669543, + "grad_norm": 1.6939120860687955, + "learning_rate": 1.0852328443478278e-07, + "loss": 0.35955798625946045, + "step": 6536 + }, + { + "epoch": 1.9115367743822196, + "grad_norm": 1.6272843503399623, + "learning_rate": 1.0781410234342093e-07, + "loss": 0.561823308467865, + "step": 6537 + }, + { + "epoch": 1.9118292147974851, + "grad_norm": 1.6724333597123697, + "learning_rate": 1.0710723249036659e-07, + "loss": 0.44518136978149414, + "step": 6538 + }, + { + "epoch": 1.9121216552127502, + "grad_norm": 2.0012454930429397, + "learning_rate": 1.0640267504084756e-07, + "loss": 0.5657057166099548, + "step": 6539 + }, + { + "epoch": 1.9124140956280158, + "grad_norm": 1.5762808769057957, + "learning_rate": 1.0570043015954989e-07, + "loss": 0.5659947395324707, + "step": 6540 + }, + { + "epoch": 1.9127065360432813, + "grad_norm": 1.5811137035723222, + "learning_rate": 1.0500049801061784e-07, + "loss": 0.45648419857025146, + "step": 6541 + }, + { + "epoch": 1.9129989764585464, + "grad_norm": 1.8646406465839787, + "learning_rate": 1.0430287875765611e-07, + "loss": 0.4978141784667969, + "step": 6542 + }, + { + "epoch": 1.913291416873812, + "grad_norm": 1.54273033799953, + "learning_rate": 1.0360757256372977e-07, + "loss": 0.5397627949714661, + "step": 6543 + }, + { + "epoch": 1.9135838572890773, + "grad_norm": 1.8918413526412523, + "learning_rate": 1.029145795913633e-07, + "loss": 0.6359304189682007, + "step": 6544 + }, + { + "epoch": 1.9138762977043426, + "grad_norm": 1.70706044627556, + "learning_rate": 1.0222390000253824e-07, + "loss": 0.5023899078369141, + "step": 6545 + }, + { + "epoch": 1.9141687381196082, + "grad_norm": 1.8668808073409142, + "learning_rate": 1.0153553395869654e-07, + "loss": 0.5231877565383911, + "step": 6546 + }, + { + "epoch": 1.9144611785348735, + "grad_norm": 1.7146199886416342, + "learning_rate": 1.008494816207406e-07, + "loss": 0.5925711393356323, + "step": 6547 + }, + { + "epoch": 1.9147536189501388, + "grad_norm": 1.5881527564838034, + "learning_rate": 1.0016574314902993e-07, + "loss": 0.42732810974121094, + "step": 6548 + }, + { + "epoch": 1.9150460593654044, + "grad_norm": 1.8539790257850415, + "learning_rate": 9.948431870338559e-08, + "loss": 0.5011821985244751, + "step": 6549 + }, + { + "epoch": 1.9153384997806697, + "grad_norm": 1.7063021653673758, + "learning_rate": 9.88052084430846e-08, + "loss": 0.5112487077713013, + "step": 6550 + }, + { + "epoch": 1.915630940195935, + "grad_norm": 1.8003514575818433, + "learning_rate": 9.812841252686667e-08, + "loss": 0.4751431345939636, + "step": 6551 + }, + { + "epoch": 1.9159233806112006, + "grad_norm": 1.9933791417538373, + "learning_rate": 9.745393111292745e-08, + "loss": 0.5343109369277954, + "step": 6552 + }, + { + "epoch": 1.9162158210264657, + "grad_norm": 1.4980785147509508, + "learning_rate": 9.678176435892417e-08, + "loss": 0.4602724015712738, + "step": 6553 + }, + { + "epoch": 1.9165082614417313, + "grad_norm": 1.5436966250785777, + "learning_rate": 9.611191242197005e-08, + "loss": 0.4756245017051697, + "step": 6554 + }, + { + "epoch": 1.9168007018569968, + "grad_norm": 1.6531719135209273, + "learning_rate": 9.544437545864093e-08, + "loss": 0.5291459560394287, + "step": 6555 + }, + { + "epoch": 1.917093142272262, + "grad_norm": 2.0976196168420946, + "learning_rate": 9.47791536249676e-08, + "loss": 0.5357412099838257, + "step": 6556 + }, + { + "epoch": 1.9173855826875275, + "grad_norm": 2.238353466121697, + "learning_rate": 9.411624707644229e-08, + "loss": 0.6298913955688477, + "step": 6557 + }, + { + "epoch": 1.9176780231027928, + "grad_norm": 1.4485326554294644, + "learning_rate": 9.345565596801553e-08, + "loss": 0.5150517225265503, + "step": 6558 + }, + { + "epoch": 1.917970463518058, + "grad_norm": 1.8563821954536717, + "learning_rate": 9.279738045409603e-08, + "loss": 0.6264858245849609, + "step": 6559 + }, + { + "epoch": 1.9182629039333237, + "grad_norm": 1.46383829182073, + "learning_rate": 9.214142068855292e-08, + "loss": 0.33123475313186646, + "step": 6560 + }, + { + "epoch": 1.918555344348589, + "grad_norm": 1.6384165039446617, + "learning_rate": 9.148777682471133e-08, + "loss": 0.5540212392807007, + "step": 6561 + }, + { + "epoch": 1.9188477847638543, + "grad_norm": 1.8427168178125763, + "learning_rate": 9.083644901535793e-08, + "loss": 0.5633922219276428, + "step": 6562 + }, + { + "epoch": 1.9191402251791199, + "grad_norm": 1.7743383669625796, + "learning_rate": 9.018743741273428e-08, + "loss": 0.58629310131073, + "step": 6563 + }, + { + "epoch": 1.9194326655943852, + "grad_norm": 1.8674136448530827, + "learning_rate": 8.95407421685457e-08, + "loss": 0.5985243320465088, + "step": 6564 + }, + { + "epoch": 1.9197251060096505, + "grad_norm": 1.6803719834498339, + "learning_rate": 8.889636343395235e-08, + "loss": 0.5344138741493225, + "step": 6565 + }, + { + "epoch": 1.920017546424916, + "grad_norm": 1.783895238536977, + "learning_rate": 8.825430135957381e-08, + "loss": 0.6139744520187378, + "step": 6566 + }, + { + "epoch": 1.9203099868401812, + "grad_norm": 1.4220884637268112, + "learning_rate": 8.761455609548663e-08, + "loss": 0.46376854181289673, + "step": 6567 + }, + { + "epoch": 1.9206024272554467, + "grad_norm": 1.7412635159811354, + "learning_rate": 8.697712779122902e-08, + "loss": 0.5053622722625732, + "step": 6568 + }, + { + "epoch": 1.920894867670712, + "grad_norm": 1.52795636278423, + "learning_rate": 8.634201659579622e-08, + "loss": 0.4363771080970764, + "step": 6569 + }, + { + "epoch": 1.9211873080859774, + "grad_norm": 1.6799265353987254, + "learning_rate": 8.570922265764059e-08, + "loss": 0.4167904853820801, + "step": 6570 + }, + { + "epoch": 1.921479748501243, + "grad_norm": 1.7506509667217935, + "learning_rate": 8.507874612467382e-08, + "loss": 0.525320291519165, + "step": 6571 + }, + { + "epoch": 1.9217721889165083, + "grad_norm": 1.5127507314447914, + "learning_rate": 8.445058714426691e-08, + "loss": 0.4087376594543457, + "step": 6572 + }, + { + "epoch": 1.9220646293317736, + "grad_norm": 1.975359435328043, + "learning_rate": 8.382474586324796e-08, + "loss": 0.471457839012146, + "step": 6573 + }, + { + "epoch": 1.9223570697470391, + "grad_norm": 1.5584377744842253, + "learning_rate": 8.32012224279033e-08, + "loss": 0.6125116348266602, + "step": 6574 + }, + { + "epoch": 1.9226495101623045, + "grad_norm": 1.8527915049964467, + "learning_rate": 8.258001698397744e-08, + "loss": 0.3800301253795624, + "step": 6575 + }, + { + "epoch": 1.9229419505775698, + "grad_norm": 1.7927235022665284, + "learning_rate": 8.196112967667313e-08, + "loss": 0.561034083366394, + "step": 6576 + }, + { + "epoch": 1.9232343909928353, + "grad_norm": 1.8012018638552385, + "learning_rate": 8.134456065065354e-08, + "loss": 0.5768460631370544, + "step": 6577 + }, + { + "epoch": 1.9235268314081004, + "grad_norm": 1.809882879975094, + "learning_rate": 8.073031005003562e-08, + "loss": 0.47440657019615173, + "step": 6578 + }, + { + "epoch": 1.923819271823366, + "grad_norm": 1.4902012429082565, + "learning_rate": 8.011837801839672e-08, + "loss": 0.5315208435058594, + "step": 6579 + }, + { + "epoch": 1.9241117122386315, + "grad_norm": 1.7054296975282524, + "learning_rate": 7.950876469877467e-08, + "loss": 0.4587036371231079, + "step": 6580 + }, + { + "epoch": 1.9244041526538966, + "grad_norm": 1.6717861291166198, + "learning_rate": 7.890147023366101e-08, + "loss": 0.5356466770172119, + "step": 6581 + }, + { + "epoch": 1.9246965930691622, + "grad_norm": 1.8066170712430372, + "learning_rate": 7.829649476500667e-08, + "loss": 0.48034095764160156, + "step": 6582 + }, + { + "epoch": 1.9249890334844275, + "grad_norm": 1.9403707417182101, + "learning_rate": 7.769383843422185e-08, + "loss": 0.502929151058197, + "step": 6583 + }, + { + "epoch": 1.9252814738996928, + "grad_norm": 1.5994546211401888, + "learning_rate": 7.709350138217386e-08, + "loss": 0.44771361351013184, + "step": 6584 + }, + { + "epoch": 1.9255739143149584, + "grad_norm": 1.7058923530240673, + "learning_rate": 7.649548374918824e-08, + "loss": 0.462479829788208, + "step": 6585 + }, + { + "epoch": 1.9258663547302237, + "grad_norm": 1.7481939511400157, + "learning_rate": 7.589978567504763e-08, + "loss": 0.4758496880531311, + "step": 6586 + }, + { + "epoch": 1.926158795145489, + "grad_norm": 1.8447645858435646, + "learning_rate": 7.530640729899174e-08, + "loss": 0.521172285079956, + "step": 6587 + }, + { + "epoch": 1.9264512355607546, + "grad_norm": 1.685029384432281, + "learning_rate": 7.471534875971964e-08, + "loss": 0.5274392366409302, + "step": 6588 + }, + { + "epoch": 1.92674367597602, + "grad_norm": 1.5547682278755586, + "learning_rate": 7.412661019538858e-08, + "loss": 0.4350961446762085, + "step": 6589 + }, + { + "epoch": 1.9270361163912852, + "grad_norm": 1.5773569785123847, + "learning_rate": 7.354019174361183e-08, + "loss": 0.6298524737358093, + "step": 6590 + }, + { + "epoch": 1.9273285568065508, + "grad_norm": 1.7494178023153484, + "learning_rate": 7.295609354146194e-08, + "loss": 0.5451292395591736, + "step": 6591 + }, + { + "epoch": 1.927620997221816, + "grad_norm": 1.8824055292173802, + "learning_rate": 7.23743157254675e-08, + "loss": 0.5371264219284058, + "step": 6592 + }, + { + "epoch": 1.9279134376370815, + "grad_norm": 1.714393478017535, + "learning_rate": 7.179485843161526e-08, + "loss": 0.5805129408836365, + "step": 6593 + }, + { + "epoch": 1.928205878052347, + "grad_norm": 1.9692321834579947, + "learning_rate": 7.121772179535135e-08, + "loss": 0.5542718172073364, + "step": 6594 + }, + { + "epoch": 1.928498318467612, + "grad_norm": 1.7503350699121312, + "learning_rate": 7.064290595157675e-08, + "loss": 0.5668192505836487, + "step": 6595 + }, + { + "epoch": 1.9287907588828777, + "grad_norm": 1.6293975396756264, + "learning_rate": 7.007041103465062e-08, + "loss": 0.5107895731925964, + "step": 6596 + }, + { + "epoch": 1.929083199298143, + "grad_norm": 1.847055531354174, + "learning_rate": 6.950023717839261e-08, + "loss": 0.47974276542663574, + "step": 6597 + }, + { + "epoch": 1.9293756397134083, + "grad_norm": 1.5624753949857668, + "learning_rate": 6.893238451607387e-08, + "loss": 0.5641148090362549, + "step": 6598 + }, + { + "epoch": 1.9296680801286739, + "grad_norm": 1.7181332365296518, + "learning_rate": 6.836685318042935e-08, + "loss": 0.5940253734588623, + "step": 6599 + }, + { + "epoch": 1.9299605205439392, + "grad_norm": 1.6880020580834156, + "learning_rate": 6.780364330364775e-08, + "loss": 0.46844422817230225, + "step": 6600 + }, + { + "epoch": 1.9302529609592045, + "grad_norm": 1.6235992853167036, + "learning_rate": 6.724275501737487e-08, + "loss": 0.3933336138725281, + "step": 6601 + }, + { + "epoch": 1.93054540137447, + "grad_norm": 1.4538666395679365, + "learning_rate": 6.668418845271695e-08, + "loss": 0.4786602258682251, + "step": 6602 + }, + { + "epoch": 1.9308378417897354, + "grad_norm": 1.798637107768398, + "learning_rate": 6.612794374023402e-08, + "loss": 0.49695518612861633, + "step": 6603 + }, + { + "epoch": 1.9311302822050007, + "grad_norm": 1.5049309556488495, + "learning_rate": 6.557402100994426e-08, + "loss": 0.4798729121685028, + "step": 6604 + }, + { + "epoch": 1.9314227226202663, + "grad_norm": 1.7300127457609986, + "learning_rate": 6.502242039132634e-08, + "loss": 0.4187319278717041, + "step": 6605 + }, + { + "epoch": 1.9317151630355314, + "grad_norm": 2.050722935709042, + "learning_rate": 6.447314201331156e-08, + "loss": 0.4945526719093323, + "step": 6606 + }, + { + "epoch": 1.932007603450797, + "grad_norm": 1.8976456851513979, + "learning_rate": 6.392618600429057e-08, + "loss": 0.5721586346626282, + "step": 6607 + }, + { + "epoch": 1.9323000438660622, + "grad_norm": 1.6286185694607815, + "learning_rate": 6.338155249211109e-08, + "loss": 0.45542022585868835, + "step": 6608 + }, + { + "epoch": 1.9325924842813276, + "grad_norm": 1.7597762099762242, + "learning_rate": 6.283924160407796e-08, + "loss": 0.5627170205116272, + "step": 6609 + }, + { + "epoch": 1.9328849246965931, + "grad_norm": 1.6951677907486626, + "learning_rate": 6.22992534669542e-08, + "loss": 0.5369620323181152, + "step": 6610 + }, + { + "epoch": 1.9331773651118584, + "grad_norm": 1.619968087818578, + "learning_rate": 6.176158820695665e-08, + "loss": 0.5268368124961853, + "step": 6611 + }, + { + "epoch": 1.9334698055271238, + "grad_norm": 1.6828649754520415, + "learning_rate": 6.122624594976257e-08, + "loss": 0.5734575986862183, + "step": 6612 + }, + { + "epoch": 1.9337622459423893, + "grad_norm": 1.86766787540182, + "learning_rate": 6.069322682050516e-08, + "loss": 0.5066978931427002, + "step": 6613 + }, + { + "epoch": 1.9340546863576547, + "grad_norm": 1.68962846891993, + "learning_rate": 6.016253094377366e-08, + "loss": 0.5462731719017029, + "step": 6614 + }, + { + "epoch": 1.93434712677292, + "grad_norm": 1.8689912619353801, + "learning_rate": 5.963415844361553e-08, + "loss": 0.5407041311264038, + "step": 6615 + }, + { + "epoch": 1.9346395671881855, + "grad_norm": 1.792133188360025, + "learning_rate": 5.910810944353418e-08, + "loss": 0.48977869749069214, + "step": 6616 + }, + { + "epoch": 1.9349320076034506, + "grad_norm": 1.8900630995604775, + "learning_rate": 5.858438406649125e-08, + "loss": 0.5320937037467957, + "step": 6617 + }, + { + "epoch": 1.9352244480187162, + "grad_norm": 1.6602834270947344, + "learning_rate": 5.806298243490327e-08, + "loss": 0.5860059261322021, + "step": 6618 + }, + { + "epoch": 1.9355168884339817, + "grad_norm": 1.7299178033338176, + "learning_rate": 5.7543904670644965e-08, + "loss": 0.49517208337783813, + "step": 6619 + }, + { + "epoch": 1.9358093288492468, + "grad_norm": 1.4975030277698207, + "learning_rate": 5.7027150895049286e-08, + "loss": 0.5060882568359375, + "step": 6620 + }, + { + "epoch": 1.9361017692645124, + "grad_norm": 1.7387399518104565, + "learning_rate": 5.651272122890184e-08, + "loss": 0.5887798070907593, + "step": 6621 + }, + { + "epoch": 1.9363942096797777, + "grad_norm": 2.006477050241073, + "learning_rate": 5.600061579244753e-08, + "loss": 0.6567577123641968, + "step": 6622 + }, + { + "epoch": 1.936686650095043, + "grad_norm": 1.7419376875296542, + "learning_rate": 5.549083470538952e-08, + "loss": 0.5672584176063538, + "step": 6623 + }, + { + "epoch": 1.9369790905103086, + "grad_norm": 1.6312975104255192, + "learning_rate": 5.4983378086885806e-08, + "loss": 0.5166369676589966, + "step": 6624 + }, + { + "epoch": 1.937271530925574, + "grad_norm": 1.7351407182284893, + "learning_rate": 5.447824605555041e-08, + "loss": 0.5157661437988281, + "step": 6625 + }, + { + "epoch": 1.9375639713408392, + "grad_norm": 1.5452343867654343, + "learning_rate": 5.397543872945443e-08, + "loss": 0.5001711845397949, + "step": 6626 + }, + { + "epoch": 1.9378564117561048, + "grad_norm": 1.5666441918912, + "learning_rate": 5.34749562261272e-08, + "loss": 0.48944878578186035, + "step": 6627 + }, + { + "epoch": 1.9381488521713701, + "grad_norm": 1.8943450842549039, + "learning_rate": 5.297679866255401e-08, + "loss": 0.5400780439376831, + "step": 6628 + }, + { + "epoch": 1.9384412925866354, + "grad_norm": 1.6944930575034618, + "learning_rate": 5.248096615517395e-08, + "loss": 0.544346809387207, + "step": 6629 + }, + { + "epoch": 1.938733733001901, + "grad_norm": 1.8360261063384646, + "learning_rate": 5.1987458819886535e-08, + "loss": 0.5283153653144836, + "step": 6630 + }, + { + "epoch": 1.939026173417166, + "grad_norm": 1.8162414803988312, + "learning_rate": 5.149627677204616e-08, + "loss": 0.555808424949646, + "step": 6631 + }, + { + "epoch": 1.9393186138324316, + "grad_norm": 1.7068645601820531, + "learning_rate": 5.10074201264632e-08, + "loss": 0.5230466723442078, + "step": 6632 + }, + { + "epoch": 1.9396110542476972, + "grad_norm": 1.592321180041504, + "learning_rate": 5.052088899740515e-08, + "loss": 0.4810416102409363, + "step": 6633 + }, + { + "epoch": 1.9399034946629623, + "grad_norm": 1.2489690563293379, + "learning_rate": 5.0036683498594365e-08, + "loss": 0.35233962535858154, + "step": 6634 + }, + { + "epoch": 1.9401959350782279, + "grad_norm": 1.5949248677680616, + "learning_rate": 4.955480374321253e-08, + "loss": 0.5250035524368286, + "step": 6635 + }, + { + "epoch": 1.9404883754934932, + "grad_norm": 1.5547636594172098, + "learning_rate": 4.907524984389622e-08, + "loss": 0.5896221399307251, + "step": 6636 + }, + { + "epoch": 1.9407808159087585, + "grad_norm": 1.5725705573586048, + "learning_rate": 4.859802191273688e-08, + "loss": 0.5410518050193787, + "step": 6637 + }, + { + "epoch": 1.941073256324024, + "grad_norm": 1.5273512663488045, + "learning_rate": 4.812312006128528e-08, + "loss": 0.5044152736663818, + "step": 6638 + }, + { + "epoch": 1.9413656967392894, + "grad_norm": 1.6537481992077037, + "learning_rate": 4.765054440054484e-08, + "loss": 0.5388177633285522, + "step": 6639 + }, + { + "epoch": 1.9416581371545547, + "grad_norm": 2.0702365693466485, + "learning_rate": 4.718029504097943e-08, + "loss": 0.5074491500854492, + "step": 6640 + }, + { + "epoch": 1.9419505775698203, + "grad_norm": 1.6224415285858116, + "learning_rate": 4.671237209250557e-08, + "loss": 0.47772669792175293, + "step": 6641 + }, + { + "epoch": 1.9422430179850856, + "grad_norm": 1.6570845374645817, + "learning_rate": 4.624677566449798e-08, + "loss": 0.4682825207710266, + "step": 6642 + }, + { + "epoch": 1.942535458400351, + "grad_norm": 1.5100328644654928, + "learning_rate": 4.578350586578628e-08, + "loss": 0.48880642652511597, + "step": 6643 + }, + { + "epoch": 1.9428278988156165, + "grad_norm": 1.6890744037677652, + "learning_rate": 4.532256280465719e-08, + "loss": 0.4590389132499695, + "step": 6644 + }, + { + "epoch": 1.9431203392308816, + "grad_norm": 1.903981857624826, + "learning_rate": 4.48639465888534e-08, + "loss": 0.5893105268478394, + "step": 6645 + }, + { + "epoch": 1.9434127796461471, + "grad_norm": 1.7274912065627603, + "learning_rate": 4.4407657325574725e-08, + "loss": 0.561900794506073, + "step": 6646 + }, + { + "epoch": 1.9437052200614124, + "grad_norm": 1.662019693277273, + "learning_rate": 4.395369512147474e-08, + "loss": 0.4140210747718811, + "step": 6647 + }, + { + "epoch": 1.9439976604766778, + "grad_norm": 1.7955978434650512, + "learning_rate": 4.350206008266522e-08, + "loss": 0.6220303773880005, + "step": 6648 + }, + { + "epoch": 1.9442901008919433, + "grad_norm": 1.771531678180808, + "learning_rate": 4.3052752314712844e-08, + "loss": 0.4903472065925598, + "step": 6649 + }, + { + "epoch": 1.9445825413072086, + "grad_norm": 1.889992657698585, + "learning_rate": 4.260577192263915e-08, + "loss": 0.4519340991973877, + "step": 6650 + }, + { + "epoch": 1.944874981722474, + "grad_norm": 1.7435292517018475, + "learning_rate": 4.216111901092501e-08, + "loss": 0.49067920446395874, + "step": 6651 + }, + { + "epoch": 1.9451674221377395, + "grad_norm": 1.8654652047797853, + "learning_rate": 4.1718793683505066e-08, + "loss": 0.5935854911804199, + "step": 6652 + }, + { + "epoch": 1.9454598625530048, + "grad_norm": 1.7744411864937968, + "learning_rate": 4.127879604376883e-08, + "loss": 0.5209576487541199, + "step": 6653 + }, + { + "epoch": 1.9457523029682702, + "grad_norm": 1.50564473891113, + "learning_rate": 4.084112619456515e-08, + "loss": 0.4454221725463867, + "step": 6654 + }, + { + "epoch": 1.9460447433835357, + "grad_norm": 1.8157940398905494, + "learning_rate": 4.0405784238194415e-08, + "loss": 0.5129591226577759, + "step": 6655 + }, + { + "epoch": 1.9463371837988008, + "grad_norm": 1.63185696744402, + "learning_rate": 3.997277027641744e-08, + "loss": 0.48704665899276733, + "step": 6656 + }, + { + "epoch": 1.9466296242140664, + "grad_norm": 1.8037751571098388, + "learning_rate": 3.95420844104466e-08, + "loss": 0.4510651230812073, + "step": 6657 + }, + { + "epoch": 1.946922064629332, + "grad_norm": 1.7817975919339482, + "learning_rate": 3.911372674095249e-08, + "loss": 0.5116807222366333, + "step": 6658 + }, + { + "epoch": 1.947214505044597, + "grad_norm": 1.7985765763419883, + "learning_rate": 3.868769736806277e-08, + "loss": 0.592056393623352, + "step": 6659 + }, + { + "epoch": 1.9475069454598626, + "grad_norm": 1.7881377609654638, + "learning_rate": 3.8263996391357805e-08, + "loss": 0.579146146774292, + "step": 6660 + }, + { + "epoch": 1.947799385875128, + "grad_norm": 1.6202416659647267, + "learning_rate": 3.784262390987503e-08, + "loss": 0.5253209471702576, + "step": 6661 + }, + { + "epoch": 1.9480918262903932, + "grad_norm": 2.008309380522338, + "learning_rate": 3.742358002210789e-08, + "loss": 0.5614888072013855, + "step": 6662 + }, + { + "epoch": 1.9483842667056588, + "grad_norm": 1.6491223001780133, + "learning_rate": 3.7006864826005796e-08, + "loss": 0.5630952715873718, + "step": 6663 + }, + { + "epoch": 1.9486767071209241, + "grad_norm": 2.1390311477096944, + "learning_rate": 3.659247841897306e-08, + "loss": 0.5990846157073975, + "step": 6664 + }, + { + "epoch": 1.9489691475361894, + "grad_norm": 1.6162006621933969, + "learning_rate": 3.6180420897868886e-08, + "loss": 0.5290813446044922, + "step": 6665 + }, + { + "epoch": 1.949261587951455, + "grad_norm": 2.6144126732722803, + "learning_rate": 3.577069235901176e-08, + "loss": 0.6710211038589478, + "step": 6666 + }, + { + "epoch": 1.9495540283667203, + "grad_norm": 1.71689411729531, + "learning_rate": 3.536329289817064e-08, + "loss": 0.4802299737930298, + "step": 6667 + }, + { + "epoch": 1.9498464687819856, + "grad_norm": 1.6268319596207468, + "learning_rate": 3.495822261057491e-08, + "loss": 0.5432649850845337, + "step": 6668 + }, + { + "epoch": 1.9501389091972512, + "grad_norm": 1.9426982793491434, + "learning_rate": 3.4555481590905495e-08, + "loss": 0.5824951529502869, + "step": 6669 + }, + { + "epoch": 1.9504313496125163, + "grad_norm": 1.5773733844612365, + "learning_rate": 3.4155069933301535e-08, + "loss": 0.48428961634635925, + "step": 6670 + }, + { + "epoch": 1.9507237900277818, + "grad_norm": 1.7258198741312958, + "learning_rate": 3.375698773135705e-08, + "loss": 0.5684780478477478, + "step": 6671 + }, + { + "epoch": 1.9510162304430474, + "grad_norm": 1.7742355369350526, + "learning_rate": 3.336123507811983e-08, + "loss": 0.5658689737319946, + "step": 6672 + }, + { + "epoch": 1.9513086708583125, + "grad_norm": 1.7743474017748566, + "learning_rate": 3.2967812066097006e-08, + "loss": 0.6265745162963867, + "step": 6673 + }, + { + "epoch": 1.951601111273578, + "grad_norm": 1.768397532537575, + "learning_rate": 3.257671878724722e-08, + "loss": 0.5732975006103516, + "step": 6674 + }, + { + "epoch": 1.9518935516888434, + "grad_norm": 2.3801499199920273, + "learning_rate": 3.218795533298624e-08, + "loss": 0.46968942880630493, + "step": 6675 + }, + { + "epoch": 1.9521859921041087, + "grad_norm": 1.9250466851177817, + "learning_rate": 3.180152179418472e-08, + "loss": 0.5651586055755615, + "step": 6676 + }, + { + "epoch": 1.9524784325193743, + "grad_norm": 1.4699414350235678, + "learning_rate": 3.141741826117151e-08, + "loss": 0.46789437532424927, + "step": 6677 + }, + { + "epoch": 1.9527708729346396, + "grad_norm": 1.6701838665271502, + "learning_rate": 3.1035644823725896e-08, + "loss": 0.5332610011100769, + "step": 6678 + }, + { + "epoch": 1.953063313349905, + "grad_norm": 1.825129394239336, + "learning_rate": 3.06562015710854e-08, + "loss": 0.49613600969314575, + "step": 6679 + }, + { + "epoch": 1.9533557537651705, + "grad_norm": 2.1340240197713265, + "learning_rate": 3.027908859194351e-08, + "loss": 0.5498408079147339, + "step": 6680 + }, + { + "epoch": 1.9536481941804358, + "grad_norm": 1.8887907896186948, + "learning_rate": 2.99043059744486e-08, + "loss": 0.6802657842636108, + "step": 6681 + }, + { + "epoch": 1.953940634595701, + "grad_norm": 1.8609256911752867, + "learning_rate": 2.9531853806201716e-08, + "loss": 0.5149989724159241, + "step": 6682 + }, + { + "epoch": 1.9542330750109667, + "grad_norm": 1.7262483706342455, + "learning_rate": 2.9161732174263212e-08, + "loss": 0.5249730944633484, + "step": 6683 + }, + { + "epoch": 1.9545255154262318, + "grad_norm": 1.7003943133697261, + "learning_rate": 2.8793941165147222e-08, + "loss": 0.5711483359336853, + "step": 6684 + }, + { + "epoch": 1.9548179558414973, + "grad_norm": 1.7303037823896377, + "learning_rate": 2.842848086482053e-08, + "loss": 0.4591020345687866, + "step": 6685 + }, + { + "epoch": 1.9551103962567626, + "grad_norm": 1.887004603599524, + "learning_rate": 2.8065351358708136e-08, + "loss": 0.575869083404541, + "step": 6686 + }, + { + "epoch": 1.955402836672028, + "grad_norm": 1.7563501117497715, + "learning_rate": 2.7704552731688816e-08, + "loss": 0.5664101839065552, + "step": 6687 + }, + { + "epoch": 1.9556952770872935, + "grad_norm": 1.5280681451949298, + "learning_rate": 2.7346085068098437e-08, + "loss": 0.5739811062812805, + "step": 6688 + }, + { + "epoch": 1.9559877175025588, + "grad_norm": 1.64304520297204, + "learning_rate": 2.6989948451726643e-08, + "loss": 0.4707348942756653, + "step": 6689 + }, + { + "epoch": 1.9562801579178242, + "grad_norm": 1.4347028954089904, + "learning_rate": 2.6636142965816848e-08, + "loss": 0.38842523097991943, + "step": 6690 + }, + { + "epoch": 1.9565725983330897, + "grad_norm": 1.9429266961932796, + "learning_rate": 2.628466869306956e-08, + "loss": 0.4295673668384552, + "step": 6691 + }, + { + "epoch": 1.956865038748355, + "grad_norm": 1.9886421076178336, + "learning_rate": 2.5935525715640176e-08, + "loss": 0.5358999967575073, + "step": 6692 + }, + { + "epoch": 1.9571574791636204, + "grad_norm": 1.8207487442928234, + "learning_rate": 2.5588714115137857e-08, + "loss": 0.49730730056762695, + "step": 6693 + }, + { + "epoch": 1.957449919578886, + "grad_norm": 1.8975782350563493, + "learning_rate": 2.5244233972627762e-08, + "loss": 0.5368232131004333, + "step": 6694 + }, + { + "epoch": 1.957742359994151, + "grad_norm": 1.6616905607648789, + "learning_rate": 2.4902085368632144e-08, + "loss": 0.48084500432014465, + "step": 6695 + }, + { + "epoch": 1.9580348004094166, + "grad_norm": 1.6503756551181779, + "learning_rate": 2.45622683831237e-08, + "loss": 0.5197296142578125, + "step": 6696 + }, + { + "epoch": 1.9583272408246821, + "grad_norm": 1.7005704554604877, + "learning_rate": 2.4224783095532224e-08, + "loss": 0.4807678163051605, + "step": 6697 + }, + { + "epoch": 1.9586196812399472, + "grad_norm": 1.5200854711140026, + "learning_rate": 2.388962958474461e-08, + "loss": 0.5117641687393188, + "step": 6698 + }, + { + "epoch": 1.9589121216552128, + "grad_norm": 1.5153035364420055, + "learning_rate": 2.355680792910153e-08, + "loss": 0.5318149328231812, + "step": 6699 + }, + { + "epoch": 1.959204562070478, + "grad_norm": 1.642749755305391, + "learning_rate": 2.3226318206395206e-08, + "loss": 0.5590193271636963, + "step": 6700 + }, + { + "epoch": 1.9594970024857434, + "grad_norm": 1.9273854799208605, + "learning_rate": 2.2898160493878275e-08, + "loss": 0.7686688899993896, + "step": 6701 + }, + { + "epoch": 1.959789442901009, + "grad_norm": 1.7479554033366604, + "learning_rate": 2.257233486825383e-08, + "loss": 0.5085177421569824, + "step": 6702 + }, + { + "epoch": 1.9600818833162743, + "grad_norm": 1.4224817781801729, + "learning_rate": 2.2248841405683176e-08, + "loss": 0.44002413749694824, + "step": 6703 + }, + { + "epoch": 1.9603743237315396, + "grad_norm": 1.6541616903883845, + "learning_rate": 2.1927680181779154e-08, + "loss": 0.5369126796722412, + "step": 6704 + }, + { + "epoch": 1.9606667641468052, + "grad_norm": 1.5811100430561291, + "learning_rate": 2.1608851271612828e-08, + "loss": 0.516021728515625, + "step": 6705 + }, + { + "epoch": 1.9609592045620705, + "grad_norm": 1.577385822778267, + "learning_rate": 2.1292354749707922e-08, + "loss": 0.5215185284614563, + "step": 6706 + }, + { + "epoch": 1.9612516449773358, + "grad_norm": 1.7926842955012665, + "learning_rate": 2.0978190690043032e-08, + "loss": 0.6051908731460571, + "step": 6707 + }, + { + "epoch": 1.9615440853926014, + "grad_norm": 1.8529134419730404, + "learning_rate": 2.066635916605386e-08, + "loss": 0.5426267385482788, + "step": 6708 + }, + { + "epoch": 1.9618365258078665, + "grad_norm": 2.064852719580073, + "learning_rate": 2.0356860250626554e-08, + "loss": 0.5888626575469971, + "step": 6709 + }, + { + "epoch": 1.962128966223132, + "grad_norm": 1.4287637894797525, + "learning_rate": 2.004969401610657e-08, + "loss": 0.5225001573562622, + "step": 6710 + }, + { + "epoch": 1.9624214066383976, + "grad_norm": 1.616132198436982, + "learning_rate": 1.974486053429092e-08, + "loss": 0.5735136270523071, + "step": 6711 + }, + { + "epoch": 1.9627138470536627, + "grad_norm": 1.6327567238976746, + "learning_rate": 1.9442359876433724e-08, + "loss": 0.5302764177322388, + "step": 6712 + }, + { + "epoch": 1.9630062874689282, + "grad_norm": 1.625182085046959, + "learning_rate": 1.9142192113241752e-08, + "loss": 0.5078837871551514, + "step": 6713 + }, + { + "epoch": 1.9632987278841936, + "grad_norm": 1.7110845788062152, + "learning_rate": 1.884435731487888e-08, + "loss": 0.5772985219955444, + "step": 6714 + }, + { + "epoch": 1.963591168299459, + "grad_norm": 2.2561904758082925, + "learning_rate": 1.8548855550959423e-08, + "loss": 0.5974931716918945, + "step": 6715 + }, + { + "epoch": 1.9638836087147244, + "grad_norm": 1.8105323667501525, + "learning_rate": 1.8255686890558123e-08, + "loss": 0.5065072774887085, + "step": 6716 + }, + { + "epoch": 1.9641760491299898, + "grad_norm": 1.4374806170365766, + "learning_rate": 1.7964851402199058e-08, + "loss": 0.4729428291320801, + "step": 6717 + }, + { + "epoch": 1.964468489545255, + "grad_norm": 1.7924892088352824, + "learning_rate": 1.7676349153864515e-08, + "loss": 0.46363723278045654, + "step": 6718 + }, + { + "epoch": 1.9647609299605207, + "grad_norm": 1.6096201158909726, + "learning_rate": 1.7390180212990547e-08, + "loss": 0.5436959266662598, + "step": 6719 + }, + { + "epoch": 1.965053370375786, + "grad_norm": 1.8570609869736334, + "learning_rate": 1.7106344646465877e-08, + "loss": 0.7571452856063843, + "step": 6720 + }, + { + "epoch": 1.9653458107910513, + "grad_norm": 1.7203125443062617, + "learning_rate": 1.682484252063632e-08, + "loss": 0.5724680423736572, + "step": 6721 + }, + { + "epoch": 1.9656382512063169, + "grad_norm": 1.5552868811193872, + "learning_rate": 1.654567390130146e-08, + "loss": 0.46937745809555054, + "step": 6722 + }, + { + "epoch": 1.965930691621582, + "grad_norm": 1.4639592826813614, + "learning_rate": 1.6268838853713552e-08, + "loss": 0.5764822363853455, + "step": 6723 + }, + { + "epoch": 1.9662231320368475, + "grad_norm": 1.8890557259087926, + "learning_rate": 1.5994337442584164e-08, + "loss": 0.6074192523956299, + "step": 6724 + }, + { + "epoch": 1.9665155724521128, + "grad_norm": 1.8156005720173343, + "learning_rate": 1.572216973207419e-08, + "loss": 0.6001715064048767, + "step": 6725 + }, + { + "epoch": 1.9668080128673782, + "grad_norm": 1.8635551001096793, + "learning_rate": 1.545233578580163e-08, + "loss": 0.5819540619850159, + "step": 6726 + }, + { + "epoch": 1.9671004532826437, + "grad_norm": 1.509757451229315, + "learning_rate": 1.518483566683826e-08, + "loss": 0.4745405912399292, + "step": 6727 + }, + { + "epoch": 1.967392893697909, + "grad_norm": 1.5301158686504193, + "learning_rate": 1.4919669437710725e-08, + "loss": 0.4438042640686035, + "step": 6728 + }, + { + "epoch": 1.9676853341131744, + "grad_norm": 1.6058873643565785, + "learning_rate": 1.465683716040056e-08, + "loss": 0.45798003673553467, + "step": 6729 + }, + { + "epoch": 1.96797777452844, + "grad_norm": 1.5582798501168125, + "learning_rate": 1.4396338896341955e-08, + "loss": 0.3918766379356384, + "step": 6730 + }, + { + "epoch": 1.9682702149437052, + "grad_norm": 1.6253936447718431, + "learning_rate": 1.4138174706426199e-08, + "loss": 0.5266170501708984, + "step": 6731 + }, + { + "epoch": 1.9685626553589706, + "grad_norm": 1.733772185361853, + "learning_rate": 1.3882344650998359e-08, + "loss": 0.5166668891906738, + "step": 6732 + }, + { + "epoch": 1.9688550957742361, + "grad_norm": 1.7595735268115036, + "learning_rate": 1.3628848789853932e-08, + "loss": 0.39324697852134705, + "step": 6733 + }, + { + "epoch": 1.9691475361895012, + "grad_norm": 1.8212233848125128, + "learning_rate": 1.3377687182248855e-08, + "loss": 0.4915732443332672, + "step": 6734 + }, + { + "epoch": 1.9694399766047668, + "grad_norm": 1.7689973508355645, + "learning_rate": 1.31288598868895e-08, + "loss": 0.5416492819786072, + "step": 6735 + }, + { + "epoch": 1.9697324170200323, + "grad_norm": 1.6021351256215517, + "learning_rate": 1.288236696193823e-08, + "loss": 0.4713748097419739, + "step": 6736 + }, + { + "epoch": 1.9700248574352974, + "grad_norm": 1.7411270752119496, + "learning_rate": 1.263820846501118e-08, + "loss": 0.44074663519859314, + "step": 6737 + }, + { + "epoch": 1.970317297850563, + "grad_norm": 1.7164561827524085, + "learning_rate": 1.2396384453179366e-08, + "loss": 0.4694680869579315, + "step": 6738 + }, + { + "epoch": 1.9706097382658283, + "grad_norm": 1.8691907501418656, + "learning_rate": 1.215689498296535e-08, + "loss": 0.553142786026001, + "step": 6739 + }, + { + "epoch": 1.9709021786810936, + "grad_norm": 1.7953149807008746, + "learning_rate": 1.1919740110351019e-08, + "loss": 0.533849835395813, + "step": 6740 + }, + { + "epoch": 1.9711946190963592, + "grad_norm": 1.9503927011602655, + "learning_rate": 1.1684919890768698e-08, + "loss": 0.5448808670043945, + "step": 6741 + }, + { + "epoch": 1.9714870595116245, + "grad_norm": 1.6447356703420446, + "learning_rate": 1.1452434379106703e-08, + "loss": 0.46860289573669434, + "step": 6742 + }, + { + "epoch": 1.9717794999268898, + "grad_norm": 2.0052944353876696, + "learning_rate": 1.122228362970712e-08, + "loss": 0.5552232265472412, + "step": 6743 + }, + { + "epoch": 1.9720719403421554, + "grad_norm": 1.7151457677082285, + "learning_rate": 1.0994467696364698e-08, + "loss": 0.4639692008495331, + "step": 6744 + }, + { + "epoch": 1.9723643807574207, + "grad_norm": 2.0905035821875746, + "learning_rate": 1.076898663233239e-08, + "loss": 0.7129387259483337, + "step": 6745 + }, + { + "epoch": 1.972656821172686, + "grad_norm": 1.6674482501618961, + "learning_rate": 1.0545840490313597e-08, + "loss": 0.6637833118438721, + "step": 6746 + }, + { + "epoch": 1.9729492615879516, + "grad_norm": 2.029336881837252, + "learning_rate": 1.0325029322467705e-08, + "loss": 0.6215991973876953, + "step": 6747 + }, + { + "epoch": 1.9732417020032167, + "grad_norm": 1.878624196936373, + "learning_rate": 1.0106553180407874e-08, + "loss": 0.48594456911087036, + "step": 6748 + }, + { + "epoch": 1.9735341424184822, + "grad_norm": 1.9063825585940108, + "learning_rate": 9.890412115202142e-09, + "loss": 0.5443629622459412, + "step": 6749 + }, + { + "epoch": 1.9738265828337478, + "grad_norm": 1.7053157420855176, + "learning_rate": 9.676606177371207e-09, + "loss": 0.643796443939209, + "step": 6750 + }, + { + "epoch": 1.974119023249013, + "grad_norm": 1.6282972872252912, + "learning_rate": 9.465135416891757e-09, + "loss": 0.6305385828018188, + "step": 6751 + }, + { + "epoch": 1.9744114636642784, + "grad_norm": 1.5632532849336644, + "learning_rate": 9.255999883193146e-09, + "loss": 0.5120108723640442, + "step": 6752 + }, + { + "epoch": 1.9747039040795438, + "grad_norm": 1.6718955354026932, + "learning_rate": 9.0491996251596e-09, + "loss": 0.5552967190742493, + "step": 6753 + }, + { + "epoch": 1.974996344494809, + "grad_norm": 1.935016742711985, + "learning_rate": 8.84473469113023e-09, + "loss": 0.6341986656188965, + "step": 6754 + }, + { + "epoch": 1.9752887849100746, + "grad_norm": 1.9011990155600869, + "learning_rate": 8.642605128896808e-09, + "loss": 0.5204262137413025, + "step": 6755 + }, + { + "epoch": 1.97558122532534, + "grad_norm": 1.75594319264598, + "learning_rate": 8.442810985705984e-09, + "loss": 0.4980974793434143, + "step": 6756 + }, + { + "epoch": 1.9758736657406053, + "grad_norm": 1.9165104575442982, + "learning_rate": 8.245352308258181e-09, + "loss": 0.5432465076446533, + "step": 6757 + }, + { + "epoch": 1.9761661061558708, + "grad_norm": 1.7852742537308695, + "learning_rate": 8.0502291427087e-09, + "loss": 0.813039243221283, + "step": 6758 + }, + { + "epoch": 1.9764585465711362, + "grad_norm": 1.8018799007975157, + "learning_rate": 7.85744153466661e-09, + "loss": 0.5723720788955688, + "step": 6759 + }, + { + "epoch": 1.9767509869864015, + "grad_norm": 1.8628448153664545, + "learning_rate": 7.666989529193647e-09, + "loss": 0.5562596321105957, + "step": 6760 + }, + { + "epoch": 1.977043427401667, + "grad_norm": 1.796195928066652, + "learning_rate": 7.478873170807532e-09, + "loss": 0.5455175638198853, + "step": 6761 + }, + { + "epoch": 1.9773358678169322, + "grad_norm": 1.791853318736957, + "learning_rate": 7.2930925034797595e-09, + "loss": 0.5753832459449768, + "step": 6762 + }, + { + "epoch": 1.9776283082321977, + "grad_norm": 1.786340662775674, + "learning_rate": 7.109647570634482e-09, + "loss": 0.49962282180786133, + "step": 6763 + }, + { + "epoch": 1.977920748647463, + "grad_norm": 1.4222417158044076, + "learning_rate": 6.9285384151507316e-09, + "loss": 0.44443345069885254, + "step": 6764 + }, + { + "epoch": 1.9782131890627284, + "grad_norm": 1.5729694345436978, + "learning_rate": 6.749765079363535e-09, + "loss": 0.3236424922943115, + "step": 6765 + }, + { + "epoch": 1.978505629477994, + "grad_norm": 1.766865850057596, + "learning_rate": 6.573327605057245e-09, + "loss": 0.5246942639350891, + "step": 6766 + }, + { + "epoch": 1.9787980698932592, + "grad_norm": 1.6890664092399734, + "learning_rate": 6.399226033475536e-09, + "loss": 0.6525053381919861, + "step": 6767 + }, + { + "epoch": 1.9790905103085246, + "grad_norm": 1.5450928873923104, + "learning_rate": 6.227460405312524e-09, + "loss": 0.502121090888977, + "step": 6768 + }, + { + "epoch": 1.9793829507237901, + "grad_norm": 2.00727430176714, + "learning_rate": 6.058030760718314e-09, + "loss": 0.6137609481811523, + "step": 6769 + }, + { + "epoch": 1.9796753911390554, + "grad_norm": 2.274345342275455, + "learning_rate": 5.890937139294561e-09, + "loss": 0.6673166751861572, + "step": 6770 + }, + { + "epoch": 1.9799678315543208, + "grad_norm": 1.8444636633461322, + "learning_rate": 5.726179580098912e-09, + "loss": 0.5888657569885254, + "step": 6771 + }, + { + "epoch": 1.9802602719695863, + "grad_norm": 1.6693157475267608, + "learning_rate": 5.563758121642781e-09, + "loss": 0.5239546298980713, + "step": 6772 + }, + { + "epoch": 1.9805527123848514, + "grad_norm": 1.8912704609026834, + "learning_rate": 5.403672801890247e-09, + "loss": 0.5446778535842896, + "step": 6773 + }, + { + "epoch": 1.980845152800117, + "grad_norm": 1.9927071253973727, + "learning_rate": 5.245923658262486e-09, + "loss": 0.6198326349258423, + "step": 6774 + }, + { + "epoch": 1.9811375932153825, + "grad_norm": 2.15235475034657, + "learning_rate": 5.090510727630005e-09, + "loss": 0.586353063583374, + "step": 6775 + }, + { + "epoch": 1.9814300336306476, + "grad_norm": 1.7195990521736408, + "learning_rate": 4.93743404632041e-09, + "loss": 0.6344239711761475, + "step": 6776 + }, + { + "epoch": 1.9817224740459132, + "grad_norm": 1.7280364585810115, + "learning_rate": 4.7866936501150816e-09, + "loss": 0.529091477394104, + "step": 6777 + }, + { + "epoch": 1.9820149144611785, + "grad_norm": 1.7357230298596742, + "learning_rate": 4.6382895742491665e-09, + "loss": 0.50063157081604, + "step": 6778 + }, + { + "epoch": 1.9823073548764438, + "grad_norm": 1.526019679238999, + "learning_rate": 4.492221853409362e-09, + "loss": 0.48398512601852417, + "step": 6779 + }, + { + "epoch": 1.9825997952917094, + "grad_norm": 1.5319705226915326, + "learning_rate": 4.348490521738358e-09, + "loss": 0.5330454707145691, + "step": 6780 + }, + { + "epoch": 1.9828922357069747, + "grad_norm": 1.829830860451363, + "learning_rate": 4.207095612833723e-09, + "loss": 0.4562032222747803, + "step": 6781 + }, + { + "epoch": 1.98318467612224, + "grad_norm": 1.7011927258883048, + "learning_rate": 4.0680371597456855e-09, + "loss": 0.47456252574920654, + "step": 6782 + }, + { + "epoch": 1.9834771165375056, + "grad_norm": 1.8486724201847988, + "learning_rate": 3.931315194977137e-09, + "loss": 0.6283844709396362, + "step": 6783 + }, + { + "epoch": 1.983769556952771, + "grad_norm": 1.7243813126388492, + "learning_rate": 3.7969297504858445e-09, + "loss": 0.5886485576629639, + "step": 6784 + }, + { + "epoch": 1.9840619973680362, + "grad_norm": 1.679651544361786, + "learning_rate": 3.664880857685571e-09, + "loss": 0.4711921811103821, + "step": 6785 + }, + { + "epoch": 1.9843544377833018, + "grad_norm": 1.8051937774075772, + "learning_rate": 3.5351685474394048e-09, + "loss": 0.5372034311294556, + "step": 6786 + }, + { + "epoch": 1.9846468781985669, + "grad_norm": 1.7143010926050217, + "learning_rate": 3.4077928500686473e-09, + "loss": 0.5314334034919739, + "step": 6787 + }, + { + "epoch": 1.9849393186138324, + "grad_norm": 1.7988305575744603, + "learning_rate": 3.2827537953461496e-09, + "loss": 0.6022863984107971, + "step": 6788 + }, + { + "epoch": 1.985231759029098, + "grad_norm": 1.844296066004364, + "learning_rate": 3.160051412499643e-09, + "loss": 0.6739746928215027, + "step": 6789 + }, + { + "epoch": 1.985524199444363, + "grad_norm": 1.795022844462659, + "learning_rate": 3.0396857302084082e-09, + "loss": 0.6454254388809204, + "step": 6790 + }, + { + "epoch": 1.9858166398596286, + "grad_norm": 1.7777744811692944, + "learning_rate": 2.9216567766088276e-09, + "loss": 0.567995011806488, + "step": 6791 + }, + { + "epoch": 1.986109080274894, + "grad_norm": 1.7916482396337698, + "learning_rate": 2.8059645792877233e-09, + "loss": 0.568576455116272, + "step": 6792 + }, + { + "epoch": 1.9864015206901593, + "grad_norm": 1.5789903561856604, + "learning_rate": 2.6926091652890175e-09, + "loss": 0.5053816437721252, + "step": 6793 + }, + { + "epoch": 1.9866939611054248, + "grad_norm": 1.4966825154239165, + "learning_rate": 2.5815905611081825e-09, + "loss": 0.47705504298210144, + "step": 6794 + }, + { + "epoch": 1.9869864015206902, + "grad_norm": 1.7555838648022946, + "learning_rate": 2.472908792695572e-09, + "loss": 0.48271438479423523, + "step": 6795 + }, + { + "epoch": 1.9872788419359555, + "grad_norm": 1.675207035758499, + "learning_rate": 2.3665638854541982e-09, + "loss": 0.5694486498832703, + "step": 6796 + }, + { + "epoch": 1.987571282351221, + "grad_norm": 1.6539598401922624, + "learning_rate": 2.2625558642419553e-09, + "loss": 0.4940011501312256, + "step": 6797 + }, + { + "epoch": 1.9878637227664864, + "grad_norm": 1.8961348890729253, + "learning_rate": 2.160884753370507e-09, + "loss": 0.5536549091339111, + "step": 6798 + }, + { + "epoch": 1.9881561631817517, + "grad_norm": 1.923836316704977, + "learning_rate": 2.0615505766041765e-09, + "loss": 0.5354948043823242, + "step": 6799 + }, + { + "epoch": 1.9884486035970173, + "grad_norm": 1.9901895658271425, + "learning_rate": 1.9645533571610585e-09, + "loss": 0.6246936321258545, + "step": 6800 + } + ], + "logging_steps": 1, + "max_steps": 6840, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2077336313905152.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-6800/training_args.bin b/checkpoint-6800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..81f2336f2b4301fde755bb2ff1a553c0af833dc6 --- /dev/null +++ b/checkpoint-6800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f18a5144102f6d607705c76873c9b6b52fea03ff40dc71ea0f2bf5e2547fe44f +size 6968 diff --git a/checkpoint-6800/zero_to_fp32.py b/checkpoint-6800/zero_to_fp32.py new file mode 100644 index 0000000000000000000000000000000000000000..5995d6e6f04e43b989587aa9022a3aef0c66d694 --- /dev/null +++ b/checkpoint-6800/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if ZERO_STAGE not in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info("Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info("Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/checkpoint-6840/README.md b/checkpoint-6840/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4005c4d8e7a819833408da4794e4e74d2ced6553 --- /dev/null +++ b/checkpoint-6840/README.md @@ -0,0 +1,208 @@ +--- +base_model: Qwen/Qwen2.5-VL-7B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-VL-7B-Instruct +- llama-factory +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/checkpoint-6840/adapter_config.json b/checkpoint-6840/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2358f759370f40d042b47e8407cdc2843daac45e --- /dev/null +++ b/checkpoint-6840/adapter_config.json @@ -0,0 +1,127 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-VL-7B-Instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "layers.6.mlp.up_proj", + "layers.24.mlp.gate_proj", + "layers.18.mlp.down_proj", + "layers.24.mlp.down_proj", + "layers.12.mlp.up_proj", + "layers.20.mlp.gate_proj", + "layers.23.mlp.up_proj", + "layers.19.mlp.down_proj", + "layers.14.mlp.down_proj", + "layers.11.mlp.gate_proj", + "layers.8.mlp.up_proj", + "layers.19.mlp.gate_proj", + "layers.7.mlp.down_proj", + "layers.22.mlp.down_proj", + "layers.13.mlp.down_proj", + "layers.23.mlp.down_proj", + "layers.9.mlp.gate_proj", + "layers.0.mlp.up_proj", + "layers.24.mlp.up_proj", + "layers.0.mlp.gate_proj", + "layers.3.mlp.gate_proj", + "layers.10.mlp.gate_proj", + "layers.10.mlp.up_proj", + "layers.14.mlp.gate_proj", + "layers.25.mlp.up_proj", + "layers.12.mlp.gate_proj", + "layers.20.mlp.down_proj", + "layers.0.mlp.down_proj", + "layers.5.mlp.down_proj", + "layers.13.mlp.gate_proj", + "layers.19.mlp.up_proj", + "layers.2.mlp.gate_proj", + "layers.18.mlp.up_proj", + "layers.21.mlp.up_proj", + "layers.2.mlp.down_proj", + "layers.6.mlp.down_proj", + "layers.21.mlp.gate_proj", + "layers.4.mlp.gate_proj", + "q_proj", + "layers.20.mlp.up_proj", + "layers.2.mlp.up_proj", + "layers.21.mlp.down_proj", + "layers.22.mlp.up_proj", + "layers.3.mlp.up_proj", + "layers.18.mlp.gate_proj", + "layers.23.mlp.gate_proj", + "layers.27.mlp.up_proj", + "v_proj", + "layers.7.mlp.gate_proj", + "layers.10.mlp.down_proj", + "layers.8.mlp.gate_proj", + "layers.17.mlp.up_proj", + "layers.5.mlp.gate_proj", + "layers.3.mlp.down_proj", + "layers.27.mlp.gate_proj", + "layers.26.mlp.down_proj", + "layers.11.mlp.down_proj", + "layers.22.mlp.gate_proj", + "layers.7.mlp.up_proj", + "layers.17.mlp.gate_proj", + "layers.14.mlp.up_proj", + "layers.13.mlp.up_proj", + "layers.17.mlp.down_proj", + "layers.15.mlp.up_proj", + "layers.26.mlp.up_proj", + "layers.5.mlp.up_proj", + "layers.16.mlp.up_proj", + "layers.1.mlp.gate_proj", + "layers.12.mlp.down_proj", + "layers.4.mlp.down_proj", + "layers.1.mlp.down_proj", + "layers.15.mlp.gate_proj", + "layers.27.mlp.down_proj", + "layers.15.mlp.down_proj", + "layers.25.mlp.gate_proj", + "layers.26.mlp.gate_proj", + "o_proj", + "layers.6.mlp.gate_proj", + "layers.11.mlp.up_proj", + "layers.9.mlp.up_proj", + "layers.16.mlp.gate_proj", + "layers.4.mlp.up_proj", + "layers.8.mlp.down_proj", + "layers.1.mlp.up_proj", + "k_proj", + "layers.16.mlp.down_proj", + "layers.25.mlp.down_proj", + "layers.9.mlp.down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-6840/adapter_model.safetensors b/checkpoint-6840/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1934484a824571d95faca5f608a429363d9dce22 --- /dev/null +++ b/checkpoint-6840/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae497a5fd3e2f347345d7a0e67639f7faf92e4f40e35c1c3afa4428486cbc1b +size 323020440 diff --git a/checkpoint-6840/chat_template.jinja b/checkpoint-6840/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..6c226632394ae7474b0d4b13e15793eac2e21ee9 --- /dev/null +++ b/checkpoint-6840/chat_template.jinja @@ -0,0 +1,7 @@ +{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system +You are a helpful assistant.<|im_end|> +{% endif %}<|im_start|>{{ message['role'] }} +{% if message['content'] is string %}{{ message['content'] }}<|im_end|> +{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|> +{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant +{% endif %} \ No newline at end of file diff --git a/checkpoint-6840/global_step6840/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/checkpoint-6840/global_step6840/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c81b8a4775d1e8323a1877bec685f2ecaf764337 --- /dev/null +++ b/checkpoint-6840/global_step6840/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e5da5f16ffce5c427bd89a1e0f5fb0267c440f1044e9c479444c669470401b5 +size 1937772272 diff --git a/checkpoint-6840/global_step6840/zero_pp_rank_0_mp_rank_00_model_states.pt b/checkpoint-6840/global_step6840/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9094e104cc78641274870965741dd2c8784c1e0f --- /dev/null +++ b/checkpoint-6840/global_step6840/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b459431d465a2bf28b79fa10f9273c438ac551179a2ef27dface53bcbc06cbb8 +size 460630 diff --git a/checkpoint-6840/latest b/checkpoint-6840/latest new file mode 100644 index 0000000000000000000000000000000000000000..501a0d32f85efb3fdb6a88c7e47a679f097d63a6 --- /dev/null +++ b/checkpoint-6840/latest @@ -0,0 +1 @@ +global_step6840 \ No newline at end of file diff --git a/checkpoint-6840/processor_config.json b/checkpoint-6840/processor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e717d9bf475c411369034636e82e48cf79108a8 --- /dev/null +++ b/checkpoint-6840/processor_config.json @@ -0,0 +1,63 @@ +{ + "image_processor": { + "data_format": "channels_first", + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessorFast", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "merge_size": 2, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2 + }, + "processor_class": "Qwen2_5_VLProcessor", + "video_processor": { + "data_format": "channels_first", + "default_to_square": true, + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "do_sample_frames": false, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessor", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "max_frames": 768, + "merge_size": 2, + "min_frames": 4, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "return_metadata": false, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2, + "video_processor_type": "Qwen2VLVideoProcessor" + } +} diff --git a/checkpoint-6840/rng_state.pth b/checkpoint-6840/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0dd1d9c7c4f8569b18e8806aa39f509f0f1ad46e --- /dev/null +++ b/checkpoint-6840/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79643ee7cf57469a295b64ae2dc27838c57f1e42c542739f21a01a16be1445e1 +size 14244 diff --git a/checkpoint-6840/scheduler.pt b/checkpoint-6840/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c5711268ca528fbbc590d9d12f606e789cb391ba --- /dev/null +++ b/checkpoint-6840/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30244252e909dbfddbcc3906dc199f032e34da7fec08615b5a27f1abeaccf9cb +size 1000 diff --git a/checkpoint-6840/tokenizer.json b/checkpoint-6840/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..34510ff0037cd50428af467a17ead5a96140a32c --- /dev/null +++ b/checkpoint-6840/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/checkpoint-6840/tokenizer_config.json b/checkpoint-6840/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7322db3e15385c79a5a29523dd1ccad6d343278 --- /dev/null +++ b/checkpoint-6840/tokenizer_config.json @@ -0,0 +1,31 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "processor_class": "Qwen2_5_VLProcessor", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/checkpoint-6840/trainer_state.json b/checkpoint-6840/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9380012d18be21b05f629199973240ffc23ad3b4 --- /dev/null +++ b/checkpoint-6840/trainer_state.json @@ -0,0 +1,47914 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 6840, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00029244041526538966, + "grad_norm": 1.376689135449382, + "learning_rate": 0.0, + "loss": 1.2599382400512695, + "step": 1 + }, + { + "epoch": 0.0005848808305307793, + "grad_norm": 1.234681838317607, + "learning_rate": 5.847953216374269e-08, + "loss": 0.9314937591552734, + "step": 2 + }, + { + "epoch": 0.000877321245796169, + "grad_norm": 1.3874138849382744, + "learning_rate": 1.1695906432748539e-07, + "loss": 1.1433629989624023, + "step": 3 + }, + { + "epoch": 0.0011697616610615586, + "grad_norm": 1.4618979511530414, + "learning_rate": 1.7543859649122808e-07, + "loss": 1.2224640846252441, + "step": 4 + }, + { + "epoch": 0.0014622020763269484, + "grad_norm": 1.236340065064986, + "learning_rate": 2.3391812865497077e-07, + "loss": 1.0468370914459229, + "step": 5 + }, + { + "epoch": 0.001754642491592338, + "grad_norm": 1.358651453520776, + "learning_rate": 2.9239766081871344e-07, + "loss": 1.1314436197280884, + "step": 6 + }, + { + "epoch": 0.0020470829068577278, + "grad_norm": 1.3850033876300505, + "learning_rate": 3.5087719298245616e-07, + "loss": 0.9903597831726074, + "step": 7 + }, + { + "epoch": 0.0023395233221231173, + "grad_norm": 1.326993456005612, + "learning_rate": 4.093567251461988e-07, + "loss": 1.1988611221313477, + "step": 8 + }, + { + "epoch": 0.002631963737388507, + "grad_norm": 1.3313234883955534, + "learning_rate": 4.6783625730994155e-07, + "loss": 1.1209533214569092, + "step": 9 + }, + { + "epoch": 0.0029244041526538967, + "grad_norm": 1.3471142230235869, + "learning_rate": 5.263157894736843e-07, + "loss": 1.1582586765289307, + "step": 10 + }, + { + "epoch": 0.0032168445679192866, + "grad_norm": 1.3073172655293792, + "learning_rate": 5.847953216374269e-07, + "loss": 1.2469007968902588, + "step": 11 + }, + { + "epoch": 0.003509284983184676, + "grad_norm": 1.500493931988472, + "learning_rate": 6.432748538011696e-07, + "loss": 1.115494728088379, + "step": 12 + }, + { + "epoch": 0.0038017253984500656, + "grad_norm": 1.4157975190751417, + "learning_rate": 7.017543859649123e-07, + "loss": 1.1927871704101562, + "step": 13 + }, + { + "epoch": 0.0040941658137154556, + "grad_norm": 1.4273551735693608, + "learning_rate": 7.60233918128655e-07, + "loss": 1.1014869213104248, + "step": 14 + }, + { + "epoch": 0.004386606228980845, + "grad_norm": 1.214320734942881, + "learning_rate": 8.187134502923977e-07, + "loss": 1.1055865287780762, + "step": 15 + }, + { + "epoch": 0.0046790466442462346, + "grad_norm": 1.2962699407775686, + "learning_rate": 8.771929824561404e-07, + "loss": 1.1071349382400513, + "step": 16 + }, + { + "epoch": 0.004971487059511625, + "grad_norm": 1.2885224717964352, + "learning_rate": 9.356725146198831e-07, + "loss": 1.1737473011016846, + "step": 17 + }, + { + "epoch": 0.005263927474777014, + "grad_norm": 1.407390623938155, + "learning_rate": 9.941520467836258e-07, + "loss": 1.283717155456543, + "step": 18 + }, + { + "epoch": 0.005556367890042404, + "grad_norm": 1.4470139877184414, + "learning_rate": 1.0526315789473685e-06, + "loss": 1.2509160041809082, + "step": 19 + }, + { + "epoch": 0.005848808305307793, + "grad_norm": 1.3242663031296102, + "learning_rate": 1.111111111111111e-06, + "loss": 0.9722317457199097, + "step": 20 + }, + { + "epoch": 0.006141248720573183, + "grad_norm": 1.7221218211796423, + "learning_rate": 1.1695906432748538e-06, + "loss": 1.1927049160003662, + "step": 21 + }, + { + "epoch": 0.006433689135838573, + "grad_norm": 1.4346324267765085, + "learning_rate": 1.2280701754385965e-06, + "loss": 1.2133033275604248, + "step": 22 + }, + { + "epoch": 0.006726129551103963, + "grad_norm": 1.449278395489955, + "learning_rate": 1.2865497076023392e-06, + "loss": 1.2373273372650146, + "step": 23 + }, + { + "epoch": 0.007018569966369352, + "grad_norm": 1.6650860096596214, + "learning_rate": 1.345029239766082e-06, + "loss": 0.9476668834686279, + "step": 24 + }, + { + "epoch": 0.007311010381634742, + "grad_norm": 1.2748998150534738, + "learning_rate": 1.4035087719298246e-06, + "loss": 1.1171324253082275, + "step": 25 + }, + { + "epoch": 0.007603450796900131, + "grad_norm": 1.4396688825039674, + "learning_rate": 1.4619883040935674e-06, + "loss": 1.1276075839996338, + "step": 26 + }, + { + "epoch": 0.007895891212165522, + "grad_norm": 1.4009443443291978, + "learning_rate": 1.52046783625731e-06, + "loss": 1.190751314163208, + "step": 27 + }, + { + "epoch": 0.008188331627430911, + "grad_norm": 1.3912141798418658, + "learning_rate": 1.5789473684210526e-06, + "loss": 1.2171813249588013, + "step": 28 + }, + { + "epoch": 0.0084807720426963, + "grad_norm": 1.3073224250652524, + "learning_rate": 1.6374269005847953e-06, + "loss": 0.8595987558364868, + "step": 29 + }, + { + "epoch": 0.00877321245796169, + "grad_norm": 1.2671914308960317, + "learning_rate": 1.695906432748538e-06, + "loss": 1.0270106792449951, + "step": 30 + }, + { + "epoch": 0.00906565287322708, + "grad_norm": 1.5005896829818803, + "learning_rate": 1.7543859649122807e-06, + "loss": 1.068537712097168, + "step": 31 + }, + { + "epoch": 0.009358093288492469, + "grad_norm": 1.2766478202995049, + "learning_rate": 1.8128654970760235e-06, + "loss": 1.1307867765426636, + "step": 32 + }, + { + "epoch": 0.009650533703757859, + "grad_norm": 1.5582616996952416, + "learning_rate": 1.8713450292397662e-06, + "loss": 1.0837950706481934, + "step": 33 + }, + { + "epoch": 0.00994297411902325, + "grad_norm": 1.4304945053464713, + "learning_rate": 1.929824561403509e-06, + "loss": 1.1506178379058838, + "step": 34 + }, + { + "epoch": 0.01023541453428864, + "grad_norm": 1.4722243618391941, + "learning_rate": 1.9883040935672516e-06, + "loss": 0.9450151324272156, + "step": 35 + }, + { + "epoch": 0.010527854949554029, + "grad_norm": 1.4847744449229108, + "learning_rate": 2.0467836257309943e-06, + "loss": 1.2040901184082031, + "step": 36 + }, + { + "epoch": 0.010820295364819418, + "grad_norm": 1.4600954284408973, + "learning_rate": 2.105263157894737e-06, + "loss": 1.2316429615020752, + "step": 37 + }, + { + "epoch": 0.011112735780084808, + "grad_norm": 1.479845514016971, + "learning_rate": 2.1637426900584798e-06, + "loss": 1.2119100093841553, + "step": 38 + }, + { + "epoch": 0.011405176195350197, + "grad_norm": 1.353351745720387, + "learning_rate": 2.222222222222222e-06, + "loss": 1.276926875114441, + "step": 39 + }, + { + "epoch": 0.011697616610615587, + "grad_norm": 1.256680621146734, + "learning_rate": 2.280701754385965e-06, + "loss": 0.9357824921607971, + "step": 40 + }, + { + "epoch": 0.011990057025880976, + "grad_norm": 1.3348703609284243, + "learning_rate": 2.3391812865497075e-06, + "loss": 1.1861131191253662, + "step": 41 + }, + { + "epoch": 0.012282497441146366, + "grad_norm": 1.3287978940598948, + "learning_rate": 2.3976608187134502e-06, + "loss": 1.1745539903640747, + "step": 42 + }, + { + "epoch": 0.012574937856411755, + "grad_norm": 1.1561631937443322, + "learning_rate": 2.456140350877193e-06, + "loss": 1.0291770696640015, + "step": 43 + }, + { + "epoch": 0.012867378271677147, + "grad_norm": 1.2176771446345134, + "learning_rate": 2.5146198830409357e-06, + "loss": 1.2361294031143188, + "step": 44 + }, + { + "epoch": 0.013159818686942536, + "grad_norm": 1.3295063710563702, + "learning_rate": 2.5730994152046784e-06, + "loss": 1.1909143924713135, + "step": 45 + }, + { + "epoch": 0.013452259102207926, + "grad_norm": 1.2650643173778968, + "learning_rate": 2.631578947368421e-06, + "loss": 1.1998133659362793, + "step": 46 + }, + { + "epoch": 0.013744699517473315, + "grad_norm": 1.1278701463292995, + "learning_rate": 2.690058479532164e-06, + "loss": 1.0011268854141235, + "step": 47 + }, + { + "epoch": 0.014037139932738705, + "grad_norm": 1.4726969666937608, + "learning_rate": 2.7485380116959066e-06, + "loss": 1.0552136898040771, + "step": 48 + }, + { + "epoch": 0.014329580348004094, + "grad_norm": 1.0797124442917296, + "learning_rate": 2.8070175438596493e-06, + "loss": 0.9727921485900879, + "step": 49 + }, + { + "epoch": 0.014622020763269484, + "grad_norm": 1.1798592697113668, + "learning_rate": 2.865497076023392e-06, + "loss": 0.9361351728439331, + "step": 50 + }, + { + "epoch": 0.014914461178534873, + "grad_norm": 1.1254749584923542, + "learning_rate": 2.9239766081871347e-06, + "loss": 1.140329360961914, + "step": 51 + }, + { + "epoch": 0.015206901593800263, + "grad_norm": 1.1050662639156084, + "learning_rate": 2.9824561403508774e-06, + "loss": 0.991325855255127, + "step": 52 + }, + { + "epoch": 0.015499342009065652, + "grad_norm": 1.364923415701691, + "learning_rate": 3.04093567251462e-06, + "loss": 1.3082914352416992, + "step": 53 + }, + { + "epoch": 0.015791782424331043, + "grad_norm": 1.1357483626397489, + "learning_rate": 3.0994152046783624e-06, + "loss": 0.9767723083496094, + "step": 54 + }, + { + "epoch": 0.016084222839596433, + "grad_norm": 1.1338887919712684, + "learning_rate": 3.157894736842105e-06, + "loss": 1.193568229675293, + "step": 55 + }, + { + "epoch": 0.016376663254861822, + "grad_norm": 1.176328275981774, + "learning_rate": 3.216374269005848e-06, + "loss": 0.9767440557479858, + "step": 56 + }, + { + "epoch": 0.016669103670127212, + "grad_norm": 1.0263265896491178, + "learning_rate": 3.2748538011695906e-06, + "loss": 0.8888605833053589, + "step": 57 + }, + { + "epoch": 0.0169615440853926, + "grad_norm": 1.0668435517314094, + "learning_rate": 3.3333333333333333e-06, + "loss": 1.087357997894287, + "step": 58 + }, + { + "epoch": 0.01725398450065799, + "grad_norm": 1.1952584851106463, + "learning_rate": 3.391812865497076e-06, + "loss": 1.0217459201812744, + "step": 59 + }, + { + "epoch": 0.01754642491592338, + "grad_norm": 1.1279843674972485, + "learning_rate": 3.4502923976608188e-06, + "loss": 1.0783777236938477, + "step": 60 + }, + { + "epoch": 0.01783886533118877, + "grad_norm": 0.9080265579264722, + "learning_rate": 3.5087719298245615e-06, + "loss": 0.85099196434021, + "step": 61 + }, + { + "epoch": 0.01813130574645416, + "grad_norm": 1.0228765689803359, + "learning_rate": 3.567251461988304e-06, + "loss": 0.9322569966316223, + "step": 62 + }, + { + "epoch": 0.01842374616171955, + "grad_norm": 0.991842254830473, + "learning_rate": 3.625730994152047e-06, + "loss": 0.8749685287475586, + "step": 63 + }, + { + "epoch": 0.018716186576984938, + "grad_norm": 0.9789077968505817, + "learning_rate": 3.6842105263157896e-06, + "loss": 0.857900857925415, + "step": 64 + }, + { + "epoch": 0.019008626992250328, + "grad_norm": 0.8092242526335478, + "learning_rate": 3.7426900584795324e-06, + "loss": 0.8891770243644714, + "step": 65 + }, + { + "epoch": 0.019301067407515717, + "grad_norm": 1.0526332302181824, + "learning_rate": 3.801169590643275e-06, + "loss": 1.0730159282684326, + "step": 66 + }, + { + "epoch": 0.019593507822781107, + "grad_norm": 1.124329301516788, + "learning_rate": 3.859649122807018e-06, + "loss": 1.108138084411621, + "step": 67 + }, + { + "epoch": 0.0198859482380465, + "grad_norm": 1.3581659451048562, + "learning_rate": 3.9181286549707605e-06, + "loss": 1.2126305103302002, + "step": 68 + }, + { + "epoch": 0.02017838865331189, + "grad_norm": 1.1108109420327934, + "learning_rate": 3.976608187134503e-06, + "loss": 0.9527193307876587, + "step": 69 + }, + { + "epoch": 0.02047082906857728, + "grad_norm": 0.9965971604796123, + "learning_rate": 4.035087719298246e-06, + "loss": 1.0454832315444946, + "step": 70 + }, + { + "epoch": 0.020763269483842668, + "grad_norm": 0.821178202034714, + "learning_rate": 4.093567251461989e-06, + "loss": 0.7075237035751343, + "step": 71 + }, + { + "epoch": 0.021055709899108058, + "grad_norm": 1.2413273222740282, + "learning_rate": 4.152046783625731e-06, + "loss": 1.0972111225128174, + "step": 72 + }, + { + "epoch": 0.021348150314373447, + "grad_norm": 0.9838475362870381, + "learning_rate": 4.210526315789474e-06, + "loss": 1.0400984287261963, + "step": 73 + }, + { + "epoch": 0.021640590729638837, + "grad_norm": 0.8577987626348056, + "learning_rate": 4.269005847953217e-06, + "loss": 0.7712557315826416, + "step": 74 + }, + { + "epoch": 0.021933031144904226, + "grad_norm": 1.0937426764383058, + "learning_rate": 4.3274853801169596e-06, + "loss": 1.1733636856079102, + "step": 75 + }, + { + "epoch": 0.022225471560169616, + "grad_norm": 0.9896291906902066, + "learning_rate": 4.385964912280702e-06, + "loss": 0.8653621673583984, + "step": 76 + }, + { + "epoch": 0.022517911975435005, + "grad_norm": 0.9059062097735997, + "learning_rate": 4.444444444444444e-06, + "loss": 0.8797299861907959, + "step": 77 + }, + { + "epoch": 0.022810352390700395, + "grad_norm": 1.0128235878781693, + "learning_rate": 4.502923976608187e-06, + "loss": 0.8357750177383423, + "step": 78 + }, + { + "epoch": 0.023102792805965784, + "grad_norm": 1.241636412088512, + "learning_rate": 4.56140350877193e-06, + "loss": 1.1249456405639648, + "step": 79 + }, + { + "epoch": 0.023395233221231174, + "grad_norm": 1.2743547410748093, + "learning_rate": 4.619883040935672e-06, + "loss": 0.9920758008956909, + "step": 80 + }, + { + "epoch": 0.023687673636496563, + "grad_norm": 1.0290847197991744, + "learning_rate": 4.678362573099415e-06, + "loss": 0.8115094900131226, + "step": 81 + }, + { + "epoch": 0.023980114051761953, + "grad_norm": 0.9339898981913745, + "learning_rate": 4.736842105263158e-06, + "loss": 1.060575246810913, + "step": 82 + }, + { + "epoch": 0.024272554467027342, + "grad_norm": 1.1898301512766587, + "learning_rate": 4.7953216374269005e-06, + "loss": 1.028218150138855, + "step": 83 + }, + { + "epoch": 0.02456499488229273, + "grad_norm": 0.9840324243241313, + "learning_rate": 4.853801169590643e-06, + "loss": 1.090872049331665, + "step": 84 + }, + { + "epoch": 0.02485743529755812, + "grad_norm": 1.110956193223445, + "learning_rate": 4.912280701754386e-06, + "loss": 1.0069574117660522, + "step": 85 + }, + { + "epoch": 0.02514987571282351, + "grad_norm": 1.0134868000559825, + "learning_rate": 4.970760233918129e-06, + "loss": 0.9391698837280273, + "step": 86 + }, + { + "epoch": 0.025442316128088904, + "grad_norm": 1.0912235029106665, + "learning_rate": 5.029239766081871e-06, + "loss": 0.881995677947998, + "step": 87 + }, + { + "epoch": 0.025734756543354293, + "grad_norm": 1.0399116507679627, + "learning_rate": 5.087719298245615e-06, + "loss": 0.87871253490448, + "step": 88 + }, + { + "epoch": 0.026027196958619683, + "grad_norm": 1.0265015868708693, + "learning_rate": 5.146198830409357e-06, + "loss": 1.005904197692871, + "step": 89 + }, + { + "epoch": 0.026319637373885072, + "grad_norm": 1.0161210383553128, + "learning_rate": 5.2046783625731e-06, + "loss": 0.8624223470687866, + "step": 90 + }, + { + "epoch": 0.02661207778915046, + "grad_norm": 1.0154040401745301, + "learning_rate": 5.263157894736842e-06, + "loss": 0.9976427555084229, + "step": 91 + }, + { + "epoch": 0.02690451820441585, + "grad_norm": 1.157266795240935, + "learning_rate": 5.321637426900586e-06, + "loss": 0.7743148803710938, + "step": 92 + }, + { + "epoch": 0.02719695861968124, + "grad_norm": 1.0027983307117943, + "learning_rate": 5.380116959064328e-06, + "loss": 0.8541792631149292, + "step": 93 + }, + { + "epoch": 0.02748939903494663, + "grad_norm": 1.0195872536359372, + "learning_rate": 5.438596491228071e-06, + "loss": 0.9141846895217896, + "step": 94 + }, + { + "epoch": 0.02778183945021202, + "grad_norm": 0.9964676811589505, + "learning_rate": 5.497076023391813e-06, + "loss": 0.9762974977493286, + "step": 95 + }, + { + "epoch": 0.02807427986547741, + "grad_norm": 1.086834377136063, + "learning_rate": 5.555555555555557e-06, + "loss": 0.8039775490760803, + "step": 96 + }, + { + "epoch": 0.0283667202807428, + "grad_norm": 1.0288673358640383, + "learning_rate": 5.6140350877192985e-06, + "loss": 0.9464477300643921, + "step": 97 + }, + { + "epoch": 0.028659160696008188, + "grad_norm": 0.9989091266376411, + "learning_rate": 5.672514619883041e-06, + "loss": 0.8264896869659424, + "step": 98 + }, + { + "epoch": 0.028951601111273578, + "grad_norm": 1.239452647422259, + "learning_rate": 5.730994152046784e-06, + "loss": 0.8347363471984863, + "step": 99 + }, + { + "epoch": 0.029244041526538967, + "grad_norm": 1.1482101557047766, + "learning_rate": 5.789473684210527e-06, + "loss": 0.7974327802658081, + "step": 100 + }, + { + "epoch": 0.029536481941804357, + "grad_norm": 1.040746567320999, + "learning_rate": 5.847953216374269e-06, + "loss": 0.7953752875328064, + "step": 101 + }, + { + "epoch": 0.029828922357069746, + "grad_norm": 1.0186289029859024, + "learning_rate": 5.906432748538012e-06, + "loss": 0.8652607798576355, + "step": 102 + }, + { + "epoch": 0.030121362772335136, + "grad_norm": 1.0719829766550855, + "learning_rate": 5.964912280701755e-06, + "loss": 0.973792552947998, + "step": 103 + }, + { + "epoch": 0.030413803187600525, + "grad_norm": 0.9226382056883017, + "learning_rate": 6.023391812865498e-06, + "loss": 0.8093612194061279, + "step": 104 + }, + { + "epoch": 0.030706243602865915, + "grad_norm": 0.9154711374479992, + "learning_rate": 6.08187134502924e-06, + "loss": 0.8463394045829773, + "step": 105 + }, + { + "epoch": 0.030998684018131304, + "grad_norm": 1.2769916053670627, + "learning_rate": 6.140350877192983e-06, + "loss": 0.7898350358009338, + "step": 106 + }, + { + "epoch": 0.0312911244333967, + "grad_norm": 1.298220618549192, + "learning_rate": 6.198830409356725e-06, + "loss": 0.9750698804855347, + "step": 107 + }, + { + "epoch": 0.031583564848662087, + "grad_norm": 1.000315516155276, + "learning_rate": 6.2573099415204685e-06, + "loss": 0.8137387633323669, + "step": 108 + }, + { + "epoch": 0.031876005263927476, + "grad_norm": 1.082436003075408, + "learning_rate": 6.31578947368421e-06, + "loss": 1.0641593933105469, + "step": 109 + }, + { + "epoch": 0.032168445679192866, + "grad_norm": 1.0363310086535433, + "learning_rate": 6.374269005847954e-06, + "loss": 0.9647193551063538, + "step": 110 + }, + { + "epoch": 0.032460886094458255, + "grad_norm": 1.1062097211432278, + "learning_rate": 6.432748538011696e-06, + "loss": 0.9693200588226318, + "step": 111 + }, + { + "epoch": 0.032753326509723645, + "grad_norm": 1.145031857661525, + "learning_rate": 6.491228070175439e-06, + "loss": 0.9600590467453003, + "step": 112 + }, + { + "epoch": 0.033045766924989034, + "grad_norm": 1.0203404188427831, + "learning_rate": 6.549707602339181e-06, + "loss": 0.8908880949020386, + "step": 113 + }, + { + "epoch": 0.033338207340254424, + "grad_norm": 1.2162435709165451, + "learning_rate": 6.608187134502925e-06, + "loss": 0.9803124666213989, + "step": 114 + }, + { + "epoch": 0.03363064775551981, + "grad_norm": 1.1738875143751093, + "learning_rate": 6.666666666666667e-06, + "loss": 0.8288271427154541, + "step": 115 + }, + { + "epoch": 0.0339230881707852, + "grad_norm": 0.9490473067752526, + "learning_rate": 6.72514619883041e-06, + "loss": 0.7203798890113831, + "step": 116 + }, + { + "epoch": 0.03421552858605059, + "grad_norm": 1.0046253156347025, + "learning_rate": 6.783625730994152e-06, + "loss": 0.7670629024505615, + "step": 117 + }, + { + "epoch": 0.03450796900131598, + "grad_norm": 1.0563125407630551, + "learning_rate": 6.842105263157896e-06, + "loss": 0.8487929105758667, + "step": 118 + }, + { + "epoch": 0.03480040941658137, + "grad_norm": 1.1292147521599132, + "learning_rate": 6.9005847953216375e-06, + "loss": 0.8332704305648804, + "step": 119 + }, + { + "epoch": 0.03509284983184676, + "grad_norm": 1.2138847310663696, + "learning_rate": 6.959064327485381e-06, + "loss": 0.9984017610549927, + "step": 120 + }, + { + "epoch": 0.03538529024711215, + "grad_norm": 1.126543099330432, + "learning_rate": 7.017543859649123e-06, + "loss": 0.788459062576294, + "step": 121 + }, + { + "epoch": 0.03567773066237754, + "grad_norm": 1.5166585395762038, + "learning_rate": 7.0760233918128665e-06, + "loss": 1.0288443565368652, + "step": 122 + }, + { + "epoch": 0.03597017107764293, + "grad_norm": 1.0086777607738802, + "learning_rate": 7.134502923976608e-06, + "loss": 0.7939552664756775, + "step": 123 + }, + { + "epoch": 0.03626261149290832, + "grad_norm": 1.0254521267017753, + "learning_rate": 7.192982456140352e-06, + "loss": 0.8816506862640381, + "step": 124 + }, + { + "epoch": 0.03655505190817371, + "grad_norm": 1.0223917066157164, + "learning_rate": 7.251461988304094e-06, + "loss": 0.8864353895187378, + "step": 125 + }, + { + "epoch": 0.0368474923234391, + "grad_norm": 1.2363556273996017, + "learning_rate": 7.309941520467837e-06, + "loss": 0.9817954897880554, + "step": 126 + }, + { + "epoch": 0.03713993273870449, + "grad_norm": 1.0757650534793346, + "learning_rate": 7.368421052631579e-06, + "loss": 0.8423842787742615, + "step": 127 + }, + { + "epoch": 0.037432373153969876, + "grad_norm": 1.1636915661730252, + "learning_rate": 7.426900584795322e-06, + "loss": 0.8375135660171509, + "step": 128 + }, + { + "epoch": 0.037724813569235266, + "grad_norm": 1.2215328884976426, + "learning_rate": 7.485380116959065e-06, + "loss": 0.9105685949325562, + "step": 129 + }, + { + "epoch": 0.038017253984500655, + "grad_norm": 1.1346801425180852, + "learning_rate": 7.5438596491228074e-06, + "loss": 0.8784557580947876, + "step": 130 + }, + { + "epoch": 0.038309694399766045, + "grad_norm": 1.0071578019284073, + "learning_rate": 7.60233918128655e-06, + "loss": 0.7557879686355591, + "step": 131 + }, + { + "epoch": 0.038602134815031434, + "grad_norm": 1.228942961434803, + "learning_rate": 7.660818713450294e-06, + "loss": 0.8966819047927856, + "step": 132 + }, + { + "epoch": 0.038894575230296824, + "grad_norm": 1.0961114842309465, + "learning_rate": 7.719298245614036e-06, + "loss": 0.7642185091972351, + "step": 133 + }, + { + "epoch": 0.03918701564556221, + "grad_norm": 1.062961529950125, + "learning_rate": 7.77777777777778e-06, + "loss": 0.8313230276107788, + "step": 134 + }, + { + "epoch": 0.0394794560608276, + "grad_norm": 1.3350623914867434, + "learning_rate": 7.836257309941521e-06, + "loss": 0.8388677835464478, + "step": 135 + }, + { + "epoch": 0.039771896476093, + "grad_norm": 1.2027686314521255, + "learning_rate": 7.894736842105265e-06, + "loss": 0.9065952301025391, + "step": 136 + }, + { + "epoch": 0.04006433689135839, + "grad_norm": 1.123144368922916, + "learning_rate": 7.953216374269006e-06, + "loss": 0.8153767585754395, + "step": 137 + }, + { + "epoch": 0.04035677730662378, + "grad_norm": 1.163761684167935, + "learning_rate": 8.01169590643275e-06, + "loss": 0.8976421356201172, + "step": 138 + }, + { + "epoch": 0.04064921772188917, + "grad_norm": 1.1354333989669174, + "learning_rate": 8.070175438596492e-06, + "loss": 0.7360264658927917, + "step": 139 + }, + { + "epoch": 0.04094165813715456, + "grad_norm": 1.1009203930924998, + "learning_rate": 8.128654970760235e-06, + "loss": 0.8442148566246033, + "step": 140 + }, + { + "epoch": 0.04123409855241995, + "grad_norm": 1.0872796831159965, + "learning_rate": 8.187134502923977e-06, + "loss": 0.6541435718536377, + "step": 141 + }, + { + "epoch": 0.041526538967685336, + "grad_norm": 1.2792221696979318, + "learning_rate": 8.24561403508772e-06, + "loss": 0.7492353916168213, + "step": 142 + }, + { + "epoch": 0.041818979382950726, + "grad_norm": 1.0406728730985955, + "learning_rate": 8.304093567251463e-06, + "loss": 0.6681893467903137, + "step": 143 + }, + { + "epoch": 0.042111419798216115, + "grad_norm": 1.2507905783247102, + "learning_rate": 8.362573099415205e-06, + "loss": 0.8384866714477539, + "step": 144 + }, + { + "epoch": 0.042403860213481505, + "grad_norm": 1.125680624680095, + "learning_rate": 8.421052631578948e-06, + "loss": 0.8338214159011841, + "step": 145 + }, + { + "epoch": 0.042696300628746894, + "grad_norm": 1.3441065562284606, + "learning_rate": 8.47953216374269e-06, + "loss": 0.8549021482467651, + "step": 146 + }, + { + "epoch": 0.042988741044012284, + "grad_norm": 1.0226139512096055, + "learning_rate": 8.538011695906434e-06, + "loss": 0.8324464559555054, + "step": 147 + }, + { + "epoch": 0.04328118145927767, + "grad_norm": 1.3742681865566602, + "learning_rate": 8.596491228070176e-06, + "loss": 0.9247474670410156, + "step": 148 + }, + { + "epoch": 0.04357362187454306, + "grad_norm": 1.3295257009133983, + "learning_rate": 8.654970760233919e-06, + "loss": 0.8488880395889282, + "step": 149 + }, + { + "epoch": 0.04386606228980845, + "grad_norm": 1.244174459745273, + "learning_rate": 8.713450292397661e-06, + "loss": 0.7844473123550415, + "step": 150 + }, + { + "epoch": 0.04415850270507384, + "grad_norm": 1.3605735346558072, + "learning_rate": 8.771929824561405e-06, + "loss": 1.0540976524353027, + "step": 151 + }, + { + "epoch": 0.04445094312033923, + "grad_norm": 1.096092225329518, + "learning_rate": 8.830409356725146e-06, + "loss": 0.7919446229934692, + "step": 152 + }, + { + "epoch": 0.04474338353560462, + "grad_norm": 1.1577837223865697, + "learning_rate": 8.888888888888888e-06, + "loss": 0.818670928478241, + "step": 153 + }, + { + "epoch": 0.04503582395087001, + "grad_norm": 1.4320201209257988, + "learning_rate": 8.947368421052632e-06, + "loss": 0.8491114377975464, + "step": 154 + }, + { + "epoch": 0.0453282643661354, + "grad_norm": 1.8326606844764444, + "learning_rate": 9.005847953216374e-06, + "loss": 0.660563588142395, + "step": 155 + }, + { + "epoch": 0.04562070478140079, + "grad_norm": 1.1838649114458772, + "learning_rate": 9.064327485380117e-06, + "loss": 0.8559159636497498, + "step": 156 + }, + { + "epoch": 0.04591314519666618, + "grad_norm": 1.0968958293675206, + "learning_rate": 9.12280701754386e-06, + "loss": 0.8478386402130127, + "step": 157 + }, + { + "epoch": 0.04620558561193157, + "grad_norm": 1.1272218094040445, + "learning_rate": 9.181286549707603e-06, + "loss": 0.758915901184082, + "step": 158 + }, + { + "epoch": 0.04649802602719696, + "grad_norm": 1.3159367769875163, + "learning_rate": 9.239766081871345e-06, + "loss": 0.773307204246521, + "step": 159 + }, + { + "epoch": 0.04679046644246235, + "grad_norm": 1.29739510285095, + "learning_rate": 9.298245614035088e-06, + "loss": 0.8948490023612976, + "step": 160 + }, + { + "epoch": 0.04708290685772774, + "grad_norm": 1.2170406448830853, + "learning_rate": 9.35672514619883e-06, + "loss": 0.83086097240448, + "step": 161 + }, + { + "epoch": 0.047375347272993126, + "grad_norm": 1.474814122834776, + "learning_rate": 9.415204678362574e-06, + "loss": 0.7683168649673462, + "step": 162 + }, + { + "epoch": 0.047667787688258516, + "grad_norm": 1.2546637555360107, + "learning_rate": 9.473684210526315e-06, + "loss": 0.9267748594284058, + "step": 163 + }, + { + "epoch": 0.047960228103523905, + "grad_norm": 1.1945733924353639, + "learning_rate": 9.532163742690059e-06, + "loss": 0.9243365526199341, + "step": 164 + }, + { + "epoch": 0.048252668518789295, + "grad_norm": 1.1508961292698372, + "learning_rate": 9.590643274853801e-06, + "loss": 0.7841176986694336, + "step": 165 + }, + { + "epoch": 0.048545108934054684, + "grad_norm": 1.1853174404309834, + "learning_rate": 9.649122807017545e-06, + "loss": 0.8318643569946289, + "step": 166 + }, + { + "epoch": 0.048837549349320074, + "grad_norm": 1.3089312801161905, + "learning_rate": 9.707602339181286e-06, + "loss": 0.866286039352417, + "step": 167 + }, + { + "epoch": 0.04912998976458546, + "grad_norm": 1.32215003396801, + "learning_rate": 9.76608187134503e-06, + "loss": 0.8232241868972778, + "step": 168 + }, + { + "epoch": 0.04942243017985085, + "grad_norm": 1.4759162272800292, + "learning_rate": 9.824561403508772e-06, + "loss": 0.874968945980072, + "step": 169 + }, + { + "epoch": 0.04971487059511624, + "grad_norm": 1.3247540509223557, + "learning_rate": 9.883040935672515e-06, + "loss": 0.9048999547958374, + "step": 170 + }, + { + "epoch": 0.05000731101038163, + "grad_norm": 1.4647995646715117, + "learning_rate": 9.941520467836257e-06, + "loss": 0.9220215082168579, + "step": 171 + }, + { + "epoch": 0.05029975142564702, + "grad_norm": 1.3290504006044366, + "learning_rate": 1e-05, + "loss": 0.8326996564865112, + "step": 172 + }, + { + "epoch": 0.05059219184091241, + "grad_norm": 1.0687285940591045, + "learning_rate": 1.0058479532163743e-05, + "loss": 0.8023662567138672, + "step": 173 + }, + { + "epoch": 0.05088463225617781, + "grad_norm": 1.4370267362244613, + "learning_rate": 1.0116959064327488e-05, + "loss": 0.9172271490097046, + "step": 174 + }, + { + "epoch": 0.0511770726714432, + "grad_norm": 1.2538172153184461, + "learning_rate": 1.017543859649123e-05, + "loss": 0.8016377687454224, + "step": 175 + }, + { + "epoch": 0.051469513086708586, + "grad_norm": 1.1436252675754246, + "learning_rate": 1.0233918128654972e-05, + "loss": 0.7656369805335999, + "step": 176 + }, + { + "epoch": 0.051761953501973976, + "grad_norm": 1.1951944941269466, + "learning_rate": 1.0292397660818714e-05, + "loss": 0.7769640684127808, + "step": 177 + }, + { + "epoch": 0.052054393917239365, + "grad_norm": 1.3791114600068226, + "learning_rate": 1.0350877192982459e-05, + "loss": 0.9830589294433594, + "step": 178 + }, + { + "epoch": 0.052346834332504755, + "grad_norm": 1.1501081025808126, + "learning_rate": 1.04093567251462e-05, + "loss": 0.8002523183822632, + "step": 179 + }, + { + "epoch": 0.052639274747770144, + "grad_norm": 1.3726838653365003, + "learning_rate": 1.0467836257309943e-05, + "loss": 0.879243016242981, + "step": 180 + }, + { + "epoch": 0.052931715163035534, + "grad_norm": 1.2863425151805854, + "learning_rate": 1.0526315789473684e-05, + "loss": 0.7266525030136108, + "step": 181 + }, + { + "epoch": 0.05322415557830092, + "grad_norm": 1.350994010752117, + "learning_rate": 1.0584795321637428e-05, + "loss": 0.784702479839325, + "step": 182 + }, + { + "epoch": 0.05351659599356631, + "grad_norm": 1.415897619399055, + "learning_rate": 1.0643274853801172e-05, + "loss": 0.8419734239578247, + "step": 183 + }, + { + "epoch": 0.0538090364088317, + "grad_norm": 1.201782404599289, + "learning_rate": 1.0701754385964913e-05, + "loss": 0.8462855815887451, + "step": 184 + }, + { + "epoch": 0.05410147682409709, + "grad_norm": 1.361501494219251, + "learning_rate": 1.0760233918128655e-05, + "loss": 0.8888737559318542, + "step": 185 + }, + { + "epoch": 0.05439391723936248, + "grad_norm": 1.3305576553150047, + "learning_rate": 1.0818713450292399e-05, + "loss": 0.8063781261444092, + "step": 186 + }, + { + "epoch": 0.05468635765462787, + "grad_norm": 1.2109684966022718, + "learning_rate": 1.0877192982456142e-05, + "loss": 0.7981499433517456, + "step": 187 + }, + { + "epoch": 0.05497879806989326, + "grad_norm": 1.5415785509759563, + "learning_rate": 1.0935672514619884e-05, + "loss": 0.8474490642547607, + "step": 188 + }, + { + "epoch": 0.05527123848515865, + "grad_norm": 1.300197838887535, + "learning_rate": 1.0994152046783626e-05, + "loss": 0.818732500076294, + "step": 189 + }, + { + "epoch": 0.05556367890042404, + "grad_norm": 1.3192619521811115, + "learning_rate": 1.105263157894737e-05, + "loss": 0.7660291194915771, + "step": 190 + }, + { + "epoch": 0.05585611931568943, + "grad_norm": 1.2626389127660034, + "learning_rate": 1.1111111111111113e-05, + "loss": 0.8240147233009338, + "step": 191 + }, + { + "epoch": 0.05614855973095482, + "grad_norm": 1.340830231936402, + "learning_rate": 1.1169590643274855e-05, + "loss": 0.9377203583717346, + "step": 192 + }, + { + "epoch": 0.05644100014622021, + "grad_norm": 1.416661564809907, + "learning_rate": 1.1228070175438597e-05, + "loss": 0.8662704229354858, + "step": 193 + }, + { + "epoch": 0.0567334405614856, + "grad_norm": 1.3274611257173192, + "learning_rate": 1.128654970760234e-05, + "loss": 0.717308759689331, + "step": 194 + }, + { + "epoch": 0.05702588097675099, + "grad_norm": 1.1942152308113003, + "learning_rate": 1.1345029239766083e-05, + "loss": 0.8538037538528442, + "step": 195 + }, + { + "epoch": 0.057318321392016376, + "grad_norm": 1.4411136610170212, + "learning_rate": 1.1403508771929826e-05, + "loss": 0.9016960859298706, + "step": 196 + }, + { + "epoch": 0.057610761807281766, + "grad_norm": 1.4664426354083508, + "learning_rate": 1.1461988304093568e-05, + "loss": 0.9313502311706543, + "step": 197 + }, + { + "epoch": 0.057903202222547155, + "grad_norm": 1.2885330427126278, + "learning_rate": 1.1520467836257312e-05, + "loss": 0.7330124974250793, + "step": 198 + }, + { + "epoch": 0.058195642637812545, + "grad_norm": 1.272277327326545, + "learning_rate": 1.1578947368421053e-05, + "loss": 0.8904056549072266, + "step": 199 + }, + { + "epoch": 0.058488083053077934, + "grad_norm": 1.4761275028472136, + "learning_rate": 1.1637426900584797e-05, + "loss": 0.7816377878189087, + "step": 200 + }, + { + "epoch": 0.058780523468343324, + "grad_norm": 1.3244130760300052, + "learning_rate": 1.1695906432748539e-05, + "loss": 0.7109910249710083, + "step": 201 + }, + { + "epoch": 0.05907296388360871, + "grad_norm": 1.499082853070359, + "learning_rate": 1.1754385964912282e-05, + "loss": 0.7657924890518188, + "step": 202 + }, + { + "epoch": 0.0593654042988741, + "grad_norm": 1.5632309821036996, + "learning_rate": 1.1812865497076024e-05, + "loss": 0.8521978259086609, + "step": 203 + }, + { + "epoch": 0.05965784471413949, + "grad_norm": 1.3625729366507646, + "learning_rate": 1.1871345029239766e-05, + "loss": 0.7558364868164062, + "step": 204 + }, + { + "epoch": 0.05995028512940488, + "grad_norm": 1.3362044158661328, + "learning_rate": 1.192982456140351e-05, + "loss": 0.8488497734069824, + "step": 205 + }, + { + "epoch": 0.06024272554467027, + "grad_norm": 1.5823695803446844, + "learning_rate": 1.1988304093567253e-05, + "loss": 0.7905591726303101, + "step": 206 + }, + { + "epoch": 0.06053516595993566, + "grad_norm": 1.324069880941127, + "learning_rate": 1.2046783625730995e-05, + "loss": 0.747936487197876, + "step": 207 + }, + { + "epoch": 0.06082760637520105, + "grad_norm": 1.3370127883002023, + "learning_rate": 1.2105263157894737e-05, + "loss": 0.8653486967086792, + "step": 208 + }, + { + "epoch": 0.06112004679046644, + "grad_norm": 1.295171295812896, + "learning_rate": 1.216374269005848e-05, + "loss": 0.8662437200546265, + "step": 209 + }, + { + "epoch": 0.06141248720573183, + "grad_norm": 1.6369328366726996, + "learning_rate": 1.2222222222222224e-05, + "loss": 0.9567133188247681, + "step": 210 + }, + { + "epoch": 0.06170492762099722, + "grad_norm": 1.4011109813275144, + "learning_rate": 1.2280701754385966e-05, + "loss": 0.8994660377502441, + "step": 211 + }, + { + "epoch": 0.06199736803626261, + "grad_norm": 1.2989562892904951, + "learning_rate": 1.2339181286549708e-05, + "loss": 0.7889316082000732, + "step": 212 + }, + { + "epoch": 0.062289808451528005, + "grad_norm": 1.2266327731037636, + "learning_rate": 1.239766081871345e-05, + "loss": 0.883985161781311, + "step": 213 + }, + { + "epoch": 0.0625822488667934, + "grad_norm": 1.2190679056716556, + "learning_rate": 1.2456140350877195e-05, + "loss": 0.7780495882034302, + "step": 214 + }, + { + "epoch": 0.06287468928205878, + "grad_norm": 1.3596314866008754, + "learning_rate": 1.2514619883040937e-05, + "loss": 0.6514906883239746, + "step": 215 + }, + { + "epoch": 0.06316712969732417, + "grad_norm": 1.3008367711622892, + "learning_rate": 1.2573099415204679e-05, + "loss": 0.750559389591217, + "step": 216 + }, + { + "epoch": 0.06345957011258956, + "grad_norm": 1.4761536100726258, + "learning_rate": 1.263157894736842e-05, + "loss": 0.8330573439598083, + "step": 217 + }, + { + "epoch": 0.06375201052785495, + "grad_norm": 1.4144186396910836, + "learning_rate": 1.2690058479532166e-05, + "loss": 0.8075361847877502, + "step": 218 + }, + { + "epoch": 0.06404445094312033, + "grad_norm": 1.2867265784947997, + "learning_rate": 1.2748538011695908e-05, + "loss": 0.7636772394180298, + "step": 219 + }, + { + "epoch": 0.06433689135838573, + "grad_norm": 1.1905704140813884, + "learning_rate": 1.280701754385965e-05, + "loss": 0.8241903185844421, + "step": 220 + }, + { + "epoch": 0.06462933177365111, + "grad_norm": 1.261461662230418, + "learning_rate": 1.2865497076023392e-05, + "loss": 0.6582514047622681, + "step": 221 + }, + { + "epoch": 0.06492177218891651, + "grad_norm": 1.461492259499335, + "learning_rate": 1.2923976608187137e-05, + "loss": 0.6363992691040039, + "step": 222 + }, + { + "epoch": 0.06521421260418189, + "grad_norm": 1.5776709499534403, + "learning_rate": 1.2982456140350879e-05, + "loss": 0.8093860149383545, + "step": 223 + }, + { + "epoch": 0.06550665301944729, + "grad_norm": 1.5281675606912017, + "learning_rate": 1.304093567251462e-05, + "loss": 0.7719511985778809, + "step": 224 + }, + { + "epoch": 0.06579909343471267, + "grad_norm": 1.4484434101459598, + "learning_rate": 1.3099415204678362e-05, + "loss": 0.8314809799194336, + "step": 225 + }, + { + "epoch": 0.06609153384997807, + "grad_norm": 1.3751378156667435, + "learning_rate": 1.3157894736842108e-05, + "loss": 0.8752902746200562, + "step": 226 + }, + { + "epoch": 0.06638397426524345, + "grad_norm": 1.4660956062146326, + "learning_rate": 1.321637426900585e-05, + "loss": 0.7564839124679565, + "step": 227 + }, + { + "epoch": 0.06667641468050885, + "grad_norm": 1.6744274403459947, + "learning_rate": 1.3274853801169591e-05, + "loss": 0.7377971410751343, + "step": 228 + }, + { + "epoch": 0.06696885509577423, + "grad_norm": 1.3046915227989528, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.7298087477684021, + "step": 229 + }, + { + "epoch": 0.06726129551103963, + "grad_norm": 1.4026797729918719, + "learning_rate": 1.3391812865497079e-05, + "loss": 0.7291176915168762, + "step": 230 + }, + { + "epoch": 0.06755373592630501, + "grad_norm": 1.3421785664914363, + "learning_rate": 1.345029239766082e-05, + "loss": 0.8226944208145142, + "step": 231 + }, + { + "epoch": 0.0678461763415704, + "grad_norm": 1.4277073905518047, + "learning_rate": 1.3508771929824562e-05, + "loss": 0.7185185551643372, + "step": 232 + }, + { + "epoch": 0.0681386167568358, + "grad_norm": 1.2950151686673683, + "learning_rate": 1.3567251461988304e-05, + "loss": 0.7028212547302246, + "step": 233 + }, + { + "epoch": 0.06843105717210118, + "grad_norm": 1.6157016450339874, + "learning_rate": 1.362573099415205e-05, + "loss": 0.8809897899627686, + "step": 234 + }, + { + "epoch": 0.06872349758736658, + "grad_norm": 1.388536739112073, + "learning_rate": 1.3684210526315791e-05, + "loss": 0.7779085040092468, + "step": 235 + }, + { + "epoch": 0.06901593800263196, + "grad_norm": 1.5070530641919806, + "learning_rate": 1.3742690058479533e-05, + "loss": 0.731019139289856, + "step": 236 + }, + { + "epoch": 0.06930837841789736, + "grad_norm": 1.4005389899518954, + "learning_rate": 1.3801169590643275e-05, + "loss": 0.7495850920677185, + "step": 237 + }, + { + "epoch": 0.06960081883316274, + "grad_norm": 1.2241508662035476, + "learning_rate": 1.385964912280702e-05, + "loss": 0.7018189430236816, + "step": 238 + }, + { + "epoch": 0.06989325924842814, + "grad_norm": 1.2596692368793962, + "learning_rate": 1.3918128654970762e-05, + "loss": 0.7072417736053467, + "step": 239 + }, + { + "epoch": 0.07018569966369352, + "grad_norm": 1.3606864903220994, + "learning_rate": 1.3976608187134504e-05, + "loss": 0.8125720620155334, + "step": 240 + }, + { + "epoch": 0.07047814007895892, + "grad_norm": 1.442924901417446, + "learning_rate": 1.4035087719298246e-05, + "loss": 0.6101655960083008, + "step": 241 + }, + { + "epoch": 0.0707705804942243, + "grad_norm": 1.3725413795436465, + "learning_rate": 1.409356725146199e-05, + "loss": 0.9005568623542786, + "step": 242 + }, + { + "epoch": 0.0710630209094897, + "grad_norm": 1.4215646059439664, + "learning_rate": 1.4152046783625733e-05, + "loss": 0.7678338289260864, + "step": 243 + }, + { + "epoch": 0.07135546132475508, + "grad_norm": 1.4745728838056915, + "learning_rate": 1.4210526315789475e-05, + "loss": 0.7563410997390747, + "step": 244 + }, + { + "epoch": 0.07164790174002048, + "grad_norm": 1.3043448641122064, + "learning_rate": 1.4269005847953217e-05, + "loss": 0.7497583627700806, + "step": 245 + }, + { + "epoch": 0.07194034215528586, + "grad_norm": 1.8237088246729396, + "learning_rate": 1.432748538011696e-05, + "loss": 0.8913442492485046, + "step": 246 + }, + { + "epoch": 0.07223278257055125, + "grad_norm": 1.446976759622428, + "learning_rate": 1.4385964912280704e-05, + "loss": 0.7714704871177673, + "step": 247 + }, + { + "epoch": 0.07252522298581664, + "grad_norm": 1.4721214924941617, + "learning_rate": 1.4444444444444446e-05, + "loss": 0.6752789616584778, + "step": 248 + }, + { + "epoch": 0.07281766340108203, + "grad_norm": 1.4015875441769006, + "learning_rate": 1.4502923976608188e-05, + "loss": 0.6092795133590698, + "step": 249 + }, + { + "epoch": 0.07311010381634742, + "grad_norm": 1.4602535650914903, + "learning_rate": 1.4561403508771931e-05, + "loss": 0.9300343990325928, + "step": 250 + }, + { + "epoch": 0.07340254423161281, + "grad_norm": 1.3884630911660603, + "learning_rate": 1.4619883040935675e-05, + "loss": 0.8005613088607788, + "step": 251 + }, + { + "epoch": 0.0736949846468782, + "grad_norm": 1.2918508056771596, + "learning_rate": 1.4678362573099417e-05, + "loss": 0.7188931703567505, + "step": 252 + }, + { + "epoch": 0.07398742506214359, + "grad_norm": 1.3258314938186555, + "learning_rate": 1.4736842105263159e-05, + "loss": 0.6967242956161499, + "step": 253 + }, + { + "epoch": 0.07427986547740897, + "grad_norm": 1.300875000270566, + "learning_rate": 1.4795321637426902e-05, + "loss": 0.6921653747558594, + "step": 254 + }, + { + "epoch": 0.07457230589267437, + "grad_norm": 1.4258732788152875, + "learning_rate": 1.4853801169590644e-05, + "loss": 0.8498743772506714, + "step": 255 + }, + { + "epoch": 0.07486474630793975, + "grad_norm": 1.4311730434285577, + "learning_rate": 1.4912280701754388e-05, + "loss": 0.6420027017593384, + "step": 256 + }, + { + "epoch": 0.07515718672320515, + "grad_norm": 1.3747073212413874, + "learning_rate": 1.497076023391813e-05, + "loss": 0.7101434469223022, + "step": 257 + }, + { + "epoch": 0.07544962713847053, + "grad_norm": 1.562801712624193, + "learning_rate": 1.5029239766081873e-05, + "loss": 0.740740180015564, + "step": 258 + }, + { + "epoch": 0.07574206755373593, + "grad_norm": 1.726645998674187, + "learning_rate": 1.5087719298245615e-05, + "loss": 0.891905665397644, + "step": 259 + }, + { + "epoch": 0.07603450796900131, + "grad_norm": 1.5486677390214905, + "learning_rate": 1.5146198830409358e-05, + "loss": 0.867740273475647, + "step": 260 + }, + { + "epoch": 0.07632694838426671, + "grad_norm": 1.5072500165891534, + "learning_rate": 1.52046783625731e-05, + "loss": 0.7895220518112183, + "step": 261 + }, + { + "epoch": 0.07661938879953209, + "grad_norm": 1.5579945503860015, + "learning_rate": 1.5263157894736846e-05, + "loss": 0.7987008094787598, + "step": 262 + }, + { + "epoch": 0.07691182921479749, + "grad_norm": 1.4014455476427317, + "learning_rate": 1.5321637426900587e-05, + "loss": 0.7780282497406006, + "step": 263 + }, + { + "epoch": 0.07720426963006287, + "grad_norm": 1.2290290646079385, + "learning_rate": 1.538011695906433e-05, + "loss": 0.6265891194343567, + "step": 264 + }, + { + "epoch": 0.07749671004532827, + "grad_norm": 1.4917276843875658, + "learning_rate": 1.543859649122807e-05, + "loss": 0.6559646129608154, + "step": 265 + }, + { + "epoch": 0.07778915046059365, + "grad_norm": 1.4406503206723986, + "learning_rate": 1.5497076023391816e-05, + "loss": 0.8362047672271729, + "step": 266 + }, + { + "epoch": 0.07808159087585904, + "grad_norm": 1.481487764499426, + "learning_rate": 1.555555555555556e-05, + "loss": 0.707663357257843, + "step": 267 + }, + { + "epoch": 0.07837403129112443, + "grad_norm": 1.398507930714671, + "learning_rate": 1.56140350877193e-05, + "loss": 0.67903071641922, + "step": 268 + }, + { + "epoch": 0.07866647170638982, + "grad_norm": 1.3187056037490035, + "learning_rate": 1.5672514619883042e-05, + "loss": 0.7634894251823425, + "step": 269 + }, + { + "epoch": 0.0789589121216552, + "grad_norm": 1.3791372975152867, + "learning_rate": 1.5730994152046787e-05, + "loss": 0.6395117044448853, + "step": 270 + }, + { + "epoch": 0.0792513525369206, + "grad_norm": 1.4273746235266698, + "learning_rate": 1.578947368421053e-05, + "loss": 0.6948165893554688, + "step": 271 + }, + { + "epoch": 0.079543792952186, + "grad_norm": 1.342718294320327, + "learning_rate": 1.584795321637427e-05, + "loss": 0.9288383722305298, + "step": 272 + }, + { + "epoch": 0.07983623336745138, + "grad_norm": 1.4727633207578312, + "learning_rate": 1.5906432748538013e-05, + "loss": 0.9291346073150635, + "step": 273 + }, + { + "epoch": 0.08012867378271678, + "grad_norm": 1.3613936763496384, + "learning_rate": 1.5964912280701755e-05, + "loss": 0.7399512529373169, + "step": 274 + }, + { + "epoch": 0.08042111419798216, + "grad_norm": 1.5856072060707183, + "learning_rate": 1.60233918128655e-05, + "loss": 0.6890764236450195, + "step": 275 + }, + { + "epoch": 0.08071355461324756, + "grad_norm": 1.1844012071470522, + "learning_rate": 1.6081871345029242e-05, + "loss": 0.6520324349403381, + "step": 276 + }, + { + "epoch": 0.08100599502851294, + "grad_norm": 1.4161353486782806, + "learning_rate": 1.6140350877192984e-05, + "loss": 0.6726658344268799, + "step": 277 + }, + { + "epoch": 0.08129843544377834, + "grad_norm": 1.5076627116667636, + "learning_rate": 1.6198830409356726e-05, + "loss": 0.7453294992446899, + "step": 278 + }, + { + "epoch": 0.08159087585904372, + "grad_norm": 1.6796077609043067, + "learning_rate": 1.625730994152047e-05, + "loss": 0.755578875541687, + "step": 279 + }, + { + "epoch": 0.08188331627430911, + "grad_norm": 1.576837195920435, + "learning_rate": 1.6315789473684213e-05, + "loss": 0.713086724281311, + "step": 280 + }, + { + "epoch": 0.0821757566895745, + "grad_norm": 1.5223162841340931, + "learning_rate": 1.6374269005847955e-05, + "loss": 0.8714310526847839, + "step": 281 + }, + { + "epoch": 0.0824681971048399, + "grad_norm": 1.4999918578300349, + "learning_rate": 1.6432748538011697e-05, + "loss": 0.6827348470687866, + "step": 282 + }, + { + "epoch": 0.08276063752010528, + "grad_norm": 1.5263417760460645, + "learning_rate": 1.649122807017544e-05, + "loss": 0.8613482713699341, + "step": 283 + }, + { + "epoch": 0.08305307793537067, + "grad_norm": 1.3847261162959308, + "learning_rate": 1.6549707602339184e-05, + "loss": 0.7442763447761536, + "step": 284 + }, + { + "epoch": 0.08334551835063606, + "grad_norm": 1.3784508201309091, + "learning_rate": 1.6608187134502926e-05, + "loss": 0.7505494356155396, + "step": 285 + }, + { + "epoch": 0.08363795876590145, + "grad_norm": 1.3042392110114591, + "learning_rate": 1.6666666666666667e-05, + "loss": 0.7720779776573181, + "step": 286 + }, + { + "epoch": 0.08393039918116683, + "grad_norm": 1.5516828033558783, + "learning_rate": 1.672514619883041e-05, + "loss": 0.7746216654777527, + "step": 287 + }, + { + "epoch": 0.08422283959643223, + "grad_norm": 1.4429865955911445, + "learning_rate": 1.6783625730994155e-05, + "loss": 0.8471436500549316, + "step": 288 + }, + { + "epoch": 0.08451528001169761, + "grad_norm": 1.4116704654777366, + "learning_rate": 1.6842105263157896e-05, + "loss": 0.7117248773574829, + "step": 289 + }, + { + "epoch": 0.08480772042696301, + "grad_norm": 1.4428575448924124, + "learning_rate": 1.690058479532164e-05, + "loss": 0.758680522441864, + "step": 290 + }, + { + "epoch": 0.08510016084222839, + "grad_norm": 1.4632326474117294, + "learning_rate": 1.695906432748538e-05, + "loss": 0.9083560705184937, + "step": 291 + }, + { + "epoch": 0.08539260125749379, + "grad_norm": 1.3444847997489586, + "learning_rate": 1.7017543859649125e-05, + "loss": 0.7457551956176758, + "step": 292 + }, + { + "epoch": 0.08568504167275917, + "grad_norm": 1.423532632485526, + "learning_rate": 1.7076023391812867e-05, + "loss": 0.7463638782501221, + "step": 293 + }, + { + "epoch": 0.08597748208802457, + "grad_norm": 1.4584931442713187, + "learning_rate": 1.713450292397661e-05, + "loss": 0.6983559131622314, + "step": 294 + }, + { + "epoch": 0.08626992250328995, + "grad_norm": 1.3612667828489424, + "learning_rate": 1.719298245614035e-05, + "loss": 0.8043842911720276, + "step": 295 + }, + { + "epoch": 0.08656236291855535, + "grad_norm": 1.5042924331122234, + "learning_rate": 1.7251461988304093e-05, + "loss": 0.7150747776031494, + "step": 296 + }, + { + "epoch": 0.08685480333382073, + "grad_norm": 2.0308017082996326, + "learning_rate": 1.7309941520467838e-05, + "loss": 0.7805558443069458, + "step": 297 + }, + { + "epoch": 0.08714724374908613, + "grad_norm": 1.4326584270734728, + "learning_rate": 1.736842105263158e-05, + "loss": 0.7158486843109131, + "step": 298 + }, + { + "epoch": 0.08743968416435151, + "grad_norm": 1.2329719748746066, + "learning_rate": 1.7426900584795322e-05, + "loss": 0.6496458053588867, + "step": 299 + }, + { + "epoch": 0.0877321245796169, + "grad_norm": 1.3255444740397837, + "learning_rate": 1.7485380116959064e-05, + "loss": 0.7488506436347961, + "step": 300 + }, + { + "epoch": 0.08802456499488229, + "grad_norm": 1.5658056782887144, + "learning_rate": 1.754385964912281e-05, + "loss": 0.8370999097824097, + "step": 301 + }, + { + "epoch": 0.08831700541014768, + "grad_norm": 1.3342670844496862, + "learning_rate": 1.760233918128655e-05, + "loss": 0.6624353528022766, + "step": 302 + }, + { + "epoch": 0.08860944582541307, + "grad_norm": 1.4627534576360353, + "learning_rate": 1.7660818713450293e-05, + "loss": 0.6861047148704529, + "step": 303 + }, + { + "epoch": 0.08890188624067846, + "grad_norm": 1.6532053166188327, + "learning_rate": 1.7719298245614035e-05, + "loss": 0.746711015701294, + "step": 304 + }, + { + "epoch": 0.08919432665594385, + "grad_norm": 1.554160121250669, + "learning_rate": 1.7777777777777777e-05, + "loss": 0.7794955968856812, + "step": 305 + }, + { + "epoch": 0.08948676707120924, + "grad_norm": 1.7649976265227958, + "learning_rate": 1.7836257309941522e-05, + "loss": 0.7202489972114563, + "step": 306 + }, + { + "epoch": 0.08977920748647462, + "grad_norm": 1.6262384567896693, + "learning_rate": 1.7894736842105264e-05, + "loss": 0.7252119183540344, + "step": 307 + }, + { + "epoch": 0.09007164790174002, + "grad_norm": 1.5452508352574224, + "learning_rate": 1.7953216374269006e-05, + "loss": 0.9168737530708313, + "step": 308 + }, + { + "epoch": 0.0903640883170054, + "grad_norm": 1.487069935429652, + "learning_rate": 1.8011695906432747e-05, + "loss": 0.7647944688796997, + "step": 309 + }, + { + "epoch": 0.0906565287322708, + "grad_norm": 1.7447386842901849, + "learning_rate": 1.8070175438596493e-05, + "loss": 0.7836136817932129, + "step": 310 + }, + { + "epoch": 0.0909489691475362, + "grad_norm": 1.2604562921756688, + "learning_rate": 1.8128654970760235e-05, + "loss": 0.6495587825775146, + "step": 311 + }, + { + "epoch": 0.09124140956280158, + "grad_norm": 1.5613577023920442, + "learning_rate": 1.8187134502923976e-05, + "loss": 0.7266290187835693, + "step": 312 + }, + { + "epoch": 0.09153384997806698, + "grad_norm": 1.9984801625992445, + "learning_rate": 1.824561403508772e-05, + "loss": 0.8417587876319885, + "step": 313 + }, + { + "epoch": 0.09182629039333236, + "grad_norm": 1.5767499272635297, + "learning_rate": 1.8304093567251464e-05, + "loss": 0.8431564569473267, + "step": 314 + }, + { + "epoch": 0.09211873080859775, + "grad_norm": 1.4390326104450535, + "learning_rate": 1.8362573099415205e-05, + "loss": 0.7724050283432007, + "step": 315 + }, + { + "epoch": 0.09241117122386314, + "grad_norm": 1.4145032164176374, + "learning_rate": 1.8421052631578947e-05, + "loss": 0.6687352657318115, + "step": 316 + }, + { + "epoch": 0.09270361163912853, + "grad_norm": 1.3696816256616517, + "learning_rate": 1.847953216374269e-05, + "loss": 0.7465454339981079, + "step": 317 + }, + { + "epoch": 0.09299605205439392, + "grad_norm": 1.507661205433782, + "learning_rate": 1.8538011695906434e-05, + "loss": 0.6944088935852051, + "step": 318 + }, + { + "epoch": 0.09328849246965931, + "grad_norm": 1.2922205760098913, + "learning_rate": 1.8596491228070176e-05, + "loss": 0.6692598462104797, + "step": 319 + }, + { + "epoch": 0.0935809328849247, + "grad_norm": 1.4345621362788812, + "learning_rate": 1.8654970760233918e-05, + "loss": 0.7287981510162354, + "step": 320 + }, + { + "epoch": 0.09387337330019009, + "grad_norm": 1.426362426046858, + "learning_rate": 1.871345029239766e-05, + "loss": 0.704437255859375, + "step": 321 + }, + { + "epoch": 0.09416581371545547, + "grad_norm": 1.2757141813139592, + "learning_rate": 1.8771929824561405e-05, + "loss": 0.6425009965896606, + "step": 322 + }, + { + "epoch": 0.09445825413072087, + "grad_norm": 1.4929466314279891, + "learning_rate": 1.8830409356725147e-05, + "loss": 0.765799880027771, + "step": 323 + }, + { + "epoch": 0.09475069454598625, + "grad_norm": 1.482293870539422, + "learning_rate": 1.888888888888889e-05, + "loss": 0.9151520133018494, + "step": 324 + }, + { + "epoch": 0.09504313496125165, + "grad_norm": 1.5087468194478204, + "learning_rate": 1.894736842105263e-05, + "loss": 0.8753486275672913, + "step": 325 + }, + { + "epoch": 0.09533557537651703, + "grad_norm": 1.649363404228967, + "learning_rate": 1.9005847953216376e-05, + "loss": 0.7652826309204102, + "step": 326 + }, + { + "epoch": 0.09562801579178243, + "grad_norm": 1.405975419146797, + "learning_rate": 1.9064327485380118e-05, + "loss": 0.7309015393257141, + "step": 327 + }, + { + "epoch": 0.09592045620704781, + "grad_norm": 1.6766609888433524, + "learning_rate": 1.912280701754386e-05, + "loss": 0.7656553983688354, + "step": 328 + }, + { + "epoch": 0.09621289662231321, + "grad_norm": 1.4942542074310006, + "learning_rate": 1.9181286549707602e-05, + "loss": 0.7400631904602051, + "step": 329 + }, + { + "epoch": 0.09650533703757859, + "grad_norm": 1.4740815055784118, + "learning_rate": 1.9239766081871347e-05, + "loss": 0.6812465190887451, + "step": 330 + }, + { + "epoch": 0.09679777745284399, + "grad_norm": 1.4394939888427052, + "learning_rate": 1.929824561403509e-05, + "loss": 0.6820628046989441, + "step": 331 + }, + { + "epoch": 0.09709021786810937, + "grad_norm": 1.9824484648298863, + "learning_rate": 1.935672514619883e-05, + "loss": 0.7437758445739746, + "step": 332 + }, + { + "epoch": 0.09738265828337477, + "grad_norm": 1.4755288186056683, + "learning_rate": 1.9415204678362573e-05, + "loss": 0.8011504411697388, + "step": 333 + }, + { + "epoch": 0.09767509869864015, + "grad_norm": 1.3829561395962537, + "learning_rate": 1.9473684210526318e-05, + "loss": 0.7437810301780701, + "step": 334 + }, + { + "epoch": 0.09796753911390554, + "grad_norm": 1.328838303483977, + "learning_rate": 1.953216374269006e-05, + "loss": 0.7419568300247192, + "step": 335 + }, + { + "epoch": 0.09825997952917093, + "grad_norm": 1.4291436246188844, + "learning_rate": 1.9590643274853802e-05, + "loss": 0.7805042266845703, + "step": 336 + }, + { + "epoch": 0.09855241994443632, + "grad_norm": 1.3104711543583085, + "learning_rate": 1.9649122807017544e-05, + "loss": 0.6952530145645142, + "step": 337 + }, + { + "epoch": 0.0988448603597017, + "grad_norm": 1.313224719465845, + "learning_rate": 1.970760233918129e-05, + "loss": 0.7669289112091064, + "step": 338 + }, + { + "epoch": 0.0991373007749671, + "grad_norm": 1.4101609769639065, + "learning_rate": 1.976608187134503e-05, + "loss": 0.8033919930458069, + "step": 339 + }, + { + "epoch": 0.09942974119023248, + "grad_norm": 1.2883543538345825, + "learning_rate": 1.9824561403508773e-05, + "loss": 0.6523177623748779, + "step": 340 + }, + { + "epoch": 0.09972218160549788, + "grad_norm": 1.3960808628411998, + "learning_rate": 1.9883040935672515e-05, + "loss": 0.7221896648406982, + "step": 341 + }, + { + "epoch": 0.10001462202076326, + "grad_norm": 1.2255647850534943, + "learning_rate": 1.994152046783626e-05, + "loss": 0.6054700016975403, + "step": 342 + }, + { + "epoch": 0.10030706243602866, + "grad_norm": 1.6303566611100393, + "learning_rate": 2e-05, + "loss": 0.8368290662765503, + "step": 343 + }, + { + "epoch": 0.10059950285129404, + "grad_norm": 1.4276425594743465, + "learning_rate": 1.99999988312804e-05, + "loss": 0.9075677990913391, + "step": 344 + }, + { + "epoch": 0.10089194326655944, + "grad_norm": 1.4517524210925274, + "learning_rate": 1.999999532512188e-05, + "loss": 0.7202495336532593, + "step": 345 + }, + { + "epoch": 0.10118438368182482, + "grad_norm": 1.5340311782896001, + "learning_rate": 1.9999989481525245e-05, + "loss": 0.7373536229133606, + "step": 346 + }, + { + "epoch": 0.10147682409709022, + "grad_norm": 1.3128585037330316, + "learning_rate": 1.9999981300491873e-05, + "loss": 0.7292035222053528, + "step": 347 + }, + { + "epoch": 0.10176926451235561, + "grad_norm": 1.2681362139682877, + "learning_rate": 1.9999970782023673e-05, + "loss": 0.8970675468444824, + "step": 348 + }, + { + "epoch": 0.102061704927621, + "grad_norm": 1.384714606589521, + "learning_rate": 1.9999957926123104e-05, + "loss": 0.7909846305847168, + "step": 349 + }, + { + "epoch": 0.1023541453428864, + "grad_norm": 1.3537270396362884, + "learning_rate": 1.999994273279317e-05, + "loss": 0.7784097790718079, + "step": 350 + }, + { + "epoch": 0.10264658575815178, + "grad_norm": 1.4008631296209513, + "learning_rate": 1.9999925202037422e-05, + "loss": 0.7129874229431152, + "step": 351 + }, + { + "epoch": 0.10293902617341717, + "grad_norm": 1.3322666039831734, + "learning_rate": 1.999990533385996e-05, + "loss": 0.7185519337654114, + "step": 352 + }, + { + "epoch": 0.10323146658868255, + "grad_norm": 1.379111892126872, + "learning_rate": 1.9999883128265428e-05, + "loss": 0.812228798866272, + "step": 353 + }, + { + "epoch": 0.10352390700394795, + "grad_norm": 1.2831139743741589, + "learning_rate": 1.999985858525901e-05, + "loss": 0.7187886238098145, + "step": 354 + }, + { + "epoch": 0.10381634741921333, + "grad_norm": 1.133776070922858, + "learning_rate": 1.9999831704846452e-05, + "loss": 0.6618789434432983, + "step": 355 + }, + { + "epoch": 0.10410878783447873, + "grad_norm": 1.5601168208020613, + "learning_rate": 1.999980248703403e-05, + "loss": 0.9226458072662354, + "step": 356 + }, + { + "epoch": 0.10440122824974411, + "grad_norm": 1.3702611517072447, + "learning_rate": 1.9999770931828578e-05, + "loss": 0.7326352596282959, + "step": 357 + }, + { + "epoch": 0.10469366866500951, + "grad_norm": 1.4755549813416367, + "learning_rate": 1.9999737039237472e-05, + "loss": 0.719240128993988, + "step": 358 + }, + { + "epoch": 0.10498610908027489, + "grad_norm": 1.2914576093532248, + "learning_rate": 1.999970080926863e-05, + "loss": 0.7380290031433105, + "step": 359 + }, + { + "epoch": 0.10527854949554029, + "grad_norm": 1.6255135036531254, + "learning_rate": 1.9999662241930523e-05, + "loss": 0.736219048500061, + "step": 360 + }, + { + "epoch": 0.10557098991080567, + "grad_norm": 1.381933387611508, + "learning_rate": 1.999962133723217e-05, + "loss": 0.8160735368728638, + "step": 361 + }, + { + "epoch": 0.10586343032607107, + "grad_norm": 1.4607575491849774, + "learning_rate": 1.9999578095183126e-05, + "loss": 0.6679781675338745, + "step": 362 + }, + { + "epoch": 0.10615587074133645, + "grad_norm": 1.551414308388604, + "learning_rate": 1.9999532515793498e-05, + "loss": 0.7670542001724243, + "step": 363 + }, + { + "epoch": 0.10644831115660185, + "grad_norm": 1.2802491712211252, + "learning_rate": 1.9999484599073945e-05, + "loss": 0.6395057439804077, + "step": 364 + }, + { + "epoch": 0.10674075157186723, + "grad_norm": 1.571289013739176, + "learning_rate": 1.9999434345035666e-05, + "loss": 0.7226368188858032, + "step": 365 + }, + { + "epoch": 0.10703319198713263, + "grad_norm": 1.4755023089198305, + "learning_rate": 1.9999381753690403e-05, + "loss": 0.6236128211021423, + "step": 366 + }, + { + "epoch": 0.10732563240239801, + "grad_norm": 1.2507526885979663, + "learning_rate": 1.9999326825050455e-05, + "loss": 0.5937299132347107, + "step": 367 + }, + { + "epoch": 0.1076180728176634, + "grad_norm": 1.294239826855842, + "learning_rate": 1.999926955912866e-05, + "loss": 0.6014857292175293, + "step": 368 + }, + { + "epoch": 0.10791051323292879, + "grad_norm": 1.1031323946933334, + "learning_rate": 1.9999209955938394e-05, + "loss": 0.5898704528808594, + "step": 369 + }, + { + "epoch": 0.10820295364819418, + "grad_norm": 1.475520460275832, + "learning_rate": 1.9999148015493602e-05, + "loss": 0.6879048943519592, + "step": 370 + }, + { + "epoch": 0.10849539406345957, + "grad_norm": 1.5235484717330832, + "learning_rate": 1.999908373780876e-05, + "loss": 0.781298041343689, + "step": 371 + }, + { + "epoch": 0.10878783447872496, + "grad_norm": 1.2913472995661532, + "learning_rate": 1.9999017122898886e-05, + "loss": 0.6997531652450562, + "step": 372 + }, + { + "epoch": 0.10908027489399034, + "grad_norm": 1.2104967688689228, + "learning_rate": 1.9998948170779556e-05, + "loss": 0.6979694366455078, + "step": 373 + }, + { + "epoch": 0.10937271530925574, + "grad_norm": 1.6154905149339498, + "learning_rate": 1.999887688146689e-05, + "loss": 0.8069214820861816, + "step": 374 + }, + { + "epoch": 0.10966515572452112, + "grad_norm": 1.4534879205249425, + "learning_rate": 1.9998803254977538e-05, + "loss": 0.875137448310852, + "step": 375 + }, + { + "epoch": 0.10995759613978652, + "grad_norm": 1.4252221781216903, + "learning_rate": 1.9998727291328725e-05, + "loss": 0.8267173767089844, + "step": 376 + }, + { + "epoch": 0.1102500365550519, + "grad_norm": 1.3704709368430794, + "learning_rate": 1.99986489905382e-05, + "loss": 0.7589337825775146, + "step": 377 + }, + { + "epoch": 0.1105424769703173, + "grad_norm": 1.7248131297126135, + "learning_rate": 1.999856835262427e-05, + "loss": 0.7479992508888245, + "step": 378 + }, + { + "epoch": 0.11083491738558268, + "grad_norm": 1.2827951417341936, + "learning_rate": 1.999848537760577e-05, + "loss": 0.7315084934234619, + "step": 379 + }, + { + "epoch": 0.11112735780084808, + "grad_norm": 1.2954297558049002, + "learning_rate": 1.9998400065502113e-05, + "loss": 0.6256793737411499, + "step": 380 + }, + { + "epoch": 0.11141979821611346, + "grad_norm": 1.3569633064170001, + "learning_rate": 1.999831241633323e-05, + "loss": 0.7521710395812988, + "step": 381 + }, + { + "epoch": 0.11171223863137886, + "grad_norm": 1.0851029845548303, + "learning_rate": 1.999822243011961e-05, + "loss": 0.6824651956558228, + "step": 382 + }, + { + "epoch": 0.11200467904664424, + "grad_norm": 1.4206429861314096, + "learning_rate": 1.9998130106882286e-05, + "loss": 0.7254977226257324, + "step": 383 + }, + { + "epoch": 0.11229711946190964, + "grad_norm": 1.4795080730717471, + "learning_rate": 1.999803544664284e-05, + "loss": 0.8263741731643677, + "step": 384 + }, + { + "epoch": 0.11258955987717502, + "grad_norm": 1.3096519492267191, + "learning_rate": 1.9997938449423397e-05, + "loss": 0.6829507350921631, + "step": 385 + }, + { + "epoch": 0.11288200029244042, + "grad_norm": 1.2970935037264724, + "learning_rate": 1.9997839115246632e-05, + "loss": 0.7452428340911865, + "step": 386 + }, + { + "epoch": 0.11317444070770581, + "grad_norm": 1.322513824449788, + "learning_rate": 1.999773744413576e-05, + "loss": 0.7900702953338623, + "step": 387 + }, + { + "epoch": 0.1134668811229712, + "grad_norm": 1.288312120065537, + "learning_rate": 1.9997633436114547e-05, + "loss": 0.6215303540229797, + "step": 388 + }, + { + "epoch": 0.11375932153823659, + "grad_norm": 1.3132613017546322, + "learning_rate": 1.999752709120731e-05, + "loss": 0.798041820526123, + "step": 389 + }, + { + "epoch": 0.11405176195350197, + "grad_norm": 1.1590478323977431, + "learning_rate": 1.9997418409438893e-05, + "loss": 0.6033064126968384, + "step": 390 + }, + { + "epoch": 0.11434420236876737, + "grad_norm": 1.0686988063553795, + "learning_rate": 1.9997307390834712e-05, + "loss": 0.6358453631401062, + "step": 391 + }, + { + "epoch": 0.11463664278403275, + "grad_norm": 1.2775095189945147, + "learning_rate": 1.999719403542071e-05, + "loss": 0.6544308662414551, + "step": 392 + }, + { + "epoch": 0.11492908319929815, + "grad_norm": 1.3305771925144483, + "learning_rate": 1.9997078343223393e-05, + "loss": 0.73077392578125, + "step": 393 + }, + { + "epoch": 0.11522152361456353, + "grad_norm": 1.1914838503287841, + "learning_rate": 1.9996960314269792e-05, + "loss": 0.5874192118644714, + "step": 394 + }, + { + "epoch": 0.11551396402982893, + "grad_norm": 1.420658082184349, + "learning_rate": 1.9996839948587503e-05, + "loss": 0.8242438435554504, + "step": 395 + }, + { + "epoch": 0.11580640444509431, + "grad_norm": 1.705790457884444, + "learning_rate": 1.9996717246204655e-05, + "loss": 0.9496668577194214, + "step": 396 + }, + { + "epoch": 0.1160988448603597, + "grad_norm": 1.2258839048083405, + "learning_rate": 1.9996592207149933e-05, + "loss": 0.6940287351608276, + "step": 397 + }, + { + "epoch": 0.11639128527562509, + "grad_norm": 1.4226760671412086, + "learning_rate": 1.999646483145256e-05, + "loss": 0.7403827905654907, + "step": 398 + }, + { + "epoch": 0.11668372569089049, + "grad_norm": 1.441557495225195, + "learning_rate": 1.9996335119142315e-05, + "loss": 0.7493172287940979, + "step": 399 + }, + { + "epoch": 0.11697616610615587, + "grad_norm": 1.1233068749163333, + "learning_rate": 1.9996203070249516e-05, + "loss": 0.6048015356063843, + "step": 400 + }, + { + "epoch": 0.11726860652142126, + "grad_norm": 1.218449987518831, + "learning_rate": 1.9996068684805025e-05, + "loss": 0.7220426797866821, + "step": 401 + }, + { + "epoch": 0.11756104693668665, + "grad_norm": 1.4820269559236292, + "learning_rate": 1.9995931962840255e-05, + "loss": 0.7294620275497437, + "step": 402 + }, + { + "epoch": 0.11785348735195204, + "grad_norm": 1.2693334480850886, + "learning_rate": 1.999579290438717e-05, + "loss": 0.7075647115707397, + "step": 403 + }, + { + "epoch": 0.11814592776721743, + "grad_norm": 1.4353448940274405, + "learning_rate": 1.9995651509478264e-05, + "loss": 0.7396657466888428, + "step": 404 + }, + { + "epoch": 0.11843836818248282, + "grad_norm": 1.5214596029668779, + "learning_rate": 1.999550777814659e-05, + "loss": 0.8240506649017334, + "step": 405 + }, + { + "epoch": 0.1187308085977482, + "grad_norm": 1.3463253886040645, + "learning_rate": 1.9995361710425752e-05, + "loss": 0.7518147826194763, + "step": 406 + }, + { + "epoch": 0.1190232490130136, + "grad_norm": 1.3938258800517485, + "learning_rate": 1.9995213306349886e-05, + "loss": 0.6998933553695679, + "step": 407 + }, + { + "epoch": 0.11931568942827898, + "grad_norm": 2.8811625928277134, + "learning_rate": 1.999506256595368e-05, + "loss": 0.659205973148346, + "step": 408 + }, + { + "epoch": 0.11960812984354438, + "grad_norm": 1.6815673603725616, + "learning_rate": 1.9994909489272372e-05, + "loss": 0.7826964259147644, + "step": 409 + }, + { + "epoch": 0.11990057025880976, + "grad_norm": 1.4225942370637599, + "learning_rate": 1.999475407634174e-05, + "loss": 0.770768404006958, + "step": 410 + }, + { + "epoch": 0.12019301067407516, + "grad_norm": 1.4031411556955713, + "learning_rate": 1.9994596327198113e-05, + "loss": 0.7390692234039307, + "step": 411 + }, + { + "epoch": 0.12048545108934054, + "grad_norm": 1.238945633280151, + "learning_rate": 1.999443624187836e-05, + "loss": 0.7092628479003906, + "step": 412 + }, + { + "epoch": 0.12077789150460594, + "grad_norm": 1.2795019723948553, + "learning_rate": 1.9994273820419903e-05, + "loss": 0.5252765417098999, + "step": 413 + }, + { + "epoch": 0.12107033191987132, + "grad_norm": 1.389583747663469, + "learning_rate": 1.9994109062860707e-05, + "loss": 0.8131704330444336, + "step": 414 + }, + { + "epoch": 0.12136277233513672, + "grad_norm": 1.490804798338551, + "learning_rate": 1.9993941969239284e-05, + "loss": 0.8257562518119812, + "step": 415 + }, + { + "epoch": 0.1216552127504021, + "grad_norm": 1.5541597255876767, + "learning_rate": 1.999377253959469e-05, + "loss": 0.7163048982620239, + "step": 416 + }, + { + "epoch": 0.1219476531656675, + "grad_norm": 1.590877283394053, + "learning_rate": 1.9993600773966528e-05, + "loss": 0.7216504812240601, + "step": 417 + }, + { + "epoch": 0.12224009358093288, + "grad_norm": 1.6748981575800963, + "learning_rate": 1.9993426672394945e-05, + "loss": 0.7831340432167053, + "step": 418 + }, + { + "epoch": 0.12253253399619828, + "grad_norm": 1.3976993960000088, + "learning_rate": 1.9993250234920638e-05, + "loss": 0.7675709128379822, + "step": 419 + }, + { + "epoch": 0.12282497441146366, + "grad_norm": 1.454911379398845, + "learning_rate": 1.999307146158485e-05, + "loss": 0.8085238337516785, + "step": 420 + }, + { + "epoch": 0.12311741482672905, + "grad_norm": 1.2979608734451222, + "learning_rate": 1.9992890352429368e-05, + "loss": 0.735150933265686, + "step": 421 + }, + { + "epoch": 0.12340985524199444, + "grad_norm": 1.2046206432187132, + "learning_rate": 1.9992706907496523e-05, + "loss": 0.612186074256897, + "step": 422 + }, + { + "epoch": 0.12370229565725983, + "grad_norm": 1.364838486847665, + "learning_rate": 1.9992521126829194e-05, + "loss": 0.6636590957641602, + "step": 423 + }, + { + "epoch": 0.12399473607252522, + "grad_norm": 1.4068215451581474, + "learning_rate": 1.9992333010470806e-05, + "loss": 0.6814526319503784, + "step": 424 + }, + { + "epoch": 0.12428717648779061, + "grad_norm": 1.3620595505436823, + "learning_rate": 1.9992142558465335e-05, + "loss": 0.6940894722938538, + "step": 425 + }, + { + "epoch": 0.12457961690305601, + "grad_norm": 1.3427645949787534, + "learning_rate": 1.9991949770857294e-05, + "loss": 0.7485121488571167, + "step": 426 + }, + { + "epoch": 0.12487205731832139, + "grad_norm": 1.266832638558228, + "learning_rate": 1.9991754647691744e-05, + "loss": 0.5315885543823242, + "step": 427 + }, + { + "epoch": 0.1251644977335868, + "grad_norm": 1.2511757429133081, + "learning_rate": 1.9991557189014297e-05, + "loss": 0.7416529655456543, + "step": 428 + }, + { + "epoch": 0.12545693814885217, + "grad_norm": 1.4031357379707678, + "learning_rate": 1.9991357394871106e-05, + "loss": 0.7937026023864746, + "step": 429 + }, + { + "epoch": 0.12574937856411755, + "grad_norm": 1.3448962462478107, + "learning_rate": 1.9991155265308872e-05, + "loss": 0.7009662389755249, + "step": 430 + }, + { + "epoch": 0.12604181897938296, + "grad_norm": 1.3042132277590721, + "learning_rate": 1.999095080037484e-05, + "loss": 0.6577681303024292, + "step": 431 + }, + { + "epoch": 0.12633425939464835, + "grad_norm": 1.4036627734956777, + "learning_rate": 1.9990744000116808e-05, + "loss": 0.7372399568557739, + "step": 432 + }, + { + "epoch": 0.12662669980991373, + "grad_norm": 1.3819832545517663, + "learning_rate": 1.999053486458311e-05, + "loss": 0.5959814190864563, + "step": 433 + }, + { + "epoch": 0.1269191402251791, + "grad_norm": 1.424207998116027, + "learning_rate": 1.999032339382263e-05, + "loss": 0.6684107780456543, + "step": 434 + }, + { + "epoch": 0.12721158064044452, + "grad_norm": 1.7048493578408517, + "learning_rate": 1.99901095878848e-05, + "loss": 0.8837687373161316, + "step": 435 + }, + { + "epoch": 0.1275040210557099, + "grad_norm": 3.7468635382669717, + "learning_rate": 1.9989893446819594e-05, + "loss": 0.7128579616546631, + "step": 436 + }, + { + "epoch": 0.1277964614709753, + "grad_norm": 1.2617709714670788, + "learning_rate": 1.9989674970677533e-05, + "loss": 0.6634687185287476, + "step": 437 + }, + { + "epoch": 0.12808890188624067, + "grad_norm": 1.626814629507008, + "learning_rate": 1.998945415950969e-05, + "loss": 0.7866299152374268, + "step": 438 + }, + { + "epoch": 0.12838134230150608, + "grad_norm": 1.6912246432889755, + "learning_rate": 1.998923101336767e-05, + "loss": 0.8104820251464844, + "step": 439 + }, + { + "epoch": 0.12867378271677146, + "grad_norm": 1.3163679319076276, + "learning_rate": 1.9989005532303637e-05, + "loss": 0.6643097400665283, + "step": 440 + }, + { + "epoch": 0.12896622313203684, + "grad_norm": 1.304280975921877, + "learning_rate": 1.9988777716370293e-05, + "loss": 0.7663843631744385, + "step": 441 + }, + { + "epoch": 0.12925866354730223, + "grad_norm": 1.4275530439491644, + "learning_rate": 1.9988547565620896e-05, + "loss": 0.8831629753112793, + "step": 442 + }, + { + "epoch": 0.12955110396256764, + "grad_norm": 1.2581390355141424, + "learning_rate": 1.9988315080109233e-05, + "loss": 0.6889798045158386, + "step": 443 + }, + { + "epoch": 0.12984354437783302, + "grad_norm": 1.2589816711321935, + "learning_rate": 1.9988080259889652e-05, + "loss": 0.8173589706420898, + "step": 444 + }, + { + "epoch": 0.1301359847930984, + "grad_norm": 1.437216407920067, + "learning_rate": 1.998784310501704e-05, + "loss": 0.7444369196891785, + "step": 445 + }, + { + "epoch": 0.13042842520836379, + "grad_norm": 1.2527388287385341, + "learning_rate": 1.998760361554682e-05, + "loss": 0.6728573441505432, + "step": 446 + }, + { + "epoch": 0.1307208656236292, + "grad_norm": 1.4620149588082576, + "learning_rate": 1.998736179153499e-05, + "loss": 0.6398168802261353, + "step": 447 + }, + { + "epoch": 0.13101330603889458, + "grad_norm": 1.3925962417611275, + "learning_rate": 1.9987117633038063e-05, + "loss": 0.7367146015167236, + "step": 448 + }, + { + "epoch": 0.13130574645415996, + "grad_norm": 1.3497781950543108, + "learning_rate": 1.998687114011311e-05, + "loss": 0.7072159051895142, + "step": 449 + }, + { + "epoch": 0.13159818686942534, + "grad_norm": 1.402234544131691, + "learning_rate": 1.998662231281775e-05, + "loss": 0.7899993062019348, + "step": 450 + }, + { + "epoch": 0.13189062728469075, + "grad_norm": 1.4376114251018388, + "learning_rate": 1.9986371151210146e-05, + "loss": 0.7668592929840088, + "step": 451 + }, + { + "epoch": 0.13218306769995614, + "grad_norm": 1.3943197925338484, + "learning_rate": 1.9986117655349003e-05, + "loss": 0.7222825288772583, + "step": 452 + }, + { + "epoch": 0.13247550811522152, + "grad_norm": 1.2939952744587226, + "learning_rate": 1.9985861825293577e-05, + "loss": 0.7301540374755859, + "step": 453 + }, + { + "epoch": 0.1327679485304869, + "grad_norm": 1.174339392511722, + "learning_rate": 1.998560366110366e-05, + "loss": 0.6517907381057739, + "step": 454 + }, + { + "epoch": 0.1330603889457523, + "grad_norm": 1.5763167634786863, + "learning_rate": 1.99853431628396e-05, + "loss": 0.6889342069625854, + "step": 455 + }, + { + "epoch": 0.1333528293610177, + "grad_norm": 1.525770213874127, + "learning_rate": 1.9985080330562293e-05, + "loss": 0.6804303526878357, + "step": 456 + }, + { + "epoch": 0.13364526977628308, + "grad_norm": 1.3944930335298842, + "learning_rate": 1.9984815164333163e-05, + "loss": 0.7699184417724609, + "step": 457 + }, + { + "epoch": 0.13393771019154846, + "grad_norm": 1.4886205672815649, + "learning_rate": 1.99845476642142e-05, + "loss": 0.7470533847808838, + "step": 458 + }, + { + "epoch": 0.13423015060681387, + "grad_norm": 1.251305257809984, + "learning_rate": 1.9984277830267927e-05, + "loss": 0.6689419746398926, + "step": 459 + }, + { + "epoch": 0.13452259102207925, + "grad_norm": 1.5088252817247363, + "learning_rate": 1.998400566255742e-05, + "loss": 0.6395387649536133, + "step": 460 + }, + { + "epoch": 0.13481503143734463, + "grad_norm": 1.3414013526988133, + "learning_rate": 1.9983731161146288e-05, + "loss": 0.7785208225250244, + "step": 461 + }, + { + "epoch": 0.13510747185261002, + "grad_norm": 1.2995640327613904, + "learning_rate": 1.9983454326098703e-05, + "loss": 0.6864018440246582, + "step": 462 + }, + { + "epoch": 0.13539991226787543, + "grad_norm": 1.424075352019454, + "learning_rate": 1.9983175157479366e-05, + "loss": 0.7201317548751831, + "step": 463 + }, + { + "epoch": 0.1356923526831408, + "grad_norm": 1.4977322356937255, + "learning_rate": 1.9982893655353534e-05, + "loss": 0.7128555774688721, + "step": 464 + }, + { + "epoch": 0.1359847930984062, + "grad_norm": 1.2421635772982216, + "learning_rate": 1.998260981978701e-05, + "loss": 0.7252457141876221, + "step": 465 + }, + { + "epoch": 0.1362772335136716, + "grad_norm": 1.472555101507684, + "learning_rate": 1.9982323650846137e-05, + "loss": 0.7453348636627197, + "step": 466 + }, + { + "epoch": 0.13656967392893699, + "grad_norm": 1.153602031844393, + "learning_rate": 1.9982035148597804e-05, + "loss": 0.6643078923225403, + "step": 467 + }, + { + "epoch": 0.13686211434420237, + "grad_norm": 1.280273878296217, + "learning_rate": 1.9981744313109445e-05, + "loss": 0.7249360084533691, + "step": 468 + }, + { + "epoch": 0.13715455475946775, + "grad_norm": 1.2363385614561972, + "learning_rate": 1.9981451144449042e-05, + "loss": 0.8179303407669067, + "step": 469 + }, + { + "epoch": 0.13744699517473316, + "grad_norm": 1.1335812448130365, + "learning_rate": 1.9981155642685125e-05, + "loss": 0.6763637661933899, + "step": 470 + }, + { + "epoch": 0.13773943558999854, + "grad_norm": 1.4603088026603306, + "learning_rate": 1.998085780788676e-05, + "loss": 0.6684300303459167, + "step": 471 + }, + { + "epoch": 0.13803187600526393, + "grad_norm": 1.2670786265894947, + "learning_rate": 1.9980557640123566e-05, + "loss": 0.7251675128936768, + "step": 472 + }, + { + "epoch": 0.1383243164205293, + "grad_norm": 1.5269819113708596, + "learning_rate": 1.998025513946571e-05, + "loss": 0.7146456241607666, + "step": 473 + }, + { + "epoch": 0.13861675683579472, + "grad_norm": 1.2263952606430522, + "learning_rate": 1.9979950305983895e-05, + "loss": 0.7067978382110596, + "step": 474 + }, + { + "epoch": 0.1389091972510601, + "grad_norm": 1.2396761565289731, + "learning_rate": 1.9979643139749373e-05, + "loss": 0.7017637491226196, + "step": 475 + }, + { + "epoch": 0.13920163766632548, + "grad_norm": 1.397663972134979, + "learning_rate": 1.9979333640833947e-05, + "loss": 0.7511367201805115, + "step": 476 + }, + { + "epoch": 0.13949407808159087, + "grad_norm": 1.5675722536579784, + "learning_rate": 1.997902180930996e-05, + "loss": 0.8129127025604248, + "step": 477 + }, + { + "epoch": 0.13978651849685628, + "grad_norm": 1.3801608404871573, + "learning_rate": 1.9978707645250293e-05, + "loss": 0.7760868072509766, + "step": 478 + }, + { + "epoch": 0.14007895891212166, + "grad_norm": 1.2722362515735255, + "learning_rate": 1.9978391148728388e-05, + "loss": 0.5190733671188354, + "step": 479 + }, + { + "epoch": 0.14037139932738704, + "grad_norm": 1.4267690174722667, + "learning_rate": 1.9978072319818222e-05, + "loss": 0.759798526763916, + "step": 480 + }, + { + "epoch": 0.14066383974265242, + "grad_norm": 1.3594087764036291, + "learning_rate": 1.997775115859432e-05, + "loss": 0.5750235319137573, + "step": 481 + }, + { + "epoch": 0.14095628015791783, + "grad_norm": 1.5288357817907694, + "learning_rate": 1.9977427665131748e-05, + "loss": 0.6837687492370605, + "step": 482 + }, + { + "epoch": 0.14124872057318322, + "grad_norm": 1.4085455647433316, + "learning_rate": 1.9977101839506123e-05, + "loss": 0.8774302005767822, + "step": 483 + }, + { + "epoch": 0.1415411609884486, + "grad_norm": 1.3951237263634118, + "learning_rate": 1.9976773681793605e-05, + "loss": 0.6447024345397949, + "step": 484 + }, + { + "epoch": 0.14183360140371398, + "grad_norm": 1.3077152366881364, + "learning_rate": 1.99764431920709e-05, + "loss": 0.6212965250015259, + "step": 485 + }, + { + "epoch": 0.1421260418189794, + "grad_norm": 1.7246179492768339, + "learning_rate": 1.9976110370415257e-05, + "loss": 0.7606823444366455, + "step": 486 + }, + { + "epoch": 0.14241848223424478, + "grad_norm": 1.6009360634049956, + "learning_rate": 1.9975775216904468e-05, + "loss": 0.792106032371521, + "step": 487 + }, + { + "epoch": 0.14271092264951016, + "grad_norm": 1.526072177508378, + "learning_rate": 1.997543773161688e-05, + "loss": 0.828373372554779, + "step": 488 + }, + { + "epoch": 0.14300336306477554, + "grad_norm": 1.2193329399673667, + "learning_rate": 1.997509791463137e-05, + "loss": 0.7148743867874146, + "step": 489 + }, + { + "epoch": 0.14329580348004095, + "grad_norm": 1.617921839516307, + "learning_rate": 1.9974755766027372e-05, + "loss": 0.6566554307937622, + "step": 490 + }, + { + "epoch": 0.14358824389530633, + "grad_norm": 1.2041404679997165, + "learning_rate": 1.9974411285884865e-05, + "loss": 0.7833706140518188, + "step": 491 + }, + { + "epoch": 0.14388068431057172, + "grad_norm": 1.3715764541616051, + "learning_rate": 1.997406447428436e-05, + "loss": 0.7661226987838745, + "step": 492 + }, + { + "epoch": 0.1441731247258371, + "grad_norm": 1.2510873907811162, + "learning_rate": 1.9973715331306935e-05, + "loss": 0.5403884649276733, + "step": 493 + }, + { + "epoch": 0.1444655651411025, + "grad_norm": 1.417853529635827, + "learning_rate": 1.9973363857034183e-05, + "loss": 0.7744722366333008, + "step": 494 + }, + { + "epoch": 0.1447580055563679, + "grad_norm": 1.7245567814035911, + "learning_rate": 1.9973010051548274e-05, + "loss": 0.9036808013916016, + "step": 495 + }, + { + "epoch": 0.14505044597163327, + "grad_norm": 1.2752769917707012, + "learning_rate": 1.9972653914931902e-05, + "loss": 0.6952388286590576, + "step": 496 + }, + { + "epoch": 0.14534288638689866, + "grad_norm": 1.5454177465030166, + "learning_rate": 1.9972295447268312e-05, + "loss": 0.7818677425384521, + "step": 497 + }, + { + "epoch": 0.14563532680216407, + "grad_norm": 1.2104336195623258, + "learning_rate": 1.9971934648641294e-05, + "loss": 0.8197327256202698, + "step": 498 + }, + { + "epoch": 0.14592776721742945, + "grad_norm": 1.1376920899270277, + "learning_rate": 1.997157151913518e-05, + "loss": 0.5898807644844055, + "step": 499 + }, + { + "epoch": 0.14622020763269483, + "grad_norm": 1.6480348319290024, + "learning_rate": 1.9971206058834857e-05, + "loss": 0.7980005741119385, + "step": 500 + }, + { + "epoch": 0.14651264804796021, + "grad_norm": 1.2480430258500308, + "learning_rate": 1.997083826782574e-05, + "loss": 0.7161837816238403, + "step": 501 + }, + { + "epoch": 0.14680508846322562, + "grad_norm": 1.436852590534495, + "learning_rate": 1.99704681461938e-05, + "loss": 0.7657293081283569, + "step": 502 + }, + { + "epoch": 0.147097528878491, + "grad_norm": 1.256627894457605, + "learning_rate": 1.9970095694025553e-05, + "loss": 0.6638028621673584, + "step": 503 + }, + { + "epoch": 0.1473899692937564, + "grad_norm": 1.344090583049545, + "learning_rate": 1.996972091140806e-05, + "loss": 0.8759262561798096, + "step": 504 + }, + { + "epoch": 0.1476824097090218, + "grad_norm": 1.1099770302505587, + "learning_rate": 1.9969343798428916e-05, + "loss": 0.6686065196990967, + "step": 505 + }, + { + "epoch": 0.14797485012428718, + "grad_norm": 1.5672815870081807, + "learning_rate": 1.9968964355176276e-05, + "loss": 0.7900313138961792, + "step": 506 + }, + { + "epoch": 0.14826729053955257, + "grad_norm": 1.3116088800480374, + "learning_rate": 1.996858258173883e-05, + "loss": 0.699286937713623, + "step": 507 + }, + { + "epoch": 0.14855973095481795, + "grad_norm": 1.149004701425465, + "learning_rate": 1.9968198478205817e-05, + "loss": 0.6613560914993286, + "step": 508 + }, + { + "epoch": 0.14885217137008336, + "grad_norm": 1.471579106109443, + "learning_rate": 1.9967812044667014e-05, + "loss": 0.8586459755897522, + "step": 509 + }, + { + "epoch": 0.14914461178534874, + "grad_norm": 1.5307049334622256, + "learning_rate": 1.9967423281212754e-05, + "loss": 0.6620850563049316, + "step": 510 + }, + { + "epoch": 0.14943705220061412, + "grad_norm": 1.6192191406380994, + "learning_rate": 1.9967032187933905e-05, + "loss": 0.7991048097610474, + "step": 511 + }, + { + "epoch": 0.1497294926158795, + "grad_norm": 1.2792732447271702, + "learning_rate": 1.9966638764921882e-05, + "loss": 0.7301167845726013, + "step": 512 + }, + { + "epoch": 0.15002193303114492, + "grad_norm": 1.244527824938295, + "learning_rate": 1.9966243012268645e-05, + "loss": 0.6470698118209839, + "step": 513 + }, + { + "epoch": 0.1503143734464103, + "grad_norm": 1.3436689137677134, + "learning_rate": 1.99658449300667e-05, + "loss": 0.5766996145248413, + "step": 514 + }, + { + "epoch": 0.15060681386167568, + "grad_norm": 1.2104018154852028, + "learning_rate": 1.9965444518409098e-05, + "loss": 0.6365845203399658, + "step": 515 + }, + { + "epoch": 0.15089925427694106, + "grad_norm": 1.6995742833660814, + "learning_rate": 1.9965041777389426e-05, + "loss": 0.6945745944976807, + "step": 516 + }, + { + "epoch": 0.15119169469220647, + "grad_norm": 1.6841525179657149, + "learning_rate": 1.996463670710183e-05, + "loss": 0.802032470703125, + "step": 517 + }, + { + "epoch": 0.15148413510747186, + "grad_norm": 1.4666130226044234, + "learning_rate": 1.996422930764099e-05, + "loss": 0.7429964542388916, + "step": 518 + }, + { + "epoch": 0.15177657552273724, + "grad_norm": 1.5508181233008433, + "learning_rate": 1.9963819579102134e-05, + "loss": 0.6462180614471436, + "step": 519 + }, + { + "epoch": 0.15206901593800262, + "grad_norm": 1.3226128228565077, + "learning_rate": 1.996340752158103e-05, + "loss": 0.888412594795227, + "step": 520 + }, + { + "epoch": 0.15236145635326803, + "grad_norm": 1.386680099002057, + "learning_rate": 1.9962993135173996e-05, + "loss": 0.6734700798988342, + "step": 521 + }, + { + "epoch": 0.15265389676853341, + "grad_norm": 1.385050142293082, + "learning_rate": 1.9962576419977894e-05, + "loss": 0.6951336860656738, + "step": 522 + }, + { + "epoch": 0.1529463371837988, + "grad_norm": 1.26022036147928, + "learning_rate": 1.9962157376090126e-05, + "loss": 0.7130852341651917, + "step": 523 + }, + { + "epoch": 0.15323877759906418, + "grad_norm": 1.4353500802059385, + "learning_rate": 1.9961736003608646e-05, + "loss": 0.8322055339813232, + "step": 524 + }, + { + "epoch": 0.1535312180143296, + "grad_norm": 1.2563635075596429, + "learning_rate": 1.996131230263194e-05, + "loss": 0.7031791806221008, + "step": 525 + }, + { + "epoch": 0.15382365842959497, + "grad_norm": 1.3606474846075662, + "learning_rate": 1.9960886273259052e-05, + "loss": 0.8268769979476929, + "step": 526 + }, + { + "epoch": 0.15411609884486036, + "grad_norm": 1.048782156231717, + "learning_rate": 1.9960457915589557e-05, + "loss": 0.6843237280845642, + "step": 527 + }, + { + "epoch": 0.15440853926012574, + "grad_norm": 1.29845256190474, + "learning_rate": 1.9960027229723585e-05, + "loss": 0.8267906904220581, + "step": 528 + }, + { + "epoch": 0.15470097967539115, + "grad_norm": 1.502232175088585, + "learning_rate": 1.9959594215761807e-05, + "loss": 0.8259629011154175, + "step": 529 + }, + { + "epoch": 0.15499342009065653, + "grad_norm": 1.3618507954167858, + "learning_rate": 1.9959158873805435e-05, + "loss": 0.654765248298645, + "step": 530 + }, + { + "epoch": 0.1552858605059219, + "grad_norm": 1.3762650099604372, + "learning_rate": 1.9958721203956233e-05, + "loss": 0.7841149568557739, + "step": 531 + }, + { + "epoch": 0.1555783009211873, + "grad_norm": 1.131527995151024, + "learning_rate": 1.9958281206316497e-05, + "loss": 0.7364583015441895, + "step": 532 + }, + { + "epoch": 0.1558707413364527, + "grad_norm": 1.2428392866727909, + "learning_rate": 1.9957838880989076e-05, + "loss": 0.7985796928405762, + "step": 533 + }, + { + "epoch": 0.1561631817517181, + "grad_norm": 1.7674168807742325, + "learning_rate": 1.9957394228077363e-05, + "loss": 0.8432350754737854, + "step": 534 + }, + { + "epoch": 0.15645562216698347, + "grad_norm": 1.409652061557183, + "learning_rate": 1.995694724768529e-05, + "loss": 0.713615894317627, + "step": 535 + }, + { + "epoch": 0.15674806258224885, + "grad_norm": 1.3406073565001748, + "learning_rate": 1.9956497939917336e-05, + "loss": 0.6472936868667603, + "step": 536 + }, + { + "epoch": 0.15704050299751426, + "grad_norm": 1.4828550722777096, + "learning_rate": 1.9956046304878528e-05, + "loss": 0.7963594198226929, + "step": 537 + }, + { + "epoch": 0.15733294341277965, + "grad_norm": 1.3875627998599316, + "learning_rate": 1.9955592342674427e-05, + "loss": 0.8043302893638611, + "step": 538 + }, + { + "epoch": 0.15762538382804503, + "grad_norm": 1.3187786308741334, + "learning_rate": 1.995513605341115e-05, + "loss": 0.6277294754981995, + "step": 539 + }, + { + "epoch": 0.1579178242433104, + "grad_norm": 1.2577326193858611, + "learning_rate": 1.9954677437195345e-05, + "loss": 0.569086492061615, + "step": 540 + }, + { + "epoch": 0.15821026465857582, + "grad_norm": 1.4002846512494251, + "learning_rate": 1.9954216494134217e-05, + "loss": 0.7694308757781982, + "step": 541 + }, + { + "epoch": 0.1585027050738412, + "grad_norm": 1.2602961243105442, + "learning_rate": 1.9953753224335504e-05, + "loss": 0.7782721519470215, + "step": 542 + }, + { + "epoch": 0.1587951454891066, + "grad_norm": 1.4115021596903525, + "learning_rate": 1.9953287627907498e-05, + "loss": 0.6231539249420166, + "step": 543 + }, + { + "epoch": 0.159087585904372, + "grad_norm": 1.6469299670076099, + "learning_rate": 1.9952819704959022e-05, + "loss": 0.6431725025177002, + "step": 544 + }, + { + "epoch": 0.15938002631963738, + "grad_norm": 1.3041234892791729, + "learning_rate": 1.9952349455599455e-05, + "loss": 0.7062366008758545, + "step": 545 + }, + { + "epoch": 0.15967246673490276, + "grad_norm": 1.3521393089140767, + "learning_rate": 1.9951876879938716e-05, + "loss": 0.5376520156860352, + "step": 546 + }, + { + "epoch": 0.15996490715016815, + "grad_norm": 1.385911158215194, + "learning_rate": 1.9951401978087267e-05, + "loss": 0.7693386077880859, + "step": 547 + }, + { + "epoch": 0.16025734756543356, + "grad_norm": 1.7168051396485104, + "learning_rate": 1.9950924750156107e-05, + "loss": 0.6735765337944031, + "step": 548 + }, + { + "epoch": 0.16054978798069894, + "grad_norm": 1.1186135901816567, + "learning_rate": 1.995044519625679e-05, + "loss": 0.5333552360534668, + "step": 549 + }, + { + "epoch": 0.16084222839596432, + "grad_norm": 1.2149038323564916, + "learning_rate": 1.994996331650141e-05, + "loss": 0.6694493293762207, + "step": 550 + }, + { + "epoch": 0.1611346688112297, + "grad_norm": 1.3895443962170193, + "learning_rate": 1.9949479111002596e-05, + "loss": 0.6056857109069824, + "step": 551 + }, + { + "epoch": 0.1614271092264951, + "grad_norm": 1.4990214566868623, + "learning_rate": 1.9948992579873538e-05, + "loss": 0.7174896001815796, + "step": 552 + }, + { + "epoch": 0.1617195496417605, + "grad_norm": 1.4417886999069138, + "learning_rate": 1.9948503723227954e-05, + "loss": 0.9150595664978027, + "step": 553 + }, + { + "epoch": 0.16201199005702588, + "grad_norm": 1.475120009674046, + "learning_rate": 1.9948012541180116e-05, + "loss": 0.7418098449707031, + "step": 554 + }, + { + "epoch": 0.16230443047229126, + "grad_norm": 1.3802668140870205, + "learning_rate": 1.9947519033844828e-05, + "loss": 0.6937648057937622, + "step": 555 + }, + { + "epoch": 0.16259687088755667, + "grad_norm": 1.198267913228467, + "learning_rate": 1.9947023201337448e-05, + "loss": 0.628747820854187, + "step": 556 + }, + { + "epoch": 0.16288931130282205, + "grad_norm": 1.3166666693196283, + "learning_rate": 1.9946525043773875e-05, + "loss": 0.6252326965332031, + "step": 557 + }, + { + "epoch": 0.16318175171808744, + "grad_norm": 1.4085830915284543, + "learning_rate": 1.9946024561270547e-05, + "loss": 0.6243278980255127, + "step": 558 + }, + { + "epoch": 0.16347419213335282, + "grad_norm": 1.515326552036181, + "learning_rate": 1.994552175394445e-05, + "loss": 0.7613602876663208, + "step": 559 + }, + { + "epoch": 0.16376663254861823, + "grad_norm": 1.4167210376939137, + "learning_rate": 1.9945016621913115e-05, + "loss": 0.7680152654647827, + "step": 560 + }, + { + "epoch": 0.1640590729638836, + "grad_norm": 1.4413485992010024, + "learning_rate": 1.9944509165294614e-05, + "loss": 0.6926383972167969, + "step": 561 + }, + { + "epoch": 0.164351513379149, + "grad_norm": 1.3901402403092062, + "learning_rate": 1.9943999384207556e-05, + "loss": 0.6822172403335571, + "step": 562 + }, + { + "epoch": 0.16464395379441438, + "grad_norm": 1.1253426305557543, + "learning_rate": 1.99434872787711e-05, + "loss": 0.6533722281455994, + "step": 563 + }, + { + "epoch": 0.1649363942096798, + "grad_norm": 1.3434183662540475, + "learning_rate": 1.9942972849104955e-05, + "loss": 0.6754113435745239, + "step": 564 + }, + { + "epoch": 0.16522883462494517, + "grad_norm": 1.3906070154993262, + "learning_rate": 1.9942456095329357e-05, + "loss": 0.5585163235664368, + "step": 565 + }, + { + "epoch": 0.16552127504021055, + "grad_norm": 1.18702583603665, + "learning_rate": 1.99419370175651e-05, + "loss": 0.6268453598022461, + "step": 566 + }, + { + "epoch": 0.16581371545547594, + "grad_norm": 1.3564219134919553, + "learning_rate": 1.994141561593351e-05, + "loss": 0.6508245468139648, + "step": 567 + }, + { + "epoch": 0.16610615587074135, + "grad_norm": 1.353057425024783, + "learning_rate": 1.9940891890556468e-05, + "loss": 0.7337379455566406, + "step": 568 + }, + { + "epoch": 0.16639859628600673, + "grad_norm": 1.3764723902611744, + "learning_rate": 1.9940365841556385e-05, + "loss": 0.7888853549957275, + "step": 569 + }, + { + "epoch": 0.1666910367012721, + "grad_norm": 1.5384301744775797, + "learning_rate": 1.993983746905623e-05, + "loss": 0.777199923992157, + "step": 570 + }, + { + "epoch": 0.1669834771165375, + "grad_norm": 1.5194907821323576, + "learning_rate": 1.9939306773179498e-05, + "loss": 0.761531412601471, + "step": 571 + }, + { + "epoch": 0.1672759175318029, + "grad_norm": 1.5063040441270878, + "learning_rate": 1.993877375405024e-05, + "loss": 0.7060664296150208, + "step": 572 + }, + { + "epoch": 0.16756835794706829, + "grad_norm": 1.472994627130685, + "learning_rate": 1.9938238411793045e-05, + "loss": 0.6797431707382202, + "step": 573 + }, + { + "epoch": 0.16786079836233367, + "grad_norm": 1.3131930617818641, + "learning_rate": 1.9937700746533048e-05, + "loss": 0.7202910780906677, + "step": 574 + }, + { + "epoch": 0.16815323877759905, + "grad_norm": 1.198711592546953, + "learning_rate": 1.9937160758395923e-05, + "loss": 0.7241546511650085, + "step": 575 + }, + { + "epoch": 0.16844567919286446, + "grad_norm": 1.3694786109804489, + "learning_rate": 1.993661844750789e-05, + "loss": 0.7055338621139526, + "step": 576 + }, + { + "epoch": 0.16873811960812984, + "grad_norm": 1.4237978283864139, + "learning_rate": 1.993607381399571e-05, + "loss": 0.6973986625671387, + "step": 577 + }, + { + "epoch": 0.16903056002339523, + "grad_norm": 1.1715457050926792, + "learning_rate": 1.993552685798669e-05, + "loss": 0.693436861038208, + "step": 578 + }, + { + "epoch": 0.1693230004386606, + "grad_norm": 1.5585764488361307, + "learning_rate": 1.9934977579608676e-05, + "loss": 0.6687765121459961, + "step": 579 + }, + { + "epoch": 0.16961544085392602, + "grad_norm": 1.3798925262407884, + "learning_rate": 1.9934425978990057e-05, + "loss": 0.7776578068733215, + "step": 580 + }, + { + "epoch": 0.1699078812691914, + "grad_norm": 1.3168335454892666, + "learning_rate": 1.9933872056259768e-05, + "loss": 0.6914045810699463, + "step": 581 + }, + { + "epoch": 0.17020032168445678, + "grad_norm": 1.4649859185166105, + "learning_rate": 1.9933315811547283e-05, + "loss": 0.8005306720733643, + "step": 582 + }, + { + "epoch": 0.1704927620997222, + "grad_norm": 1.3952257625848015, + "learning_rate": 1.9932757244982625e-05, + "loss": 0.6936507225036621, + "step": 583 + }, + { + "epoch": 0.17078520251498758, + "grad_norm": 1.157795409448355, + "learning_rate": 1.9932196356696353e-05, + "loss": 0.6915504932403564, + "step": 584 + }, + { + "epoch": 0.17107764293025296, + "grad_norm": 1.4153568154846778, + "learning_rate": 1.9931633146819573e-05, + "loss": 0.7583723664283752, + "step": 585 + }, + { + "epoch": 0.17137008334551834, + "grad_norm": 1.2959976429359619, + "learning_rate": 1.9931067615483927e-05, + "loss": 0.7097266912460327, + "step": 586 + }, + { + "epoch": 0.17166252376078375, + "grad_norm": 1.5238633829769868, + "learning_rate": 1.9930499762821608e-05, + "loss": 0.7586667537689209, + "step": 587 + }, + { + "epoch": 0.17195496417604914, + "grad_norm": 1.3505202775838374, + "learning_rate": 1.9929929588965352e-05, + "loss": 0.7043411731719971, + "step": 588 + }, + { + "epoch": 0.17224740459131452, + "grad_norm": 1.3150009626714483, + "learning_rate": 1.9929357094048425e-05, + "loss": 0.8502261638641357, + "step": 589 + }, + { + "epoch": 0.1725398450065799, + "grad_norm": 1.3901300269374877, + "learning_rate": 1.992878227820465e-05, + "loss": 0.7196993827819824, + "step": 590 + }, + { + "epoch": 0.1728322854218453, + "grad_norm": 1.5475395216492736, + "learning_rate": 1.9928205141568388e-05, + "loss": 0.6783720850944519, + "step": 591 + }, + { + "epoch": 0.1731247258371107, + "grad_norm": 1.1911883688546063, + "learning_rate": 1.9927625684274534e-05, + "loss": 0.7128307819366455, + "step": 592 + }, + { + "epoch": 0.17341716625237608, + "grad_norm": 1.226507853409212, + "learning_rate": 1.9927043906458538e-05, + "loss": 0.7289423942565918, + "step": 593 + }, + { + "epoch": 0.17370960666764146, + "grad_norm": 1.298942183876381, + "learning_rate": 1.992645980825639e-05, + "loss": 0.6306120157241821, + "step": 594 + }, + { + "epoch": 0.17400204708290687, + "grad_norm": 1.2456494719411173, + "learning_rate": 1.9925873389804614e-05, + "loss": 0.7910655736923218, + "step": 595 + }, + { + "epoch": 0.17429448749817225, + "grad_norm": 1.267940212117298, + "learning_rate": 1.9925284651240282e-05, + "loss": 0.6075282096862793, + "step": 596 + }, + { + "epoch": 0.17458692791343763, + "grad_norm": 1.251937615037275, + "learning_rate": 1.992469359270101e-05, + "loss": 0.6270443201065063, + "step": 597 + }, + { + "epoch": 0.17487936832870302, + "grad_norm": 1.3200413033724028, + "learning_rate": 1.9924100214324955e-05, + "loss": 0.6487830877304077, + "step": 598 + }, + { + "epoch": 0.17517180874396843, + "grad_norm": 1.45237431858529, + "learning_rate": 1.9923504516250814e-05, + "loss": 0.5986843705177307, + "step": 599 + }, + { + "epoch": 0.1754642491592338, + "grad_norm": 1.2191897136056242, + "learning_rate": 1.992290649861783e-05, + "loss": 0.7734183073043823, + "step": 600 + }, + { + "epoch": 0.1757566895744992, + "grad_norm": 1.167414919229407, + "learning_rate": 1.9922306161565782e-05, + "loss": 0.5784964561462402, + "step": 601 + }, + { + "epoch": 0.17604912998976457, + "grad_norm": 1.501564665297397, + "learning_rate": 1.9921703505234995e-05, + "loss": 0.8034321069717407, + "step": 602 + }, + { + "epoch": 0.17634157040502998, + "grad_norm": 1.314622713247698, + "learning_rate": 1.992109852976634e-05, + "loss": 0.8153722882270813, + "step": 603 + }, + { + "epoch": 0.17663401082029537, + "grad_norm": 1.877065501880657, + "learning_rate": 1.992049123530123e-05, + "loss": 0.7293002605438232, + "step": 604 + }, + { + "epoch": 0.17692645123556075, + "grad_norm": 1.514670729590329, + "learning_rate": 1.9919881621981606e-05, + "loss": 0.7108439207077026, + "step": 605 + }, + { + "epoch": 0.17721889165082613, + "grad_norm": 1.4748189889445555, + "learning_rate": 1.9919269689949968e-05, + "loss": 0.7581946849822998, + "step": 606 + }, + { + "epoch": 0.17751133206609154, + "grad_norm": 1.2337358872247315, + "learning_rate": 1.991865543934935e-05, + "loss": 0.6821258068084717, + "step": 607 + }, + { + "epoch": 0.17780377248135693, + "grad_norm": 1.2791852908008183, + "learning_rate": 1.991803887032333e-05, + "loss": 0.7116109728813171, + "step": 608 + }, + { + "epoch": 0.1780962128966223, + "grad_norm": 1.2208883706731903, + "learning_rate": 1.9917419983016025e-05, + "loss": 0.6680186986923218, + "step": 609 + }, + { + "epoch": 0.1783886533118877, + "grad_norm": 1.3494621179320938, + "learning_rate": 1.99167987775721e-05, + "loss": 0.6763704419136047, + "step": 610 + }, + { + "epoch": 0.1786810937271531, + "grad_norm": 1.4133729383070797, + "learning_rate": 1.9916175254136755e-05, + "loss": 0.756158709526062, + "step": 611 + }, + { + "epoch": 0.17897353414241848, + "grad_norm": 1.4652489049885558, + "learning_rate": 1.9915549412855734e-05, + "loss": 0.600861132144928, + "step": 612 + }, + { + "epoch": 0.17926597455768387, + "grad_norm": 1.4731466609399737, + "learning_rate": 1.991492125387533e-05, + "loss": 0.6927047967910767, + "step": 613 + }, + { + "epoch": 0.17955841497294925, + "grad_norm": 1.6937006516406405, + "learning_rate": 1.9914290777342362e-05, + "loss": 0.6908516883850098, + "step": 614 + }, + { + "epoch": 0.17985085538821466, + "grad_norm": 1.4155029526585772, + "learning_rate": 1.9913657983404206e-05, + "loss": 0.7968926429748535, + "step": 615 + }, + { + "epoch": 0.18014329580348004, + "grad_norm": 1.1016955037712495, + "learning_rate": 1.9913022872208773e-05, + "loss": 0.6035164594650269, + "step": 616 + }, + { + "epoch": 0.18043573621874542, + "grad_norm": 1.4061380717551752, + "learning_rate": 1.9912385443904518e-05, + "loss": 0.6733090877532959, + "step": 617 + }, + { + "epoch": 0.1807281766340108, + "grad_norm": 2.2181842231696645, + "learning_rate": 1.9911745698640426e-05, + "loss": 0.6968391537666321, + "step": 618 + }, + { + "epoch": 0.18102061704927622, + "grad_norm": 1.2136657361400474, + "learning_rate": 1.991110363656605e-05, + "loss": 0.7126309871673584, + "step": 619 + }, + { + "epoch": 0.1813130574645416, + "grad_norm": 1.5461052617008268, + "learning_rate": 1.9910459257831455e-05, + "loss": 0.8604997396469116, + "step": 620 + }, + { + "epoch": 0.18160549787980698, + "grad_norm": 1.4378853015325992, + "learning_rate": 1.9909812562587266e-05, + "loss": 0.674797534942627, + "step": 621 + }, + { + "epoch": 0.1818979382950724, + "grad_norm": 1.4538548213207452, + "learning_rate": 1.9909163550984644e-05, + "loss": 0.7439107894897461, + "step": 622 + }, + { + "epoch": 0.18219037871033777, + "grad_norm": 1.4410118469577065, + "learning_rate": 1.9908512223175293e-05, + "loss": 0.7137601971626282, + "step": 623 + }, + { + "epoch": 0.18248281912560316, + "grad_norm": 1.286772355171783, + "learning_rate": 1.9907858579311448e-05, + "loss": 0.6395502090454102, + "step": 624 + }, + { + "epoch": 0.18277525954086854, + "grad_norm": 1.7411485569290241, + "learning_rate": 1.9907202619545905e-05, + "loss": 0.6747852563858032, + "step": 625 + }, + { + "epoch": 0.18306769995613395, + "grad_norm": 1.3891342500470065, + "learning_rate": 1.9906544344031986e-05, + "loss": 0.6995632648468018, + "step": 626 + }, + { + "epoch": 0.18336014037139933, + "grad_norm": 1.3916150531596103, + "learning_rate": 1.9905883752923557e-05, + "loss": 0.7006711363792419, + "step": 627 + }, + { + "epoch": 0.18365258078666472, + "grad_norm": 1.189158109720048, + "learning_rate": 1.990522084637503e-05, + "loss": 0.660778820514679, + "step": 628 + }, + { + "epoch": 0.1839450212019301, + "grad_norm": 1.258003733155152, + "learning_rate": 1.9904555624541362e-05, + "loss": 0.5826665163040161, + "step": 629 + }, + { + "epoch": 0.1842374616171955, + "grad_norm": 1.5565251427155322, + "learning_rate": 1.990388808757803e-05, + "loss": 0.8064266443252563, + "step": 630 + }, + { + "epoch": 0.1845299020324609, + "grad_norm": 1.3066621609893527, + "learning_rate": 1.9903218235641078e-05, + "loss": 0.6856451034545898, + "step": 631 + }, + { + "epoch": 0.18482234244772627, + "grad_norm": 1.325447510265949, + "learning_rate": 1.9902546068887076e-05, + "loss": 0.6423801183700562, + "step": 632 + }, + { + "epoch": 0.18511478286299166, + "grad_norm": 1.252931011950935, + "learning_rate": 1.9901871587473135e-05, + "loss": 0.6903005242347717, + "step": 633 + }, + { + "epoch": 0.18540722327825707, + "grad_norm": 1.2981623515351661, + "learning_rate": 1.9901194791556916e-05, + "loss": 0.636742115020752, + "step": 634 + }, + { + "epoch": 0.18569966369352245, + "grad_norm": 1.154196245030106, + "learning_rate": 1.9900515681296614e-05, + "loss": 0.6541105508804321, + "step": 635 + }, + { + "epoch": 0.18599210410878783, + "grad_norm": 1.2463484642096474, + "learning_rate": 1.9899834256850973e-05, + "loss": 0.7026485204696655, + "step": 636 + }, + { + "epoch": 0.1862845445240532, + "grad_norm": 1.2626549460002545, + "learning_rate": 1.989915051837926e-05, + "loss": 0.6232702732086182, + "step": 637 + }, + { + "epoch": 0.18657698493931862, + "grad_norm": 1.222405284140282, + "learning_rate": 1.9898464466041306e-05, + "loss": 0.5971217155456543, + "step": 638 + }, + { + "epoch": 0.186869425354584, + "grad_norm": 1.228365693552395, + "learning_rate": 1.9897776099997463e-05, + "loss": 0.7942230701446533, + "step": 639 + }, + { + "epoch": 0.1871618657698494, + "grad_norm": 1.4547764939553913, + "learning_rate": 1.9897085420408637e-05, + "loss": 0.6578072309494019, + "step": 640 + }, + { + "epoch": 0.18745430618511477, + "grad_norm": 1.3118111344764942, + "learning_rate": 1.989639242743627e-05, + "loss": 0.6928422451019287, + "step": 641 + }, + { + "epoch": 0.18774674660038018, + "grad_norm": 1.4232777703090678, + "learning_rate": 1.9895697121242346e-05, + "loss": 0.7656213641166687, + "step": 642 + }, + { + "epoch": 0.18803918701564556, + "grad_norm": 1.3841907158773847, + "learning_rate": 1.9894999501989383e-05, + "loss": 0.6540038585662842, + "step": 643 + }, + { + "epoch": 0.18833162743091095, + "grad_norm": 1.5637672668766274, + "learning_rate": 1.989429956984045e-05, + "loss": 0.707741379737854, + "step": 644 + }, + { + "epoch": 0.18862406784617633, + "grad_norm": 1.2389494128425964, + "learning_rate": 1.9893597324959156e-05, + "loss": 0.6191326379776001, + "step": 645 + }, + { + "epoch": 0.18891650826144174, + "grad_norm": 1.2174290538744046, + "learning_rate": 1.9892892767509634e-05, + "loss": 0.616736114025116, + "step": 646 + }, + { + "epoch": 0.18920894867670712, + "grad_norm": 1.4366227278982104, + "learning_rate": 1.989218589765658e-05, + "loss": 0.803301215171814, + "step": 647 + }, + { + "epoch": 0.1895013890919725, + "grad_norm": 1.2775653707157333, + "learning_rate": 1.989147671556522e-05, + "loss": 0.6528021097183228, + "step": 648 + }, + { + "epoch": 0.1897938295072379, + "grad_norm": 1.5463247112798635, + "learning_rate": 1.9890765221401314e-05, + "loss": 0.6966919898986816, + "step": 649 + }, + { + "epoch": 0.1900862699225033, + "grad_norm": 1.2768484224289256, + "learning_rate": 1.9890051415331178e-05, + "loss": 0.7223595380783081, + "step": 650 + }, + { + "epoch": 0.19037871033776868, + "grad_norm": 1.404271714764208, + "learning_rate": 1.9889335297521656e-05, + "loss": 0.6727452278137207, + "step": 651 + }, + { + "epoch": 0.19067115075303406, + "grad_norm": 1.5662163632688932, + "learning_rate": 1.988861686814014e-05, + "loss": 0.7008258104324341, + "step": 652 + }, + { + "epoch": 0.19096359116829945, + "grad_norm": 1.3756400508505757, + "learning_rate": 1.988789612735455e-05, + "loss": 0.7624703049659729, + "step": 653 + }, + { + "epoch": 0.19125603158356486, + "grad_norm": 1.4133612106119275, + "learning_rate": 1.988717307533336e-05, + "loss": 0.6813088655471802, + "step": 654 + }, + { + "epoch": 0.19154847199883024, + "grad_norm": 1.1919173127519105, + "learning_rate": 1.988644771224558e-05, + "loss": 0.5401284694671631, + "step": 655 + }, + { + "epoch": 0.19184091241409562, + "grad_norm": 1.4613018451006843, + "learning_rate": 1.9885720038260756e-05, + "loss": 0.6805379986763, + "step": 656 + }, + { + "epoch": 0.19213335282936103, + "grad_norm": 1.5412845974712732, + "learning_rate": 1.9884990053548982e-05, + "loss": 0.6449974775314331, + "step": 657 + }, + { + "epoch": 0.19242579324462641, + "grad_norm": 1.3481077932409014, + "learning_rate": 1.988425775828088e-05, + "loss": 0.6940032839775085, + "step": 658 + }, + { + "epoch": 0.1927182336598918, + "grad_norm": 1.3088210596354761, + "learning_rate": 1.9883523152627626e-05, + "loss": 0.7089565396308899, + "step": 659 + }, + { + "epoch": 0.19301067407515718, + "grad_norm": 1.3865316758332553, + "learning_rate": 1.9882786236760932e-05, + "loss": 0.7508438229560852, + "step": 660 + }, + { + "epoch": 0.1933031144904226, + "grad_norm": 1.6156320166139564, + "learning_rate": 1.988204701085304e-05, + "loss": 0.6828616261482239, + "step": 661 + }, + { + "epoch": 0.19359555490568797, + "grad_norm": 1.2372815991073003, + "learning_rate": 1.9881305475076744e-05, + "loss": 0.6652963161468506, + "step": 662 + }, + { + "epoch": 0.19388799532095335, + "grad_norm": 1.2410743539313074, + "learning_rate": 1.988056162960537e-05, + "loss": 0.6859447360038757, + "step": 663 + }, + { + "epoch": 0.19418043573621874, + "grad_norm": 1.4440746421071415, + "learning_rate": 1.9879815474612794e-05, + "loss": 0.693805992603302, + "step": 664 + }, + { + "epoch": 0.19447287615148415, + "grad_norm": 1.359257774367856, + "learning_rate": 1.987906701027342e-05, + "loss": 0.7028747200965881, + "step": 665 + }, + { + "epoch": 0.19476531656674953, + "grad_norm": 1.2833261279779522, + "learning_rate": 1.9878316236762195e-05, + "loss": 0.7492112517356873, + "step": 666 + }, + { + "epoch": 0.1950577569820149, + "grad_norm": 1.2065346249489062, + "learning_rate": 1.9877563154254613e-05, + "loss": 0.5394963026046753, + "step": 667 + }, + { + "epoch": 0.1953501973972803, + "grad_norm": 1.1848542596539768, + "learning_rate": 1.98768077629267e-05, + "loss": 0.5185493230819702, + "step": 668 + }, + { + "epoch": 0.1956426378125457, + "grad_norm": 1.2600065416138704, + "learning_rate": 1.9876050062955027e-05, + "loss": 0.7279829382896423, + "step": 669 + }, + { + "epoch": 0.1959350782278111, + "grad_norm": 1.3533145550923509, + "learning_rate": 1.9875290054516692e-05, + "loss": 0.7437206506729126, + "step": 670 + }, + { + "epoch": 0.19622751864307647, + "grad_norm": 1.6022192807514979, + "learning_rate": 1.9874527737789358e-05, + "loss": 0.7294617891311646, + "step": 671 + }, + { + "epoch": 0.19651995905834185, + "grad_norm": 1.3433918645025815, + "learning_rate": 1.9873763112951198e-05, + "loss": 0.7710307240486145, + "step": 672 + }, + { + "epoch": 0.19681239947360726, + "grad_norm": 1.3797998364213817, + "learning_rate": 1.9872996180180947e-05, + "loss": 0.690025806427002, + "step": 673 + }, + { + "epoch": 0.19710483988887265, + "grad_norm": 1.2826936342217614, + "learning_rate": 1.9872226939657867e-05, + "loss": 0.6690589189529419, + "step": 674 + }, + { + "epoch": 0.19739728030413803, + "grad_norm": 1.31971712284742, + "learning_rate": 1.9871455391561764e-05, + "loss": 0.7587239742279053, + "step": 675 + }, + { + "epoch": 0.1976897207194034, + "grad_norm": 1.2583882254944232, + "learning_rate": 1.987068153607298e-05, + "loss": 0.8048006296157837, + "step": 676 + }, + { + "epoch": 0.19798216113466882, + "grad_norm": 1.4904938665104162, + "learning_rate": 1.9869905373372402e-05, + "loss": 0.721023678779602, + "step": 677 + }, + { + "epoch": 0.1982746015499342, + "grad_norm": 1.2975987405043754, + "learning_rate": 1.9869126903641457e-05, + "loss": 0.646798849105835, + "step": 678 + }, + { + "epoch": 0.1985670419651996, + "grad_norm": 1.2591898865565592, + "learning_rate": 1.9868346127062098e-05, + "loss": 0.597393274307251, + "step": 679 + }, + { + "epoch": 0.19885948238046497, + "grad_norm": 1.2773189541737207, + "learning_rate": 1.9867563043816836e-05, + "loss": 0.8619129657745361, + "step": 680 + }, + { + "epoch": 0.19915192279573038, + "grad_norm": 1.2343587826225086, + "learning_rate": 1.986677765408871e-05, + "loss": 0.5391764640808105, + "step": 681 + }, + { + "epoch": 0.19944436321099576, + "grad_norm": 1.360221019641669, + "learning_rate": 1.9865989958061297e-05, + "loss": 0.8185729384422302, + "step": 682 + }, + { + "epoch": 0.19973680362626114, + "grad_norm": 1.3798220626145994, + "learning_rate": 1.9865199955918712e-05, + "loss": 0.6629397869110107, + "step": 683 + }, + { + "epoch": 0.20002924404152653, + "grad_norm": 1.2700323386046573, + "learning_rate": 1.9864407647845626e-05, + "loss": 0.6752325296401978, + "step": 684 + }, + { + "epoch": 0.20032168445679194, + "grad_norm": 1.4583632577866723, + "learning_rate": 1.9863613034027224e-05, + "loss": 0.8509782552719116, + "step": 685 + }, + { + "epoch": 0.20061412487205732, + "grad_norm": 1.2832087066986109, + "learning_rate": 1.986281611464925e-05, + "loss": 0.5573478937149048, + "step": 686 + }, + { + "epoch": 0.2009065652873227, + "grad_norm": 1.4672386586086157, + "learning_rate": 1.9862016889897976e-05, + "loss": 0.8152032494544983, + "step": 687 + }, + { + "epoch": 0.20119900570258809, + "grad_norm": 1.2878245307564982, + "learning_rate": 1.9861215359960217e-05, + "loss": 0.6346902847290039, + "step": 688 + }, + { + "epoch": 0.2014914461178535, + "grad_norm": 1.3877152633732261, + "learning_rate": 1.986041152502332e-05, + "loss": 0.6608721017837524, + "step": 689 + }, + { + "epoch": 0.20178388653311888, + "grad_norm": 1.5061562575575014, + "learning_rate": 1.9859605385275188e-05, + "loss": 0.7753713130950928, + "step": 690 + }, + { + "epoch": 0.20207632694838426, + "grad_norm": 1.2917361787707549, + "learning_rate": 1.9858796940904238e-05, + "loss": 0.6747434139251709, + "step": 691 + }, + { + "epoch": 0.20236876736364964, + "grad_norm": 1.4853341728710303, + "learning_rate": 1.9857986192099446e-05, + "loss": 0.7263737320899963, + "step": 692 + }, + { + "epoch": 0.20266120777891505, + "grad_norm": 1.2072706917482865, + "learning_rate": 1.9857173139050324e-05, + "loss": 0.7910827994346619, + "step": 693 + }, + { + "epoch": 0.20295364819418044, + "grad_norm": 1.479189890111576, + "learning_rate": 1.9856357781946913e-05, + "loss": 0.7245683670043945, + "step": 694 + }, + { + "epoch": 0.20324608860944582, + "grad_norm": 1.146324196354459, + "learning_rate": 1.9855540120979794e-05, + "loss": 0.7440140247344971, + "step": 695 + }, + { + "epoch": 0.20353852902471123, + "grad_norm": 1.823699641073059, + "learning_rate": 1.9854720156340096e-05, + "loss": 0.7485358715057373, + "step": 696 + }, + { + "epoch": 0.2038309694399766, + "grad_norm": 1.3927934028554216, + "learning_rate": 1.985389788821948e-05, + "loss": 0.7658560872077942, + "step": 697 + }, + { + "epoch": 0.204123409855242, + "grad_norm": 1.5269096149843602, + "learning_rate": 1.9853073316810144e-05, + "loss": 0.7366135120391846, + "step": 698 + }, + { + "epoch": 0.20441585027050738, + "grad_norm": 1.2008198015347107, + "learning_rate": 1.985224644230483e-05, + "loss": 0.622355580329895, + "step": 699 + }, + { + "epoch": 0.2047082906857728, + "grad_norm": 1.1924050316279482, + "learning_rate": 1.985141726489681e-05, + "loss": 0.6123125553131104, + "step": 700 + }, + { + "epoch": 0.20500073110103817, + "grad_norm": 1.3537888634275872, + "learning_rate": 1.9850585784779907e-05, + "loss": 0.6768301725387573, + "step": 701 + }, + { + "epoch": 0.20529317151630355, + "grad_norm": 1.2390814549745153, + "learning_rate": 1.9849752002148465e-05, + "loss": 0.6562466621398926, + "step": 702 + }, + { + "epoch": 0.20558561193156893, + "grad_norm": 1.5562868949340583, + "learning_rate": 1.984891591719738e-05, + "loss": 0.7818280458450317, + "step": 703 + }, + { + "epoch": 0.20587805234683434, + "grad_norm": 1.3407102317592055, + "learning_rate": 1.9848077530122083e-05, + "loss": 0.7144001722335815, + "step": 704 + }, + { + "epoch": 0.20617049276209973, + "grad_norm": 1.1671039191657233, + "learning_rate": 1.9847236841118537e-05, + "loss": 0.700564980506897, + "step": 705 + }, + { + "epoch": 0.2064629331773651, + "grad_norm": 1.3051666135645792, + "learning_rate": 1.984639385038326e-05, + "loss": 0.5933517217636108, + "step": 706 + }, + { + "epoch": 0.2067553735926305, + "grad_norm": 1.2749925819283578, + "learning_rate": 1.9845548558113278e-05, + "loss": 0.6174886226654053, + "step": 707 + }, + { + "epoch": 0.2070478140078959, + "grad_norm": 1.3159599421199524, + "learning_rate": 1.9844700964506188e-05, + "loss": 0.7241572141647339, + "step": 708 + }, + { + "epoch": 0.20734025442316129, + "grad_norm": 1.227834334214839, + "learning_rate": 1.9843851069760103e-05, + "loss": 0.6620675325393677, + "step": 709 + }, + { + "epoch": 0.20763269483842667, + "grad_norm": 1.3263327729601424, + "learning_rate": 1.9842998874073682e-05, + "loss": 0.6115273237228394, + "step": 710 + }, + { + "epoch": 0.20792513525369205, + "grad_norm": 1.2961824988419117, + "learning_rate": 1.984214437764612e-05, + "loss": 0.6871848106384277, + "step": 711 + }, + { + "epoch": 0.20821757566895746, + "grad_norm": 1.3134080639211354, + "learning_rate": 1.9841287580677152e-05, + "loss": 0.6887271404266357, + "step": 712 + }, + { + "epoch": 0.20851001608422284, + "grad_norm": 1.4994035488495783, + "learning_rate": 1.9840428483367046e-05, + "loss": 0.8519056439399719, + "step": 713 + }, + { + "epoch": 0.20880245649948823, + "grad_norm": 1.1754556134484295, + "learning_rate": 1.9839567085916617e-05, + "loss": 0.8168978691101074, + "step": 714 + }, + { + "epoch": 0.2090948969147536, + "grad_norm": 1.3651960767502735, + "learning_rate": 1.98387033885272e-05, + "loss": 0.6565415859222412, + "step": 715 + }, + { + "epoch": 0.20938733733001902, + "grad_norm": 1.3008644261492222, + "learning_rate": 1.9837837391400697e-05, + "loss": 0.7305471897125244, + "step": 716 + }, + { + "epoch": 0.2096797777452844, + "grad_norm": 1.4799180289336367, + "learning_rate": 1.9836969094739512e-05, + "loss": 0.7676819562911987, + "step": 717 + }, + { + "epoch": 0.20997221816054978, + "grad_norm": 1.8463650009400876, + "learning_rate": 1.983609849874661e-05, + "loss": 0.6519052982330322, + "step": 718 + }, + { + "epoch": 0.21026465857581517, + "grad_norm": 1.2876599445155823, + "learning_rate": 1.9835225603625488e-05, + "loss": 0.6298089623451233, + "step": 719 + }, + { + "epoch": 0.21055709899108058, + "grad_norm": 1.3906710149258825, + "learning_rate": 1.9834350409580184e-05, + "loss": 0.6384454369544983, + "step": 720 + }, + { + "epoch": 0.21084953940634596, + "grad_norm": 1.1568343654967514, + "learning_rate": 1.9833472916815264e-05, + "loss": 0.6335986852645874, + "step": 721 + }, + { + "epoch": 0.21114197982161134, + "grad_norm": 1.3831022749264381, + "learning_rate": 1.983259312553584e-05, + "loss": 0.6587867736816406, + "step": 722 + }, + { + "epoch": 0.21143442023687672, + "grad_norm": 1.4202837808347009, + "learning_rate": 1.9831711035947552e-05, + "loss": 0.6884294748306274, + "step": 723 + }, + { + "epoch": 0.21172686065214213, + "grad_norm": 1.3257507653834097, + "learning_rate": 1.983082664825659e-05, + "loss": 0.7094298601150513, + "step": 724 + }, + { + "epoch": 0.21201930106740752, + "grad_norm": 1.2528953355997736, + "learning_rate": 1.982993996266967e-05, + "loss": 0.736876368522644, + "step": 725 + }, + { + "epoch": 0.2123117414826729, + "grad_norm": 1.3690939580337487, + "learning_rate": 1.9829050979394052e-05, + "loss": 0.7802199125289917, + "step": 726 + }, + { + "epoch": 0.21260418189793828, + "grad_norm": 1.1986325257536081, + "learning_rate": 1.9828159698637527e-05, + "loss": 0.602590799331665, + "step": 727 + }, + { + "epoch": 0.2128966223132037, + "grad_norm": 1.2705657575851783, + "learning_rate": 1.982726612060843e-05, + "loss": 0.6855295896530151, + "step": 728 + }, + { + "epoch": 0.21318906272846908, + "grad_norm": 1.3075577627317818, + "learning_rate": 1.982637024551563e-05, + "loss": 0.7174949645996094, + "step": 729 + }, + { + "epoch": 0.21348150314373446, + "grad_norm": 1.404568014095412, + "learning_rate": 1.9825472073568527e-05, + "loss": 0.7002695798873901, + "step": 730 + }, + { + "epoch": 0.21377394355899984, + "grad_norm": 1.3606210741478622, + "learning_rate": 1.982457160497707e-05, + "loss": 0.7256268262863159, + "step": 731 + }, + { + "epoch": 0.21406638397426525, + "grad_norm": 1.6598974008247112, + "learning_rate": 1.9823668839951732e-05, + "loss": 0.8223557472229004, + "step": 732 + }, + { + "epoch": 0.21435882438953063, + "grad_norm": 1.361285088499868, + "learning_rate": 1.982276377870353e-05, + "loss": 0.760543942451477, + "step": 733 + }, + { + "epoch": 0.21465126480479602, + "grad_norm": 1.1189262427603888, + "learning_rate": 1.982185642144402e-05, + "loss": 0.5587141513824463, + "step": 734 + }, + { + "epoch": 0.21494370522006143, + "grad_norm": 1.5077440828298982, + "learning_rate": 1.9820946768385295e-05, + "loss": 0.5775829553604126, + "step": 735 + }, + { + "epoch": 0.2152361456353268, + "grad_norm": 1.2761529870001347, + "learning_rate": 1.982003481973997e-05, + "loss": 0.6654443144798279, + "step": 736 + }, + { + "epoch": 0.2155285860505922, + "grad_norm": 1.5826837327135188, + "learning_rate": 1.9819120575721212e-05, + "loss": 0.7963466048240662, + "step": 737 + }, + { + "epoch": 0.21582102646585757, + "grad_norm": 1.3788031698645051, + "learning_rate": 1.981820403654272e-05, + "loss": 0.6748678684234619, + "step": 738 + }, + { + "epoch": 0.21611346688112298, + "grad_norm": 1.4155297807006182, + "learning_rate": 1.9817285202418733e-05, + "loss": 0.7041783928871155, + "step": 739 + }, + { + "epoch": 0.21640590729638837, + "grad_norm": 1.5390789301713295, + "learning_rate": 1.981636407356402e-05, + "loss": 0.8008041381835938, + "step": 740 + }, + { + "epoch": 0.21669834771165375, + "grad_norm": 1.4349473190399622, + "learning_rate": 1.9815440650193887e-05, + "loss": 0.6873682141304016, + "step": 741 + }, + { + "epoch": 0.21699078812691913, + "grad_norm": 1.4041288075629241, + "learning_rate": 1.981451493252418e-05, + "loss": 0.6316831111907959, + "step": 742 + }, + { + "epoch": 0.21728322854218454, + "grad_norm": 1.3377112960270812, + "learning_rate": 1.9813586920771283e-05, + "loss": 0.6481543779373169, + "step": 743 + }, + { + "epoch": 0.21757566895744992, + "grad_norm": 1.2613104485847573, + "learning_rate": 1.9812656615152112e-05, + "loss": 0.6642731428146362, + "step": 744 + }, + { + "epoch": 0.2178681093727153, + "grad_norm": 1.4870873028073741, + "learning_rate": 1.9811724015884115e-05, + "loss": 0.6769483089447021, + "step": 745 + }, + { + "epoch": 0.2181605497879807, + "grad_norm": 1.4050593471281791, + "learning_rate": 1.981078912318529e-05, + "loss": 0.6397525072097778, + "step": 746 + }, + { + "epoch": 0.2184529902032461, + "grad_norm": 1.170420294448055, + "learning_rate": 1.9809851937274154e-05, + "loss": 0.4963756203651428, + "step": 747 + }, + { + "epoch": 0.21874543061851148, + "grad_norm": 1.6049508757911466, + "learning_rate": 1.9808912458369774e-05, + "loss": 0.7352936267852783, + "step": 748 + }, + { + "epoch": 0.21903787103377687, + "grad_norm": 1.3947943752325116, + "learning_rate": 1.980797068669175e-05, + "loss": 0.7177609205245972, + "step": 749 + }, + { + "epoch": 0.21933031144904225, + "grad_norm": 1.2819324457206713, + "learning_rate": 1.980702662246021e-05, + "loss": 0.76703941822052, + "step": 750 + }, + { + "epoch": 0.21962275186430766, + "grad_norm": 1.4885423867402507, + "learning_rate": 1.980608026589582e-05, + "loss": 0.8591324090957642, + "step": 751 + }, + { + "epoch": 0.21991519227957304, + "grad_norm": 1.1920075550965599, + "learning_rate": 1.9805131617219792e-05, + "loss": 0.6216185092926025, + "step": 752 + }, + { + "epoch": 0.22020763269483842, + "grad_norm": 1.359972752643247, + "learning_rate": 1.9804180676653867e-05, + "loss": 0.6067323684692383, + "step": 753 + }, + { + "epoch": 0.2205000731101038, + "grad_norm": 1.329886038437426, + "learning_rate": 1.9803227444420316e-05, + "loss": 0.5832521319389343, + "step": 754 + }, + { + "epoch": 0.22079251352536922, + "grad_norm": 1.3701144460168073, + "learning_rate": 1.9802271920741957e-05, + "loss": 0.6181083917617798, + "step": 755 + }, + { + "epoch": 0.2210849539406346, + "grad_norm": 1.6323941211416428, + "learning_rate": 1.9801314105842135e-05, + "loss": 0.614393949508667, + "step": 756 + }, + { + "epoch": 0.22137739435589998, + "grad_norm": 1.4783150089736257, + "learning_rate": 1.980035399994473e-05, + "loss": 0.7598476409912109, + "step": 757 + }, + { + "epoch": 0.22166983477116536, + "grad_norm": 1.3445249209174277, + "learning_rate": 1.979939160327417e-05, + "loss": 0.7185830473899841, + "step": 758 + }, + { + "epoch": 0.22196227518643077, + "grad_norm": 1.2604381133839313, + "learning_rate": 1.9798426916055403e-05, + "loss": 0.6672362089157104, + "step": 759 + }, + { + "epoch": 0.22225471560169616, + "grad_norm": 1.323605486489286, + "learning_rate": 1.9797459938513918e-05, + "loss": 0.60948646068573, + "step": 760 + }, + { + "epoch": 0.22254715601696154, + "grad_norm": 1.376081699980774, + "learning_rate": 1.979649067087574e-05, + "loss": 0.6073893308639526, + "step": 761 + }, + { + "epoch": 0.22283959643222692, + "grad_norm": 2.11374968768554, + "learning_rate": 1.9795519113367434e-05, + "loss": 0.7521525025367737, + "step": 762 + }, + { + "epoch": 0.22313203684749233, + "grad_norm": 1.3631196959673009, + "learning_rate": 1.979454526621609e-05, + "loss": 0.7281486988067627, + "step": 763 + }, + { + "epoch": 0.22342447726275771, + "grad_norm": 1.3466801989985047, + "learning_rate": 1.9793569129649345e-05, + "loss": 0.5628652572631836, + "step": 764 + }, + { + "epoch": 0.2237169176780231, + "grad_norm": 1.7030188389110175, + "learning_rate": 1.9792590703895364e-05, + "loss": 0.9115084409713745, + "step": 765 + }, + { + "epoch": 0.22400935809328848, + "grad_norm": 1.1906430527809846, + "learning_rate": 1.9791609989182843e-05, + "loss": 0.5793902277946472, + "step": 766 + }, + { + "epoch": 0.2243017985085539, + "grad_norm": 1.319680929079464, + "learning_rate": 1.979062698574102e-05, + "loss": 0.5811150074005127, + "step": 767 + }, + { + "epoch": 0.22459423892381927, + "grad_norm": 1.8337754364313175, + "learning_rate": 1.978964169379967e-05, + "loss": 0.7450643181800842, + "step": 768 + }, + { + "epoch": 0.22488667933908466, + "grad_norm": 1.2696945630714354, + "learning_rate": 1.9788654113589093e-05, + "loss": 0.6617515087127686, + "step": 769 + }, + { + "epoch": 0.22517911975435004, + "grad_norm": 1.1685310150494228, + "learning_rate": 1.9787664245340137e-05, + "loss": 0.6240406036376953, + "step": 770 + }, + { + "epoch": 0.22547156016961545, + "grad_norm": 1.450209328719988, + "learning_rate": 1.978667208928417e-05, + "loss": 0.694688081741333, + "step": 771 + }, + { + "epoch": 0.22576400058488083, + "grad_norm": 1.274649499261431, + "learning_rate": 1.9785677645653107e-05, + "loss": 0.6855190396308899, + "step": 772 + }, + { + "epoch": 0.2260564410001462, + "grad_norm": 1.5531275718881066, + "learning_rate": 1.978468091467939e-05, + "loss": 0.8132567405700684, + "step": 773 + }, + { + "epoch": 0.22634888141541162, + "grad_norm": 1.2819374084058084, + "learning_rate": 1.9783681896596006e-05, + "loss": 0.7011039853096008, + "step": 774 + }, + { + "epoch": 0.226641321830677, + "grad_norm": 1.2317633693628418, + "learning_rate": 1.9782680591636462e-05, + "loss": 0.5754199028015137, + "step": 775 + }, + { + "epoch": 0.2269337622459424, + "grad_norm": 1.3342396229289735, + "learning_rate": 1.9781677000034807e-05, + "loss": 0.7518784403800964, + "step": 776 + }, + { + "epoch": 0.22722620266120777, + "grad_norm": 1.4619385156109748, + "learning_rate": 1.978067112202563e-05, + "loss": 0.6802738904953003, + "step": 777 + }, + { + "epoch": 0.22751864307647318, + "grad_norm": 1.2836639966818497, + "learning_rate": 1.9779662957844046e-05, + "loss": 0.7667055726051331, + "step": 778 + }, + { + "epoch": 0.22781108349173856, + "grad_norm": 1.3402387686228199, + "learning_rate": 1.9778652507725704e-05, + "loss": 0.7590975165367126, + "step": 779 + }, + { + "epoch": 0.22810352390700395, + "grad_norm": 1.5322182562597366, + "learning_rate": 1.9777639771906795e-05, + "loss": 0.8009685277938843, + "step": 780 + }, + { + "epoch": 0.22839596432226933, + "grad_norm": 1.2184372022517955, + "learning_rate": 1.977662475062404e-05, + "loss": 0.6094385981559753, + "step": 781 + }, + { + "epoch": 0.22868840473753474, + "grad_norm": 1.2258891813878965, + "learning_rate": 1.977560744411469e-05, + "loss": 0.5919946432113647, + "step": 782 + }, + { + "epoch": 0.22898084515280012, + "grad_norm": 1.3994922066796667, + "learning_rate": 1.9774587852616537e-05, + "loss": 0.7616838216781616, + "step": 783 + }, + { + "epoch": 0.2292732855680655, + "grad_norm": 1.0864449553171927, + "learning_rate": 1.9773565976367903e-05, + "loss": 0.5107603073120117, + "step": 784 + }, + { + "epoch": 0.2295657259833309, + "grad_norm": 1.3785741559157736, + "learning_rate": 1.9772541815607645e-05, + "loss": 0.6819792985916138, + "step": 785 + }, + { + "epoch": 0.2298581663985963, + "grad_norm": 1.3095462010721952, + "learning_rate": 1.977151537057516e-05, + "loss": 0.748264729976654, + "step": 786 + }, + { + "epoch": 0.23015060681386168, + "grad_norm": 1.511078591377817, + "learning_rate": 1.977048664151037e-05, + "loss": 0.7341534495353699, + "step": 787 + }, + { + "epoch": 0.23044304722912706, + "grad_norm": 1.3481462417331131, + "learning_rate": 1.976945562865373e-05, + "loss": 0.569247841835022, + "step": 788 + }, + { + "epoch": 0.23073548764439245, + "grad_norm": 1.4792545387125078, + "learning_rate": 1.9768422332246233e-05, + "loss": 0.7003188133239746, + "step": 789 + }, + { + "epoch": 0.23102792805965786, + "grad_norm": 1.222254549739519, + "learning_rate": 1.9767386752529415e-05, + "loss": 0.6484041810035706, + "step": 790 + }, + { + "epoch": 0.23132036847492324, + "grad_norm": 1.2921197831934208, + "learning_rate": 1.9766348889745324e-05, + "loss": 0.6635721921920776, + "step": 791 + }, + { + "epoch": 0.23161280889018862, + "grad_norm": 1.3606759597173597, + "learning_rate": 1.9765308744136568e-05, + "loss": 0.5855914354324341, + "step": 792 + }, + { + "epoch": 0.231905249305454, + "grad_norm": 1.3590534475124305, + "learning_rate": 1.976426631594626e-05, + "loss": 0.7606059312820435, + "step": 793 + }, + { + "epoch": 0.2321976897207194, + "grad_norm": 1.399907486961256, + "learning_rate": 1.976322160541807e-05, + "loss": 0.7080718278884888, + "step": 794 + }, + { + "epoch": 0.2324901301359848, + "grad_norm": 1.6372996876909576, + "learning_rate": 1.9762174612796195e-05, + "loss": 0.8838162422180176, + "step": 795 + }, + { + "epoch": 0.23278257055125018, + "grad_norm": 1.1906217629409164, + "learning_rate": 1.9761125338325357e-05, + "loss": 0.5776950120925903, + "step": 796 + }, + { + "epoch": 0.23307501096651556, + "grad_norm": 1.4075761903811832, + "learning_rate": 1.9760073782250817e-05, + "loss": 0.7455854415893555, + "step": 797 + }, + { + "epoch": 0.23336745138178097, + "grad_norm": 1.4778525028622385, + "learning_rate": 1.9759019944818375e-05, + "loss": 0.7160001993179321, + "step": 798 + }, + { + "epoch": 0.23365989179704635, + "grad_norm": 1.2680712563874137, + "learning_rate": 1.9757963826274357e-05, + "loss": 0.6282311081886292, + "step": 799 + }, + { + "epoch": 0.23395233221231174, + "grad_norm": 1.3617325718771658, + "learning_rate": 1.9756905426865626e-05, + "loss": 0.6479916572570801, + "step": 800 + }, + { + "epoch": 0.23424477262757712, + "grad_norm": 1.2789508587545713, + "learning_rate": 1.9755844746839573e-05, + "loss": 0.6519639492034912, + "step": 801 + }, + { + "epoch": 0.23453721304284253, + "grad_norm": 1.4374021901805083, + "learning_rate": 1.9754781786444122e-05, + "loss": 0.5591464638710022, + "step": 802 + }, + { + "epoch": 0.2348296534581079, + "grad_norm": 1.6094479116430809, + "learning_rate": 1.9753716545927745e-05, + "loss": 0.6378511190414429, + "step": 803 + }, + { + "epoch": 0.2351220938733733, + "grad_norm": 1.593476138868701, + "learning_rate": 1.9752649025539424e-05, + "loss": 0.7932485342025757, + "step": 804 + }, + { + "epoch": 0.23541453428863868, + "grad_norm": 1.327032855057245, + "learning_rate": 1.9751579225528694e-05, + "loss": 0.7344592809677124, + "step": 805 + }, + { + "epoch": 0.2357069747039041, + "grad_norm": 1.5060138108990804, + "learning_rate": 1.975050714614561e-05, + "loss": 0.7879096269607544, + "step": 806 + }, + { + "epoch": 0.23599941511916947, + "grad_norm": 1.31391427286964, + "learning_rate": 1.9749432787640764e-05, + "loss": 0.6428436040878296, + "step": 807 + }, + { + "epoch": 0.23629185553443485, + "grad_norm": 1.1924129057081494, + "learning_rate": 1.9748356150265283e-05, + "loss": 0.7018194198608398, + "step": 808 + }, + { + "epoch": 0.23658429594970024, + "grad_norm": 1.3487665777693398, + "learning_rate": 1.974727723427082e-05, + "loss": 0.7696131467819214, + "step": 809 + }, + { + "epoch": 0.23687673636496565, + "grad_norm": 1.2806200429683234, + "learning_rate": 1.974619603990957e-05, + "loss": 0.6429424285888672, + "step": 810 + }, + { + "epoch": 0.23716917678023103, + "grad_norm": 1.4197164517856635, + "learning_rate": 1.9745112567434254e-05, + "loss": 0.7205626964569092, + "step": 811 + }, + { + "epoch": 0.2374616171954964, + "grad_norm": 1.206628595880062, + "learning_rate": 1.9744026817098122e-05, + "loss": 0.7018989324569702, + "step": 812 + }, + { + "epoch": 0.23775405761076182, + "grad_norm": 1.4562632106002198, + "learning_rate": 1.974293878915497e-05, + "loss": 0.6861958503723145, + "step": 813 + }, + { + "epoch": 0.2380464980260272, + "grad_norm": 1.8277672251442496, + "learning_rate": 1.9741848483859117e-05, + "loss": 0.687503457069397, + "step": 814 + }, + { + "epoch": 0.23833893844129259, + "grad_norm": 1.6702364448324796, + "learning_rate": 1.9740755901465408e-05, + "loss": 0.7808526754379272, + "step": 815 + }, + { + "epoch": 0.23863137885655797, + "grad_norm": 1.4777579354772585, + "learning_rate": 1.973966104222923e-05, + "loss": 0.7387286424636841, + "step": 816 + }, + { + "epoch": 0.23892381927182338, + "grad_norm": 1.2761337726208828, + "learning_rate": 1.9738563906406508e-05, + "loss": 0.6262110471725464, + "step": 817 + }, + { + "epoch": 0.23921625968708876, + "grad_norm": 1.2308979686961945, + "learning_rate": 1.973746449425368e-05, + "loss": 0.6618830561637878, + "step": 818 + }, + { + "epoch": 0.23950870010235414, + "grad_norm": 1.3525742869997646, + "learning_rate": 1.9736362806027732e-05, + "loss": 0.5866184234619141, + "step": 819 + }, + { + "epoch": 0.23980114051761953, + "grad_norm": 1.1916120410649227, + "learning_rate": 1.9735258841986175e-05, + "loss": 0.6413314342498779, + "step": 820 + }, + { + "epoch": 0.24009358093288494, + "grad_norm": 1.3855684564301443, + "learning_rate": 1.9734152602387054e-05, + "loss": 0.6125906109809875, + "step": 821 + }, + { + "epoch": 0.24038602134815032, + "grad_norm": 1.3708182915073268, + "learning_rate": 1.973304408748895e-05, + "loss": 0.6128122806549072, + "step": 822 + }, + { + "epoch": 0.2406784617634157, + "grad_norm": 1.4552398411515748, + "learning_rate": 1.973193329755097e-05, + "loss": 0.7763051986694336, + "step": 823 + }, + { + "epoch": 0.24097090217868108, + "grad_norm": 1.406068384249821, + "learning_rate": 1.9730820232832747e-05, + "loss": 0.7187550067901611, + "step": 824 + }, + { + "epoch": 0.2412633425939465, + "grad_norm": 1.4089612736012989, + "learning_rate": 1.972970489359446e-05, + "loss": 0.6564748287200928, + "step": 825 + }, + { + "epoch": 0.24155578300921188, + "grad_norm": 1.2962838731212396, + "learning_rate": 1.9728587280096815e-05, + "loss": 0.6573271751403809, + "step": 826 + }, + { + "epoch": 0.24184822342447726, + "grad_norm": 1.606482466732529, + "learning_rate": 1.9727467392601042e-05, + "loss": 0.8032153844833374, + "step": 827 + }, + { + "epoch": 0.24214066383974264, + "grad_norm": 1.344534982986645, + "learning_rate": 1.972634523136891e-05, + "loss": 0.6781449913978577, + "step": 828 + }, + { + "epoch": 0.24243310425500805, + "grad_norm": 1.3970734980370678, + "learning_rate": 1.972522079666272e-05, + "loss": 0.580757737159729, + "step": 829 + }, + { + "epoch": 0.24272554467027344, + "grad_norm": 1.4569992070347761, + "learning_rate": 1.97240940887453e-05, + "loss": 0.626894474029541, + "step": 830 + }, + { + "epoch": 0.24301798508553882, + "grad_norm": 1.4885978649776115, + "learning_rate": 1.9722965107880005e-05, + "loss": 0.8188163042068481, + "step": 831 + }, + { + "epoch": 0.2433104255008042, + "grad_norm": 1.4514623765445114, + "learning_rate": 1.9721833854330734e-05, + "loss": 0.6943579912185669, + "step": 832 + }, + { + "epoch": 0.2436028659160696, + "grad_norm": 1.3452906489662066, + "learning_rate": 1.972070032836191e-05, + "loss": 0.6177504658699036, + "step": 833 + }, + { + "epoch": 0.243895306331335, + "grad_norm": 1.3249219466208975, + "learning_rate": 1.971956453023849e-05, + "loss": 0.683998703956604, + "step": 834 + }, + { + "epoch": 0.24418774674660038, + "grad_norm": 1.3523687150823345, + "learning_rate": 1.9718426460225952e-05, + "loss": 0.77602219581604, + "step": 835 + }, + { + "epoch": 0.24448018716186576, + "grad_norm": 1.0190390519787025, + "learning_rate": 1.971728611859032e-05, + "loss": 0.4930742383003235, + "step": 836 + }, + { + "epoch": 0.24477262757713117, + "grad_norm": 1.057766741950331, + "learning_rate": 1.971614350559814e-05, + "loss": 0.634628415107727, + "step": 837 + }, + { + "epoch": 0.24506506799239655, + "grad_norm": 1.4273024070967653, + "learning_rate": 1.971499862151649e-05, + "loss": 0.6439167857170105, + "step": 838 + }, + { + "epoch": 0.24535750840766193, + "grad_norm": 1.1385728991135244, + "learning_rate": 1.9713851466612982e-05, + "loss": 0.701258659362793, + "step": 839 + }, + { + "epoch": 0.24564994882292732, + "grad_norm": 1.4590112387376561, + "learning_rate": 1.9712702041155753e-05, + "loss": 0.6488544344902039, + "step": 840 + }, + { + "epoch": 0.24594238923819273, + "grad_norm": 1.3405708553224296, + "learning_rate": 1.9711550345413476e-05, + "loss": 0.6962910890579224, + "step": 841 + }, + { + "epoch": 0.2462348296534581, + "grad_norm": 1.1939053963741824, + "learning_rate": 1.9710396379655355e-05, + "loss": 0.6617723703384399, + "step": 842 + }, + { + "epoch": 0.2465272700687235, + "grad_norm": 1.2279058278823862, + "learning_rate": 1.970924014415112e-05, + "loss": 0.7152801752090454, + "step": 843 + }, + { + "epoch": 0.24681971048398887, + "grad_norm": 1.2796222731345095, + "learning_rate": 1.9708081639171035e-05, + "loss": 0.6712393760681152, + "step": 844 + }, + { + "epoch": 0.24711215089925428, + "grad_norm": 1.3941735155074029, + "learning_rate": 1.970692086498589e-05, + "loss": 0.8413758277893066, + "step": 845 + }, + { + "epoch": 0.24740459131451967, + "grad_norm": 1.423836225011119, + "learning_rate": 1.9705757821867015e-05, + "loss": 0.6460679769515991, + "step": 846 + }, + { + "epoch": 0.24769703172978505, + "grad_norm": 1.3704721229511874, + "learning_rate": 1.970459251008626e-05, + "loss": 0.759244441986084, + "step": 847 + }, + { + "epoch": 0.24798947214505043, + "grad_norm": 1.2356631241001201, + "learning_rate": 1.970342492991601e-05, + "loss": 0.8148110508918762, + "step": 848 + }, + { + "epoch": 0.24828191256031584, + "grad_norm": 1.2587770996787473, + "learning_rate": 1.970225508162918e-05, + "loss": 0.6620084047317505, + "step": 849 + }, + { + "epoch": 0.24857435297558123, + "grad_norm": 1.451838551232366, + "learning_rate": 1.9701082965499217e-05, + "loss": 0.7090305089950562, + "step": 850 + }, + { + "epoch": 0.2488667933908466, + "grad_norm": 1.2074340737341804, + "learning_rate": 1.9699908581800094e-05, + "loss": 0.6846730709075928, + "step": 851 + }, + { + "epoch": 0.24915923380611202, + "grad_norm": 1.0752757256209107, + "learning_rate": 1.9698731930806315e-05, + "loss": 0.5183212757110596, + "step": 852 + }, + { + "epoch": 0.2494516742213774, + "grad_norm": 1.4176078828661092, + "learning_rate": 1.9697553012792915e-05, + "loss": 0.6913097500801086, + "step": 853 + }, + { + "epoch": 0.24974411463664278, + "grad_norm": 1.4996885245263052, + "learning_rate": 1.9696371828035466e-05, + "loss": 0.7896280884742737, + "step": 854 + }, + { + "epoch": 0.2500365550519082, + "grad_norm": 1.4718644942105623, + "learning_rate": 1.9695188376810055e-05, + "loss": 0.947577714920044, + "step": 855 + }, + { + "epoch": 0.2503289954671736, + "grad_norm": 1.3825164821538705, + "learning_rate": 1.9694002659393306e-05, + "loss": 0.7772419452667236, + "step": 856 + }, + { + "epoch": 0.25062143588243896, + "grad_norm": 1.3624521016930335, + "learning_rate": 1.9692814676062376e-05, + "loss": 0.6255912780761719, + "step": 857 + }, + { + "epoch": 0.25091387629770434, + "grad_norm": 1.3319834146029552, + "learning_rate": 1.969162442709495e-05, + "loss": 0.6572105884552002, + "step": 858 + }, + { + "epoch": 0.2512063167129697, + "grad_norm": 1.3718275193420901, + "learning_rate": 1.969043191276924e-05, + "loss": 0.6387436389923096, + "step": 859 + }, + { + "epoch": 0.2514987571282351, + "grad_norm": 1.1976239787141296, + "learning_rate": 1.968923713336399e-05, + "loss": 0.9180483222007751, + "step": 860 + }, + { + "epoch": 0.2517911975435005, + "grad_norm": 1.211847411431562, + "learning_rate": 1.9688040089158473e-05, + "loss": 0.6830536127090454, + "step": 861 + }, + { + "epoch": 0.2520836379587659, + "grad_norm": 1.6904119232689327, + "learning_rate": 1.9686840780432487e-05, + "loss": 0.9061588644981384, + "step": 862 + }, + { + "epoch": 0.2523760783740313, + "grad_norm": 1.157670921080695, + "learning_rate": 1.9685639207466365e-05, + "loss": 0.558010458946228, + "step": 863 + }, + { + "epoch": 0.2526685187892967, + "grad_norm": 1.1825470022948923, + "learning_rate": 1.968443537054097e-05, + "loss": 0.6788249611854553, + "step": 864 + }, + { + "epoch": 0.2529609592045621, + "grad_norm": 1.2105730438992965, + "learning_rate": 1.968322926993769e-05, + "loss": 0.576469898223877, + "step": 865 + }, + { + "epoch": 0.25325339961982746, + "grad_norm": 1.2982512656817862, + "learning_rate": 1.9682020905938438e-05, + "loss": 0.6994123458862305, + "step": 866 + }, + { + "epoch": 0.25354584003509284, + "grad_norm": 1.206872992638966, + "learning_rate": 1.9680810278825672e-05, + "loss": 0.6929521560668945, + "step": 867 + }, + { + "epoch": 0.2538382804503582, + "grad_norm": 1.273656030058159, + "learning_rate": 1.9679597388882363e-05, + "loss": 0.7596743106842041, + "step": 868 + }, + { + "epoch": 0.2541307208656236, + "grad_norm": 1.4805809886864818, + "learning_rate": 1.9678382236392013e-05, + "loss": 0.7925904989242554, + "step": 869 + }, + { + "epoch": 0.25442316128088904, + "grad_norm": 1.3335550122348163, + "learning_rate": 1.9677164821638666e-05, + "loss": 0.722467839717865, + "step": 870 + }, + { + "epoch": 0.2547156016961544, + "grad_norm": 1.3131624182400288, + "learning_rate": 1.9675945144906882e-05, + "loss": 0.7165451049804688, + "step": 871 + }, + { + "epoch": 0.2550080421114198, + "grad_norm": 1.1797512350865442, + "learning_rate": 1.9674723206481746e-05, + "loss": 0.5897061824798584, + "step": 872 + }, + { + "epoch": 0.2553004825266852, + "grad_norm": 1.2365962649439657, + "learning_rate": 1.9673499006648885e-05, + "loss": 0.6634531021118164, + "step": 873 + }, + { + "epoch": 0.2555929229419506, + "grad_norm": 1.3214235822507945, + "learning_rate": 1.9672272545694445e-05, + "loss": 0.7237584590911865, + "step": 874 + }, + { + "epoch": 0.25588536335721596, + "grad_norm": 1.4848759223566366, + "learning_rate": 1.967104382390511e-05, + "loss": 0.6382388472557068, + "step": 875 + }, + { + "epoch": 0.25617780377248134, + "grad_norm": 1.31447030866248, + "learning_rate": 1.966981284156808e-05, + "loss": 0.6788768768310547, + "step": 876 + }, + { + "epoch": 0.2564702441877467, + "grad_norm": 1.3072783419197107, + "learning_rate": 1.966857959897109e-05, + "loss": 0.6347095966339111, + "step": 877 + }, + { + "epoch": 0.25676268460301216, + "grad_norm": 1.4344629064681063, + "learning_rate": 1.9667344096402406e-05, + "loss": 0.8896903991699219, + "step": 878 + }, + { + "epoch": 0.25705512501827754, + "grad_norm": 1.3876445939749689, + "learning_rate": 1.966610633415082e-05, + "loss": 0.71473228931427, + "step": 879 + }, + { + "epoch": 0.2573475654335429, + "grad_norm": 1.3302375445053003, + "learning_rate": 1.9664866312505646e-05, + "loss": 0.7311601638793945, + "step": 880 + }, + { + "epoch": 0.2576400058488083, + "grad_norm": 1.2472942559074918, + "learning_rate": 1.9663624031756737e-05, + "loss": 0.6186199188232422, + "step": 881 + }, + { + "epoch": 0.2579324462640737, + "grad_norm": 1.4896774549089442, + "learning_rate": 1.9662379492194467e-05, + "loss": 0.8059204816818237, + "step": 882 + }, + { + "epoch": 0.25822488667933907, + "grad_norm": 1.4468929069066396, + "learning_rate": 1.9661132694109736e-05, + "loss": 0.6065236330032349, + "step": 883 + }, + { + "epoch": 0.25851732709460445, + "grad_norm": 1.182060018600662, + "learning_rate": 1.965988363779398e-05, + "loss": 0.6491106152534485, + "step": 884 + }, + { + "epoch": 0.25880976750986984, + "grad_norm": 1.197300798410388, + "learning_rate": 1.9658632323539158e-05, + "loss": 0.526267945766449, + "step": 885 + }, + { + "epoch": 0.2591022079251353, + "grad_norm": 1.5008074138248908, + "learning_rate": 1.9657378751637755e-05, + "loss": 0.812760591506958, + "step": 886 + }, + { + "epoch": 0.25939464834040066, + "grad_norm": 1.807239371921464, + "learning_rate": 1.9656122922382786e-05, + "loss": 0.7957908511161804, + "step": 887 + }, + { + "epoch": 0.25968708875566604, + "grad_norm": 1.3552357306732934, + "learning_rate": 1.9654864836067796e-05, + "loss": 0.7426323890686035, + "step": 888 + }, + { + "epoch": 0.2599795291709314, + "grad_norm": 1.3206271267013228, + "learning_rate": 1.9653604492986852e-05, + "loss": 0.602961540222168, + "step": 889 + }, + { + "epoch": 0.2602719695861968, + "grad_norm": 1.7789592821205134, + "learning_rate": 1.965234189343455e-05, + "loss": 0.8706510066986084, + "step": 890 + }, + { + "epoch": 0.2605644100014622, + "grad_norm": 1.3042391493572836, + "learning_rate": 1.965107703770602e-05, + "loss": 0.6245810985565186, + "step": 891 + }, + { + "epoch": 0.26085685041672757, + "grad_norm": 1.3389608750174764, + "learning_rate": 1.964980992609691e-05, + "loss": 0.7455421686172485, + "step": 892 + }, + { + "epoch": 0.261149290831993, + "grad_norm": 1.3769047718413097, + "learning_rate": 1.9648540558903404e-05, + "loss": 0.6917043328285217, + "step": 893 + }, + { + "epoch": 0.2614417312472584, + "grad_norm": 1.5543661242785587, + "learning_rate": 1.9647268936422204e-05, + "loss": 0.6488040685653687, + "step": 894 + }, + { + "epoch": 0.2617341716625238, + "grad_norm": 1.4168880936407573, + "learning_rate": 1.964599505895055e-05, + "loss": 0.7416148781776428, + "step": 895 + }, + { + "epoch": 0.26202661207778916, + "grad_norm": 1.2398123962846468, + "learning_rate": 1.9644718926786196e-05, + "loss": 0.7012773156166077, + "step": 896 + }, + { + "epoch": 0.26231905249305454, + "grad_norm": 1.4024640685787384, + "learning_rate": 1.9643440540227438e-05, + "loss": 0.8644432425498962, + "step": 897 + }, + { + "epoch": 0.2626114929083199, + "grad_norm": 1.2155057674795815, + "learning_rate": 1.9642159899573084e-05, + "loss": 0.614842414855957, + "step": 898 + }, + { + "epoch": 0.2629039333235853, + "grad_norm": 1.406064497865486, + "learning_rate": 1.964087700512248e-05, + "loss": 0.7794508337974548, + "step": 899 + }, + { + "epoch": 0.2631963737388507, + "grad_norm": 1.3041032890013364, + "learning_rate": 1.9639591857175492e-05, + "loss": 0.49217259883880615, + "step": 900 + }, + { + "epoch": 0.2634888141541161, + "grad_norm": 2.5300379427879656, + "learning_rate": 1.9638304456032516e-05, + "loss": 0.6319605708122253, + "step": 901 + }, + { + "epoch": 0.2637812545693815, + "grad_norm": 1.2937854520821135, + "learning_rate": 1.9637014801994478e-05, + "loss": 0.6066744327545166, + "step": 902 + }, + { + "epoch": 0.2640736949846469, + "grad_norm": 1.3364560601793205, + "learning_rate": 1.9635722895362824e-05, + "loss": 0.7529127597808838, + "step": 903 + }, + { + "epoch": 0.26436613539991227, + "grad_norm": 1.1766314649269587, + "learning_rate": 1.9634428736439524e-05, + "loss": 0.6026389598846436, + "step": 904 + }, + { + "epoch": 0.26465857581517765, + "grad_norm": 1.1341480559887087, + "learning_rate": 1.9633132325527092e-05, + "loss": 0.6227229237556458, + "step": 905 + }, + { + "epoch": 0.26495101623044304, + "grad_norm": 1.0934147682033295, + "learning_rate": 1.9631833662928548e-05, + "loss": 0.5959285497665405, + "step": 906 + }, + { + "epoch": 0.2652434566457084, + "grad_norm": 1.5332323248713289, + "learning_rate": 1.9630532748947445e-05, + "loss": 0.8104684352874756, + "step": 907 + }, + { + "epoch": 0.2655358970609738, + "grad_norm": 1.4286964634802555, + "learning_rate": 1.962922958388787e-05, + "loss": 0.6722325682640076, + "step": 908 + }, + { + "epoch": 0.26582833747623924, + "grad_norm": 1.3146328085881052, + "learning_rate": 1.962792416805442e-05, + "loss": 0.5996029376983643, + "step": 909 + }, + { + "epoch": 0.2661207778915046, + "grad_norm": 1.2576705371159294, + "learning_rate": 1.962661650175224e-05, + "loss": 0.7214776873588562, + "step": 910 + }, + { + "epoch": 0.26641321830677, + "grad_norm": 1.3644451050997106, + "learning_rate": 1.9625306585286986e-05, + "loss": 0.6833420991897583, + "step": 911 + }, + { + "epoch": 0.2667056587220354, + "grad_norm": 1.3539788924921423, + "learning_rate": 1.9623994418964834e-05, + "loss": 0.5571368336677551, + "step": 912 + }, + { + "epoch": 0.26699809913730077, + "grad_norm": 1.3710487138213245, + "learning_rate": 1.9622680003092503e-05, + "loss": 0.6748533248901367, + "step": 913 + }, + { + "epoch": 0.26729053955256615, + "grad_norm": 1.3715994474814863, + "learning_rate": 1.9621363337977232e-05, + "loss": 0.6681679487228394, + "step": 914 + }, + { + "epoch": 0.26758297996783154, + "grad_norm": 1.482670676536411, + "learning_rate": 1.9620044423926775e-05, + "loss": 0.6839786767959595, + "step": 915 + }, + { + "epoch": 0.2678754203830969, + "grad_norm": 1.4250296018843953, + "learning_rate": 1.961872326124943e-05, + "loss": 0.7481753826141357, + "step": 916 + }, + { + "epoch": 0.26816786079836236, + "grad_norm": 1.2167024955211783, + "learning_rate": 1.9617399850254e-05, + "loss": 0.6044093370437622, + "step": 917 + }, + { + "epoch": 0.26846030121362774, + "grad_norm": 1.284073365031053, + "learning_rate": 1.9616074191249833e-05, + "loss": 0.6399786472320557, + "step": 918 + }, + { + "epoch": 0.2687527416288931, + "grad_norm": 1.4810486497659208, + "learning_rate": 1.961474628454679e-05, + "loss": 0.6769053339958191, + "step": 919 + }, + { + "epoch": 0.2690451820441585, + "grad_norm": 1.3650368498715015, + "learning_rate": 1.961341613045526e-05, + "loss": 0.7508189678192139, + "step": 920 + }, + { + "epoch": 0.2693376224594239, + "grad_norm": 1.3260194970823536, + "learning_rate": 1.9612083729286164e-05, + "loss": 0.728675365447998, + "step": 921 + }, + { + "epoch": 0.26963006287468927, + "grad_norm": 1.241243201070507, + "learning_rate": 1.9610749081350934e-05, + "loss": 0.6886277794837952, + "step": 922 + }, + { + "epoch": 0.26992250328995465, + "grad_norm": 1.272552251820391, + "learning_rate": 1.9609412186961542e-05, + "loss": 0.6756877899169922, + "step": 923 + }, + { + "epoch": 0.27021494370522003, + "grad_norm": 1.3464083414999921, + "learning_rate": 1.960807304643048e-05, + "loss": 0.6761744022369385, + "step": 924 + }, + { + "epoch": 0.2705073841204855, + "grad_norm": 1.3141872927798783, + "learning_rate": 1.9606731660070758e-05, + "loss": 0.6475736498832703, + "step": 925 + }, + { + "epoch": 0.27079982453575085, + "grad_norm": 1.2576667239396297, + "learning_rate": 1.9605388028195922e-05, + "loss": 0.6169984936714172, + "step": 926 + }, + { + "epoch": 0.27109226495101624, + "grad_norm": 1.36667119537221, + "learning_rate": 1.9604042151120035e-05, + "loss": 0.6411685943603516, + "step": 927 + }, + { + "epoch": 0.2713847053662816, + "grad_norm": 1.203794827188605, + "learning_rate": 1.960269402915769e-05, + "loss": 0.6802625060081482, + "step": 928 + }, + { + "epoch": 0.271677145781547, + "grad_norm": 1.1204382547238934, + "learning_rate": 1.9601343662624e-05, + "loss": 0.6321320533752441, + "step": 929 + }, + { + "epoch": 0.2719695861968124, + "grad_norm": 1.1836254946940896, + "learning_rate": 1.959999105183461e-05, + "loss": 0.6242578029632568, + "step": 930 + }, + { + "epoch": 0.27226202661207777, + "grad_norm": 1.3574626937776866, + "learning_rate": 1.9598636197105672e-05, + "loss": 0.8106271624565125, + "step": 931 + }, + { + "epoch": 0.2725544670273432, + "grad_norm": 1.3336233570386715, + "learning_rate": 1.9597279098753893e-05, + "loss": 0.6810879707336426, + "step": 932 + }, + { + "epoch": 0.2728469074426086, + "grad_norm": 1.4182604377271, + "learning_rate": 1.959591975709647e-05, + "loss": 0.6121781468391418, + "step": 933 + }, + { + "epoch": 0.27313934785787397, + "grad_norm": 1.3855646528211634, + "learning_rate": 1.9594558172451153e-05, + "loss": 0.7347930669784546, + "step": 934 + }, + { + "epoch": 0.27343178827313935, + "grad_norm": 1.7726573891466724, + "learning_rate": 1.9593194345136196e-05, + "loss": 0.8280940651893616, + "step": 935 + }, + { + "epoch": 0.27372422868840474, + "grad_norm": 1.7069126445705718, + "learning_rate": 1.959182827547039e-05, + "loss": 0.8171218633651733, + "step": 936 + }, + { + "epoch": 0.2740166691036701, + "grad_norm": 1.5519639216005559, + "learning_rate": 1.9590459963773043e-05, + "loss": 0.7350337505340576, + "step": 937 + }, + { + "epoch": 0.2743091095189355, + "grad_norm": 1.2380635233009907, + "learning_rate": 1.9589089410363992e-05, + "loss": 0.5648026466369629, + "step": 938 + }, + { + "epoch": 0.2746015499342009, + "grad_norm": 1.2184482229154892, + "learning_rate": 1.9587716615563592e-05, + "loss": 0.630626916885376, + "step": 939 + }, + { + "epoch": 0.2748939903494663, + "grad_norm": 1.247434869071023, + "learning_rate": 1.9586341579692728e-05, + "loss": 0.658649206161499, + "step": 940 + }, + { + "epoch": 0.2751864307647317, + "grad_norm": 1.3583264773002954, + "learning_rate": 1.9584964303072804e-05, + "loss": 0.6938339471817017, + "step": 941 + }, + { + "epoch": 0.2754788711799971, + "grad_norm": 1.2844871691004516, + "learning_rate": 1.9583584786025755e-05, + "loss": 0.7124238014221191, + "step": 942 + }, + { + "epoch": 0.27577131159526247, + "grad_norm": 1.295461976555009, + "learning_rate": 1.9582203028874027e-05, + "loss": 0.5879669189453125, + "step": 943 + }, + { + "epoch": 0.27606375201052785, + "grad_norm": 1.3092326597229536, + "learning_rate": 1.9580819031940605e-05, + "loss": 0.6169895529747009, + "step": 944 + }, + { + "epoch": 0.27635619242579323, + "grad_norm": 1.3408083006486937, + "learning_rate": 1.9579432795548986e-05, + "loss": 0.6367429494857788, + "step": 945 + }, + { + "epoch": 0.2766486328410586, + "grad_norm": 1.294470969807804, + "learning_rate": 1.9578044320023195e-05, + "loss": 0.6198331117630005, + "step": 946 + }, + { + "epoch": 0.276941073256324, + "grad_norm": 1.2934388501492589, + "learning_rate": 1.9576653605687782e-05, + "loss": 0.6731230616569519, + "step": 947 + }, + { + "epoch": 0.27723351367158944, + "grad_norm": 1.3743119206413423, + "learning_rate": 1.957526065286781e-05, + "loss": 0.7185516953468323, + "step": 948 + }, + { + "epoch": 0.2775259540868548, + "grad_norm": 1.5124791251983178, + "learning_rate": 1.9573865461888882e-05, + "loss": 0.7362357378005981, + "step": 949 + }, + { + "epoch": 0.2778183945021202, + "grad_norm": 1.481999625276378, + "learning_rate": 1.9572468033077113e-05, + "loss": 0.7051525712013245, + "step": 950 + }, + { + "epoch": 0.2781108349173856, + "grad_norm": 1.3167000079730038, + "learning_rate": 1.9571068366759143e-05, + "loss": 0.6267420053482056, + "step": 951 + }, + { + "epoch": 0.27840327533265097, + "grad_norm": 1.4667668035632615, + "learning_rate": 1.9569666463262136e-05, + "loss": 0.649080753326416, + "step": 952 + }, + { + "epoch": 0.27869571574791635, + "grad_norm": 1.1940294879505342, + "learning_rate": 1.9568262322913777e-05, + "loss": 0.5700061321258545, + "step": 953 + }, + { + "epoch": 0.27898815616318173, + "grad_norm": 1.21562106075719, + "learning_rate": 1.9566855946042274e-05, + "loss": 0.6121870875358582, + "step": 954 + }, + { + "epoch": 0.2792805965784471, + "grad_norm": 1.3828404656512372, + "learning_rate": 1.9565447332976362e-05, + "loss": 0.8294541239738464, + "step": 955 + }, + { + "epoch": 0.27957303699371255, + "grad_norm": 1.2953263908127255, + "learning_rate": 1.9564036484045295e-05, + "loss": 0.6979323625564575, + "step": 956 + }, + { + "epoch": 0.27986547740897794, + "grad_norm": 1.4787353970640398, + "learning_rate": 1.9562623399578853e-05, + "loss": 0.6847009658813477, + "step": 957 + }, + { + "epoch": 0.2801579178242433, + "grad_norm": 1.174633661295302, + "learning_rate": 1.956120807990733e-05, + "loss": 0.6821733713150024, + "step": 958 + }, + { + "epoch": 0.2804503582395087, + "grad_norm": 1.2766608312969014, + "learning_rate": 1.955979052536155e-05, + "loss": 0.6943963766098022, + "step": 959 + }, + { + "epoch": 0.2807427986547741, + "grad_norm": 1.6283703947702834, + "learning_rate": 1.955837073627286e-05, + "loss": 0.5841893553733826, + "step": 960 + }, + { + "epoch": 0.28103523907003947, + "grad_norm": 1.4526296199919857, + "learning_rate": 1.955694871297313e-05, + "loss": 0.7196778059005737, + "step": 961 + }, + { + "epoch": 0.28132767948530485, + "grad_norm": 1.3568922084457422, + "learning_rate": 1.9555524455794743e-05, + "loss": 0.697501540184021, + "step": 962 + }, + { + "epoch": 0.28162011990057023, + "grad_norm": 1.3269336256780513, + "learning_rate": 1.9554097965070612e-05, + "loss": 0.7265810966491699, + "step": 963 + }, + { + "epoch": 0.28191256031583567, + "grad_norm": 1.1794879937673313, + "learning_rate": 1.955266924113417e-05, + "loss": 0.5766021013259888, + "step": 964 + }, + { + "epoch": 0.28220500073110105, + "grad_norm": 1.1486001787824904, + "learning_rate": 1.955123828431938e-05, + "loss": 0.6885402202606201, + "step": 965 + }, + { + "epoch": 0.28249744114636643, + "grad_norm": 1.4093622546586522, + "learning_rate": 1.954980509496071e-05, + "loss": 0.719329297542572, + "step": 966 + }, + { + "epoch": 0.2827898815616318, + "grad_norm": 1.1657877260705576, + "learning_rate": 1.954836967339316e-05, + "loss": 0.5621368885040283, + "step": 967 + }, + { + "epoch": 0.2830823219768972, + "grad_norm": 1.4684107409650433, + "learning_rate": 1.954693201995226e-05, + "loss": 0.6323715448379517, + "step": 968 + }, + { + "epoch": 0.2833747623921626, + "grad_norm": 1.1727530946898588, + "learning_rate": 1.954549213497404e-05, + "loss": 0.6265028119087219, + "step": 969 + }, + { + "epoch": 0.28366720280742797, + "grad_norm": 1.2740242277637046, + "learning_rate": 1.9544050018795076e-05, + "loss": 0.6234713792800903, + "step": 970 + }, + { + "epoch": 0.2839596432226934, + "grad_norm": 1.2342517719802, + "learning_rate": 1.9542605671752447e-05, + "loss": 0.6505804657936096, + "step": 971 + }, + { + "epoch": 0.2842520836379588, + "grad_norm": 1.408353713096739, + "learning_rate": 1.954115909418376e-05, + "loss": 0.7756558656692505, + "step": 972 + }, + { + "epoch": 0.28454452405322417, + "grad_norm": 1.4275947350210108, + "learning_rate": 1.953971028642715e-05, + "loss": 0.767257034778595, + "step": 973 + }, + { + "epoch": 0.28483696446848955, + "grad_norm": 1.5164327383088176, + "learning_rate": 1.9538259248821265e-05, + "loss": 0.6702018976211548, + "step": 974 + }, + { + "epoch": 0.28512940488375493, + "grad_norm": 1.5385088670888984, + "learning_rate": 1.953680598170527e-05, + "loss": 0.7072827816009521, + "step": 975 + }, + { + "epoch": 0.2854218452990203, + "grad_norm": 1.4449259987675327, + "learning_rate": 1.953535048541886e-05, + "loss": 0.6343571543693542, + "step": 976 + }, + { + "epoch": 0.2857142857142857, + "grad_norm": 1.2668558478543779, + "learning_rate": 1.953389276030225e-05, + "loss": 0.6361520290374756, + "step": 977 + }, + { + "epoch": 0.2860067261295511, + "grad_norm": 1.144363699587152, + "learning_rate": 1.9532432806696178e-05, + "loss": 0.6757364273071289, + "step": 978 + }, + { + "epoch": 0.2862991665448165, + "grad_norm": 1.2373799950730142, + "learning_rate": 1.9530970624941896e-05, + "loss": 0.6311759948730469, + "step": 979 + }, + { + "epoch": 0.2865916069600819, + "grad_norm": 1.3327233434420644, + "learning_rate": 1.9529506215381176e-05, + "loss": 0.6207036972045898, + "step": 980 + }, + { + "epoch": 0.2868840473753473, + "grad_norm": 1.182706201187961, + "learning_rate": 1.952803957835632e-05, + "loss": 0.5154495239257812, + "step": 981 + }, + { + "epoch": 0.28717648779061267, + "grad_norm": 1.4885508278374788, + "learning_rate": 1.9526570714210146e-05, + "loss": 0.797666072845459, + "step": 982 + }, + { + "epoch": 0.28746892820587805, + "grad_norm": 1.5013519512468485, + "learning_rate": 1.9525099623285983e-05, + "loss": 0.659400224685669, + "step": 983 + }, + { + "epoch": 0.28776136862114343, + "grad_norm": 1.565667149921291, + "learning_rate": 1.9523626305927706e-05, + "loss": 0.7638698816299438, + "step": 984 + }, + { + "epoch": 0.2880538090364088, + "grad_norm": 1.282540952352899, + "learning_rate": 1.952215076247968e-05, + "loss": 0.6656497120857239, + "step": 985 + }, + { + "epoch": 0.2883462494516742, + "grad_norm": 1.6004320535828411, + "learning_rate": 1.9520672993286807e-05, + "loss": 0.7701614499092102, + "step": 986 + }, + { + "epoch": 0.28863868986693964, + "grad_norm": 1.4907110687279852, + "learning_rate": 1.951919299869451e-05, + "loss": 0.6710221767425537, + "step": 987 + }, + { + "epoch": 0.288931130282205, + "grad_norm": 1.3912460639172692, + "learning_rate": 1.951771077904873e-05, + "loss": 0.6307191848754883, + "step": 988 + }, + { + "epoch": 0.2892235706974704, + "grad_norm": 1.5585350101159294, + "learning_rate": 1.951622633469592e-05, + "loss": 0.8226636648178101, + "step": 989 + }, + { + "epoch": 0.2895160111127358, + "grad_norm": 1.3925257650330547, + "learning_rate": 1.9514739665983065e-05, + "loss": 0.6286089420318604, + "step": 990 + }, + { + "epoch": 0.28980845152800117, + "grad_norm": 1.3766260212895336, + "learning_rate": 1.9513250773257667e-05, + "loss": 0.8167316317558289, + "step": 991 + }, + { + "epoch": 0.29010089194326655, + "grad_norm": 1.3082034964893225, + "learning_rate": 1.9511759656867738e-05, + "loss": 0.6840806603431702, + "step": 992 + }, + { + "epoch": 0.29039333235853193, + "grad_norm": 4.707433700267527, + "learning_rate": 1.9510266317161823e-05, + "loss": 0.5731699466705322, + "step": 993 + }, + { + "epoch": 0.2906857727737973, + "grad_norm": 1.179743170686313, + "learning_rate": 1.950877075448898e-05, + "loss": 0.696578860282898, + "step": 994 + }, + { + "epoch": 0.29097821318906275, + "grad_norm": 1.28092562469002, + "learning_rate": 1.9507272969198787e-05, + "loss": 0.7194398641586304, + "step": 995 + }, + { + "epoch": 0.29127065360432813, + "grad_norm": 1.7406610068492592, + "learning_rate": 1.9505772961641342e-05, + "loss": 0.7041016817092896, + "step": 996 + }, + { + "epoch": 0.2915630940195935, + "grad_norm": 1.2586308004321554, + "learning_rate": 1.9504270732167267e-05, + "loss": 0.7073841691017151, + "step": 997 + }, + { + "epoch": 0.2918555344348589, + "grad_norm": 1.204085782896564, + "learning_rate": 1.9502766281127693e-05, + "loss": 0.5097789764404297, + "step": 998 + }, + { + "epoch": 0.2921479748501243, + "grad_norm": 1.1340482101200409, + "learning_rate": 1.9501259608874276e-05, + "loss": 0.6522337198257446, + "step": 999 + }, + { + "epoch": 0.29244041526538966, + "grad_norm": 1.2639457143948831, + "learning_rate": 1.9499750715759197e-05, + "loss": 0.8276036381721497, + "step": 1000 + }, + { + "epoch": 0.29273285568065505, + "grad_norm": 1.3336888124261281, + "learning_rate": 1.9498239602135145e-05, + "loss": 0.7701225876808167, + "step": 1001 + }, + { + "epoch": 0.29302529609592043, + "grad_norm": 1.4216994028606598, + "learning_rate": 1.949672626835534e-05, + "loss": 0.6112316846847534, + "step": 1002 + }, + { + "epoch": 0.29331773651118587, + "grad_norm": 1.5055133598944146, + "learning_rate": 1.9495210714773506e-05, + "loss": 0.7196093201637268, + "step": 1003 + }, + { + "epoch": 0.29361017692645125, + "grad_norm": 1.3102459642638802, + "learning_rate": 1.9493692941743903e-05, + "loss": 0.708210825920105, + "step": 1004 + }, + { + "epoch": 0.29390261734171663, + "grad_norm": 1.1576562552023075, + "learning_rate": 1.9492172949621298e-05, + "loss": 0.6156430244445801, + "step": 1005 + }, + { + "epoch": 0.294195057756982, + "grad_norm": 1.3177580817558727, + "learning_rate": 1.9490650738760977e-05, + "loss": 0.6125216484069824, + "step": 1006 + }, + { + "epoch": 0.2944874981722474, + "grad_norm": 1.5792615772910776, + "learning_rate": 1.9489126309518752e-05, + "loss": 0.5691695213317871, + "step": 1007 + }, + { + "epoch": 0.2947799385875128, + "grad_norm": 1.2458453862912673, + "learning_rate": 1.9487599662250945e-05, + "loss": 0.6733062267303467, + "step": 1008 + }, + { + "epoch": 0.29507237900277816, + "grad_norm": 1.3579820847813902, + "learning_rate": 1.94860707973144e-05, + "loss": 0.6069025993347168, + "step": 1009 + }, + { + "epoch": 0.2953648194180436, + "grad_norm": 1.3771790647505693, + "learning_rate": 1.9484539715066488e-05, + "loss": 0.6191028356552124, + "step": 1010 + }, + { + "epoch": 0.295657259833309, + "grad_norm": 1.3927395620788336, + "learning_rate": 1.9483006415865082e-05, + "loss": 0.7423045635223389, + "step": 1011 + }, + { + "epoch": 0.29594970024857437, + "grad_norm": 1.584259935283413, + "learning_rate": 1.9481470900068585e-05, + "loss": 0.854878306388855, + "step": 1012 + }, + { + "epoch": 0.29624214066383975, + "grad_norm": 1.3274147652805814, + "learning_rate": 1.9479933168035914e-05, + "loss": 0.6950500011444092, + "step": 1013 + }, + { + "epoch": 0.29653458107910513, + "grad_norm": 1.2664754529699496, + "learning_rate": 1.9478393220126503e-05, + "loss": 0.6944484710693359, + "step": 1014 + }, + { + "epoch": 0.2968270214943705, + "grad_norm": 1.3385070796010239, + "learning_rate": 1.9476851056700303e-05, + "loss": 0.7120212316513062, + "step": 1015 + }, + { + "epoch": 0.2971194619096359, + "grad_norm": 1.2818173555684258, + "learning_rate": 1.9475306678117792e-05, + "loss": 0.6271052956581116, + "step": 1016 + }, + { + "epoch": 0.2974119023249013, + "grad_norm": 1.386949235285712, + "learning_rate": 1.9473760084739958e-05, + "loss": 0.6398453712463379, + "step": 1017 + }, + { + "epoch": 0.2977043427401667, + "grad_norm": 1.440440679973054, + "learning_rate": 1.94722112769283e-05, + "loss": 0.5563585758209229, + "step": 1018 + }, + { + "epoch": 0.2979967831554321, + "grad_norm": 1.2637928746894573, + "learning_rate": 1.947066025504485e-05, + "loss": 0.7895959615707397, + "step": 1019 + }, + { + "epoch": 0.2982892235706975, + "grad_norm": 1.2684661754258477, + "learning_rate": 1.9469107019452148e-05, + "loss": 0.6304349303245544, + "step": 1020 + }, + { + "epoch": 0.29858166398596286, + "grad_norm": 1.4493096125993807, + "learning_rate": 1.9467551570513257e-05, + "loss": 0.6915549039840698, + "step": 1021 + }, + { + "epoch": 0.29887410440122825, + "grad_norm": 1.2593652754748748, + "learning_rate": 1.9465993908591748e-05, + "loss": 0.6257511377334595, + "step": 1022 + }, + { + "epoch": 0.29916654481649363, + "grad_norm": 1.4075585450481771, + "learning_rate": 1.9464434034051716e-05, + "loss": 0.6409085988998413, + "step": 1023 + }, + { + "epoch": 0.299458985231759, + "grad_norm": 1.358442522813864, + "learning_rate": 1.9462871947257772e-05, + "loss": 0.7281351089477539, + "step": 1024 + }, + { + "epoch": 0.2997514256470244, + "grad_norm": 1.441690145181621, + "learning_rate": 1.9461307648575047e-05, + "loss": 0.8016781806945801, + "step": 1025 + }, + { + "epoch": 0.30004386606228983, + "grad_norm": 1.2844064559637345, + "learning_rate": 1.9459741138369186e-05, + "loss": 0.5883209705352783, + "step": 1026 + }, + { + "epoch": 0.3003363064775552, + "grad_norm": 1.674320224055934, + "learning_rate": 1.9458172417006347e-05, + "loss": 0.6414197683334351, + "step": 1027 + }, + { + "epoch": 0.3006287468928206, + "grad_norm": 1.465437904752509, + "learning_rate": 1.9456601484853218e-05, + "loss": 0.7076515555381775, + "step": 1028 + }, + { + "epoch": 0.300921187308086, + "grad_norm": 1.4091861442316225, + "learning_rate": 1.9455028342276984e-05, + "loss": 0.8102637529373169, + "step": 1029 + }, + { + "epoch": 0.30121362772335136, + "grad_norm": 1.3935099692215975, + "learning_rate": 1.9453452989645362e-05, + "loss": 0.6954574584960938, + "step": 1030 + }, + { + "epoch": 0.30150606813861675, + "grad_norm": 1.1912974865854908, + "learning_rate": 1.9451875427326585e-05, + "loss": 0.6647125482559204, + "step": 1031 + }, + { + "epoch": 0.3017985085538821, + "grad_norm": 1.2637381593470247, + "learning_rate": 1.9450295655689392e-05, + "loss": 0.5501933097839355, + "step": 1032 + }, + { + "epoch": 0.3020909489691475, + "grad_norm": 1.1642394496276798, + "learning_rate": 1.944871367510305e-05, + "loss": 0.6561415195465088, + "step": 1033 + }, + { + "epoch": 0.30238338938441295, + "grad_norm": 1.2818557575199787, + "learning_rate": 1.9447129485937335e-05, + "loss": 0.6768229007720947, + "step": 1034 + }, + { + "epoch": 0.30267582979967833, + "grad_norm": 1.229414584528048, + "learning_rate": 1.9445543088562543e-05, + "loss": 0.5693868398666382, + "step": 1035 + }, + { + "epoch": 0.3029682702149437, + "grad_norm": 1.197937800783061, + "learning_rate": 1.9443954483349485e-05, + "loss": 0.6165708303451538, + "step": 1036 + }, + { + "epoch": 0.3032607106302091, + "grad_norm": 1.0808504567320436, + "learning_rate": 1.944236367066948e-05, + "loss": 0.6116082668304443, + "step": 1037 + }, + { + "epoch": 0.3035531510454745, + "grad_norm": 1.2481100676234638, + "learning_rate": 1.9440770650894384e-05, + "loss": 0.7027714848518372, + "step": 1038 + }, + { + "epoch": 0.30384559146073986, + "grad_norm": 1.3613368127158991, + "learning_rate": 1.943917542439655e-05, + "loss": 0.7339189052581787, + "step": 1039 + }, + { + "epoch": 0.30413803187600524, + "grad_norm": 1.322856585416547, + "learning_rate": 1.943757799154885e-05, + "loss": 0.7975895404815674, + "step": 1040 + }, + { + "epoch": 0.3044304722912706, + "grad_norm": 1.2603507441667385, + "learning_rate": 1.9435978352724673e-05, + "loss": 0.6421841382980347, + "step": 1041 + }, + { + "epoch": 0.30472291270653606, + "grad_norm": 1.3017046883641064, + "learning_rate": 1.943437650829793e-05, + "loss": 0.6731791496276855, + "step": 1042 + }, + { + "epoch": 0.30501535312180145, + "grad_norm": 1.224211690521448, + "learning_rate": 1.943277245864304e-05, + "loss": 0.7008551359176636, + "step": 1043 + }, + { + "epoch": 0.30530779353706683, + "grad_norm": 1.2549197569852149, + "learning_rate": 1.943116620413494e-05, + "loss": 0.6777141094207764, + "step": 1044 + }, + { + "epoch": 0.3056002339523322, + "grad_norm": 1.258074600817151, + "learning_rate": 1.9429557745149084e-05, + "loss": 0.7649033069610596, + "step": 1045 + }, + { + "epoch": 0.3058926743675976, + "grad_norm": 1.2626508350830759, + "learning_rate": 1.9427947082061432e-05, + "loss": 0.6460477709770203, + "step": 1046 + }, + { + "epoch": 0.306185114782863, + "grad_norm": 1.3748035809258794, + "learning_rate": 1.942633421524848e-05, + "loss": 0.5939697623252869, + "step": 1047 + }, + { + "epoch": 0.30647755519812836, + "grad_norm": 1.3696807292374817, + "learning_rate": 1.9424719145087216e-05, + "loss": 0.606407880783081, + "step": 1048 + }, + { + "epoch": 0.3067699956133938, + "grad_norm": 1.2114201905625201, + "learning_rate": 1.9423101871955153e-05, + "loss": 0.5515298843383789, + "step": 1049 + }, + { + "epoch": 0.3070624360286592, + "grad_norm": 1.4449996700249255, + "learning_rate": 1.942148239623032e-05, + "loss": 0.7397217154502869, + "step": 1050 + }, + { + "epoch": 0.30735487644392456, + "grad_norm": 1.708533630902304, + "learning_rate": 1.9419860718291265e-05, + "loss": 0.6397782564163208, + "step": 1051 + }, + { + "epoch": 0.30764731685918995, + "grad_norm": 1.1946031757535738, + "learning_rate": 1.9418236838517036e-05, + "loss": 0.589732825756073, + "step": 1052 + }, + { + "epoch": 0.30793975727445533, + "grad_norm": 1.4196894685331136, + "learning_rate": 1.941661075728721e-05, + "loss": 0.7968351244926453, + "step": 1053 + }, + { + "epoch": 0.3082321976897207, + "grad_norm": 1.35500416476017, + "learning_rate": 1.9414982474981877e-05, + "loss": 0.5740514397621155, + "step": 1054 + }, + { + "epoch": 0.3085246381049861, + "grad_norm": 1.314001411398827, + "learning_rate": 1.9413351991981632e-05, + "loss": 0.656599760055542, + "step": 1055 + }, + { + "epoch": 0.3088170785202515, + "grad_norm": 1.2592244001939052, + "learning_rate": 1.9411719308667593e-05, + "loss": 0.5638262033462524, + "step": 1056 + }, + { + "epoch": 0.3091095189355169, + "grad_norm": 1.3510783569743914, + "learning_rate": 1.9410084425421392e-05, + "loss": 0.6391294002532959, + "step": 1057 + }, + { + "epoch": 0.3094019593507823, + "grad_norm": 1.300451628146748, + "learning_rate": 1.9408447342625167e-05, + "loss": 0.7109906077384949, + "step": 1058 + }, + { + "epoch": 0.3096943997660477, + "grad_norm": 1.35271058872007, + "learning_rate": 1.9406808060661583e-05, + "loss": 0.6922626495361328, + "step": 1059 + }, + { + "epoch": 0.30998684018131306, + "grad_norm": 1.3729160813047252, + "learning_rate": 1.9405166579913808e-05, + "loss": 0.6708151698112488, + "step": 1060 + }, + { + "epoch": 0.31027928059657844, + "grad_norm": 1.3049592711968918, + "learning_rate": 1.940352290076553e-05, + "loss": 0.6259905099868774, + "step": 1061 + }, + { + "epoch": 0.3105717210118438, + "grad_norm": 1.3047971530530311, + "learning_rate": 1.940187702360095e-05, + "loss": 0.6590703725814819, + "step": 1062 + }, + { + "epoch": 0.3108641614271092, + "grad_norm": 1.5136066296614852, + "learning_rate": 1.9400228948804777e-05, + "loss": 0.7371482849121094, + "step": 1063 + }, + { + "epoch": 0.3111566018423746, + "grad_norm": 1.3637094061000257, + "learning_rate": 1.9398578676762243e-05, + "loss": 0.6954984664916992, + "step": 1064 + }, + { + "epoch": 0.31144904225764003, + "grad_norm": 1.197618668709007, + "learning_rate": 1.9396926207859085e-05, + "loss": 0.604501485824585, + "step": 1065 + }, + { + "epoch": 0.3117414826729054, + "grad_norm": 1.4637648544146704, + "learning_rate": 1.939527154248156e-05, + "loss": 0.7580305337905884, + "step": 1066 + }, + { + "epoch": 0.3120339230881708, + "grad_norm": 1.2774221611024956, + "learning_rate": 1.9393614681016443e-05, + "loss": 0.5996969938278198, + "step": 1067 + }, + { + "epoch": 0.3123263635034362, + "grad_norm": 1.2247945329694363, + "learning_rate": 1.9391955623851e-05, + "loss": 0.5939687490463257, + "step": 1068 + }, + { + "epoch": 0.31261880391870156, + "grad_norm": 1.2833481425507127, + "learning_rate": 1.939029437137304e-05, + "loss": 0.6194947957992554, + "step": 1069 + }, + { + "epoch": 0.31291124433396694, + "grad_norm": 1.406800587144287, + "learning_rate": 1.9388630923970862e-05, + "loss": 0.7419420480728149, + "step": 1070 + }, + { + "epoch": 0.3132036847492323, + "grad_norm": 1.4290715744520364, + "learning_rate": 1.938696528203329e-05, + "loss": 0.6950613856315613, + "step": 1071 + }, + { + "epoch": 0.3134961251644977, + "grad_norm": 1.542135386244918, + "learning_rate": 1.9385297445949657e-05, + "loss": 0.7376282215118408, + "step": 1072 + }, + { + "epoch": 0.31378856557976315, + "grad_norm": 1.4197281288148755, + "learning_rate": 1.938362741610981e-05, + "loss": 0.800892174243927, + "step": 1073 + }, + { + "epoch": 0.31408100599502853, + "grad_norm": 1.3238571566647774, + "learning_rate": 1.938195519290411e-05, + "loss": 0.5747013688087463, + "step": 1074 + }, + { + "epoch": 0.3143734464102939, + "grad_norm": 1.3986418419585354, + "learning_rate": 1.9380280776723422e-05, + "loss": 0.7341697216033936, + "step": 1075 + }, + { + "epoch": 0.3146658868255593, + "grad_norm": 1.2703636347468634, + "learning_rate": 1.9378604167959138e-05, + "loss": 0.6229791641235352, + "step": 1076 + }, + { + "epoch": 0.3149583272408247, + "grad_norm": 1.2978515497126824, + "learning_rate": 1.937692536700315e-05, + "loss": 0.7266645431518555, + "step": 1077 + }, + { + "epoch": 0.31525076765609006, + "grad_norm": 1.1885775086050685, + "learning_rate": 1.937524437424787e-05, + "loss": 0.6163127422332764, + "step": 1078 + }, + { + "epoch": 0.31554320807135544, + "grad_norm": 1.3182925237610392, + "learning_rate": 1.9373561190086225e-05, + "loss": 0.6609925031661987, + "step": 1079 + }, + { + "epoch": 0.3158356484866208, + "grad_norm": 1.305005533703013, + "learning_rate": 1.937187581491164e-05, + "loss": 0.7157741785049438, + "step": 1080 + }, + { + "epoch": 0.31612808890188626, + "grad_norm": 1.286980397276467, + "learning_rate": 1.937018824911807e-05, + "loss": 0.6486212015151978, + "step": 1081 + }, + { + "epoch": 0.31642052931715164, + "grad_norm": 1.3383942599839993, + "learning_rate": 1.9368498493099963e-05, + "loss": 0.6931928396224976, + "step": 1082 + }, + { + "epoch": 0.316712969732417, + "grad_norm": 1.3241410530363713, + "learning_rate": 1.9366806547252295e-05, + "loss": 0.9291354417800903, + "step": 1083 + }, + { + "epoch": 0.3170054101476824, + "grad_norm": 1.247174604159187, + "learning_rate": 1.936511241197055e-05, + "loss": 0.6276642084121704, + "step": 1084 + }, + { + "epoch": 0.3172978505629478, + "grad_norm": 1.5622111951472988, + "learning_rate": 1.936341608765072e-05, + "loss": 0.7836581468582153, + "step": 1085 + }, + { + "epoch": 0.3175902909782132, + "grad_norm": 1.3508415072117352, + "learning_rate": 1.9361717574689308e-05, + "loss": 0.6785084009170532, + "step": 1086 + }, + { + "epoch": 0.31788273139347856, + "grad_norm": 1.2099617708073434, + "learning_rate": 1.936001687348333e-05, + "loss": 0.5715218782424927, + "step": 1087 + }, + { + "epoch": 0.318175171808744, + "grad_norm": 1.4697249093694587, + "learning_rate": 1.9358313984430324e-05, + "loss": 0.8417775630950928, + "step": 1088 + }, + { + "epoch": 0.3184676122240094, + "grad_norm": 1.2670961493626953, + "learning_rate": 1.935660890792832e-05, + "loss": 0.598076343536377, + "step": 1089 + }, + { + "epoch": 0.31876005263927476, + "grad_norm": 1.1923598198798329, + "learning_rate": 1.9354901644375876e-05, + "loss": 0.5830154418945312, + "step": 1090 + }, + { + "epoch": 0.31905249305454014, + "grad_norm": 1.307163759376097, + "learning_rate": 1.935319219417205e-05, + "loss": 0.5746437311172485, + "step": 1091 + }, + { + "epoch": 0.3193449334698055, + "grad_norm": 1.1091668518622428, + "learning_rate": 1.9351480557716414e-05, + "loss": 0.5520191788673401, + "step": 1092 + }, + { + "epoch": 0.3196373738850709, + "grad_norm": 1.218411442512637, + "learning_rate": 1.9349766735409058e-05, + "loss": 0.5847123861312866, + "step": 1093 + }, + { + "epoch": 0.3199298143003363, + "grad_norm": 1.568693203893066, + "learning_rate": 1.9348050727650577e-05, + "loss": 0.7390924692153931, + "step": 1094 + }, + { + "epoch": 0.3202222547156017, + "grad_norm": 1.4427842898109178, + "learning_rate": 1.9346332534842074e-05, + "loss": 0.5812145471572876, + "step": 1095 + }, + { + "epoch": 0.3205146951308671, + "grad_norm": 1.615769461575852, + "learning_rate": 1.9344612157385166e-05, + "loss": 0.6958816647529602, + "step": 1096 + }, + { + "epoch": 0.3208071355461325, + "grad_norm": 1.3481467288956208, + "learning_rate": 1.9342889595681986e-05, + "loss": 0.5618177652359009, + "step": 1097 + }, + { + "epoch": 0.3210995759613979, + "grad_norm": 1.2846515235734224, + "learning_rate": 1.9341164850135163e-05, + "loss": 0.6099411845207214, + "step": 1098 + }, + { + "epoch": 0.32139201637666326, + "grad_norm": 1.1242331249756639, + "learning_rate": 1.9339437921147854e-05, + "loss": 0.6772094964981079, + "step": 1099 + }, + { + "epoch": 0.32168445679192864, + "grad_norm": 1.4006184046576602, + "learning_rate": 1.9337708809123718e-05, + "loss": 0.6916643381118774, + "step": 1100 + }, + { + "epoch": 0.321976897207194, + "grad_norm": 1.199381767960838, + "learning_rate": 1.933597751446692e-05, + "loss": 0.5716762542724609, + "step": 1101 + }, + { + "epoch": 0.3222693376224594, + "grad_norm": 1.511781401125701, + "learning_rate": 1.9334244037582143e-05, + "loss": 0.68224036693573, + "step": 1102 + }, + { + "epoch": 0.3225617780377248, + "grad_norm": 1.3199204633429549, + "learning_rate": 1.933250837887457e-05, + "loss": 0.6888231635093689, + "step": 1103 + }, + { + "epoch": 0.3228542184529902, + "grad_norm": 1.4809797608653643, + "learning_rate": 1.933077053874991e-05, + "loss": 0.6469036340713501, + "step": 1104 + }, + { + "epoch": 0.3231466588682556, + "grad_norm": 1.5099365665086963, + "learning_rate": 1.932903051761437e-05, + "loss": 0.6202501058578491, + "step": 1105 + }, + { + "epoch": 0.323439099283521, + "grad_norm": 1.454362918518285, + "learning_rate": 1.932728831587467e-05, + "loss": 0.6041314601898193, + "step": 1106 + }, + { + "epoch": 0.3237315396987864, + "grad_norm": 1.3479422917529533, + "learning_rate": 1.9325543933938034e-05, + "loss": 0.7081667184829712, + "step": 1107 + }, + { + "epoch": 0.32402398011405176, + "grad_norm": 1.477559211803618, + "learning_rate": 1.9323797372212204e-05, + "loss": 0.7743494510650635, + "step": 1108 + }, + { + "epoch": 0.32431642052931714, + "grad_norm": 1.3188148010775738, + "learning_rate": 1.9322048631105428e-05, + "loss": 0.6122584342956543, + "step": 1109 + }, + { + "epoch": 0.3246088609445825, + "grad_norm": 1.279178726850882, + "learning_rate": 1.932029771102646e-05, + "loss": 0.6106122732162476, + "step": 1110 + }, + { + "epoch": 0.3249013013598479, + "grad_norm": 1.1897376224269591, + "learning_rate": 1.9318544612384572e-05, + "loss": 0.5082784295082092, + "step": 1111 + }, + { + "epoch": 0.32519374177511334, + "grad_norm": 1.3081590787355515, + "learning_rate": 1.9316789335589542e-05, + "loss": 0.6845188140869141, + "step": 1112 + }, + { + "epoch": 0.3254861821903787, + "grad_norm": 1.343292960468675, + "learning_rate": 1.9315031881051653e-05, + "loss": 0.5972481966018677, + "step": 1113 + }, + { + "epoch": 0.3257786226056441, + "grad_norm": 1.372744387816622, + "learning_rate": 1.931327224918169e-05, + "loss": 0.6312427520751953, + "step": 1114 + }, + { + "epoch": 0.3260710630209095, + "grad_norm": 1.6334469145871557, + "learning_rate": 1.9311510440390973e-05, + "loss": 0.7904551029205322, + "step": 1115 + }, + { + "epoch": 0.3263635034361749, + "grad_norm": 1.4496533611968336, + "learning_rate": 1.9309746455091302e-05, + "loss": 0.6513646841049194, + "step": 1116 + }, + { + "epoch": 0.32665594385144026, + "grad_norm": 1.2559107839078971, + "learning_rate": 1.9307980293694997e-05, + "loss": 0.5349715948104858, + "step": 1117 + }, + { + "epoch": 0.32694838426670564, + "grad_norm": 1.122130050588245, + "learning_rate": 1.93062119566149e-05, + "loss": 0.5815087556838989, + "step": 1118 + }, + { + "epoch": 0.327240824681971, + "grad_norm": 1.3638823451289013, + "learning_rate": 1.9304441444264335e-05, + "loss": 0.6380286812782288, + "step": 1119 + }, + { + "epoch": 0.32753326509723646, + "grad_norm": 1.2646470578382853, + "learning_rate": 1.9302668757057157e-05, + "loss": 0.7222728729248047, + "step": 1120 + }, + { + "epoch": 0.32782570551250184, + "grad_norm": 1.1611022114208025, + "learning_rate": 1.9300893895407715e-05, + "loss": 0.6262868642807007, + "step": 1121 + }, + { + "epoch": 0.3281181459277672, + "grad_norm": 1.522127361747668, + "learning_rate": 1.929911685973088e-05, + "loss": 0.6387197971343994, + "step": 1122 + }, + { + "epoch": 0.3284105863430326, + "grad_norm": 1.3879760662124887, + "learning_rate": 1.9297337650442015e-05, + "loss": 0.77378249168396, + "step": 1123 + }, + { + "epoch": 0.328703026758298, + "grad_norm": 1.3651260322738243, + "learning_rate": 1.9295556267957004e-05, + "loss": 0.7589142322540283, + "step": 1124 + }, + { + "epoch": 0.32899546717356337, + "grad_norm": 1.481283036614999, + "learning_rate": 1.9293772712692233e-05, + "loss": 0.7153090238571167, + "step": 1125 + }, + { + "epoch": 0.32928790758882875, + "grad_norm": 1.2572705841763243, + "learning_rate": 1.9291986985064595e-05, + "loss": 0.5738104581832886, + "step": 1126 + }, + { + "epoch": 0.3295803480040942, + "grad_norm": 1.2803221849130417, + "learning_rate": 1.92901990854915e-05, + "loss": 0.6530819535255432, + "step": 1127 + }, + { + "epoch": 0.3298727884193596, + "grad_norm": 1.221270033991816, + "learning_rate": 1.9288409014390854e-05, + "loss": 0.59107506275177, + "step": 1128 + }, + { + "epoch": 0.33016522883462496, + "grad_norm": 1.4658806562930384, + "learning_rate": 1.9286616772181072e-05, + "loss": 0.5798863172531128, + "step": 1129 + }, + { + "epoch": 0.33045766924989034, + "grad_norm": 1.689951660615568, + "learning_rate": 1.9284822359281085e-05, + "loss": 0.6957223415374756, + "step": 1130 + }, + { + "epoch": 0.3307501096651557, + "grad_norm": 1.3614959188818774, + "learning_rate": 1.9283025776110326e-05, + "loss": 0.6933379173278809, + "step": 1131 + }, + { + "epoch": 0.3310425500804211, + "grad_norm": 1.2712490538707164, + "learning_rate": 1.928122702308873e-05, + "loss": 0.527482271194458, + "step": 1132 + }, + { + "epoch": 0.3313349904956865, + "grad_norm": 1.2166131933862214, + "learning_rate": 1.927942610063675e-05, + "loss": 0.7244399785995483, + "step": 1133 + }, + { + "epoch": 0.33162743091095187, + "grad_norm": 1.4636848406157517, + "learning_rate": 1.9277623009175338e-05, + "loss": 0.7881563901901245, + "step": 1134 + }, + { + "epoch": 0.3319198713262173, + "grad_norm": 1.415089568819196, + "learning_rate": 1.9275817749125956e-05, + "loss": 0.7523232698440552, + "step": 1135 + }, + { + "epoch": 0.3322123117414827, + "grad_norm": 1.5537538186729503, + "learning_rate": 1.9274010320910575e-05, + "loss": 0.7226657867431641, + "step": 1136 + }, + { + "epoch": 0.3325047521567481, + "grad_norm": 1.4230293062648038, + "learning_rate": 1.9272200724951666e-05, + "loss": 0.6461686491966248, + "step": 1137 + }, + { + "epoch": 0.33279719257201346, + "grad_norm": 1.1785466753796996, + "learning_rate": 1.9270388961672214e-05, + "loss": 0.6343599557876587, + "step": 1138 + }, + { + "epoch": 0.33308963298727884, + "grad_norm": 1.2762072218920462, + "learning_rate": 1.926857503149571e-05, + "loss": 0.5510993599891663, + "step": 1139 + }, + { + "epoch": 0.3333820734025442, + "grad_norm": 1.3887196408907312, + "learning_rate": 1.9266758934846142e-05, + "loss": 0.6022439002990723, + "step": 1140 + }, + { + "epoch": 0.3336745138178096, + "grad_norm": 1.2716403438701216, + "learning_rate": 1.9264940672148018e-05, + "loss": 0.708207368850708, + "step": 1141 + }, + { + "epoch": 0.333966954233075, + "grad_norm": 1.3137035916667523, + "learning_rate": 1.9263120243826345e-05, + "loss": 0.566935122013092, + "step": 1142 + }, + { + "epoch": 0.3342593946483404, + "grad_norm": 1.634118861015607, + "learning_rate": 1.9261297650306635e-05, + "loss": 0.6848355531692505, + "step": 1143 + }, + { + "epoch": 0.3345518350636058, + "grad_norm": 1.3058715943169161, + "learning_rate": 1.9259472892014907e-05, + "loss": 0.7335090637207031, + "step": 1144 + }, + { + "epoch": 0.3348442754788712, + "grad_norm": 1.425387946547354, + "learning_rate": 1.925764596937769e-05, + "loss": 0.7323876023292542, + "step": 1145 + }, + { + "epoch": 0.33513671589413657, + "grad_norm": 1.377083094919456, + "learning_rate": 1.9255816882822017e-05, + "loss": 0.5564731955528259, + "step": 1146 + }, + { + "epoch": 0.33542915630940195, + "grad_norm": 1.5323554162589257, + "learning_rate": 1.925398563277542e-05, + "loss": 0.7699049711227417, + "step": 1147 + }, + { + "epoch": 0.33572159672466734, + "grad_norm": 1.4580663324783634, + "learning_rate": 1.925215221966595e-05, + "loss": 0.688602089881897, + "step": 1148 + }, + { + "epoch": 0.3360140371399327, + "grad_norm": 1.4706838569192882, + "learning_rate": 1.9250316643922153e-05, + "loss": 0.7103208899497986, + "step": 1149 + }, + { + "epoch": 0.3363064775551981, + "grad_norm": 1.2723502109555263, + "learning_rate": 1.9248478905973078e-05, + "loss": 0.6313603520393372, + "step": 1150 + }, + { + "epoch": 0.33659891797046354, + "grad_norm": 1.4985289931464978, + "learning_rate": 1.9246639006248294e-05, + "loss": 0.8420118093490601, + "step": 1151 + }, + { + "epoch": 0.3368913583857289, + "grad_norm": 1.4358130705661303, + "learning_rate": 1.9244796945177864e-05, + "loss": 0.6566640734672546, + "step": 1152 + }, + { + "epoch": 0.3371837988009943, + "grad_norm": 1.3408154011751006, + "learning_rate": 1.9242952723192357e-05, + "loss": 0.6455206274986267, + "step": 1153 + }, + { + "epoch": 0.3374762392162597, + "grad_norm": 1.3469873034007918, + "learning_rate": 1.924110634072285e-05, + "loss": 0.7348071336746216, + "step": 1154 + }, + { + "epoch": 0.33776867963152507, + "grad_norm": 1.7471975705727423, + "learning_rate": 1.9239257798200918e-05, + "loss": 0.7187973260879517, + "step": 1155 + }, + { + "epoch": 0.33806112004679045, + "grad_norm": 1.2712100505239146, + "learning_rate": 1.9237407096058655e-05, + "loss": 0.683181643486023, + "step": 1156 + }, + { + "epoch": 0.33835356046205584, + "grad_norm": 1.2445760635583791, + "learning_rate": 1.9235554234728646e-05, + "loss": 0.7296931743621826, + "step": 1157 + }, + { + "epoch": 0.3386460008773212, + "grad_norm": 1.1890040509691011, + "learning_rate": 1.923369921464399e-05, + "loss": 0.6656480431556702, + "step": 1158 + }, + { + "epoch": 0.33893844129258666, + "grad_norm": 1.3248976863888173, + "learning_rate": 1.923184203623828e-05, + "loss": 0.6284874677658081, + "step": 1159 + }, + { + "epoch": 0.33923088170785204, + "grad_norm": 1.29677623825286, + "learning_rate": 1.922998269994563e-05, + "loss": 0.7065030336380005, + "step": 1160 + }, + { + "epoch": 0.3395233221231174, + "grad_norm": 1.3728212504218815, + "learning_rate": 1.9228121206200637e-05, + "loss": 0.7077580690383911, + "step": 1161 + }, + { + "epoch": 0.3398157625383828, + "grad_norm": 1.4855746260471363, + "learning_rate": 1.9226257555438428e-05, + "loss": 0.6012637615203857, + "step": 1162 + }, + { + "epoch": 0.3401082029536482, + "grad_norm": 1.304745585816947, + "learning_rate": 1.9224391748094607e-05, + "loss": 0.8166115283966064, + "step": 1163 + }, + { + "epoch": 0.34040064336891357, + "grad_norm": 1.502407347484804, + "learning_rate": 1.92225237846053e-05, + "loss": 0.6066576242446899, + "step": 1164 + }, + { + "epoch": 0.34069308378417895, + "grad_norm": 1.2739635711375565, + "learning_rate": 1.922065366540713e-05, + "loss": 0.7226361632347107, + "step": 1165 + }, + { + "epoch": 0.3409855241994444, + "grad_norm": 1.6597939275709621, + "learning_rate": 1.9218781390937233e-05, + "loss": 0.7786005139350891, + "step": 1166 + }, + { + "epoch": 0.3412779646147098, + "grad_norm": 1.4812475412632635, + "learning_rate": 1.9216906961633234e-05, + "loss": 0.6534268856048584, + "step": 1167 + }, + { + "epoch": 0.34157040502997515, + "grad_norm": 1.2208380860350694, + "learning_rate": 1.9215030377933274e-05, + "loss": 0.6048434376716614, + "step": 1168 + }, + { + "epoch": 0.34186284544524054, + "grad_norm": 1.356773734579803, + "learning_rate": 1.921315164027599e-05, + "loss": 0.8321201205253601, + "step": 1169 + }, + { + "epoch": 0.3421552858605059, + "grad_norm": 1.5327102242092, + "learning_rate": 1.9211270749100527e-05, + "loss": 0.7142379283905029, + "step": 1170 + }, + { + "epoch": 0.3424477262757713, + "grad_norm": 1.205344060714777, + "learning_rate": 1.9209387704846535e-05, + "loss": 0.6262812614440918, + "step": 1171 + }, + { + "epoch": 0.3427401666910367, + "grad_norm": 1.4345003452190939, + "learning_rate": 1.920750250795416e-05, + "loss": 0.7242386341094971, + "step": 1172 + }, + { + "epoch": 0.34303260710630207, + "grad_norm": 1.3077522067723235, + "learning_rate": 1.9205615158864054e-05, + "loss": 0.6064128875732422, + "step": 1173 + }, + { + "epoch": 0.3433250475215675, + "grad_norm": 1.3868970600691566, + "learning_rate": 1.9203725658017374e-05, + "loss": 0.6720623970031738, + "step": 1174 + }, + { + "epoch": 0.3436174879368329, + "grad_norm": 1.3281016407079367, + "learning_rate": 1.9201834005855785e-05, + "loss": 0.745712161064148, + "step": 1175 + }, + { + "epoch": 0.34390992835209827, + "grad_norm": 1.3336156391355163, + "learning_rate": 1.9199940202821445e-05, + "loss": 0.6387969255447388, + "step": 1176 + }, + { + "epoch": 0.34420236876736365, + "grad_norm": 1.3104807608053473, + "learning_rate": 1.9198044249357018e-05, + "loss": 0.6634984612464905, + "step": 1177 + }, + { + "epoch": 0.34449480918262904, + "grad_norm": 1.0968916991502757, + "learning_rate": 1.919614614590567e-05, + "loss": 0.4732145667076111, + "step": 1178 + }, + { + "epoch": 0.3447872495978944, + "grad_norm": 1.4315145497505135, + "learning_rate": 1.9194245892911077e-05, + "loss": 0.6621897220611572, + "step": 1179 + }, + { + "epoch": 0.3450796900131598, + "grad_norm": 1.3492357768294603, + "learning_rate": 1.9192343490817412e-05, + "loss": 0.5691112279891968, + "step": 1180 + }, + { + "epoch": 0.3453721304284252, + "grad_norm": 1.5009979829344267, + "learning_rate": 1.919043894006934e-05, + "loss": 0.6326683759689331, + "step": 1181 + }, + { + "epoch": 0.3456645708436906, + "grad_norm": 1.3965991672121214, + "learning_rate": 1.9188532241112047e-05, + "loss": 0.6068567037582397, + "step": 1182 + }, + { + "epoch": 0.345957011258956, + "grad_norm": 1.5425955582670972, + "learning_rate": 1.918662339439121e-05, + "loss": 0.707065761089325, + "step": 1183 + }, + { + "epoch": 0.3462494516742214, + "grad_norm": 1.4506511105853803, + "learning_rate": 1.9184712400353008e-05, + "loss": 0.7821887135505676, + "step": 1184 + }, + { + "epoch": 0.34654189208948677, + "grad_norm": 1.3192596730278041, + "learning_rate": 1.918279925944413e-05, + "loss": 0.6759425401687622, + "step": 1185 + }, + { + "epoch": 0.34683433250475215, + "grad_norm": 1.2819153702423505, + "learning_rate": 1.9180883972111756e-05, + "loss": 0.5660048127174377, + "step": 1186 + }, + { + "epoch": 0.34712677292001753, + "grad_norm": 1.392117573401842, + "learning_rate": 1.9178966538803574e-05, + "loss": 0.708798885345459, + "step": 1187 + }, + { + "epoch": 0.3474192133352829, + "grad_norm": 1.0828895012382165, + "learning_rate": 1.9177046959967774e-05, + "loss": 0.603208065032959, + "step": 1188 + }, + { + "epoch": 0.3477116537505483, + "grad_norm": 1.2856052178527815, + "learning_rate": 1.9175125236053043e-05, + "loss": 0.8259323835372925, + "step": 1189 + }, + { + "epoch": 0.34800409416581374, + "grad_norm": 1.2349901090123199, + "learning_rate": 1.9173201367508572e-05, + "loss": 0.573014497756958, + "step": 1190 + }, + { + "epoch": 0.3482965345810791, + "grad_norm": 1.49130421629148, + "learning_rate": 1.9171275354784062e-05, + "loss": 0.8202974200248718, + "step": 1191 + }, + { + "epoch": 0.3485889749963445, + "grad_norm": 1.313328733803151, + "learning_rate": 1.9169347198329693e-05, + "loss": 0.5352192521095276, + "step": 1192 + }, + { + "epoch": 0.3488814154116099, + "grad_norm": 1.4707600848748155, + "learning_rate": 1.916741689859617e-05, + "loss": 0.7303881645202637, + "step": 1193 + }, + { + "epoch": 0.34917385582687527, + "grad_norm": 1.136402601726834, + "learning_rate": 1.9165484456034683e-05, + "loss": 0.670224666595459, + "step": 1194 + }, + { + "epoch": 0.34946629624214065, + "grad_norm": 1.213410956274994, + "learning_rate": 1.9163549871096934e-05, + "loss": 0.7311158776283264, + "step": 1195 + }, + { + "epoch": 0.34975873665740603, + "grad_norm": 1.3163563045896416, + "learning_rate": 1.9161613144235117e-05, + "loss": 0.6346032619476318, + "step": 1196 + }, + { + "epoch": 0.3500511770726714, + "grad_norm": 1.3538502473866518, + "learning_rate": 1.9159674275901932e-05, + "loss": 0.66914302110672, + "step": 1197 + }, + { + "epoch": 0.35034361748793685, + "grad_norm": 1.408804907617288, + "learning_rate": 1.9157733266550577e-05, + "loss": 0.6775194406509399, + "step": 1198 + }, + { + "epoch": 0.35063605790320224, + "grad_norm": 1.3219370751555166, + "learning_rate": 1.915579011663475e-05, + "loss": 0.6887085437774658, + "step": 1199 + }, + { + "epoch": 0.3509284983184676, + "grad_norm": 1.4120877262018603, + "learning_rate": 1.9153844826608652e-05, + "loss": 0.7474929690361023, + "step": 1200 + }, + { + "epoch": 0.351220938733733, + "grad_norm": 1.3551417524104399, + "learning_rate": 1.915189739692698e-05, + "loss": 0.5665907859802246, + "step": 1201 + }, + { + "epoch": 0.3515133791489984, + "grad_norm": 1.4582334765772325, + "learning_rate": 1.9149947828044938e-05, + "loss": 0.6044580340385437, + "step": 1202 + }, + { + "epoch": 0.35180581956426377, + "grad_norm": 1.1481279810019642, + "learning_rate": 1.914799612041822e-05, + "loss": 0.6590601205825806, + "step": 1203 + }, + { + "epoch": 0.35209825997952915, + "grad_norm": 1.1796025597233206, + "learning_rate": 1.9146042274503033e-05, + "loss": 0.5204451084136963, + "step": 1204 + }, + { + "epoch": 0.3523907003947946, + "grad_norm": 1.3267878452954167, + "learning_rate": 1.9144086290756077e-05, + "loss": 0.6036473512649536, + "step": 1205 + }, + { + "epoch": 0.35268314081005997, + "grad_norm": 1.438922587418907, + "learning_rate": 1.914212816963454e-05, + "loss": 0.5652757883071899, + "step": 1206 + }, + { + "epoch": 0.35297558122532535, + "grad_norm": 1.3041918712359999, + "learning_rate": 1.9140167911596133e-05, + "loss": 0.707310676574707, + "step": 1207 + }, + { + "epoch": 0.35326802164059073, + "grad_norm": 1.4881761799215045, + "learning_rate": 1.9138205517099048e-05, + "loss": 0.8539729714393616, + "step": 1208 + }, + { + "epoch": 0.3535604620558561, + "grad_norm": 1.6882139973772572, + "learning_rate": 1.9136240986601986e-05, + "loss": 0.6502546072006226, + "step": 1209 + }, + { + "epoch": 0.3538529024711215, + "grad_norm": 1.3852727639404194, + "learning_rate": 1.9134274320564145e-05, + "loss": 0.7279889583587646, + "step": 1210 + }, + { + "epoch": 0.3541453428863869, + "grad_norm": 1.380784482123245, + "learning_rate": 1.9132305519445215e-05, + "loss": 0.6916895508766174, + "step": 1211 + }, + { + "epoch": 0.35443778330165226, + "grad_norm": 1.3426787464995344, + "learning_rate": 1.9130334583705395e-05, + "loss": 0.6941961050033569, + "step": 1212 + }, + { + "epoch": 0.3547302237169177, + "grad_norm": 1.211958694677935, + "learning_rate": 1.912836151380538e-05, + "loss": 0.6686822175979614, + "step": 1213 + }, + { + "epoch": 0.3550226641321831, + "grad_norm": 1.4335891633323221, + "learning_rate": 1.912638631020636e-05, + "loss": 0.818913459777832, + "step": 1214 + }, + { + "epoch": 0.35531510454744847, + "grad_norm": 1.3902796641746433, + "learning_rate": 1.9124408973370034e-05, + "loss": 0.6461240649223328, + "step": 1215 + }, + { + "epoch": 0.35560754496271385, + "grad_norm": 1.3521177557458626, + "learning_rate": 1.9122429503758586e-05, + "loss": 0.6982225179672241, + "step": 1216 + }, + { + "epoch": 0.35589998537797923, + "grad_norm": 1.2726818955529642, + "learning_rate": 1.9120447901834708e-05, + "loss": 0.6319124698638916, + "step": 1217 + }, + { + "epoch": 0.3561924257932446, + "grad_norm": 1.4379853975185637, + "learning_rate": 1.9118464168061584e-05, + "loss": 0.7092441320419312, + "step": 1218 + }, + { + "epoch": 0.35648486620851, + "grad_norm": 1.4989677994022448, + "learning_rate": 1.9116478302902904e-05, + "loss": 0.7696874141693115, + "step": 1219 + }, + { + "epoch": 0.3567773066237754, + "grad_norm": 1.348418923049424, + "learning_rate": 1.9114490306822846e-05, + "loss": 0.6944275498390198, + "step": 1220 + }, + { + "epoch": 0.3570697470390408, + "grad_norm": 1.4736146352332777, + "learning_rate": 1.9112500180286098e-05, + "loss": 0.6179015636444092, + "step": 1221 + }, + { + "epoch": 0.3573621874543062, + "grad_norm": 1.4832162039625727, + "learning_rate": 1.911050792375784e-05, + "loss": 0.6964149475097656, + "step": 1222 + }, + { + "epoch": 0.3576546278695716, + "grad_norm": 1.314680516503926, + "learning_rate": 1.9108513537703746e-05, + "loss": 0.6923096776008606, + "step": 1223 + }, + { + "epoch": 0.35794706828483697, + "grad_norm": 1.3108509564109556, + "learning_rate": 1.9106517022589993e-05, + "loss": 0.5205660462379456, + "step": 1224 + }, + { + "epoch": 0.35823950870010235, + "grad_norm": 1.604738205722927, + "learning_rate": 1.910451837888325e-05, + "loss": 0.7488006353378296, + "step": 1225 + }, + { + "epoch": 0.35853194911536773, + "grad_norm": 1.1847506052614252, + "learning_rate": 1.91025176070507e-05, + "loss": 0.5414390563964844, + "step": 1226 + }, + { + "epoch": 0.3588243895306331, + "grad_norm": 1.2745914596652235, + "learning_rate": 1.910051470756e-05, + "loss": 0.6891577839851379, + "step": 1227 + }, + { + "epoch": 0.3591168299458985, + "grad_norm": 1.3018823092824294, + "learning_rate": 1.9098509680879318e-05, + "loss": 0.6496376991271973, + "step": 1228 + }, + { + "epoch": 0.35940927036116394, + "grad_norm": 1.386313672695145, + "learning_rate": 1.909650252747732e-05, + "loss": 0.758609414100647, + "step": 1229 + }, + { + "epoch": 0.3597017107764293, + "grad_norm": 1.544442120518355, + "learning_rate": 1.9094493247823164e-05, + "loss": 0.7509145736694336, + "step": 1230 + }, + { + "epoch": 0.3599941511916947, + "grad_norm": 1.2125512669659357, + "learning_rate": 1.9092481842386506e-05, + "loss": 0.7432405352592468, + "step": 1231 + }, + { + "epoch": 0.3602865916069601, + "grad_norm": 1.4492900887661606, + "learning_rate": 1.90904683116375e-05, + "loss": 0.7208698391914368, + "step": 1232 + }, + { + "epoch": 0.36057903202222547, + "grad_norm": 1.4245050002638069, + "learning_rate": 1.9088452656046798e-05, + "loss": 0.638593852519989, + "step": 1233 + }, + { + "epoch": 0.36087147243749085, + "grad_norm": 1.4428449097608804, + "learning_rate": 1.9086434876085548e-05, + "loss": 0.6663007736206055, + "step": 1234 + }, + { + "epoch": 0.36116391285275623, + "grad_norm": 1.4112526680406456, + "learning_rate": 1.908441497222539e-05, + "loss": 0.7132781744003296, + "step": 1235 + }, + { + "epoch": 0.3614563532680216, + "grad_norm": 1.3024532647304885, + "learning_rate": 1.9082392944938467e-05, + "loss": 0.6545308828353882, + "step": 1236 + }, + { + "epoch": 0.36174879368328705, + "grad_norm": 1.1385624733680002, + "learning_rate": 1.908036879469741e-05, + "loss": 0.7525626420974731, + "step": 1237 + }, + { + "epoch": 0.36204123409855243, + "grad_norm": 1.4286424106237192, + "learning_rate": 1.9078342521975365e-05, + "loss": 0.7336804866790771, + "step": 1238 + }, + { + "epoch": 0.3623336745138178, + "grad_norm": 1.3025101490885231, + "learning_rate": 1.907631412724595e-05, + "loss": 0.5822359323501587, + "step": 1239 + }, + { + "epoch": 0.3626261149290832, + "grad_norm": 1.1928464678887247, + "learning_rate": 1.907428361098329e-05, + "loss": 0.6110040545463562, + "step": 1240 + }, + { + "epoch": 0.3629185553443486, + "grad_norm": 1.3329812952112776, + "learning_rate": 1.9072250973662008e-05, + "loss": 0.5363205671310425, + "step": 1241 + }, + { + "epoch": 0.36321099575961396, + "grad_norm": 1.298737392722519, + "learning_rate": 1.9070216215757225e-05, + "loss": 0.6804911494255066, + "step": 1242 + }, + { + "epoch": 0.36350343617487935, + "grad_norm": 1.306574555012534, + "learning_rate": 1.906817933774455e-05, + "loss": 0.5670056343078613, + "step": 1243 + }, + { + "epoch": 0.3637958765901448, + "grad_norm": 1.6342501045897717, + "learning_rate": 1.9066140340100086e-05, + "loss": 0.6839423775672913, + "step": 1244 + }, + { + "epoch": 0.36408831700541017, + "grad_norm": 1.3102468000864722, + "learning_rate": 1.906409922330044e-05, + "loss": 0.6512447595596313, + "step": 1245 + }, + { + "epoch": 0.36438075742067555, + "grad_norm": 1.3767881480650324, + "learning_rate": 1.9062055987822713e-05, + "loss": 0.6602088212966919, + "step": 1246 + }, + { + "epoch": 0.36467319783594093, + "grad_norm": 1.3684046563228518, + "learning_rate": 1.9060010634144502e-05, + "loss": 0.6859074831008911, + "step": 1247 + }, + { + "epoch": 0.3649656382512063, + "grad_norm": 1.4029132597681886, + "learning_rate": 1.9057963162743888e-05, + "loss": 0.6871531009674072, + "step": 1248 + }, + { + "epoch": 0.3652580786664717, + "grad_norm": 1.2778457575589584, + "learning_rate": 1.9055913574099454e-05, + "loss": 0.7396048307418823, + "step": 1249 + }, + { + "epoch": 0.3655505190817371, + "grad_norm": 2.313748947770577, + "learning_rate": 1.9053861868690283e-05, + "loss": 0.7013602256774902, + "step": 1250 + }, + { + "epoch": 0.36584295949700246, + "grad_norm": 1.2854553849472183, + "learning_rate": 1.905180804699595e-05, + "loss": 0.6355527639389038, + "step": 1251 + }, + { + "epoch": 0.3661353999122679, + "grad_norm": 1.1923686434429392, + "learning_rate": 1.9049752109496526e-05, + "loss": 0.6869304180145264, + "step": 1252 + }, + { + "epoch": 0.3664278403275333, + "grad_norm": 1.2404032301108463, + "learning_rate": 1.9047694056672566e-05, + "loss": 0.5267671346664429, + "step": 1253 + }, + { + "epoch": 0.36672028074279867, + "grad_norm": 1.2479293372256655, + "learning_rate": 1.9045633889005134e-05, + "loss": 0.6586635112762451, + "step": 1254 + }, + { + "epoch": 0.36701272115806405, + "grad_norm": 1.2783901733768512, + "learning_rate": 1.9043571606975776e-05, + "loss": 0.6743361949920654, + "step": 1255 + }, + { + "epoch": 0.36730516157332943, + "grad_norm": 1.267912865737822, + "learning_rate": 1.9041507211066543e-05, + "loss": 0.5779668688774109, + "step": 1256 + }, + { + "epoch": 0.3675976019885948, + "grad_norm": 1.240910914837657, + "learning_rate": 1.9039440701759972e-05, + "loss": 0.693313479423523, + "step": 1257 + }, + { + "epoch": 0.3678900424038602, + "grad_norm": 1.2581810913293596, + "learning_rate": 1.9037372079539096e-05, + "loss": 0.6314960718154907, + "step": 1258 + }, + { + "epoch": 0.3681824828191256, + "grad_norm": 1.4026915606466803, + "learning_rate": 1.9035301344887445e-05, + "loss": 0.6483266949653625, + "step": 1259 + }, + { + "epoch": 0.368474923234391, + "grad_norm": 1.1963714897771014, + "learning_rate": 1.903322849828904e-05, + "loss": 0.5896739959716797, + "step": 1260 + }, + { + "epoch": 0.3687673636496564, + "grad_norm": 1.3246139419549132, + "learning_rate": 1.9031153540228398e-05, + "loss": 0.6760983467102051, + "step": 1261 + }, + { + "epoch": 0.3690598040649218, + "grad_norm": 1.409129098147532, + "learning_rate": 1.9029076471190525e-05, + "loss": 0.7453440427780151, + "step": 1262 + }, + { + "epoch": 0.36935224448018716, + "grad_norm": 1.4768395375517958, + "learning_rate": 1.9026997291660926e-05, + "loss": 0.7382408380508423, + "step": 1263 + }, + { + "epoch": 0.36964468489545255, + "grad_norm": 1.3416426687197567, + "learning_rate": 1.9024916002125594e-05, + "loss": 0.6420471668243408, + "step": 1264 + }, + { + "epoch": 0.36993712531071793, + "grad_norm": 1.406350116015231, + "learning_rate": 1.9022832603071017e-05, + "loss": 0.6436389684677124, + "step": 1265 + }, + { + "epoch": 0.3702295657259833, + "grad_norm": 1.3047843220477244, + "learning_rate": 1.9020747094984182e-05, + "loss": 0.689171314239502, + "step": 1266 + }, + { + "epoch": 0.3705220061412487, + "grad_norm": 1.2640328794263636, + "learning_rate": 1.9018659478352556e-05, + "loss": 0.6704196333885193, + "step": 1267 + }, + { + "epoch": 0.37081444655651413, + "grad_norm": 2.0690106215423536, + "learning_rate": 1.9016569753664118e-05, + "loss": 0.6598329544067383, + "step": 1268 + }, + { + "epoch": 0.3711068869717795, + "grad_norm": 1.8262603065561684, + "learning_rate": 1.901447792140732e-05, + "loss": 0.7353986501693726, + "step": 1269 + }, + { + "epoch": 0.3713993273870449, + "grad_norm": 1.4285098808767827, + "learning_rate": 1.9012383982071112e-05, + "loss": 0.666167140007019, + "step": 1270 + }, + { + "epoch": 0.3716917678023103, + "grad_norm": 1.2598465904930443, + "learning_rate": 1.9010287936144948e-05, + "loss": 0.6097015738487244, + "step": 1271 + }, + { + "epoch": 0.37198420821757566, + "grad_norm": 1.107025542737965, + "learning_rate": 1.9008189784118764e-05, + "loss": 0.6352437138557434, + "step": 1272 + }, + { + "epoch": 0.37227664863284105, + "grad_norm": 1.5662430122293758, + "learning_rate": 1.9006089526482982e-05, + "loss": 0.6686104536056519, + "step": 1273 + }, + { + "epoch": 0.3725690890481064, + "grad_norm": 1.1719719158143125, + "learning_rate": 1.9003987163728535e-05, + "loss": 0.6504377126693726, + "step": 1274 + }, + { + "epoch": 0.3728615294633718, + "grad_norm": 1.2550627286183815, + "learning_rate": 1.9001882696346835e-05, + "loss": 0.5834585428237915, + "step": 1275 + }, + { + "epoch": 0.37315396987863725, + "grad_norm": 1.5420452194055032, + "learning_rate": 1.8999776124829788e-05, + "loss": 0.665432870388031, + "step": 1276 + }, + { + "epoch": 0.37344641029390263, + "grad_norm": 1.4022956370096276, + "learning_rate": 1.899766744966979e-05, + "loss": 0.659697949886322, + "step": 1277 + }, + { + "epoch": 0.373738850709168, + "grad_norm": 1.194404836566078, + "learning_rate": 1.899555667135973e-05, + "loss": 0.5703476071357727, + "step": 1278 + }, + { + "epoch": 0.3740312911244334, + "grad_norm": 1.5482464090140011, + "learning_rate": 1.8993443790392994e-05, + "loss": 0.809308648109436, + "step": 1279 + }, + { + "epoch": 0.3743237315396988, + "grad_norm": 1.293354946450912, + "learning_rate": 1.8991328807263455e-05, + "loss": 0.7120508551597595, + "step": 1280 + }, + { + "epoch": 0.37461617195496416, + "grad_norm": 1.2261555314771986, + "learning_rate": 1.898921172246547e-05, + "loss": 0.625985860824585, + "step": 1281 + }, + { + "epoch": 0.37490861237022954, + "grad_norm": 1.3045001966325798, + "learning_rate": 1.898709253649391e-05, + "loss": 0.637261152267456, + "step": 1282 + }, + { + "epoch": 0.375201052785495, + "grad_norm": 1.3550273094265433, + "learning_rate": 1.89849712498441e-05, + "loss": 0.7420133352279663, + "step": 1283 + }, + { + "epoch": 0.37549349320076036, + "grad_norm": 1.2854448504545577, + "learning_rate": 1.8982847863011898e-05, + "loss": 0.6230417490005493, + "step": 1284 + }, + { + "epoch": 0.37578593361602575, + "grad_norm": 1.2127007776565961, + "learning_rate": 1.8980722376493622e-05, + "loss": 0.6896604299545288, + "step": 1285 + }, + { + "epoch": 0.37607837403129113, + "grad_norm": 1.3900367736992565, + "learning_rate": 1.8978594790786092e-05, + "loss": 0.5767710208892822, + "step": 1286 + }, + { + "epoch": 0.3763708144465565, + "grad_norm": 1.3829044432724817, + "learning_rate": 1.8976465106386625e-05, + "loss": 0.6945392489433289, + "step": 1287 + }, + { + "epoch": 0.3766632548618219, + "grad_norm": 1.215943914903153, + "learning_rate": 1.8974333323793014e-05, + "loss": 0.7208314538002014, + "step": 1288 + }, + { + "epoch": 0.3769556952770873, + "grad_norm": 1.5769052361743978, + "learning_rate": 1.8972199443503556e-05, + "loss": 0.7201139330863953, + "step": 1289 + }, + { + "epoch": 0.37724813569235266, + "grad_norm": 1.3366567930451483, + "learning_rate": 1.8970063466017028e-05, + "loss": 0.6791107654571533, + "step": 1290 + }, + { + "epoch": 0.3775405761076181, + "grad_norm": 1.2566261030582595, + "learning_rate": 1.89679253918327e-05, + "loss": 0.6535364389419556, + "step": 1291 + }, + { + "epoch": 0.3778330165228835, + "grad_norm": 1.3498011568256927, + "learning_rate": 1.8965785221450343e-05, + "loss": 0.5910370349884033, + "step": 1292 + }, + { + "epoch": 0.37812545693814886, + "grad_norm": 1.273886266732073, + "learning_rate": 1.8963642955370203e-05, + "loss": 0.7025415897369385, + "step": 1293 + }, + { + "epoch": 0.37841789735341425, + "grad_norm": 1.3743129752593892, + "learning_rate": 1.8961498594093018e-05, + "loss": 0.8007702827453613, + "step": 1294 + }, + { + "epoch": 0.37871033776867963, + "grad_norm": 1.3170193178053329, + "learning_rate": 1.895935213812003e-05, + "loss": 0.6947172284126282, + "step": 1295 + }, + { + "epoch": 0.379002778183945, + "grad_norm": 1.1178211676030798, + "learning_rate": 1.895720358795295e-05, + "loss": 0.7024818658828735, + "step": 1296 + }, + { + "epoch": 0.3792952185992104, + "grad_norm": 1.3447819598276562, + "learning_rate": 1.895505294409399e-05, + "loss": 0.8202607035636902, + "step": 1297 + }, + { + "epoch": 0.3795876590144758, + "grad_norm": 1.3114070841017331, + "learning_rate": 1.8952900207045853e-05, + "loss": 0.8001795411109924, + "step": 1298 + }, + { + "epoch": 0.3798800994297412, + "grad_norm": 1.4499936638579116, + "learning_rate": 1.895074537731173e-05, + "loss": 0.8068668842315674, + "step": 1299 + }, + { + "epoch": 0.3801725398450066, + "grad_norm": 1.5847094151692727, + "learning_rate": 1.8948588455395294e-05, + "loss": 0.7685220241546631, + "step": 1300 + }, + { + "epoch": 0.380464980260272, + "grad_norm": 1.5870604224504243, + "learning_rate": 1.8946429441800715e-05, + "loss": 0.695665717124939, + "step": 1301 + }, + { + "epoch": 0.38075742067553736, + "grad_norm": 1.2451750201018865, + "learning_rate": 1.894426833703265e-05, + "loss": 0.6073132753372192, + "step": 1302 + }, + { + "epoch": 0.38104986109080274, + "grad_norm": 1.3373381283666117, + "learning_rate": 1.894210514159624e-05, + "loss": 0.6334577798843384, + "step": 1303 + }, + { + "epoch": 0.3813423015060681, + "grad_norm": 1.519675902061051, + "learning_rate": 1.8939939855997125e-05, + "loss": 0.6448806524276733, + "step": 1304 + }, + { + "epoch": 0.3816347419213335, + "grad_norm": 1.438744535892561, + "learning_rate": 1.8937772480741427e-05, + "loss": 0.7587993144989014, + "step": 1305 + }, + { + "epoch": 0.3819271823365989, + "grad_norm": 1.3270277700231368, + "learning_rate": 1.8935603016335752e-05, + "loss": 0.6924787759780884, + "step": 1306 + }, + { + "epoch": 0.38221962275186433, + "grad_norm": 1.4268553989545638, + "learning_rate": 1.8933431463287197e-05, + "loss": 0.678055465221405, + "step": 1307 + }, + { + "epoch": 0.3825120631671297, + "grad_norm": 1.5391207675187488, + "learning_rate": 1.8931257822103357e-05, + "loss": 0.7519007325172424, + "step": 1308 + }, + { + "epoch": 0.3828045035823951, + "grad_norm": 1.3654471111870499, + "learning_rate": 1.8929082093292306e-05, + "loss": 0.6905468702316284, + "step": 1309 + }, + { + "epoch": 0.3830969439976605, + "grad_norm": 1.2938870443591295, + "learning_rate": 1.8926904277362603e-05, + "loss": 0.6718122363090515, + "step": 1310 + }, + { + "epoch": 0.38338938441292586, + "grad_norm": 1.2471147738993698, + "learning_rate": 1.89247243748233e-05, + "loss": 0.6903961896896362, + "step": 1311 + }, + { + "epoch": 0.38368182482819124, + "grad_norm": 1.2542516264352948, + "learning_rate": 1.8922542386183942e-05, + "loss": 0.6947582960128784, + "step": 1312 + }, + { + "epoch": 0.3839742652434566, + "grad_norm": 1.188134072228004, + "learning_rate": 1.8920358311954548e-05, + "loss": 0.5850759148597717, + "step": 1313 + }, + { + "epoch": 0.38426670565872206, + "grad_norm": 1.3420186334522382, + "learning_rate": 1.891817215264564e-05, + "loss": 0.6512178778648376, + "step": 1314 + }, + { + "epoch": 0.38455914607398745, + "grad_norm": 1.217758250797112, + "learning_rate": 1.891598390876821e-05, + "loss": 0.5910850167274475, + "step": 1315 + }, + { + "epoch": 0.38485158648925283, + "grad_norm": 1.5593571397265127, + "learning_rate": 1.891379358083375e-05, + "loss": 0.7113536596298218, + "step": 1316 + }, + { + "epoch": 0.3851440269045182, + "grad_norm": 1.293224731928583, + "learning_rate": 1.891160116935424e-05, + "loss": 0.705318808555603, + "step": 1317 + }, + { + "epoch": 0.3854364673197836, + "grad_norm": 1.3559272013315313, + "learning_rate": 1.890940667484214e-05, + "loss": 0.7524716258049011, + "step": 1318 + }, + { + "epoch": 0.385728907735049, + "grad_norm": 1.4516012043532711, + "learning_rate": 1.89072100978104e-05, + "loss": 0.6130248308181763, + "step": 1319 + }, + { + "epoch": 0.38602134815031436, + "grad_norm": 1.4354959704098622, + "learning_rate": 1.8905011438772455e-05, + "loss": 0.6535071134567261, + "step": 1320 + }, + { + "epoch": 0.38631378856557974, + "grad_norm": 1.3663991139031981, + "learning_rate": 1.890281069824223e-05, + "loss": 0.7027082443237305, + "step": 1321 + }, + { + "epoch": 0.3866062289808452, + "grad_norm": 1.4293858623040305, + "learning_rate": 1.8900607876734133e-05, + "loss": 0.7055719494819641, + "step": 1322 + }, + { + "epoch": 0.38689866939611056, + "grad_norm": 1.3719150653410752, + "learning_rate": 1.8898402974763063e-05, + "loss": 0.7403384447097778, + "step": 1323 + }, + { + "epoch": 0.38719110981137594, + "grad_norm": 1.4816215708629428, + "learning_rate": 1.88961959928444e-05, + "loss": 0.6771470308303833, + "step": 1324 + }, + { + "epoch": 0.3874835502266413, + "grad_norm": 1.552809609148836, + "learning_rate": 1.8893986931494015e-05, + "loss": 0.7258767485618591, + "step": 1325 + }, + { + "epoch": 0.3877759906419067, + "grad_norm": 1.4168779145398758, + "learning_rate": 1.889177579122826e-05, + "loss": 0.7587069869041443, + "step": 1326 + }, + { + "epoch": 0.3880684310571721, + "grad_norm": 1.0432394702021985, + "learning_rate": 1.888956257256398e-05, + "loss": 0.5434668660163879, + "step": 1327 + }, + { + "epoch": 0.3883608714724375, + "grad_norm": 1.2927507112469059, + "learning_rate": 1.8887347276018496e-05, + "loss": 0.5311154127120972, + "step": 1328 + }, + { + "epoch": 0.38865331188770286, + "grad_norm": 1.2533915775325788, + "learning_rate": 1.888512990210962e-05, + "loss": 0.5651747584342957, + "step": 1329 + }, + { + "epoch": 0.3889457523029683, + "grad_norm": 1.1863162008873491, + "learning_rate": 1.8882910451355654e-05, + "loss": 0.628046989440918, + "step": 1330 + }, + { + "epoch": 0.3892381927182337, + "grad_norm": 1.362511771688155, + "learning_rate": 1.888068892427538e-05, + "loss": 0.644639253616333, + "step": 1331 + }, + { + "epoch": 0.38953063313349906, + "grad_norm": 1.2081658901416763, + "learning_rate": 1.887846532138806e-05, + "loss": 0.6290382742881775, + "step": 1332 + }, + { + "epoch": 0.38982307354876444, + "grad_norm": 1.319310840364395, + "learning_rate": 1.8876239643213456e-05, + "loss": 0.6881425380706787, + "step": 1333 + }, + { + "epoch": 0.3901155139640298, + "grad_norm": 1.398816649776473, + "learning_rate": 1.8874011890271807e-05, + "loss": 0.645643949508667, + "step": 1334 + }, + { + "epoch": 0.3904079543792952, + "grad_norm": 1.3694554603281324, + "learning_rate": 1.887178206308383e-05, + "loss": 0.6965867280960083, + "step": 1335 + }, + { + "epoch": 0.3907003947945606, + "grad_norm": 1.1683610353079796, + "learning_rate": 1.886955016217074e-05, + "loss": 0.7326550483703613, + "step": 1336 + }, + { + "epoch": 0.390992835209826, + "grad_norm": 1.1781300264884254, + "learning_rate": 1.886731618805422e-05, + "loss": 0.6570208668708801, + "step": 1337 + }, + { + "epoch": 0.3912852756250914, + "grad_norm": 1.3563073747469718, + "learning_rate": 1.886508014125646e-05, + "loss": 0.7391610145568848, + "step": 1338 + }, + { + "epoch": 0.3915777160403568, + "grad_norm": 1.2946978380119605, + "learning_rate": 1.8862842022300124e-05, + "loss": 0.651665985584259, + "step": 1339 + }, + { + "epoch": 0.3918701564556222, + "grad_norm": 1.383137909559831, + "learning_rate": 1.8860601831708346e-05, + "loss": 0.695915699005127, + "step": 1340 + }, + { + "epoch": 0.39216259687088756, + "grad_norm": 1.4373340263094165, + "learning_rate": 1.885835957000476e-05, + "loss": 0.7209347486495972, + "step": 1341 + }, + { + "epoch": 0.39245503728615294, + "grad_norm": 1.3210736597005808, + "learning_rate": 1.885611523771349e-05, + "loss": 0.6083317995071411, + "step": 1342 + }, + { + "epoch": 0.3927474777014183, + "grad_norm": 1.1271235346878163, + "learning_rate": 1.8853868835359127e-05, + "loss": 0.5544713735580444, + "step": 1343 + }, + { + "epoch": 0.3930399181166837, + "grad_norm": 1.5309462253741093, + "learning_rate": 1.8851620363466756e-05, + "loss": 0.6333836317062378, + "step": 1344 + }, + { + "epoch": 0.3933323585319491, + "grad_norm": 1.2454908963117732, + "learning_rate": 1.8849369822561943e-05, + "loss": 0.554995059967041, + "step": 1345 + }, + { + "epoch": 0.3936247989472145, + "grad_norm": 1.1941759610212306, + "learning_rate": 1.884711721317074e-05, + "loss": 0.6696420907974243, + "step": 1346 + }, + { + "epoch": 0.3939172393624799, + "grad_norm": 1.3685512221597254, + "learning_rate": 1.8844862535819682e-05, + "loss": 0.6031695604324341, + "step": 1347 + }, + { + "epoch": 0.3942096797777453, + "grad_norm": 1.2956806020723108, + "learning_rate": 1.884260579103578e-05, + "loss": 0.686814546585083, + "step": 1348 + }, + { + "epoch": 0.3945021201930107, + "grad_norm": 1.3341047094387086, + "learning_rate": 1.884034697934654e-05, + "loss": 0.8113317489624023, + "step": 1349 + }, + { + "epoch": 0.39479456060827606, + "grad_norm": 1.2860694131453334, + "learning_rate": 1.8838086101279946e-05, + "loss": 0.5645952820777893, + "step": 1350 + }, + { + "epoch": 0.39508700102354144, + "grad_norm": 1.2016385972759884, + "learning_rate": 1.883582315736446e-05, + "loss": 0.6431643962860107, + "step": 1351 + }, + { + "epoch": 0.3953794414388068, + "grad_norm": 1.5868415992731069, + "learning_rate": 1.8833558148129034e-05, + "loss": 0.7691985368728638, + "step": 1352 + }, + { + "epoch": 0.39567188185407226, + "grad_norm": 1.2630640231572245, + "learning_rate": 1.88312910741031e-05, + "loss": 0.5951793789863586, + "step": 1353 + }, + { + "epoch": 0.39596432226933764, + "grad_norm": 1.4300765958854422, + "learning_rate": 1.8829021935816572e-05, + "loss": 0.671844482421875, + "step": 1354 + }, + { + "epoch": 0.396256762684603, + "grad_norm": 1.1270834377200167, + "learning_rate": 1.8826750733799845e-05, + "loss": 0.5290843844413757, + "step": 1355 + }, + { + "epoch": 0.3965492030998684, + "grad_norm": 1.5593842090138308, + "learning_rate": 1.8824477468583806e-05, + "loss": 0.6492103934288025, + "step": 1356 + }, + { + "epoch": 0.3968416435151338, + "grad_norm": 1.5289372619537642, + "learning_rate": 1.882220214069981e-05, + "loss": 0.6111055016517639, + "step": 1357 + }, + { + "epoch": 0.3971340839303992, + "grad_norm": 1.5921417426189186, + "learning_rate": 1.8819924750679702e-05, + "loss": 0.8123398423194885, + "step": 1358 + }, + { + "epoch": 0.39742652434566456, + "grad_norm": 1.1798948792158594, + "learning_rate": 1.8817645299055815e-05, + "loss": 0.6118077039718628, + "step": 1359 + }, + { + "epoch": 0.39771896476092994, + "grad_norm": 1.3941391170101545, + "learning_rate": 1.8815363786360948e-05, + "loss": 0.6916248798370361, + "step": 1360 + }, + { + "epoch": 0.3980114051761954, + "grad_norm": 1.2139550861968382, + "learning_rate": 1.8813080213128394e-05, + "loss": 0.5586028099060059, + "step": 1361 + }, + { + "epoch": 0.39830384559146076, + "grad_norm": 1.4269154081314215, + "learning_rate": 1.8810794579891925e-05, + "loss": 0.6132841110229492, + "step": 1362 + }, + { + "epoch": 0.39859628600672614, + "grad_norm": 1.6891257084549798, + "learning_rate": 1.8808506887185793e-05, + "loss": 0.7329133749008179, + "step": 1363 + }, + { + "epoch": 0.3988887264219915, + "grad_norm": 1.2704606915664418, + "learning_rate": 1.8806217135544736e-05, + "loss": 0.5975138545036316, + "step": 1364 + }, + { + "epoch": 0.3991811668372569, + "grad_norm": 1.315662529163245, + "learning_rate": 1.8803925325503963e-05, + "loss": 0.5790295600891113, + "step": 1365 + }, + { + "epoch": 0.3994736072525223, + "grad_norm": 1.2020939373145192, + "learning_rate": 1.8801631457599173e-05, + "loss": 0.6506124138832092, + "step": 1366 + }, + { + "epoch": 0.39976604766778767, + "grad_norm": 1.1981976421950613, + "learning_rate": 1.8799335532366547e-05, + "loss": 0.5577528476715088, + "step": 1367 + }, + { + "epoch": 0.40005848808305305, + "grad_norm": 1.4216027512167424, + "learning_rate": 1.879703755034274e-05, + "loss": 0.675471305847168, + "step": 1368 + }, + { + "epoch": 0.4003509284983185, + "grad_norm": 1.2857175045016282, + "learning_rate": 1.879473751206489e-05, + "loss": 0.5826357007026672, + "step": 1369 + }, + { + "epoch": 0.4006433689135839, + "grad_norm": 1.31090591162355, + "learning_rate": 1.8792435418070623e-05, + "loss": 0.5146772265434265, + "step": 1370 + }, + { + "epoch": 0.40093580932884926, + "grad_norm": 1.224194477069696, + "learning_rate": 1.879013126889804e-05, + "loss": 0.6049208641052246, + "step": 1371 + }, + { + "epoch": 0.40122824974411464, + "grad_norm": 1.4047657351006413, + "learning_rate": 1.878782506508571e-05, + "loss": 0.7058207392692566, + "step": 1372 + }, + { + "epoch": 0.40152069015938, + "grad_norm": 1.3782924104285919, + "learning_rate": 1.8785516807172704e-05, + "loss": 0.6281940937042236, + "step": 1373 + }, + { + "epoch": 0.4018131305746454, + "grad_norm": 1.5710053658084482, + "learning_rate": 1.878320649569856e-05, + "loss": 0.6318703889846802, + "step": 1374 + }, + { + "epoch": 0.4021055709899108, + "grad_norm": 1.369183615763356, + "learning_rate": 1.87808941312033e-05, + "loss": 0.6595311164855957, + "step": 1375 + }, + { + "epoch": 0.40239801140517617, + "grad_norm": 1.2850685362090555, + "learning_rate": 1.8778579714227433e-05, + "loss": 0.6964930295944214, + "step": 1376 + }, + { + "epoch": 0.4026904518204416, + "grad_norm": 1.338060741631637, + "learning_rate": 1.8776263245311926e-05, + "loss": 0.6093966364860535, + "step": 1377 + }, + { + "epoch": 0.402982892235707, + "grad_norm": 1.2438362189425571, + "learning_rate": 1.8773944724998248e-05, + "loss": 0.5337893962860107, + "step": 1378 + }, + { + "epoch": 0.4032753326509724, + "grad_norm": 1.3665212711176857, + "learning_rate": 1.8771624153828336e-05, + "loss": 0.5899128317832947, + "step": 1379 + }, + { + "epoch": 0.40356777306623776, + "grad_norm": 1.41983053503157, + "learning_rate": 1.876930153234461e-05, + "loss": 0.7054699659347534, + "step": 1380 + }, + { + "epoch": 0.40386021348150314, + "grad_norm": 1.4240217447880834, + "learning_rate": 1.876697686108997e-05, + "loss": 0.6910602450370789, + "step": 1381 + }, + { + "epoch": 0.4041526538967685, + "grad_norm": 1.383183489617276, + "learning_rate": 1.876465014060779e-05, + "loss": 0.605659008026123, + "step": 1382 + }, + { + "epoch": 0.4044450943120339, + "grad_norm": 1.4798727853043008, + "learning_rate": 1.8762321371441934e-05, + "loss": 0.7159937620162964, + "step": 1383 + }, + { + "epoch": 0.4047375347272993, + "grad_norm": 1.394589338486404, + "learning_rate": 1.8759990554136733e-05, + "loss": 0.7568333148956299, + "step": 1384 + }, + { + "epoch": 0.4050299751425647, + "grad_norm": 1.613153659624872, + "learning_rate": 1.8757657689236998e-05, + "loss": 0.7117356657981873, + "step": 1385 + }, + { + "epoch": 0.4053224155578301, + "grad_norm": 1.4078248670298101, + "learning_rate": 1.8755322777288027e-05, + "loss": 0.6429109573364258, + "step": 1386 + }, + { + "epoch": 0.4056148559730955, + "grad_norm": 1.571804966063755, + "learning_rate": 1.875298581883559e-05, + "loss": 0.643811821937561, + "step": 1387 + }, + { + "epoch": 0.40590729638836087, + "grad_norm": 1.439883937827134, + "learning_rate": 1.875064681442594e-05, + "loss": 0.7143295407295227, + "step": 1388 + }, + { + "epoch": 0.40619973680362625, + "grad_norm": 1.5655377581350667, + "learning_rate": 1.8748305764605798e-05, + "loss": 0.732312023639679, + "step": 1389 + }, + { + "epoch": 0.40649217721889164, + "grad_norm": 1.2430462426936875, + "learning_rate": 1.8745962669922375e-05, + "loss": 0.6878848075866699, + "step": 1390 + }, + { + "epoch": 0.406784617634157, + "grad_norm": 1.40828278888595, + "learning_rate": 1.8743617530923356e-05, + "loss": 0.6716262698173523, + "step": 1391 + }, + { + "epoch": 0.40707705804942246, + "grad_norm": 1.346254534859124, + "learning_rate": 1.87412703481569e-05, + "loss": 0.4990834593772888, + "step": 1392 + }, + { + "epoch": 0.40736949846468784, + "grad_norm": 1.4289189141042684, + "learning_rate": 1.8738921122171647e-05, + "loss": 0.6541857719421387, + "step": 1393 + }, + { + "epoch": 0.4076619388799532, + "grad_norm": 1.4587922420879296, + "learning_rate": 1.8736569853516715e-05, + "loss": 0.6310811042785645, + "step": 1394 + }, + { + "epoch": 0.4079543792952186, + "grad_norm": 1.5328348699666439, + "learning_rate": 1.8734216542741702e-05, + "loss": 0.6335423588752747, + "step": 1395 + }, + { + "epoch": 0.408246819710484, + "grad_norm": 1.4540701020587141, + "learning_rate": 1.873186119039667e-05, + "loss": 0.6315034627914429, + "step": 1396 + }, + { + "epoch": 0.40853926012574937, + "grad_norm": 1.2591166704430221, + "learning_rate": 1.872950379703218e-05, + "loss": 0.6822362542152405, + "step": 1397 + }, + { + "epoch": 0.40883170054101475, + "grad_norm": 1.3128671260601936, + "learning_rate": 1.8727144363199257e-05, + "loss": 0.699965238571167, + "step": 1398 + }, + { + "epoch": 0.40912414095628014, + "grad_norm": 1.376512615463435, + "learning_rate": 1.8724782889449397e-05, + "loss": 0.6769841313362122, + "step": 1399 + }, + { + "epoch": 0.4094165813715456, + "grad_norm": 1.242831468646962, + "learning_rate": 1.8722419376334584e-05, + "loss": 0.5219473838806152, + "step": 1400 + }, + { + "epoch": 0.40970902178681096, + "grad_norm": 1.1119386554431685, + "learning_rate": 1.872005382440728e-05, + "loss": 0.6091574430465698, + "step": 1401 + }, + { + "epoch": 0.41000146220207634, + "grad_norm": 1.3005524040148213, + "learning_rate": 1.8717686234220406e-05, + "loss": 0.7589390277862549, + "step": 1402 + }, + { + "epoch": 0.4102939026173417, + "grad_norm": 1.2931582987016021, + "learning_rate": 1.8715316606327384e-05, + "loss": 0.7042895555496216, + "step": 1403 + }, + { + "epoch": 0.4105863430326071, + "grad_norm": 1.2509877991876854, + "learning_rate": 1.8712944941282095e-05, + "loss": 0.6490949988365173, + "step": 1404 + }, + { + "epoch": 0.4108787834478725, + "grad_norm": 1.3726951776657805, + "learning_rate": 1.87105712396389e-05, + "loss": 0.6614132523536682, + "step": 1405 + }, + { + "epoch": 0.41117122386313787, + "grad_norm": 1.3416970895813871, + "learning_rate": 1.8708195501952637e-05, + "loss": 0.666157603263855, + "step": 1406 + }, + { + "epoch": 0.41146366427840325, + "grad_norm": 1.5455429688837699, + "learning_rate": 1.8705817728778626e-05, + "loss": 0.7347884178161621, + "step": 1407 + }, + { + "epoch": 0.4117561046936687, + "grad_norm": 1.6323767587093516, + "learning_rate": 1.8703437920672652e-05, + "loss": 0.8129836320877075, + "step": 1408 + }, + { + "epoch": 0.4120485451089341, + "grad_norm": 1.2690047775005027, + "learning_rate": 1.870105607819098e-05, + "loss": 0.645210862159729, + "step": 1409 + }, + { + "epoch": 0.41234098552419945, + "grad_norm": 1.367879279910813, + "learning_rate": 1.8698672201890355e-05, + "loss": 0.6716916561126709, + "step": 1410 + }, + { + "epoch": 0.41263342593946484, + "grad_norm": 1.66933384894401, + "learning_rate": 1.869628629232799e-05, + "loss": 0.8190855383872986, + "step": 1411 + }, + { + "epoch": 0.4129258663547302, + "grad_norm": 1.3900061091611966, + "learning_rate": 1.8693898350061582e-05, + "loss": 0.7618075609207153, + "step": 1412 + }, + { + "epoch": 0.4132183067699956, + "grad_norm": 1.3486290329442485, + "learning_rate": 1.869150837564929e-05, + "loss": 0.719980001449585, + "step": 1413 + }, + { + "epoch": 0.413510747185261, + "grad_norm": 1.2278158990840933, + "learning_rate": 1.8689116369649763e-05, + "loss": 0.6601548194885254, + "step": 1414 + }, + { + "epoch": 0.41380318760052637, + "grad_norm": 1.4157736896401232, + "learning_rate": 1.8686722332622112e-05, + "loss": 0.5991787314414978, + "step": 1415 + }, + { + "epoch": 0.4140956280157918, + "grad_norm": 1.2916299361998576, + "learning_rate": 1.8684326265125935e-05, + "loss": 0.6089641451835632, + "step": 1416 + }, + { + "epoch": 0.4143880684310572, + "grad_norm": 1.4857622706167455, + "learning_rate": 1.8681928167721297e-05, + "loss": 0.8143327236175537, + "step": 1417 + }, + { + "epoch": 0.41468050884632257, + "grad_norm": 1.1137129272750816, + "learning_rate": 1.8679528040968733e-05, + "loss": 0.6127045154571533, + "step": 1418 + }, + { + "epoch": 0.41497294926158795, + "grad_norm": 1.2684856043432204, + "learning_rate": 1.8677125885429262e-05, + "loss": 0.659069299697876, + "step": 1419 + }, + { + "epoch": 0.41526538967685334, + "grad_norm": 1.3122733176612695, + "learning_rate": 1.8674721701664377e-05, + "loss": 0.7277505397796631, + "step": 1420 + }, + { + "epoch": 0.4155578300921187, + "grad_norm": 1.2107555045955465, + "learning_rate": 1.8672315490236034e-05, + "loss": 0.6128710508346558, + "step": 1421 + }, + { + "epoch": 0.4158502705073841, + "grad_norm": 1.7155790773588848, + "learning_rate": 1.866990725170667e-05, + "loss": 0.7439340949058533, + "step": 1422 + }, + { + "epoch": 0.4161427109226495, + "grad_norm": 1.1423179387443951, + "learning_rate": 1.8667496986639206e-05, + "loss": 0.5855459570884705, + "step": 1423 + }, + { + "epoch": 0.4164351513379149, + "grad_norm": 1.3335637335552337, + "learning_rate": 1.866508469559702e-05, + "loss": 0.6865170001983643, + "step": 1424 + }, + { + "epoch": 0.4167275917531803, + "grad_norm": 1.3191415692644766, + "learning_rate": 1.866267037914397e-05, + "loss": 0.6648446917533875, + "step": 1425 + }, + { + "epoch": 0.4170200321684457, + "grad_norm": 1.5198580088053322, + "learning_rate": 1.866025403784439e-05, + "loss": 0.6919275522232056, + "step": 1426 + }, + { + "epoch": 0.41731247258371107, + "grad_norm": 1.3309988770277923, + "learning_rate": 1.865783567226308e-05, + "loss": 0.7270313501358032, + "step": 1427 + }, + { + "epoch": 0.41760491299897645, + "grad_norm": 1.2814450794742573, + "learning_rate": 1.8655415282965327e-05, + "loss": 0.5938387513160706, + "step": 1428 + }, + { + "epoch": 0.41789735341424183, + "grad_norm": 1.378984312222445, + "learning_rate": 1.8652992870516872e-05, + "loss": 0.6517149209976196, + "step": 1429 + }, + { + "epoch": 0.4181897938295072, + "grad_norm": 1.1862439123900306, + "learning_rate": 1.8650568435483948e-05, + "loss": 0.6688356399536133, + "step": 1430 + }, + { + "epoch": 0.41848223424477266, + "grad_norm": 1.153419964025717, + "learning_rate": 1.864814197843325e-05, + "loss": 0.5300855040550232, + "step": 1431 + }, + { + "epoch": 0.41877467466003804, + "grad_norm": 1.270766161064103, + "learning_rate": 1.8645713499931943e-05, + "loss": 0.6404704451560974, + "step": 1432 + }, + { + "epoch": 0.4190671150753034, + "grad_norm": 1.5430855845367462, + "learning_rate": 1.8643283000547673e-05, + "loss": 0.6758813858032227, + "step": 1433 + }, + { + "epoch": 0.4193595554905688, + "grad_norm": 1.3783205387944717, + "learning_rate": 1.8640850480848552e-05, + "loss": 0.6328250169754028, + "step": 1434 + }, + { + "epoch": 0.4196519959058342, + "grad_norm": 1.26459826517306, + "learning_rate": 1.863841594140317e-05, + "loss": 0.6747157573699951, + "step": 1435 + }, + { + "epoch": 0.41994443632109957, + "grad_norm": 1.3504769695047412, + "learning_rate": 1.8635979382780584e-05, + "loss": 0.5314475893974304, + "step": 1436 + }, + { + "epoch": 0.42023687673636495, + "grad_norm": 1.345420561697831, + "learning_rate": 1.863354080555033e-05, + "loss": 0.478320837020874, + "step": 1437 + }, + { + "epoch": 0.42052931715163033, + "grad_norm": 1.7039787917499718, + "learning_rate": 1.86311002102824e-05, + "loss": 0.7389972805976868, + "step": 1438 + }, + { + "epoch": 0.42082175756689577, + "grad_norm": 1.5752835852867741, + "learning_rate": 1.8628657597547273e-05, + "loss": 0.5449938178062439, + "step": 1439 + }, + { + "epoch": 0.42111419798216115, + "grad_norm": 1.417913338852298, + "learning_rate": 1.8626212967915897e-05, + "loss": 0.6752811670303345, + "step": 1440 + }, + { + "epoch": 0.42140663839742654, + "grad_norm": 1.3268814915367182, + "learning_rate": 1.862376632195969e-05, + "loss": 0.7750412821769714, + "step": 1441 + }, + { + "epoch": 0.4216990788126919, + "grad_norm": 1.4150998717703018, + "learning_rate": 1.8621317660250535e-05, + "loss": 0.5967680215835571, + "step": 1442 + }, + { + "epoch": 0.4219915192279573, + "grad_norm": 1.2836272802739963, + "learning_rate": 1.86188669833608e-05, + "loss": 0.6781327724456787, + "step": 1443 + }, + { + "epoch": 0.4222839596432227, + "grad_norm": 1.421988457915262, + "learning_rate": 1.8616414291863307e-05, + "loss": 0.7539681196212769, + "step": 1444 + }, + { + "epoch": 0.42257640005848807, + "grad_norm": 1.5265432564271315, + "learning_rate": 1.8613959586331364e-05, + "loss": 0.6976957321166992, + "step": 1445 + }, + { + "epoch": 0.42286884047375345, + "grad_norm": 1.3365892238255053, + "learning_rate": 1.861150286733874e-05, + "loss": 0.6616528034210205, + "step": 1446 + }, + { + "epoch": 0.4231612808890189, + "grad_norm": 1.4482994306877846, + "learning_rate": 1.860904413545968e-05, + "loss": 0.6407957077026367, + "step": 1447 + }, + { + "epoch": 0.42345372130428427, + "grad_norm": 1.4193133822561126, + "learning_rate": 1.86065833912689e-05, + "loss": 0.5918550491333008, + "step": 1448 + }, + { + "epoch": 0.42374616171954965, + "grad_norm": 1.421765780188314, + "learning_rate": 1.8604120635341574e-05, + "loss": 0.6142056584358215, + "step": 1449 + }, + { + "epoch": 0.42403860213481503, + "grad_norm": 1.4371201128611453, + "learning_rate": 1.8601655868253368e-05, + "loss": 0.6359597444534302, + "step": 1450 + }, + { + "epoch": 0.4243310425500804, + "grad_norm": 1.2914617625794835, + "learning_rate": 1.8599189090580402e-05, + "loss": 0.7149467468261719, + "step": 1451 + }, + { + "epoch": 0.4246234829653458, + "grad_norm": 1.2900964447275098, + "learning_rate": 1.8596720302899272e-05, + "loss": 0.6015822887420654, + "step": 1452 + }, + { + "epoch": 0.4249159233806112, + "grad_norm": 1.1866564154864978, + "learning_rate": 1.8594249505787035e-05, + "loss": 0.6389881372451782, + "step": 1453 + }, + { + "epoch": 0.42520836379587656, + "grad_norm": 1.381321058965008, + "learning_rate": 1.8591776699821235e-05, + "loss": 0.7479783892631531, + "step": 1454 + }, + { + "epoch": 0.425500804211142, + "grad_norm": 1.2271977568055246, + "learning_rate": 1.8589301885579866e-05, + "loss": 0.6574498414993286, + "step": 1455 + }, + { + "epoch": 0.4257932446264074, + "grad_norm": 1.3187836865578064, + "learning_rate": 1.858682506364141e-05, + "loss": 0.6314088702201843, + "step": 1456 + }, + { + "epoch": 0.42608568504167277, + "grad_norm": 1.4747450600155867, + "learning_rate": 1.85843462345848e-05, + "loss": 0.605385959148407, + "step": 1457 + }, + { + "epoch": 0.42637812545693815, + "grad_norm": 1.280849948973879, + "learning_rate": 1.8581865398989452e-05, + "loss": 0.6355551481246948, + "step": 1458 + }, + { + "epoch": 0.42667056587220353, + "grad_norm": 1.3012840164028812, + "learning_rate": 1.8579382557435247e-05, + "loss": 0.6303017139434814, + "step": 1459 + }, + { + "epoch": 0.4269630062874689, + "grad_norm": 1.2629380280411955, + "learning_rate": 1.8576897710502532e-05, + "loss": 0.5916526317596436, + "step": 1460 + }, + { + "epoch": 0.4272554467027343, + "grad_norm": 1.2467440963341316, + "learning_rate": 1.8574410858772126e-05, + "loss": 0.5709279179573059, + "step": 1461 + }, + { + "epoch": 0.4275478871179997, + "grad_norm": 1.2909430743502928, + "learning_rate": 1.8571922002825317e-05, + "loss": 0.571231484413147, + "step": 1462 + }, + { + "epoch": 0.4278403275332651, + "grad_norm": 1.310017395907512, + "learning_rate": 1.8569431143243856e-05, + "loss": 0.6352202892303467, + "step": 1463 + }, + { + "epoch": 0.4281327679485305, + "grad_norm": 1.316165374470179, + "learning_rate": 1.8566938280609965e-05, + "loss": 0.553265392780304, + "step": 1464 + }, + { + "epoch": 0.4284252083637959, + "grad_norm": 1.1127868543655046, + "learning_rate": 1.8564443415506343e-05, + "loss": 0.4913727045059204, + "step": 1465 + }, + { + "epoch": 0.42871764877906127, + "grad_norm": 1.4457215110099157, + "learning_rate": 1.8561946548516143e-05, + "loss": 0.542539119720459, + "step": 1466 + }, + { + "epoch": 0.42901008919432665, + "grad_norm": 1.5261496853017646, + "learning_rate": 1.8559447680222994e-05, + "loss": 0.719292163848877, + "step": 1467 + }, + { + "epoch": 0.42930252960959203, + "grad_norm": 1.4842625427656275, + "learning_rate": 1.8556946811210993e-05, + "loss": 0.8443170785903931, + "step": 1468 + }, + { + "epoch": 0.4295949700248574, + "grad_norm": 1.4024545882927506, + "learning_rate": 1.8554443942064705e-05, + "loss": 0.7899821996688843, + "step": 1469 + }, + { + "epoch": 0.42988741044012285, + "grad_norm": 1.3637198474337424, + "learning_rate": 1.8551939073369155e-05, + "loss": 0.617426872253418, + "step": 1470 + }, + { + "epoch": 0.43017985085538823, + "grad_norm": 1.284473833943433, + "learning_rate": 1.8549432205709842e-05, + "loss": 0.5573505163192749, + "step": 1471 + }, + { + "epoch": 0.4304722912706536, + "grad_norm": 1.2050796372555104, + "learning_rate": 1.8546923339672734e-05, + "loss": 0.5571975111961365, + "step": 1472 + }, + { + "epoch": 0.430764731685919, + "grad_norm": 1.2452948917501594, + "learning_rate": 1.854441247584426e-05, + "loss": 0.6411981582641602, + "step": 1473 + }, + { + "epoch": 0.4310571721011844, + "grad_norm": 1.4342124934143161, + "learning_rate": 1.8541899614811323e-05, + "loss": 0.4766804277896881, + "step": 1474 + }, + { + "epoch": 0.43134961251644977, + "grad_norm": 1.5114551227786939, + "learning_rate": 1.8539384757161285e-05, + "loss": 0.7479405403137207, + "step": 1475 + }, + { + "epoch": 0.43164205293171515, + "grad_norm": 1.3476436799817348, + "learning_rate": 1.8536867903481983e-05, + "loss": 0.6848211288452148, + "step": 1476 + }, + { + "epoch": 0.43193449334698053, + "grad_norm": 1.2973665530504777, + "learning_rate": 1.8534349054361708e-05, + "loss": 0.7413634061813354, + "step": 1477 + }, + { + "epoch": 0.43222693376224597, + "grad_norm": 1.1870657052305638, + "learning_rate": 1.8531828210389236e-05, + "loss": 0.5880843997001648, + "step": 1478 + }, + { + "epoch": 0.43251937417751135, + "grad_norm": 1.16075786792099, + "learning_rate": 1.852930537215379e-05, + "loss": 0.5885627269744873, + "step": 1479 + }, + { + "epoch": 0.43281181459277673, + "grad_norm": 1.3270242768891243, + "learning_rate": 1.8526780540245077e-05, + "loss": 0.706636905670166, + "step": 1480 + }, + { + "epoch": 0.4331042550080421, + "grad_norm": 1.3793959384028218, + "learning_rate": 1.8524253715253255e-05, + "loss": 0.6521843075752258, + "step": 1481 + }, + { + "epoch": 0.4333966954233075, + "grad_norm": 1.3825746336646279, + "learning_rate": 1.8521724897768955e-05, + "loss": 0.6231021881103516, + "step": 1482 + }, + { + "epoch": 0.4336891358385729, + "grad_norm": 1.4460679872410762, + "learning_rate": 1.851919408838327e-05, + "loss": 0.6859451532363892, + "step": 1483 + }, + { + "epoch": 0.43398157625383826, + "grad_norm": 1.2507527028404273, + "learning_rate": 1.851666128768777e-05, + "loss": 0.7948323488235474, + "step": 1484 + }, + { + "epoch": 0.43427401666910365, + "grad_norm": 1.3631419376990976, + "learning_rate": 1.8514126496274473e-05, + "loss": 0.7815203070640564, + "step": 1485 + }, + { + "epoch": 0.4345664570843691, + "grad_norm": 1.2904619284943133, + "learning_rate": 1.8511589714735875e-05, + "loss": 0.6941452622413635, + "step": 1486 + }, + { + "epoch": 0.43485889749963447, + "grad_norm": 1.41567858231915, + "learning_rate": 1.850905094366493e-05, + "loss": 0.5500549674034119, + "step": 1487 + }, + { + "epoch": 0.43515133791489985, + "grad_norm": 1.2918667262960315, + "learning_rate": 1.8506510183655066e-05, + "loss": 0.6616400480270386, + "step": 1488 + }, + { + "epoch": 0.43544377833016523, + "grad_norm": 1.2491627898498192, + "learning_rate": 1.8503967435300166e-05, + "loss": 0.6920043230056763, + "step": 1489 + }, + { + "epoch": 0.4357362187454306, + "grad_norm": 1.215912086863742, + "learning_rate": 1.8501422699194584e-05, + "loss": 0.6080813407897949, + "step": 1490 + }, + { + "epoch": 0.436028659160696, + "grad_norm": 1.2215283867587456, + "learning_rate": 1.8498875975933135e-05, + "loss": 0.576184868812561, + "step": 1491 + }, + { + "epoch": 0.4363210995759614, + "grad_norm": 1.3544983329172053, + "learning_rate": 1.84963272661111e-05, + "loss": 0.6647310256958008, + "step": 1492 + }, + { + "epoch": 0.43661353999122676, + "grad_norm": 1.5126248587795905, + "learning_rate": 1.8493776570324224e-05, + "loss": 0.6738306283950806, + "step": 1493 + }, + { + "epoch": 0.4369059804064922, + "grad_norm": 1.306695091605799, + "learning_rate": 1.849122388916872e-05, + "loss": 0.681056022644043, + "step": 1494 + }, + { + "epoch": 0.4371984208217576, + "grad_norm": 1.2802492616875505, + "learning_rate": 1.848866922324126e-05, + "loss": 0.7844547033309937, + "step": 1495 + }, + { + "epoch": 0.43749086123702297, + "grad_norm": 1.278338668380481, + "learning_rate": 1.8486112573138977e-05, + "loss": 0.6478928327560425, + "step": 1496 + }, + { + "epoch": 0.43778330165228835, + "grad_norm": 1.1565510309984284, + "learning_rate": 1.8483553939459477e-05, + "loss": 0.6035341024398804, + "step": 1497 + }, + { + "epoch": 0.43807574206755373, + "grad_norm": 1.5407821231530743, + "learning_rate": 1.8480993322800826e-05, + "loss": 0.6664912700653076, + "step": 1498 + }, + { + "epoch": 0.4383681824828191, + "grad_norm": 1.2757017491830842, + "learning_rate": 1.847843072376155e-05, + "loss": 0.7171953916549683, + "step": 1499 + }, + { + "epoch": 0.4386606228980845, + "grad_norm": 1.6930649567828897, + "learning_rate": 1.8475866142940646e-05, + "loss": 0.8400344848632812, + "step": 1500 + }, + { + "epoch": 0.4389530633133499, + "grad_norm": 1.4411024776302432, + "learning_rate": 1.8473299580937563e-05, + "loss": 0.5119056701660156, + "step": 1501 + }, + { + "epoch": 0.4392455037286153, + "grad_norm": 1.2781692932924433, + "learning_rate": 1.847073103835222e-05, + "loss": 0.5864866375923157, + "step": 1502 + }, + { + "epoch": 0.4395379441438807, + "grad_norm": 1.1391351003013295, + "learning_rate": 1.8468160515785e-05, + "loss": 0.6389576196670532, + "step": 1503 + }, + { + "epoch": 0.4398303845591461, + "grad_norm": 1.3447539998849671, + "learning_rate": 1.846558801383675e-05, + "loss": 0.6745110750198364, + "step": 1504 + }, + { + "epoch": 0.44012282497441146, + "grad_norm": 1.4359844129069297, + "learning_rate": 1.846301353310877e-05, + "loss": 0.6207559704780579, + "step": 1505 + }, + { + "epoch": 0.44041526538967685, + "grad_norm": 1.4143769366285628, + "learning_rate": 1.8460437074202832e-05, + "loss": 0.6818139553070068, + "step": 1506 + }, + { + "epoch": 0.44070770580494223, + "grad_norm": 1.4877202307925406, + "learning_rate": 1.845785863772117e-05, + "loss": 0.652062714099884, + "step": 1507 + }, + { + "epoch": 0.4410001462202076, + "grad_norm": 1.340284980688535, + "learning_rate": 1.8455278224266476e-05, + "loss": 0.6842166185379028, + "step": 1508 + }, + { + "epoch": 0.44129258663547305, + "grad_norm": 1.3899905625699573, + "learning_rate": 1.8452695834441904e-05, + "loss": 0.6459342837333679, + "step": 1509 + }, + { + "epoch": 0.44158502705073843, + "grad_norm": 1.3677235686172902, + "learning_rate": 1.8450111468851078e-05, + "loss": 0.6036739349365234, + "step": 1510 + }, + { + "epoch": 0.4418774674660038, + "grad_norm": 1.401326082704981, + "learning_rate": 1.844752512809807e-05, + "loss": 0.7530199289321899, + "step": 1511 + }, + { + "epoch": 0.4421699078812692, + "grad_norm": 1.249585374389202, + "learning_rate": 1.8444936812787428e-05, + "loss": 0.6098290085792542, + "step": 1512 + }, + { + "epoch": 0.4424623482965346, + "grad_norm": 1.6252323705163014, + "learning_rate": 1.844234652352415e-05, + "loss": 0.7142464518547058, + "step": 1513 + }, + { + "epoch": 0.44275478871179996, + "grad_norm": 1.3215155589821708, + "learning_rate": 1.8439754260913703e-05, + "loss": 0.4895970821380615, + "step": 1514 + }, + { + "epoch": 0.44304722912706535, + "grad_norm": 1.2855871920553614, + "learning_rate": 1.8437160025562012e-05, + "loss": 0.6166520118713379, + "step": 1515 + }, + { + "epoch": 0.4433396695423307, + "grad_norm": 1.3621423468696194, + "learning_rate": 1.8434563818075462e-05, + "loss": 0.6020585894584656, + "step": 1516 + }, + { + "epoch": 0.44363210995759617, + "grad_norm": 1.3215872914676274, + "learning_rate": 1.8431965639060904e-05, + "loss": 0.6879030466079712, + "step": 1517 + }, + { + "epoch": 0.44392455037286155, + "grad_norm": 1.2000763930073624, + "learning_rate": 1.8429365489125644e-05, + "loss": 0.5753897428512573, + "step": 1518 + }, + { + "epoch": 0.44421699078812693, + "grad_norm": 1.2916902596192155, + "learning_rate": 1.8426763368877455e-05, + "loss": 0.5165301561355591, + "step": 1519 + }, + { + "epoch": 0.4445094312033923, + "grad_norm": 1.630208225804633, + "learning_rate": 1.842415927892456e-05, + "loss": 0.6377310752868652, + "step": 1520 + }, + { + "epoch": 0.4448018716186577, + "grad_norm": 1.4221002668397775, + "learning_rate": 1.842155321987566e-05, + "loss": 0.7429912090301514, + "step": 1521 + }, + { + "epoch": 0.4450943120339231, + "grad_norm": 1.5079395076396265, + "learning_rate": 1.8418945192339892e-05, + "loss": 0.6177542209625244, + "step": 1522 + }, + { + "epoch": 0.44538675244918846, + "grad_norm": 1.2784904022569494, + "learning_rate": 1.8416335196926877e-05, + "loss": 0.662541389465332, + "step": 1523 + }, + { + "epoch": 0.44567919286445384, + "grad_norm": 1.2782173083325044, + "learning_rate": 1.841372323424668e-05, + "loss": 0.6026759743690491, + "step": 1524 + }, + { + "epoch": 0.4459716332797193, + "grad_norm": 1.5759742604234355, + "learning_rate": 1.8411109304909837e-05, + "loss": 0.7902384400367737, + "step": 1525 + }, + { + "epoch": 0.44626407369498466, + "grad_norm": 1.4904175669631523, + "learning_rate": 1.840849340952733e-05, + "loss": 0.6588590145111084, + "step": 1526 + }, + { + "epoch": 0.44655651411025005, + "grad_norm": 1.1682358413615135, + "learning_rate": 1.8405875548710614e-05, + "loss": 0.49133825302124023, + "step": 1527 + }, + { + "epoch": 0.44684895452551543, + "grad_norm": 1.4464174570347765, + "learning_rate": 1.8403255723071597e-05, + "loss": 0.6644654273986816, + "step": 1528 + }, + { + "epoch": 0.4471413949407808, + "grad_norm": 1.2325053536943291, + "learning_rate": 1.8400633933222647e-05, + "loss": 0.6257454752922058, + "step": 1529 + }, + { + "epoch": 0.4474338353560462, + "grad_norm": 1.4100106920950097, + "learning_rate": 1.8398010179776597e-05, + "loss": 0.6671919226646423, + "step": 1530 + }, + { + "epoch": 0.4477262757713116, + "grad_norm": 1.1625081058782702, + "learning_rate": 1.839538446334672e-05, + "loss": 0.6001447439193726, + "step": 1531 + }, + { + "epoch": 0.44801871618657696, + "grad_norm": 1.6509081383772402, + "learning_rate": 1.8392756784546775e-05, + "loss": 0.8103213310241699, + "step": 1532 + }, + { + "epoch": 0.4483111566018424, + "grad_norm": 1.1675484766628168, + "learning_rate": 1.839012714399096e-05, + "loss": 0.7010835409164429, + "step": 1533 + }, + { + "epoch": 0.4486035970171078, + "grad_norm": 1.0773967688725017, + "learning_rate": 1.8387495542293935e-05, + "loss": 0.5709215402603149, + "step": 1534 + }, + { + "epoch": 0.44889603743237316, + "grad_norm": 1.3558935245332375, + "learning_rate": 1.8384861980070826e-05, + "loss": 0.6410949230194092, + "step": 1535 + }, + { + "epoch": 0.44918847784763855, + "grad_norm": 1.358963272892771, + "learning_rate": 1.838222645793721e-05, + "loss": 0.8036839962005615, + "step": 1536 + }, + { + "epoch": 0.44948091826290393, + "grad_norm": 1.1470889977158967, + "learning_rate": 1.8379588976509123e-05, + "loss": 0.49213099479675293, + "step": 1537 + }, + { + "epoch": 0.4497733586781693, + "grad_norm": 1.5829843161961048, + "learning_rate": 1.8376949536403063e-05, + "loss": 0.7111018896102905, + "step": 1538 + }, + { + "epoch": 0.4500657990934347, + "grad_norm": 1.313995907545699, + "learning_rate": 1.837430813823598e-05, + "loss": 0.8506999015808105, + "step": 1539 + }, + { + "epoch": 0.4503582395087001, + "grad_norm": 1.2175571229137518, + "learning_rate": 1.8371664782625287e-05, + "loss": 0.7369798421859741, + "step": 1540 + }, + { + "epoch": 0.4506506799239655, + "grad_norm": 1.3435168892785054, + "learning_rate": 1.8369019470188855e-05, + "loss": 0.5982831120491028, + "step": 1541 + }, + { + "epoch": 0.4509431203392309, + "grad_norm": 1.2303590063922416, + "learning_rate": 1.8366372201545002e-05, + "loss": 0.6129144430160522, + "step": 1542 + }, + { + "epoch": 0.4512355607544963, + "grad_norm": 1.5191607059455674, + "learning_rate": 1.8363722977312512e-05, + "loss": 0.7142921686172485, + "step": 1543 + }, + { + "epoch": 0.45152800116976166, + "grad_norm": 1.1545455601160404, + "learning_rate": 1.8361071798110635e-05, + "loss": 0.515651524066925, + "step": 1544 + }, + { + "epoch": 0.45182044158502704, + "grad_norm": 1.3144713138844157, + "learning_rate": 1.8358418664559058e-05, + "loss": 0.5544168949127197, + "step": 1545 + }, + { + "epoch": 0.4521128820002924, + "grad_norm": 1.2540637765053078, + "learning_rate": 1.8355763577277938e-05, + "loss": 0.6801918745040894, + "step": 1546 + }, + { + "epoch": 0.4524053224155578, + "grad_norm": 1.3664850716479517, + "learning_rate": 1.835310653688789e-05, + "loss": 0.683785080909729, + "step": 1547 + }, + { + "epoch": 0.45269776283082325, + "grad_norm": 1.363558169999723, + "learning_rate": 1.835044754400997e-05, + "loss": 0.5689892172813416, + "step": 1548 + }, + { + "epoch": 0.45299020324608863, + "grad_norm": 1.1621305276584806, + "learning_rate": 1.8347786599265713e-05, + "loss": 0.5260726809501648, + "step": 1549 + }, + { + "epoch": 0.453282643661354, + "grad_norm": 1.2201116845769602, + "learning_rate": 1.834512370327709e-05, + "loss": 0.6792432069778442, + "step": 1550 + }, + { + "epoch": 0.4535750840766194, + "grad_norm": 1.198643016289117, + "learning_rate": 1.8342458856666545e-05, + "loss": 0.6336524486541748, + "step": 1551 + }, + { + "epoch": 0.4538675244918848, + "grad_norm": 1.3472994421503108, + "learning_rate": 1.8339792060056965e-05, + "loss": 0.5929614901542664, + "step": 1552 + }, + { + "epoch": 0.45415996490715016, + "grad_norm": 1.2599505430948363, + "learning_rate": 1.8337123314071696e-05, + "loss": 0.6683382391929626, + "step": 1553 + }, + { + "epoch": 0.45445240532241554, + "grad_norm": 1.2860246628200298, + "learning_rate": 1.833445261933454e-05, + "loss": 0.6256811618804932, + "step": 1554 + }, + { + "epoch": 0.4547448457376809, + "grad_norm": 1.3499468606960694, + "learning_rate": 1.8331779976469765e-05, + "loss": 0.5974653959274292, + "step": 1555 + }, + { + "epoch": 0.45503728615294636, + "grad_norm": 1.2078321854850618, + "learning_rate": 1.8329105386102074e-05, + "loss": 0.5471535325050354, + "step": 1556 + }, + { + "epoch": 0.45532972656821175, + "grad_norm": 1.284169615938693, + "learning_rate": 1.832642884885664e-05, + "loss": 0.5751267075538635, + "step": 1557 + }, + { + "epoch": 0.45562216698347713, + "grad_norm": 1.3234326952626145, + "learning_rate": 1.8323750365359092e-05, + "loss": 0.7003380060195923, + "step": 1558 + }, + { + "epoch": 0.4559146073987425, + "grad_norm": 1.3333099062603002, + "learning_rate": 1.8321069936235503e-05, + "loss": 0.6351351737976074, + "step": 1559 + }, + { + "epoch": 0.4562070478140079, + "grad_norm": 1.4452410048586575, + "learning_rate": 1.8318387562112407e-05, + "loss": 0.6083345413208008, + "step": 1560 + }, + { + "epoch": 0.4564994882292733, + "grad_norm": 1.230127453588353, + "learning_rate": 1.83157032436168e-05, + "loss": 0.589935302734375, + "step": 1561 + }, + { + "epoch": 0.45679192864453866, + "grad_norm": 1.3491229847821233, + "learning_rate": 1.8313016981376116e-05, + "loss": 0.7648014426231384, + "step": 1562 + }, + { + "epoch": 0.45708436905980404, + "grad_norm": 1.2461686063365083, + "learning_rate": 1.831032877601826e-05, + "loss": 0.7309973239898682, + "step": 1563 + }, + { + "epoch": 0.4573768094750695, + "grad_norm": 1.4691097869713072, + "learning_rate": 1.8307638628171575e-05, + "loss": 0.7231593728065491, + "step": 1564 + }, + { + "epoch": 0.45766924989033486, + "grad_norm": 1.4770239307253334, + "learning_rate": 1.8304946538464876e-05, + "loss": 0.7321262359619141, + "step": 1565 + }, + { + "epoch": 0.45796169030560024, + "grad_norm": 1.1157038717428966, + "learning_rate": 1.830225250752742e-05, + "loss": 0.5866271257400513, + "step": 1566 + }, + { + "epoch": 0.4582541307208656, + "grad_norm": 1.4899327841327124, + "learning_rate": 1.8299556535988917e-05, + "loss": 0.7146202325820923, + "step": 1567 + }, + { + "epoch": 0.458546571136131, + "grad_norm": 1.0989226716242009, + "learning_rate": 1.8296858624479536e-05, + "loss": 0.4600168466567993, + "step": 1568 + }, + { + "epoch": 0.4588390115513964, + "grad_norm": 1.5647421342147445, + "learning_rate": 1.8294158773629896e-05, + "loss": 0.5710705518722534, + "step": 1569 + }, + { + "epoch": 0.4591314519666618, + "grad_norm": 1.4737029572986353, + "learning_rate": 1.8291456984071073e-05, + "loss": 0.7075216770172119, + "step": 1570 + }, + { + "epoch": 0.45942389238192716, + "grad_norm": 1.2087048615463696, + "learning_rate": 1.828875325643459e-05, + "loss": 0.5262739062309265, + "step": 1571 + }, + { + "epoch": 0.4597163327971926, + "grad_norm": 1.2732843462549814, + "learning_rate": 1.8286047591352436e-05, + "loss": 0.724657416343689, + "step": 1572 + }, + { + "epoch": 0.460008773212458, + "grad_norm": 1.2778614004914874, + "learning_rate": 1.8283339989457033e-05, + "loss": 0.6047587394714355, + "step": 1573 + }, + { + "epoch": 0.46030121362772336, + "grad_norm": 1.481028950467352, + "learning_rate": 1.828063045138127e-05, + "loss": 0.6647980213165283, + "step": 1574 + }, + { + "epoch": 0.46059365404298874, + "grad_norm": 1.3031844151965102, + "learning_rate": 1.827791897775849e-05, + "loss": 0.6081969738006592, + "step": 1575 + }, + { + "epoch": 0.4608860944582541, + "grad_norm": 1.2574668609577524, + "learning_rate": 1.827520556922248e-05, + "loss": 0.6815003156661987, + "step": 1576 + }, + { + "epoch": 0.4611785348735195, + "grad_norm": 1.25588669780601, + "learning_rate": 1.8272490226407476e-05, + "loss": 0.5571715235710144, + "step": 1577 + }, + { + "epoch": 0.4614709752887849, + "grad_norm": 1.241115553107667, + "learning_rate": 1.8269772949948185e-05, + "loss": 0.7562757730484009, + "step": 1578 + }, + { + "epoch": 0.4617634157040503, + "grad_norm": 1.3753582703744767, + "learning_rate": 1.8267053740479745e-05, + "loss": 0.6330382227897644, + "step": 1579 + }, + { + "epoch": 0.4620558561193157, + "grad_norm": 1.5331426598457012, + "learning_rate": 1.826433259863776e-05, + "loss": 0.7696597576141357, + "step": 1580 + }, + { + "epoch": 0.4623482965345811, + "grad_norm": 1.3594821877317964, + "learning_rate": 1.8261609525058275e-05, + "loss": 0.6953772306442261, + "step": 1581 + }, + { + "epoch": 0.4626407369498465, + "grad_norm": 1.3957443557298115, + "learning_rate": 1.8258884520377797e-05, + "loss": 0.5856037735939026, + "step": 1582 + }, + { + "epoch": 0.46293317736511186, + "grad_norm": 1.3245931479550002, + "learning_rate": 1.8256157585233277e-05, + "loss": 0.5988172888755798, + "step": 1583 + }, + { + "epoch": 0.46322561778037724, + "grad_norm": 1.3153037118046438, + "learning_rate": 1.8253428720262117e-05, + "loss": 0.6320241689682007, + "step": 1584 + }, + { + "epoch": 0.4635180581956426, + "grad_norm": 1.1680775814478943, + "learning_rate": 1.8250697926102182e-05, + "loss": 0.5758935213088989, + "step": 1585 + }, + { + "epoch": 0.463810498610908, + "grad_norm": 1.4295465315991271, + "learning_rate": 1.8247965203391763e-05, + "loss": 0.7104986906051636, + "step": 1586 + }, + { + "epoch": 0.46410293902617344, + "grad_norm": 1.4739846709331708, + "learning_rate": 1.8245230552769634e-05, + "loss": 0.6322015523910522, + "step": 1587 + }, + { + "epoch": 0.4643953794414388, + "grad_norm": 1.4263760736603013, + "learning_rate": 1.824249397487499e-05, + "loss": 0.5881235003471375, + "step": 1588 + }, + { + "epoch": 0.4646878198567042, + "grad_norm": 1.5652864190332019, + "learning_rate": 1.8239755470347497e-05, + "loss": 0.8097240924835205, + "step": 1589 + }, + { + "epoch": 0.4649802602719696, + "grad_norm": 1.4192861983980027, + "learning_rate": 1.823701503982726e-05, + "loss": 0.6538649201393127, + "step": 1590 + }, + { + "epoch": 0.465272700687235, + "grad_norm": 1.2329012857349442, + "learning_rate": 1.8234272683954842e-05, + "loss": 0.5868922472000122, + "step": 1591 + }, + { + "epoch": 0.46556514110250036, + "grad_norm": 1.3076575357637654, + "learning_rate": 1.8231528403371248e-05, + "loss": 0.6747265458106995, + "step": 1592 + }, + { + "epoch": 0.46585758151776574, + "grad_norm": 1.2961728564371904, + "learning_rate": 1.8228782198717936e-05, + "loss": 0.6519996523857117, + "step": 1593 + }, + { + "epoch": 0.4661500219330311, + "grad_norm": 1.2212124627082057, + "learning_rate": 1.822603407063682e-05, + "loss": 0.7268975973129272, + "step": 1594 + }, + { + "epoch": 0.46644246234829656, + "grad_norm": 1.1603454255193932, + "learning_rate": 1.8223284019770252e-05, + "loss": 0.6554980278015137, + "step": 1595 + }, + { + "epoch": 0.46673490276356194, + "grad_norm": 1.350233636463654, + "learning_rate": 1.8220532046761047e-05, + "loss": 0.7014105319976807, + "step": 1596 + }, + { + "epoch": 0.4670273431788273, + "grad_norm": 1.4228663397014873, + "learning_rate": 1.821777815225245e-05, + "loss": 0.5766602158546448, + "step": 1597 + }, + { + "epoch": 0.4673197835940927, + "grad_norm": 1.304159292005938, + "learning_rate": 1.8215022336888182e-05, + "loss": 0.5106521844863892, + "step": 1598 + }, + { + "epoch": 0.4676122240093581, + "grad_norm": 1.419250792414019, + "learning_rate": 1.821226460131239e-05, + "loss": 0.801375150680542, + "step": 1599 + }, + { + "epoch": 0.4679046644246235, + "grad_norm": 1.2845833863087142, + "learning_rate": 1.8209504946169677e-05, + "loss": 0.6189062595367432, + "step": 1600 + }, + { + "epoch": 0.46819710483988886, + "grad_norm": 1.2468379881228138, + "learning_rate": 1.8206743372105098e-05, + "loss": 0.6719359159469604, + "step": 1601 + }, + { + "epoch": 0.46848954525515424, + "grad_norm": 1.4660205035921348, + "learning_rate": 1.8203979879764153e-05, + "loss": 0.7437123656272888, + "step": 1602 + }, + { + "epoch": 0.4687819856704197, + "grad_norm": 1.3639008290802046, + "learning_rate": 1.8201214469792793e-05, + "loss": 0.7273217439651489, + "step": 1603 + }, + { + "epoch": 0.46907442608568506, + "grad_norm": 1.332814377531963, + "learning_rate": 1.8198447142837416e-05, + "loss": 0.6467087268829346, + "step": 1604 + }, + { + "epoch": 0.46936686650095044, + "grad_norm": 1.1167815102053054, + "learning_rate": 1.8195677899544866e-05, + "loss": 0.5764428973197937, + "step": 1605 + }, + { + "epoch": 0.4696593069162158, + "grad_norm": 1.4761144768835275, + "learning_rate": 1.8192906740562437e-05, + "loss": 0.5969977378845215, + "step": 1606 + }, + { + "epoch": 0.4699517473314812, + "grad_norm": 1.3424638711815577, + "learning_rate": 1.819013366653787e-05, + "loss": 0.7237746119499207, + "step": 1607 + }, + { + "epoch": 0.4702441877467466, + "grad_norm": 1.4494789457227795, + "learning_rate": 1.8187358678119355e-05, + "loss": 0.6289568543434143, + "step": 1608 + }, + { + "epoch": 0.47053662816201197, + "grad_norm": 1.1494676131886132, + "learning_rate": 1.8184581775955533e-05, + "loss": 0.5773013234138489, + "step": 1609 + }, + { + "epoch": 0.47082906857727735, + "grad_norm": 1.3055308518970814, + "learning_rate": 1.818180296069548e-05, + "loss": 0.5940284729003906, + "step": 1610 + }, + { + "epoch": 0.4711215089925428, + "grad_norm": 1.506634303312927, + "learning_rate": 1.8179022232988735e-05, + "loss": 0.7051881551742554, + "step": 1611 + }, + { + "epoch": 0.4714139494078082, + "grad_norm": 1.2817274142705404, + "learning_rate": 1.8176239593485267e-05, + "loss": 0.6427813768386841, + "step": 1612 + }, + { + "epoch": 0.47170638982307356, + "grad_norm": 1.3150009445137423, + "learning_rate": 1.817345504283551e-05, + "loss": 0.7041782736778259, + "step": 1613 + }, + { + "epoch": 0.47199883023833894, + "grad_norm": 1.1960422316530261, + "learning_rate": 1.817066858169033e-05, + "loss": 0.6568688154220581, + "step": 1614 + }, + { + "epoch": 0.4722912706536043, + "grad_norm": 1.1082706297141673, + "learning_rate": 1.816788021070105e-05, + "loss": 0.4784452021121979, + "step": 1615 + }, + { + "epoch": 0.4725837110688697, + "grad_norm": 1.403652579196444, + "learning_rate": 1.816508993051943e-05, + "loss": 0.6012705564498901, + "step": 1616 + }, + { + "epoch": 0.4728761514841351, + "grad_norm": 1.441258763214559, + "learning_rate": 1.8162297741797685e-05, + "loss": 0.6414428949356079, + "step": 1617 + }, + { + "epoch": 0.47316859189940047, + "grad_norm": 1.4131643644174843, + "learning_rate": 1.815950364518847e-05, + "loss": 0.6446187496185303, + "step": 1618 + }, + { + "epoch": 0.4734610323146659, + "grad_norm": 1.2552495046018781, + "learning_rate": 1.8156707641344885e-05, + "loss": 0.5153034329414368, + "step": 1619 + }, + { + "epoch": 0.4737534727299313, + "grad_norm": 1.5159052607593526, + "learning_rate": 1.8153909730920485e-05, + "loss": 0.7209463715553284, + "step": 1620 + }, + { + "epoch": 0.4740459131451967, + "grad_norm": 1.2933785450044248, + "learning_rate": 1.8151109914569267e-05, + "loss": 0.5990744829177856, + "step": 1621 + }, + { + "epoch": 0.47433835356046206, + "grad_norm": 1.3033668993107679, + "learning_rate": 1.814830819294566e-05, + "loss": 0.5706672668457031, + "step": 1622 + }, + { + "epoch": 0.47463079397572744, + "grad_norm": 1.1946317041445573, + "learning_rate": 1.814550456670456e-05, + "loss": 0.538548469543457, + "step": 1623 + }, + { + "epoch": 0.4749232343909928, + "grad_norm": 1.3282078081285205, + "learning_rate": 1.8142699036501288e-05, + "loss": 0.6450623273849487, + "step": 1624 + }, + { + "epoch": 0.4752156748062582, + "grad_norm": 1.336508209824809, + "learning_rate": 1.813989160299163e-05, + "loss": 0.6537624597549438, + "step": 1625 + }, + { + "epoch": 0.47550811522152364, + "grad_norm": 1.2777879020397362, + "learning_rate": 1.8137082266831794e-05, + "loss": 0.7126362323760986, + "step": 1626 + }, + { + "epoch": 0.475800555636789, + "grad_norm": 1.4542616967071014, + "learning_rate": 1.813427102867846e-05, + "loss": 0.6686921119689941, + "step": 1627 + }, + { + "epoch": 0.4760929960520544, + "grad_norm": 1.4231643377055359, + "learning_rate": 1.8131457889188723e-05, + "loss": 0.5925619602203369, + "step": 1628 + }, + { + "epoch": 0.4763854364673198, + "grad_norm": 1.2702390975554385, + "learning_rate": 1.8128642849020147e-05, + "loss": 0.7251017689704895, + "step": 1629 + }, + { + "epoch": 0.47667787688258517, + "grad_norm": 1.5675645867645378, + "learning_rate": 1.8125825908830733e-05, + "loss": 0.7524283528327942, + "step": 1630 + }, + { + "epoch": 0.47697031729785055, + "grad_norm": 1.2843975237623166, + "learning_rate": 1.8123007069278914e-05, + "loss": 0.7593197226524353, + "step": 1631 + }, + { + "epoch": 0.47726275771311594, + "grad_norm": 1.2304771008785658, + "learning_rate": 1.812018633102358e-05, + "loss": 0.43353578448295593, + "step": 1632 + }, + { + "epoch": 0.4775551981283813, + "grad_norm": 1.1488804965894268, + "learning_rate": 1.8117363694724063e-05, + "loss": 0.6254708766937256, + "step": 1633 + }, + { + "epoch": 0.47784763854364676, + "grad_norm": 1.2467231401784862, + "learning_rate": 1.811453916104014e-05, + "loss": 0.5970091223716736, + "step": 1634 + }, + { + "epoch": 0.47814007895891214, + "grad_norm": 1.2798152763028137, + "learning_rate": 1.8111712730632024e-05, + "loss": 0.6299331188201904, + "step": 1635 + }, + { + "epoch": 0.4784325193741775, + "grad_norm": 1.4325282365212126, + "learning_rate": 1.810888440416038e-05, + "loss": 0.7461789846420288, + "step": 1636 + }, + { + "epoch": 0.4787249597894429, + "grad_norm": 1.2539146793136515, + "learning_rate": 1.8106054182286305e-05, + "loss": 0.5053290724754333, + "step": 1637 + }, + { + "epoch": 0.4790174002047083, + "grad_norm": 1.2809048918941985, + "learning_rate": 1.810322206567135e-05, + "loss": 0.6853327751159668, + "step": 1638 + }, + { + "epoch": 0.47930984061997367, + "grad_norm": 1.4027979186429358, + "learning_rate": 1.8100388054977508e-05, + "loss": 0.5337134599685669, + "step": 1639 + }, + { + "epoch": 0.47960228103523905, + "grad_norm": 1.357622845311743, + "learning_rate": 1.809755215086721e-05, + "loss": 0.7082560062408447, + "step": 1640 + }, + { + "epoch": 0.47989472145050444, + "grad_norm": 1.3590974916852807, + "learning_rate": 1.8094714354003325e-05, + "loss": 0.680424153804779, + "step": 1641 + }, + { + "epoch": 0.4801871618657699, + "grad_norm": 1.1398057291819046, + "learning_rate": 1.8091874665049183e-05, + "loss": 0.5235139727592468, + "step": 1642 + }, + { + "epoch": 0.48047960228103526, + "grad_norm": 1.3822416905178454, + "learning_rate": 1.8089033084668535e-05, + "loss": 0.7843992114067078, + "step": 1643 + }, + { + "epoch": 0.48077204269630064, + "grad_norm": 1.4941957252025324, + "learning_rate": 1.8086189613525587e-05, + "loss": 0.6736497282981873, + "step": 1644 + }, + { + "epoch": 0.481064483111566, + "grad_norm": 1.3326594399820286, + "learning_rate": 1.808334425228498e-05, + "loss": 0.6898948550224304, + "step": 1645 + }, + { + "epoch": 0.4813569235268314, + "grad_norm": 1.3419429940100798, + "learning_rate": 1.80804970016118e-05, + "loss": 0.6719726324081421, + "step": 1646 + }, + { + "epoch": 0.4816493639420968, + "grad_norm": 1.316270232362313, + "learning_rate": 1.807764786217158e-05, + "loss": 0.6904356479644775, + "step": 1647 + }, + { + "epoch": 0.48194180435736217, + "grad_norm": 1.3009257254922486, + "learning_rate": 1.8074796834630285e-05, + "loss": 0.5956645011901855, + "step": 1648 + }, + { + "epoch": 0.48223424477262755, + "grad_norm": 1.162557710559535, + "learning_rate": 1.8071943919654323e-05, + "loss": 0.5676499009132385, + "step": 1649 + }, + { + "epoch": 0.482526685187893, + "grad_norm": 1.3145895725362904, + "learning_rate": 1.8069089117910547e-05, + "loss": 0.6006937026977539, + "step": 1650 + }, + { + "epoch": 0.48281912560315837, + "grad_norm": 1.3694341047830378, + "learning_rate": 1.806623243006625e-05, + "loss": 0.6241977214813232, + "step": 1651 + }, + { + "epoch": 0.48311156601842375, + "grad_norm": 1.4152304986784254, + "learning_rate": 1.806337385678917e-05, + "loss": 0.7359870672225952, + "step": 1652 + }, + { + "epoch": 0.48340400643368914, + "grad_norm": 1.155725074088707, + "learning_rate": 1.806051339874748e-05, + "loss": 0.6113119125366211, + "step": 1653 + }, + { + "epoch": 0.4836964468489545, + "grad_norm": 1.3288798785197886, + "learning_rate": 1.8057651056609784e-05, + "loss": 0.642951488494873, + "step": 1654 + }, + { + "epoch": 0.4839888872642199, + "grad_norm": 1.3081605749498326, + "learning_rate": 1.8054786831045147e-05, + "loss": 0.7020113468170166, + "step": 1655 + }, + { + "epoch": 0.4842813276794853, + "grad_norm": 1.355302216036822, + "learning_rate": 1.8051920722723063e-05, + "loss": 0.678231418132782, + "step": 1656 + }, + { + "epoch": 0.48457376809475067, + "grad_norm": 1.2407750790627203, + "learning_rate": 1.8049052732313466e-05, + "loss": 0.604765772819519, + "step": 1657 + }, + { + "epoch": 0.4848662085100161, + "grad_norm": 1.501775861517808, + "learning_rate": 1.8046182860486735e-05, + "loss": 0.6812270879745483, + "step": 1658 + }, + { + "epoch": 0.4851586489252815, + "grad_norm": 1.329019452940817, + "learning_rate": 1.8043311107913675e-05, + "loss": 0.6284930109977722, + "step": 1659 + }, + { + "epoch": 0.48545108934054687, + "grad_norm": 1.4460160298748268, + "learning_rate": 1.8040437475265554e-05, + "loss": 0.665177583694458, + "step": 1660 + }, + { + "epoch": 0.48574352975581225, + "grad_norm": 1.365611165893268, + "learning_rate": 1.8037561963214058e-05, + "loss": 0.7628738284111023, + "step": 1661 + }, + { + "epoch": 0.48603597017107764, + "grad_norm": 1.4917601408905583, + "learning_rate": 1.8034684572431322e-05, + "loss": 0.6372654438018799, + "step": 1662 + }, + { + "epoch": 0.486328410586343, + "grad_norm": 1.2986927468884095, + "learning_rate": 1.803180530358992e-05, + "loss": 0.5915756225585938, + "step": 1663 + }, + { + "epoch": 0.4866208510016084, + "grad_norm": 1.3509164579114188, + "learning_rate": 1.802892415736286e-05, + "loss": 0.6821908950805664, + "step": 1664 + }, + { + "epoch": 0.48691329141687384, + "grad_norm": 1.3857679722145793, + "learning_rate": 1.80260411344236e-05, + "loss": 0.6418279409408569, + "step": 1665 + }, + { + "epoch": 0.4872057318321392, + "grad_norm": 1.154306591574384, + "learning_rate": 1.802315623544602e-05, + "loss": 0.5582526922225952, + "step": 1666 + }, + { + "epoch": 0.4874981722474046, + "grad_norm": 1.3431793608397968, + "learning_rate": 1.8020269461104448e-05, + "loss": 0.7145007848739624, + "step": 1667 + }, + { + "epoch": 0.48779061266267, + "grad_norm": 1.2110741699326812, + "learning_rate": 1.8017380812073658e-05, + "loss": 0.5415871739387512, + "step": 1668 + }, + { + "epoch": 0.48808305307793537, + "grad_norm": 1.488356994545647, + "learning_rate": 1.801449028902885e-05, + "loss": 0.728327751159668, + "step": 1669 + }, + { + "epoch": 0.48837549349320075, + "grad_norm": 1.3273378299589804, + "learning_rate": 1.8011597892645665e-05, + "loss": 0.6469160914421082, + "step": 1670 + }, + { + "epoch": 0.48866793390846613, + "grad_norm": 1.3096259850876997, + "learning_rate": 1.8008703623600185e-05, + "loss": 0.7107353210449219, + "step": 1671 + }, + { + "epoch": 0.4889603743237315, + "grad_norm": 1.4201847213896843, + "learning_rate": 1.8005807482568926e-05, + "loss": 0.6918982267379761, + "step": 1672 + }, + { + "epoch": 0.48925281473899696, + "grad_norm": 1.4096024584844806, + "learning_rate": 1.800290947022884e-05, + "loss": 0.661738932132721, + "step": 1673 + }, + { + "epoch": 0.48954525515426234, + "grad_norm": 1.4938181766281158, + "learning_rate": 1.800000958725733e-05, + "loss": 0.6816283464431763, + "step": 1674 + }, + { + "epoch": 0.4898376955695277, + "grad_norm": 1.348689926804817, + "learning_rate": 1.7997107834332217e-05, + "loss": 0.6988941431045532, + "step": 1675 + }, + { + "epoch": 0.4901301359847931, + "grad_norm": 1.5696470599370025, + "learning_rate": 1.799420421213177e-05, + "loss": 0.7997519969940186, + "step": 1676 + }, + { + "epoch": 0.4904225764000585, + "grad_norm": 1.3512394042939826, + "learning_rate": 1.7991298721334697e-05, + "loss": 0.6552794575691223, + "step": 1677 + }, + { + "epoch": 0.49071501681532387, + "grad_norm": 1.2446219807906005, + "learning_rate": 1.7988391362620135e-05, + "loss": 0.6144021153450012, + "step": 1678 + }, + { + "epoch": 0.49100745723058925, + "grad_norm": 1.2086851376188177, + "learning_rate": 1.798548213666766e-05, + "loss": 0.5036276578903198, + "step": 1679 + }, + { + "epoch": 0.49129989764585463, + "grad_norm": 1.1620444251602322, + "learning_rate": 1.7982571044157288e-05, + "loss": 0.5152162313461304, + "step": 1680 + }, + { + "epoch": 0.49159233806112007, + "grad_norm": 1.4266855366652862, + "learning_rate": 1.797965808576947e-05, + "loss": 0.7249797582626343, + "step": 1681 + }, + { + "epoch": 0.49188477847638545, + "grad_norm": 1.138885414798186, + "learning_rate": 1.7976743262185094e-05, + "loss": 0.5769079923629761, + "step": 1682 + }, + { + "epoch": 0.49217721889165084, + "grad_norm": 1.2523240509929359, + "learning_rate": 1.797382657408548e-05, + "loss": 0.7017331123352051, + "step": 1683 + }, + { + "epoch": 0.4924696593069162, + "grad_norm": 1.3095438640742119, + "learning_rate": 1.797090802215238e-05, + "loss": 0.788599967956543, + "step": 1684 + }, + { + "epoch": 0.4927620997221816, + "grad_norm": 1.3652642181905799, + "learning_rate": 1.7967987607067997e-05, + "loss": 0.5716612935066223, + "step": 1685 + }, + { + "epoch": 0.493054540137447, + "grad_norm": 1.396592202891807, + "learning_rate": 1.796506532951496e-05, + "loss": 0.6808345913887024, + "step": 1686 + }, + { + "epoch": 0.49334698055271237, + "grad_norm": 1.421363062787346, + "learning_rate": 1.7962141190176326e-05, + "loss": 0.6540817022323608, + "step": 1687 + }, + { + "epoch": 0.49363942096797775, + "grad_norm": 1.3162774070898267, + "learning_rate": 1.7959215189735604e-05, + "loss": 0.6522870063781738, + "step": 1688 + }, + { + "epoch": 0.4939318613832432, + "grad_norm": 1.2120992084575881, + "learning_rate": 1.7956287328876724e-05, + "loss": 0.5217882990837097, + "step": 1689 + }, + { + "epoch": 0.49422430179850857, + "grad_norm": 1.1456971313507769, + "learning_rate": 1.795335760828405e-05, + "loss": 0.6985372304916382, + "step": 1690 + }, + { + "epoch": 0.49451674221377395, + "grad_norm": 1.6308222645679713, + "learning_rate": 1.7950426028642397e-05, + "loss": 0.7199063301086426, + "step": 1691 + }, + { + "epoch": 0.49480918262903933, + "grad_norm": 1.2503132677681021, + "learning_rate": 1.7947492590636998e-05, + "loss": 0.5810575485229492, + "step": 1692 + }, + { + "epoch": 0.4951016230443047, + "grad_norm": 1.5393913616038981, + "learning_rate": 1.7944557294953528e-05, + "loss": 0.7443726658821106, + "step": 1693 + }, + { + "epoch": 0.4953940634595701, + "grad_norm": 1.4257690332105803, + "learning_rate": 1.7941620142278092e-05, + "loss": 0.6774560213088989, + "step": 1694 + }, + { + "epoch": 0.4956865038748355, + "grad_norm": 1.4876883296800856, + "learning_rate": 1.793868113329724e-05, + "loss": 0.6983137726783752, + "step": 1695 + }, + { + "epoch": 0.49597894429010086, + "grad_norm": 1.500775887710686, + "learning_rate": 1.793574026869793e-05, + "loss": 0.6481274366378784, + "step": 1696 + }, + { + "epoch": 0.4962713847053663, + "grad_norm": 1.5261372345633493, + "learning_rate": 1.793279754916759e-05, + "loss": 0.6489002704620361, + "step": 1697 + }, + { + "epoch": 0.4965638251206317, + "grad_norm": 1.200851338265551, + "learning_rate": 1.7929852975394056e-05, + "loss": 0.7054505348205566, + "step": 1698 + }, + { + "epoch": 0.49685626553589707, + "grad_norm": 1.1948769153228862, + "learning_rate": 1.79269065480656e-05, + "loss": 0.5257681608200073, + "step": 1699 + }, + { + "epoch": 0.49714870595116245, + "grad_norm": 1.2760885846913066, + "learning_rate": 1.7923958267870936e-05, + "loss": 0.8625251054763794, + "step": 1700 + }, + { + "epoch": 0.49744114636642783, + "grad_norm": 1.223950331700182, + "learning_rate": 1.7921008135499205e-05, + "loss": 0.6736147999763489, + "step": 1701 + }, + { + "epoch": 0.4977335867816932, + "grad_norm": 1.351351583663473, + "learning_rate": 1.7918056151639985e-05, + "loss": 0.5079643130302429, + "step": 1702 + }, + { + "epoch": 0.4980260271969586, + "grad_norm": 1.2324398794203584, + "learning_rate": 1.791510231698328e-05, + "loss": 0.597242534160614, + "step": 1703 + }, + { + "epoch": 0.49831846761222404, + "grad_norm": 1.3776511171825507, + "learning_rate": 1.791214663221953e-05, + "loss": 0.6695376038551331, + "step": 1704 + }, + { + "epoch": 0.4986109080274894, + "grad_norm": 1.2400454845090276, + "learning_rate": 1.7909189098039616e-05, + "loss": 0.6411684155464172, + "step": 1705 + }, + { + "epoch": 0.4989033484427548, + "grad_norm": 1.3917271277458743, + "learning_rate": 1.790622971513484e-05, + "loss": 0.6671754121780396, + "step": 1706 + }, + { + "epoch": 0.4991957888580202, + "grad_norm": 1.1384272276613905, + "learning_rate": 1.7903268484196936e-05, + "loss": 0.5312573909759521, + "step": 1707 + }, + { + "epoch": 0.49948822927328557, + "grad_norm": 1.3626241120949947, + "learning_rate": 1.7900305405918076e-05, + "loss": 0.643236517906189, + "step": 1708 + }, + { + "epoch": 0.49978066968855095, + "grad_norm": 1.4093385837144417, + "learning_rate": 1.7897340480990863e-05, + "loss": 0.7942951321601868, + "step": 1709 + }, + { + "epoch": 0.5000731101038164, + "grad_norm": 1.3198251548980515, + "learning_rate": 1.789437371010833e-05, + "loss": 0.701362133026123, + "step": 1710 + }, + { + "epoch": 0.5003655505190817, + "grad_norm": 1.3304955567316399, + "learning_rate": 1.789140509396394e-05, + "loss": 0.6993157863616943, + "step": 1711 + }, + { + "epoch": 0.5006579909343472, + "grad_norm": 1.0719148279657758, + "learning_rate": 1.788843463325159e-05, + "loss": 0.568405270576477, + "step": 1712 + }, + { + "epoch": 0.5009504313496125, + "grad_norm": 0.976150644308567, + "learning_rate": 1.7885462328665605e-05, + "loss": 0.4948374032974243, + "step": 1713 + }, + { + "epoch": 0.5012428717648779, + "grad_norm": 1.4692514127239873, + "learning_rate": 1.7882488180900743e-05, + "loss": 0.6679480671882629, + "step": 1714 + }, + { + "epoch": 0.5015353121801432, + "grad_norm": 1.5018221461401142, + "learning_rate": 1.78795121906522e-05, + "loss": 0.706131100654602, + "step": 1715 + }, + { + "epoch": 0.5018277525954087, + "grad_norm": 1.207740414795638, + "learning_rate": 1.787653435861559e-05, + "loss": 0.6691830158233643, + "step": 1716 + }, + { + "epoch": 0.5021201930106741, + "grad_norm": 1.163150990025552, + "learning_rate": 1.787355468548696e-05, + "loss": 0.5624213218688965, + "step": 1717 + }, + { + "epoch": 0.5024126334259394, + "grad_norm": 1.3394004970303723, + "learning_rate": 1.78705731719628e-05, + "loss": 0.4589618444442749, + "step": 1718 + }, + { + "epoch": 0.5027050738412049, + "grad_norm": 1.384883869852314, + "learning_rate": 1.7867589818740012e-05, + "loss": 0.571403980255127, + "step": 1719 + }, + { + "epoch": 0.5029975142564702, + "grad_norm": 1.0668853872947273, + "learning_rate": 1.786460462651594e-05, + "loss": 0.5395561456680298, + "step": 1720 + }, + { + "epoch": 0.5032899546717357, + "grad_norm": 1.243223907233259, + "learning_rate": 1.7861617595988355e-05, + "loss": 0.6166945695877075, + "step": 1721 + }, + { + "epoch": 0.503582395087001, + "grad_norm": 1.4857752879775032, + "learning_rate": 1.7858628727855458e-05, + "loss": 0.6812523603439331, + "step": 1722 + }, + { + "epoch": 0.5038748355022664, + "grad_norm": 1.2390654420633957, + "learning_rate": 1.7855638022815872e-05, + "loss": 0.6602752208709717, + "step": 1723 + }, + { + "epoch": 0.5041672759175319, + "grad_norm": 1.0873682718880517, + "learning_rate": 1.7852645481568665e-05, + "loss": 0.49925822019577026, + "step": 1724 + }, + { + "epoch": 0.5044597163327972, + "grad_norm": 1.3265310908908576, + "learning_rate": 1.784965110481332e-05, + "loss": 0.5557682514190674, + "step": 1725 + }, + { + "epoch": 0.5047521567480626, + "grad_norm": 1.2775644185514514, + "learning_rate": 1.7846654893249756e-05, + "loss": 0.6576372981071472, + "step": 1726 + }, + { + "epoch": 0.505044597163328, + "grad_norm": 2.047704943438843, + "learning_rate": 1.7843656847578317e-05, + "loss": 0.5266367197036743, + "step": 1727 + }, + { + "epoch": 0.5053370375785934, + "grad_norm": 1.6086224094226402, + "learning_rate": 1.7840656968499782e-05, + "loss": 0.7368261218070984, + "step": 1728 + }, + { + "epoch": 0.5056294779938587, + "grad_norm": 1.2755318597370908, + "learning_rate": 1.7837655256715355e-05, + "loss": 0.6583619117736816, + "step": 1729 + }, + { + "epoch": 0.5059219184091241, + "grad_norm": 1.4196511617190575, + "learning_rate": 1.7834651712926662e-05, + "loss": 0.7323073148727417, + "step": 1730 + }, + { + "epoch": 0.5062143588243895, + "grad_norm": 1.540686270234863, + "learning_rate": 1.783164633783577e-05, + "loss": 0.6059812307357788, + "step": 1731 + }, + { + "epoch": 0.5065067992396549, + "grad_norm": 1.451028079648097, + "learning_rate": 1.782863913214516e-05, + "loss": 0.5992608070373535, + "step": 1732 + }, + { + "epoch": 0.5067992396549204, + "grad_norm": 1.3452146161553644, + "learning_rate": 1.7825630096557754e-05, + "loss": 0.5729147791862488, + "step": 1733 + }, + { + "epoch": 0.5070916800701857, + "grad_norm": 1.4383912240083958, + "learning_rate": 1.782261923177689e-05, + "loss": 0.6708269119262695, + "step": 1734 + }, + { + "epoch": 0.5073841204854511, + "grad_norm": 1.0922943221428454, + "learning_rate": 1.7819606538506347e-05, + "loss": 0.5377235412597656, + "step": 1735 + }, + { + "epoch": 0.5076765609007164, + "grad_norm": 1.3060450837457043, + "learning_rate": 1.781659201745032e-05, + "loss": 0.6899171471595764, + "step": 1736 + }, + { + "epoch": 0.5079690013159819, + "grad_norm": 1.2574262616785272, + "learning_rate": 1.7813575669313434e-05, + "loss": 0.6712576150894165, + "step": 1737 + }, + { + "epoch": 0.5082614417312472, + "grad_norm": 1.3797290531865334, + "learning_rate": 1.781055749480074e-05, + "loss": 0.6989667415618896, + "step": 1738 + }, + { + "epoch": 0.5085538821465126, + "grad_norm": 1.4976341004458755, + "learning_rate": 1.7807537494617723e-05, + "loss": 0.6103490591049194, + "step": 1739 + }, + { + "epoch": 0.5088463225617781, + "grad_norm": 1.2059878229475702, + "learning_rate": 1.7804515669470287e-05, + "loss": 0.4882289171218872, + "step": 1740 + }, + { + "epoch": 0.5091387629770434, + "grad_norm": 1.3963253268337052, + "learning_rate": 1.7801492020064764e-05, + "loss": 0.7244713306427002, + "step": 1741 + }, + { + "epoch": 0.5094312033923089, + "grad_norm": 1.2588544303384788, + "learning_rate": 1.7798466547107918e-05, + "loss": 0.6055952310562134, + "step": 1742 + }, + { + "epoch": 0.5097236438075742, + "grad_norm": 1.3449125705801426, + "learning_rate": 1.779543925130693e-05, + "loss": 0.5893995761871338, + "step": 1743 + }, + { + "epoch": 0.5100160842228396, + "grad_norm": 1.4169541262971606, + "learning_rate": 1.7792410133369413e-05, + "loss": 0.6154330968856812, + "step": 1744 + }, + { + "epoch": 0.5103085246381049, + "grad_norm": 1.294650393818464, + "learning_rate": 1.778937919400341e-05, + "loss": 0.6227806806564331, + "step": 1745 + }, + { + "epoch": 0.5106009650533704, + "grad_norm": 1.563882907776874, + "learning_rate": 1.7786346433917376e-05, + "loss": 0.6192313432693481, + "step": 1746 + }, + { + "epoch": 0.5108934054686358, + "grad_norm": 1.324638073205218, + "learning_rate": 1.7783311853820205e-05, + "loss": 0.6175359487533569, + "step": 1747 + }, + { + "epoch": 0.5111858458839011, + "grad_norm": 1.17912928754983, + "learning_rate": 1.7780275454421218e-05, + "loss": 0.5588991641998291, + "step": 1748 + }, + { + "epoch": 0.5114782862991666, + "grad_norm": 1.0201894222615457, + "learning_rate": 1.777723723643014e-05, + "loss": 0.637115478515625, + "step": 1749 + }, + { + "epoch": 0.5117707267144319, + "grad_norm": 1.5101308062255179, + "learning_rate": 1.777419720055715e-05, + "loss": 0.6762860417366028, + "step": 1750 + }, + { + "epoch": 0.5120631671296973, + "grad_norm": 1.5211239881114056, + "learning_rate": 1.7771155347512828e-05, + "loss": 0.6980293989181519, + "step": 1751 + }, + { + "epoch": 0.5123556075449627, + "grad_norm": 1.3145597239587745, + "learning_rate": 1.7768111678008194e-05, + "loss": 0.6587250232696533, + "step": 1752 + }, + { + "epoch": 0.5126480479602281, + "grad_norm": 1.4750219793579704, + "learning_rate": 1.776506619275469e-05, + "loss": 0.6571120619773865, + "step": 1753 + }, + { + "epoch": 0.5129404883754934, + "grad_norm": 1.705487520120489, + "learning_rate": 1.7762018892464172e-05, + "loss": 0.8127633333206177, + "step": 1754 + }, + { + "epoch": 0.5132329287907589, + "grad_norm": 1.4136977790679228, + "learning_rate": 1.7758969777848935e-05, + "loss": 0.6585550308227539, + "step": 1755 + }, + { + "epoch": 0.5135253692060243, + "grad_norm": 1.5019600327645424, + "learning_rate": 1.7755918849621686e-05, + "loss": 0.6347511410713196, + "step": 1756 + }, + { + "epoch": 0.5138178096212896, + "grad_norm": 1.4489353235186164, + "learning_rate": 1.775286610849556e-05, + "loss": 0.5918457508087158, + "step": 1757 + }, + { + "epoch": 0.5141102500365551, + "grad_norm": 1.2541802522573693, + "learning_rate": 1.774981155518412e-05, + "loss": 0.7042769193649292, + "step": 1758 + }, + { + "epoch": 0.5144026904518204, + "grad_norm": 1.4327318826910254, + "learning_rate": 1.7746755190401353e-05, + "loss": 0.8014250993728638, + "step": 1759 + }, + { + "epoch": 0.5146951308670858, + "grad_norm": 1.339232110324459, + "learning_rate": 1.774369701486166e-05, + "loss": 0.6703939437866211, + "step": 1760 + }, + { + "epoch": 0.5149875712823512, + "grad_norm": 1.1710558248660605, + "learning_rate": 1.774063702927987e-05, + "loss": 0.6189682483673096, + "step": 1761 + }, + { + "epoch": 0.5152800116976166, + "grad_norm": 1.4110546220906648, + "learning_rate": 1.7737575234371238e-05, + "loss": 0.5386991500854492, + "step": 1762 + }, + { + "epoch": 0.515572452112882, + "grad_norm": 1.4204019461155708, + "learning_rate": 1.773451163085144e-05, + "loss": 0.6389357447624207, + "step": 1763 + }, + { + "epoch": 0.5158648925281474, + "grad_norm": 1.1798787279597898, + "learning_rate": 1.7731446219436577e-05, + "loss": 0.7247746586799622, + "step": 1764 + }, + { + "epoch": 0.5161573329434128, + "grad_norm": 1.2114702713778023, + "learning_rate": 1.7728379000843164e-05, + "loss": 0.5538983941078186, + "step": 1765 + }, + { + "epoch": 0.5164497733586781, + "grad_norm": 1.155329008927324, + "learning_rate": 1.7725309975788155e-05, + "loss": 0.6003320813179016, + "step": 1766 + }, + { + "epoch": 0.5167422137739436, + "grad_norm": 1.4065479816352848, + "learning_rate": 1.7722239144988908e-05, + "loss": 0.603177011013031, + "step": 1767 + }, + { + "epoch": 0.5170346541892089, + "grad_norm": 1.1699743536266287, + "learning_rate": 1.771916650916321e-05, + "loss": 0.6071338653564453, + "step": 1768 + }, + { + "epoch": 0.5173270946044743, + "grad_norm": 1.4268603398797357, + "learning_rate": 1.7716092069029275e-05, + "loss": 0.6148535013198853, + "step": 1769 + }, + { + "epoch": 0.5176195350197397, + "grad_norm": 1.3460628970570976, + "learning_rate": 1.7713015825305735e-05, + "loss": 0.6236969828605652, + "step": 1770 + }, + { + "epoch": 0.5179119754350051, + "grad_norm": 1.4613715991480511, + "learning_rate": 1.770993777871164e-05, + "loss": 0.5439775586128235, + "step": 1771 + }, + { + "epoch": 0.5182044158502705, + "grad_norm": 1.3246469866549868, + "learning_rate": 1.770685792996647e-05, + "loss": 0.6498249769210815, + "step": 1772 + }, + { + "epoch": 0.5184968562655359, + "grad_norm": 1.307598965769502, + "learning_rate": 1.7703776279790113e-05, + "loss": 0.5838749408721924, + "step": 1773 + }, + { + "epoch": 0.5187892966808013, + "grad_norm": 1.44861400348765, + "learning_rate": 1.770069282890289e-05, + "loss": 0.6467812657356262, + "step": 1774 + }, + { + "epoch": 0.5190817370960666, + "grad_norm": 1.3332181124442455, + "learning_rate": 1.7697607578025543e-05, + "loss": 0.5878627896308899, + "step": 1775 + }, + { + "epoch": 0.5193741775113321, + "grad_norm": 1.2905348700615993, + "learning_rate": 1.7694520527879223e-05, + "loss": 0.6252161264419556, + "step": 1776 + }, + { + "epoch": 0.5196666179265974, + "grad_norm": 1.2071686484495499, + "learning_rate": 1.7691431679185518e-05, + "loss": 0.6098401546478271, + "step": 1777 + }, + { + "epoch": 0.5199590583418628, + "grad_norm": 1.4529959736387221, + "learning_rate": 1.7688341032666415e-05, + "loss": 0.7401748299598694, + "step": 1778 + }, + { + "epoch": 0.5202514987571283, + "grad_norm": 1.278188059333223, + "learning_rate": 1.768524858904435e-05, + "loss": 0.5398571491241455, + "step": 1779 + }, + { + "epoch": 0.5205439391723936, + "grad_norm": 1.211971903081478, + "learning_rate": 1.768215434904215e-05, + "loss": 0.5565935969352722, + "step": 1780 + }, + { + "epoch": 0.520836379587659, + "grad_norm": 1.3982258941889667, + "learning_rate": 1.7679058313383078e-05, + "loss": 0.5510461926460266, + "step": 1781 + }, + { + "epoch": 0.5211288200029244, + "grad_norm": 1.5839871959956162, + "learning_rate": 1.7675960482790818e-05, + "loss": 0.670242428779602, + "step": 1782 + }, + { + "epoch": 0.5214212604181898, + "grad_norm": 1.309838763427276, + "learning_rate": 1.7672860857989463e-05, + "loss": 0.6556246280670166, + "step": 1783 + }, + { + "epoch": 0.5217137008334551, + "grad_norm": 1.3555406156984307, + "learning_rate": 1.7669759439703537e-05, + "loss": 0.7133421897888184, + "step": 1784 + }, + { + "epoch": 0.5220061412487206, + "grad_norm": 1.340410804208978, + "learning_rate": 1.766665622865797e-05, + "loss": 0.5520647168159485, + "step": 1785 + }, + { + "epoch": 0.522298581663986, + "grad_norm": 1.2754706768801123, + "learning_rate": 1.766355122557813e-05, + "loss": 0.6906430125236511, + "step": 1786 + }, + { + "epoch": 0.5225910220792513, + "grad_norm": 1.331418831759662, + "learning_rate": 1.766044443118978e-05, + "loss": 0.6847748756408691, + "step": 1787 + }, + { + "epoch": 0.5228834624945168, + "grad_norm": 1.6656678493050783, + "learning_rate": 1.7657335846219125e-05, + "loss": 0.6690354347229004, + "step": 1788 + }, + { + "epoch": 0.5231759029097821, + "grad_norm": 1.5097667681145126, + "learning_rate": 1.765422547139277e-05, + "loss": 0.6508032083511353, + "step": 1789 + }, + { + "epoch": 0.5234683433250475, + "grad_norm": 1.3545274700404182, + "learning_rate": 1.7651113307437754e-05, + "loss": 0.7686585187911987, + "step": 1790 + }, + { + "epoch": 0.5237607837403129, + "grad_norm": 1.5694388106807053, + "learning_rate": 1.764799935508152e-05, + "loss": 0.7669490575790405, + "step": 1791 + }, + { + "epoch": 0.5240532241555783, + "grad_norm": 1.3694245126086426, + "learning_rate": 1.7644883615051936e-05, + "loss": 0.6630266308784485, + "step": 1792 + }, + { + "epoch": 0.5243456645708436, + "grad_norm": 1.350854180871217, + "learning_rate": 1.764176608807729e-05, + "loss": 0.6054951548576355, + "step": 1793 + }, + { + "epoch": 0.5246381049861091, + "grad_norm": 1.3573271710882402, + "learning_rate": 1.7638646774886282e-05, + "loss": 0.6519330739974976, + "step": 1794 + }, + { + "epoch": 0.5249305454013745, + "grad_norm": 1.3013890836364408, + "learning_rate": 1.7635525676208034e-05, + "loss": 0.6797915101051331, + "step": 1795 + }, + { + "epoch": 0.5252229858166398, + "grad_norm": 1.4138018427804997, + "learning_rate": 1.7632402792772084e-05, + "loss": 0.7296736240386963, + "step": 1796 + }, + { + "epoch": 0.5255154262319053, + "grad_norm": 1.4894816204298726, + "learning_rate": 1.7629278125308388e-05, + "loss": 0.6371006965637207, + "step": 1797 + }, + { + "epoch": 0.5258078666471706, + "grad_norm": 1.1913157227609021, + "learning_rate": 1.762615167454732e-05, + "loss": 0.5315746068954468, + "step": 1798 + }, + { + "epoch": 0.526100307062436, + "grad_norm": 1.115665172593258, + "learning_rate": 1.762302344121966e-05, + "loss": 0.5285685062408447, + "step": 1799 + }, + { + "epoch": 0.5263927474777014, + "grad_norm": 1.269936179033053, + "learning_rate": 1.7619893426056622e-05, + "loss": 0.623146653175354, + "step": 1800 + }, + { + "epoch": 0.5266851878929668, + "grad_norm": 1.3314922698636598, + "learning_rate": 1.7616761629789824e-05, + "loss": 0.5433363318443298, + "step": 1801 + }, + { + "epoch": 0.5269776283082322, + "grad_norm": 1.422200045831386, + "learning_rate": 1.7613628053151307e-05, + "loss": 0.5035480260848999, + "step": 1802 + }, + { + "epoch": 0.5272700687234976, + "grad_norm": 1.3947936859584276, + "learning_rate": 1.7610492696873523e-05, + "loss": 0.678544819355011, + "step": 1803 + }, + { + "epoch": 0.527562509138763, + "grad_norm": 1.2973841494755158, + "learning_rate": 1.7607355561689347e-05, + "loss": 0.6237714290618896, + "step": 1804 + }, + { + "epoch": 0.5278549495540283, + "grad_norm": 1.8411758190439966, + "learning_rate": 1.760421664833206e-05, + "loss": 0.6943943500518799, + "step": 1805 + }, + { + "epoch": 0.5281473899692938, + "grad_norm": 1.1545458109151105, + "learning_rate": 1.7601075957535366e-05, + "loss": 0.5477268695831299, + "step": 1806 + }, + { + "epoch": 0.5284398303845591, + "grad_norm": 1.5589440207416567, + "learning_rate": 1.759793349003338e-05, + "loss": 0.6627641320228577, + "step": 1807 + }, + { + "epoch": 0.5287322707998245, + "grad_norm": 1.169894530317387, + "learning_rate": 1.7594789246560638e-05, + "loss": 0.5394496917724609, + "step": 1808 + }, + { + "epoch": 0.5290247112150899, + "grad_norm": 1.5989109343746286, + "learning_rate": 1.759164322785209e-05, + "loss": 0.7824013233184814, + "step": 1809 + }, + { + "epoch": 0.5293171516303553, + "grad_norm": 1.5859531867022811, + "learning_rate": 1.7588495434643094e-05, + "loss": 0.6959671974182129, + "step": 1810 + }, + { + "epoch": 0.5296095920456207, + "grad_norm": 1.256097179377318, + "learning_rate": 1.7585345867669427e-05, + "loss": 0.7036902904510498, + "step": 1811 + }, + { + "epoch": 0.5299020324608861, + "grad_norm": 1.2520265115718123, + "learning_rate": 1.7582194527667285e-05, + "loss": 0.6700775623321533, + "step": 1812 + }, + { + "epoch": 0.5301944728761515, + "grad_norm": 1.4077714911889505, + "learning_rate": 1.7579041415373273e-05, + "loss": 0.648280918598175, + "step": 1813 + }, + { + "epoch": 0.5304869132914168, + "grad_norm": 1.3424741441047479, + "learning_rate": 1.757588653152441e-05, + "loss": 0.688485324382782, + "step": 1814 + }, + { + "epoch": 0.5307793537066823, + "grad_norm": 1.4718330240816029, + "learning_rate": 1.757272987685813e-05, + "loss": 0.6743370890617371, + "step": 1815 + }, + { + "epoch": 0.5310717941219476, + "grad_norm": 1.2524252340987996, + "learning_rate": 1.7569571452112288e-05, + "loss": 0.5597015619277954, + "step": 1816 + }, + { + "epoch": 0.531364234537213, + "grad_norm": 1.0387462800714626, + "learning_rate": 1.756641125802514e-05, + "loss": 0.48607051372528076, + "step": 1817 + }, + { + "epoch": 0.5316566749524785, + "grad_norm": 1.3375496888713005, + "learning_rate": 1.7563249295335366e-05, + "loss": 0.6712289452552795, + "step": 1818 + }, + { + "epoch": 0.5319491153677438, + "grad_norm": 1.4037646661677698, + "learning_rate": 1.7560085564782057e-05, + "loss": 0.5937772989273071, + "step": 1819 + }, + { + "epoch": 0.5322415557830092, + "grad_norm": 1.5529497860681427, + "learning_rate": 1.7556920067104714e-05, + "loss": 0.7416468262672424, + "step": 1820 + }, + { + "epoch": 0.5325339961982746, + "grad_norm": 1.1975217725231788, + "learning_rate": 1.7553752803043247e-05, + "loss": 0.6302096247673035, + "step": 1821 + }, + { + "epoch": 0.53282643661354, + "grad_norm": 1.268842982106158, + "learning_rate": 1.7550583773337992e-05, + "loss": 0.5576045513153076, + "step": 1822 + }, + { + "epoch": 0.5331188770288053, + "grad_norm": 1.3076658324014316, + "learning_rate": 1.7547412978729688e-05, + "loss": 0.5436257123947144, + "step": 1823 + }, + { + "epoch": 0.5334113174440708, + "grad_norm": 1.2387778464918946, + "learning_rate": 1.754424041995949e-05, + "loss": 0.5674831867218018, + "step": 1824 + }, + { + "epoch": 0.5337037578593362, + "grad_norm": 1.3729116406743342, + "learning_rate": 1.7541066097768965e-05, + "loss": 0.7254515290260315, + "step": 1825 + }, + { + "epoch": 0.5339961982746015, + "grad_norm": 1.1721694105309242, + "learning_rate": 1.7537890012900088e-05, + "loss": 0.5706701278686523, + "step": 1826 + }, + { + "epoch": 0.534288638689867, + "grad_norm": 1.4929452380767032, + "learning_rate": 1.7534712166095253e-05, + "loss": 0.6801357269287109, + "step": 1827 + }, + { + "epoch": 0.5345810791051323, + "grad_norm": 1.115878861059579, + "learning_rate": 1.753153255809726e-05, + "loss": 0.6851463317871094, + "step": 1828 + }, + { + "epoch": 0.5348735195203977, + "grad_norm": 1.3277835192492438, + "learning_rate": 1.7528351189649324e-05, + "loss": 0.6475861072540283, + "step": 1829 + }, + { + "epoch": 0.5351659599356631, + "grad_norm": 1.462925601634232, + "learning_rate": 1.752516806149507e-05, + "loss": 0.6953648924827576, + "step": 1830 + }, + { + "epoch": 0.5354584003509285, + "grad_norm": 1.5314952476377168, + "learning_rate": 1.7521983174378537e-05, + "loss": 0.5128777623176575, + "step": 1831 + }, + { + "epoch": 0.5357508407661938, + "grad_norm": 1.3754167803768682, + "learning_rate": 1.751879652904417e-05, + "loss": 0.5780255198478699, + "step": 1832 + }, + { + "epoch": 0.5360432811814593, + "grad_norm": 1.1326334157819233, + "learning_rate": 1.751560812623683e-05, + "loss": 0.581814169883728, + "step": 1833 + }, + { + "epoch": 0.5363357215967247, + "grad_norm": 1.2244339664502468, + "learning_rate": 1.7512417966701788e-05, + "loss": 0.5609169006347656, + "step": 1834 + }, + { + "epoch": 0.53662816201199, + "grad_norm": 1.2348222464159622, + "learning_rate": 1.7509226051184716e-05, + "loss": 0.6029868125915527, + "step": 1835 + }, + { + "epoch": 0.5369206024272555, + "grad_norm": 1.5575658935823142, + "learning_rate": 1.7506032380431718e-05, + "loss": 0.6749545335769653, + "step": 1836 + }, + { + "epoch": 0.5372130428425208, + "grad_norm": 1.4261868258477342, + "learning_rate": 1.750283695518929e-05, + "loss": 0.7710991501808167, + "step": 1837 + }, + { + "epoch": 0.5375054832577862, + "grad_norm": 1.2797893583505542, + "learning_rate": 1.7499639776204334e-05, + "loss": 0.6330907940864563, + "step": 1838 + }, + { + "epoch": 0.5377979236730516, + "grad_norm": 1.3697405221939354, + "learning_rate": 1.7496440844224186e-05, + "loss": 0.655827522277832, + "step": 1839 + }, + { + "epoch": 0.538090364088317, + "grad_norm": 1.3640883815652403, + "learning_rate": 1.7493240159996565e-05, + "loss": 0.723412275314331, + "step": 1840 + }, + { + "epoch": 0.5383828045035824, + "grad_norm": 1.273855459734962, + "learning_rate": 1.7490037724269618e-05, + "loss": 0.5504157543182373, + "step": 1841 + }, + { + "epoch": 0.5386752449188478, + "grad_norm": 1.3867652356352673, + "learning_rate": 1.7486833537791895e-05, + "loss": 0.6258282661437988, + "step": 1842 + }, + { + "epoch": 0.5389676853341132, + "grad_norm": 1.3063024833172743, + "learning_rate": 1.748362760131235e-05, + "loss": 0.7044231295585632, + "step": 1843 + }, + { + "epoch": 0.5392601257493785, + "grad_norm": 1.329844005030904, + "learning_rate": 1.7480419915580357e-05, + "loss": 0.5979568362236023, + "step": 1844 + }, + { + "epoch": 0.539552566164644, + "grad_norm": 1.2396904419147898, + "learning_rate": 1.7477210481345686e-05, + "loss": 0.558562159538269, + "step": 1845 + }, + { + "epoch": 0.5398450065799093, + "grad_norm": 1.5914882070233294, + "learning_rate": 1.747399929935853e-05, + "loss": 0.5965149402618408, + "step": 1846 + }, + { + "epoch": 0.5401374469951747, + "grad_norm": 1.2286076413347484, + "learning_rate": 1.7470786370369483e-05, + "loss": 0.6202878355979919, + "step": 1847 + }, + { + "epoch": 0.5404298874104401, + "grad_norm": 1.4696847585462156, + "learning_rate": 1.746757169512954e-05, + "loss": 0.652141273021698, + "step": 1848 + }, + { + "epoch": 0.5407223278257055, + "grad_norm": 1.3491880900702233, + "learning_rate": 1.746435527439012e-05, + "loss": 0.5713402628898621, + "step": 1849 + }, + { + "epoch": 0.541014768240971, + "grad_norm": 1.1036198614058235, + "learning_rate": 1.7461137108903042e-05, + "loss": 0.49776554107666016, + "step": 1850 + }, + { + "epoch": 0.5413072086562363, + "grad_norm": 1.3593053008733638, + "learning_rate": 1.7457917199420525e-05, + "loss": 0.7047991752624512, + "step": 1851 + }, + { + "epoch": 0.5415996490715017, + "grad_norm": 1.249302868601747, + "learning_rate": 1.7454695546695207e-05, + "loss": 0.7019875049591064, + "step": 1852 + }, + { + "epoch": 0.541892089486767, + "grad_norm": 1.1395410254023401, + "learning_rate": 1.745147215148013e-05, + "loss": 0.5448435544967651, + "step": 1853 + }, + { + "epoch": 0.5421845299020325, + "grad_norm": 1.3392616230054089, + "learning_rate": 1.7448247014528745e-05, + "loss": 0.6042202711105347, + "step": 1854 + }, + { + "epoch": 0.5424769703172978, + "grad_norm": 1.6632726033150385, + "learning_rate": 1.744502013659491e-05, + "loss": 0.8448539972305298, + "step": 1855 + }, + { + "epoch": 0.5427694107325632, + "grad_norm": 1.5168637416823716, + "learning_rate": 1.7441791518432877e-05, + "loss": 0.6541755795478821, + "step": 1856 + }, + { + "epoch": 0.5430618511478287, + "grad_norm": 1.3214742528031191, + "learning_rate": 1.7438561160797326e-05, + "loss": 0.6700184345245361, + "step": 1857 + }, + { + "epoch": 0.543354291563094, + "grad_norm": 1.5975598198717695, + "learning_rate": 1.7435329064443335e-05, + "loss": 0.6407896280288696, + "step": 1858 + }, + { + "epoch": 0.5436467319783594, + "grad_norm": 1.1007084555597737, + "learning_rate": 1.7432095230126382e-05, + "loss": 0.5380120277404785, + "step": 1859 + }, + { + "epoch": 0.5439391723936248, + "grad_norm": 1.4184366915429367, + "learning_rate": 1.7428859658602353e-05, + "loss": 0.6561373472213745, + "step": 1860 + }, + { + "epoch": 0.5442316128088902, + "grad_norm": 1.7211281199225186, + "learning_rate": 1.7425622350627545e-05, + "loss": 0.724541962146759, + "step": 1861 + }, + { + "epoch": 0.5445240532241555, + "grad_norm": 1.3361773700031112, + "learning_rate": 1.7422383306958666e-05, + "loss": 0.6258946657180786, + "step": 1862 + }, + { + "epoch": 0.544816493639421, + "grad_norm": 1.4343211647036773, + "learning_rate": 1.7419142528352815e-05, + "loss": 0.560769259929657, + "step": 1863 + }, + { + "epoch": 0.5451089340546864, + "grad_norm": 1.3199774156859019, + "learning_rate": 1.741590001556751e-05, + "loss": 0.7782202363014221, + "step": 1864 + }, + { + "epoch": 0.5454013744699517, + "grad_norm": 1.1330260111547463, + "learning_rate": 1.7412655769360663e-05, + "loss": 0.5956888198852539, + "step": 1865 + }, + { + "epoch": 0.5456938148852172, + "grad_norm": 1.2304180375361309, + "learning_rate": 1.7409409790490602e-05, + "loss": 0.6251999139785767, + "step": 1866 + }, + { + "epoch": 0.5459862553004825, + "grad_norm": 1.201828702533108, + "learning_rate": 1.740616207971605e-05, + "loss": 0.5864061713218689, + "step": 1867 + }, + { + "epoch": 0.5462786957157479, + "grad_norm": 1.1335552643310969, + "learning_rate": 1.7402912637796146e-05, + "loss": 0.6241225004196167, + "step": 1868 + }, + { + "epoch": 0.5465711361310133, + "grad_norm": 1.4457655679285375, + "learning_rate": 1.739966146549042e-05, + "loss": 0.7190053462982178, + "step": 1869 + }, + { + "epoch": 0.5468635765462787, + "grad_norm": 1.3107442552185273, + "learning_rate": 1.739640856355882e-05, + "loss": 0.6771985292434692, + "step": 1870 + }, + { + "epoch": 0.547156016961544, + "grad_norm": 1.3163112428890422, + "learning_rate": 1.7393153932761687e-05, + "loss": 0.5480636954307556, + "step": 1871 + }, + { + "epoch": 0.5474484573768095, + "grad_norm": 1.5272520029044583, + "learning_rate": 1.7389897573859773e-05, + "loss": 0.7362977862358093, + "step": 1872 + }, + { + "epoch": 0.5477408977920749, + "grad_norm": 1.3701377425052599, + "learning_rate": 1.7386639487614232e-05, + "loss": 0.6483198404312134, + "step": 1873 + }, + { + "epoch": 0.5480333382073402, + "grad_norm": 1.137215399363759, + "learning_rate": 1.7383379674786622e-05, + "loss": 0.479977548122406, + "step": 1874 + }, + { + "epoch": 0.5483257786226057, + "grad_norm": 1.2815568792711947, + "learning_rate": 1.738011813613891e-05, + "loss": 0.6824718117713928, + "step": 1875 + }, + { + "epoch": 0.548618219037871, + "grad_norm": 1.4252738341228008, + "learning_rate": 1.737685487243345e-05, + "loss": 0.564873218536377, + "step": 1876 + }, + { + "epoch": 0.5489106594531364, + "grad_norm": 1.208162951014484, + "learning_rate": 1.7373589884433015e-05, + "loss": 0.5748772621154785, + "step": 1877 + }, + { + "epoch": 0.5492030998684018, + "grad_norm": 1.329038884364766, + "learning_rate": 1.7370323172900778e-05, + "loss": 0.6403437852859497, + "step": 1878 + }, + { + "epoch": 0.5494955402836672, + "grad_norm": 1.7288162586927747, + "learning_rate": 1.7367054738600312e-05, + "loss": 0.8253078460693359, + "step": 1879 + }, + { + "epoch": 0.5497879806989326, + "grad_norm": 1.204164217164209, + "learning_rate": 1.7363784582295596e-05, + "loss": 0.6823058128356934, + "step": 1880 + }, + { + "epoch": 0.550080421114198, + "grad_norm": 1.0289811643005782, + "learning_rate": 1.7360512704751003e-05, + "loss": 0.48659563064575195, + "step": 1881 + }, + { + "epoch": 0.5503728615294634, + "grad_norm": 1.5395158772607802, + "learning_rate": 1.735723910673132e-05, + "loss": 0.6380710601806641, + "step": 1882 + }, + { + "epoch": 0.5506653019447287, + "grad_norm": 1.512121712639047, + "learning_rate": 1.7353963789001723e-05, + "loss": 0.6956683397293091, + "step": 1883 + }, + { + "epoch": 0.5509577423599942, + "grad_norm": 1.2992852551955654, + "learning_rate": 1.735068675232781e-05, + "loss": 0.5751473903656006, + "step": 1884 + }, + { + "epoch": 0.5512501827752595, + "grad_norm": 1.3297689987083825, + "learning_rate": 1.734740799747556e-05, + "loss": 0.7265490293502808, + "step": 1885 + }, + { + "epoch": 0.5515426231905249, + "grad_norm": 1.3317519459591716, + "learning_rate": 1.734412752521136e-05, + "loss": 0.7419843673706055, + "step": 1886 + }, + { + "epoch": 0.5518350636057903, + "grad_norm": 1.3385317545855182, + "learning_rate": 1.734084533630201e-05, + "loss": 0.7381073236465454, + "step": 1887 + }, + { + "epoch": 0.5521275040210557, + "grad_norm": 1.4687535531628584, + "learning_rate": 1.7337561431514692e-05, + "loss": 0.6542054414749146, + "step": 1888 + }, + { + "epoch": 0.5524199444363211, + "grad_norm": 1.3144027889366288, + "learning_rate": 1.7334275811617e-05, + "loss": 0.6283866167068481, + "step": 1889 + }, + { + "epoch": 0.5527123848515865, + "grad_norm": 1.34879443340803, + "learning_rate": 1.7330988477376935e-05, + "loss": 0.6171330809593201, + "step": 1890 + }, + { + "epoch": 0.5530048252668519, + "grad_norm": 1.309075407888037, + "learning_rate": 1.7327699429562887e-05, + "loss": 0.5181430578231812, + "step": 1891 + }, + { + "epoch": 0.5532972656821172, + "grad_norm": 1.4382455208413174, + "learning_rate": 1.7324408668943645e-05, + "loss": 0.7337771058082581, + "step": 1892 + }, + { + "epoch": 0.5535897060973827, + "grad_norm": 1.3677542553778577, + "learning_rate": 1.7321116196288413e-05, + "loss": 0.5193721055984497, + "step": 1893 + }, + { + "epoch": 0.553882146512648, + "grad_norm": 1.2912522952038028, + "learning_rate": 1.731782201236678e-05, + "loss": 0.7743211388587952, + "step": 1894 + }, + { + "epoch": 0.5541745869279134, + "grad_norm": 1.5457463678190766, + "learning_rate": 1.731452611794875e-05, + "loss": 0.8244242072105408, + "step": 1895 + }, + { + "epoch": 0.5544670273431789, + "grad_norm": 1.3153817051947132, + "learning_rate": 1.7311228513804712e-05, + "loss": 0.6276153326034546, + "step": 1896 + }, + { + "epoch": 0.5547594677584442, + "grad_norm": 1.4741498614217154, + "learning_rate": 1.7307929200705463e-05, + "loss": 0.7919771671295166, + "step": 1897 + }, + { + "epoch": 0.5550519081737096, + "grad_norm": 1.527110359994231, + "learning_rate": 1.7304628179422192e-05, + "loss": 0.6187459230422974, + "step": 1898 + }, + { + "epoch": 0.555344348588975, + "grad_norm": 1.1766146767977552, + "learning_rate": 1.7301325450726497e-05, + "loss": 0.6190363764762878, + "step": 1899 + }, + { + "epoch": 0.5556367890042404, + "grad_norm": 1.209178127119406, + "learning_rate": 1.7298021015390375e-05, + "loss": 0.5537956953048706, + "step": 1900 + }, + { + "epoch": 0.5559292294195057, + "grad_norm": 1.434637926231007, + "learning_rate": 1.729471487418621e-05, + "loss": 0.7164788246154785, + "step": 1901 + }, + { + "epoch": 0.5562216698347712, + "grad_norm": 1.2878374944552806, + "learning_rate": 1.7291407027886796e-05, + "loss": 0.6101689338684082, + "step": 1902 + }, + { + "epoch": 0.5565141102500366, + "grad_norm": 1.4102535348815881, + "learning_rate": 1.7288097477265322e-05, + "loss": 0.7112093567848206, + "step": 1903 + }, + { + "epoch": 0.5568065506653019, + "grad_norm": 1.698804519808014, + "learning_rate": 1.7284786223095376e-05, + "loss": 0.7807149291038513, + "step": 1904 + }, + { + "epoch": 0.5570989910805674, + "grad_norm": 1.3150296925108194, + "learning_rate": 1.7281473266150942e-05, + "loss": 0.5723121166229248, + "step": 1905 + }, + { + "epoch": 0.5573914314958327, + "grad_norm": 1.4287078485940368, + "learning_rate": 1.7278158607206402e-05, + "loss": 0.6901307106018066, + "step": 1906 + }, + { + "epoch": 0.5576838719110981, + "grad_norm": 1.3895105915390893, + "learning_rate": 1.7274842247036547e-05, + "loss": 0.8247314095497131, + "step": 1907 + }, + { + "epoch": 0.5579763123263635, + "grad_norm": 1.2902939634670878, + "learning_rate": 1.727152418641654e-05, + "loss": 0.758405327796936, + "step": 1908 + }, + { + "epoch": 0.5582687527416289, + "grad_norm": 1.1507745861737273, + "learning_rate": 1.7268204426121967e-05, + "loss": 0.6448276042938232, + "step": 1909 + }, + { + "epoch": 0.5585611931568942, + "grad_norm": 1.4597983603763345, + "learning_rate": 1.7264882966928803e-05, + "loss": 0.6846790313720703, + "step": 1910 + }, + { + "epoch": 0.5588536335721597, + "grad_norm": 1.494960410585431, + "learning_rate": 1.726155980961342e-05, + "loss": 0.6427637338638306, + "step": 1911 + }, + { + "epoch": 0.5591460739874251, + "grad_norm": 1.6049335332675108, + "learning_rate": 1.7258234954952578e-05, + "loss": 0.7105496525764465, + "step": 1912 + }, + { + "epoch": 0.5594385144026904, + "grad_norm": 1.247874236176648, + "learning_rate": 1.7254908403723446e-05, + "loss": 0.6307404041290283, + "step": 1913 + }, + { + "epoch": 0.5597309548179559, + "grad_norm": 1.3460021193743466, + "learning_rate": 1.7251580156703587e-05, + "loss": 0.7194197177886963, + "step": 1914 + }, + { + "epoch": 0.5600233952332212, + "grad_norm": 1.4541814827650097, + "learning_rate": 1.7248250214670955e-05, + "loss": 0.676772952079773, + "step": 1915 + }, + { + "epoch": 0.5603158356484866, + "grad_norm": 1.4231220185819522, + "learning_rate": 1.724491857840391e-05, + "loss": 0.6047924160957336, + "step": 1916 + }, + { + "epoch": 0.560608276063752, + "grad_norm": 1.4639689581400968, + "learning_rate": 1.7241585248681192e-05, + "loss": 0.7412474155426025, + "step": 1917 + }, + { + "epoch": 0.5609007164790174, + "grad_norm": 1.3634846491128696, + "learning_rate": 1.7238250226281952e-05, + "loss": 0.6337922215461731, + "step": 1918 + }, + { + "epoch": 0.5611931568942828, + "grad_norm": 1.325394488194612, + "learning_rate": 1.7234913511985733e-05, + "loss": 0.7192416787147522, + "step": 1919 + }, + { + "epoch": 0.5614855973095482, + "grad_norm": 1.5807591545293311, + "learning_rate": 1.723157510657247e-05, + "loss": 0.6576168537139893, + "step": 1920 + }, + { + "epoch": 0.5617780377248136, + "grad_norm": 1.2677184116479052, + "learning_rate": 1.722823501082249e-05, + "loss": 0.6592451333999634, + "step": 1921 + }, + { + "epoch": 0.5620704781400789, + "grad_norm": 1.3384834377307993, + "learning_rate": 1.722489322551653e-05, + "loss": 0.8042774796485901, + "step": 1922 + }, + { + "epoch": 0.5623629185553444, + "grad_norm": 1.4566017039283872, + "learning_rate": 1.7221549751435706e-05, + "loss": 0.727135181427002, + "step": 1923 + }, + { + "epoch": 0.5626553589706097, + "grad_norm": 1.3099994778880142, + "learning_rate": 1.7218204589361535e-05, + "loss": 0.5641134977340698, + "step": 1924 + }, + { + "epoch": 0.5629477993858751, + "grad_norm": 1.5113194940037022, + "learning_rate": 1.7214857740075924e-05, + "loss": 0.6354084610939026, + "step": 1925 + }, + { + "epoch": 0.5632402398011405, + "grad_norm": 1.3038206210364904, + "learning_rate": 1.7211509204361187e-05, + "loss": 0.6044377088546753, + "step": 1926 + }, + { + "epoch": 0.5635326802164059, + "grad_norm": 1.2045011077136063, + "learning_rate": 1.7208158983000022e-05, + "loss": 0.5519559383392334, + "step": 1927 + }, + { + "epoch": 0.5638251206316713, + "grad_norm": 1.162061868190052, + "learning_rate": 1.7204807076775514e-05, + "loss": 0.4480612277984619, + "step": 1928 + }, + { + "epoch": 0.5641175610469367, + "grad_norm": 1.3899173129631617, + "learning_rate": 1.7201453486471167e-05, + "loss": 0.5929607152938843, + "step": 1929 + }, + { + "epoch": 0.5644100014622021, + "grad_norm": 2.021763483016241, + "learning_rate": 1.7198098212870847e-05, + "loss": 0.6863572001457214, + "step": 1930 + }, + { + "epoch": 0.5647024418774674, + "grad_norm": 1.0938398450209694, + "learning_rate": 1.719474125675884e-05, + "loss": 0.5551834106445312, + "step": 1931 + }, + { + "epoch": 0.5649948822927329, + "grad_norm": 1.3644128319132816, + "learning_rate": 1.7191382618919802e-05, + "loss": 0.6113166809082031, + "step": 1932 + }, + { + "epoch": 0.5652873227079982, + "grad_norm": 1.419009993473521, + "learning_rate": 1.7188022300138805e-05, + "loss": 0.7833362817764282, + "step": 1933 + }, + { + "epoch": 0.5655797631232636, + "grad_norm": 1.3899666208681147, + "learning_rate": 1.71846603012013e-05, + "loss": 0.5981882810592651, + "step": 1934 + }, + { + "epoch": 0.5658722035385291, + "grad_norm": 1.3211180154101085, + "learning_rate": 1.7181296622893132e-05, + "loss": 0.6009912490844727, + "step": 1935 + }, + { + "epoch": 0.5661646439537944, + "grad_norm": 1.5201002205446237, + "learning_rate": 1.717793126600054e-05, + "loss": 0.5605272054672241, + "step": 1936 + }, + { + "epoch": 0.5664570843690598, + "grad_norm": 1.9581129231236365, + "learning_rate": 1.717456423131016e-05, + "loss": 0.6310821771621704, + "step": 1937 + }, + { + "epoch": 0.5667495247843252, + "grad_norm": 1.3290964241159713, + "learning_rate": 1.7171195519609013e-05, + "loss": 0.6776266694068909, + "step": 1938 + }, + { + "epoch": 0.5670419651995906, + "grad_norm": 1.5744599660597636, + "learning_rate": 1.7167825131684516e-05, + "loss": 0.6369091868400574, + "step": 1939 + }, + { + "epoch": 0.5673344056148559, + "grad_norm": 1.5708596771950396, + "learning_rate": 1.7164453068324472e-05, + "loss": 0.6241647005081177, + "step": 1940 + }, + { + "epoch": 0.5676268460301214, + "grad_norm": 1.1863544042032323, + "learning_rate": 1.7161079330317086e-05, + "loss": 0.6411961317062378, + "step": 1941 + }, + { + "epoch": 0.5679192864453868, + "grad_norm": 1.4635134179889109, + "learning_rate": 1.7157703918450942e-05, + "loss": 0.6148936152458191, + "step": 1942 + }, + { + "epoch": 0.5682117268606521, + "grad_norm": 1.3183225060577142, + "learning_rate": 1.7154326833515034e-05, + "loss": 0.5006934404373169, + "step": 1943 + }, + { + "epoch": 0.5685041672759176, + "grad_norm": 1.462356689812602, + "learning_rate": 1.7150948076298722e-05, + "loss": 0.7446701526641846, + "step": 1944 + }, + { + "epoch": 0.5687966076911829, + "grad_norm": 1.2052848826016378, + "learning_rate": 1.7147567647591777e-05, + "loss": 0.6159533262252808, + "step": 1945 + }, + { + "epoch": 0.5690890481064483, + "grad_norm": 1.4298530885651661, + "learning_rate": 1.7144185548184355e-05, + "loss": 0.6437554359436035, + "step": 1946 + }, + { + "epoch": 0.5693814885217137, + "grad_norm": 1.3361469734250542, + "learning_rate": 1.7140801778866995e-05, + "loss": 0.6229397654533386, + "step": 1947 + }, + { + "epoch": 0.5696739289369791, + "grad_norm": 1.4197238006731758, + "learning_rate": 1.7137416340430636e-05, + "loss": 0.5777184963226318, + "step": 1948 + }, + { + "epoch": 0.5699663693522444, + "grad_norm": 1.543436374887725, + "learning_rate": 1.7134029233666603e-05, + "loss": 0.7817827463150024, + "step": 1949 + }, + { + "epoch": 0.5702588097675099, + "grad_norm": 1.3527927450904613, + "learning_rate": 1.713064045936662e-05, + "loss": 0.6784861087799072, + "step": 1950 + }, + { + "epoch": 0.5705512501827753, + "grad_norm": 1.2839254399050724, + "learning_rate": 1.7127250018322777e-05, + "loss": 0.6883150339126587, + "step": 1951 + }, + { + "epoch": 0.5708436905980406, + "grad_norm": 1.093202890209594, + "learning_rate": 1.712385791132758e-05, + "loss": 0.5464504957199097, + "step": 1952 + }, + { + "epoch": 0.5711361310133061, + "grad_norm": 1.2617859237604026, + "learning_rate": 1.7120464139173908e-05, + "loss": 0.5950040817260742, + "step": 1953 + }, + { + "epoch": 0.5714285714285714, + "grad_norm": 1.374864335037442, + "learning_rate": 1.7117068702655034e-05, + "loss": 0.6381576061248779, + "step": 1954 + }, + { + "epoch": 0.5717210118438368, + "grad_norm": 1.2624571465966312, + "learning_rate": 1.7113671602564628e-05, + "loss": 0.6611777544021606, + "step": 1955 + }, + { + "epoch": 0.5720134522591022, + "grad_norm": 1.2625162580462326, + "learning_rate": 1.7110272839696735e-05, + "loss": 0.5057446956634521, + "step": 1956 + }, + { + "epoch": 0.5723058926743676, + "grad_norm": 1.3802970727547992, + "learning_rate": 1.7106872414845798e-05, + "loss": 0.6095671653747559, + "step": 1957 + }, + { + "epoch": 0.572598333089633, + "grad_norm": 1.4171107803407814, + "learning_rate": 1.710347032880664e-05, + "loss": 0.5514808893203735, + "step": 1958 + }, + { + "epoch": 0.5728907735048984, + "grad_norm": 2.1059044775107516, + "learning_rate": 1.7100066582374487e-05, + "loss": 0.6491304039955139, + "step": 1959 + }, + { + "epoch": 0.5731832139201638, + "grad_norm": 1.2887931231971388, + "learning_rate": 1.7096661176344936e-05, + "loss": 0.6759692430496216, + "step": 1960 + }, + { + "epoch": 0.5734756543354291, + "grad_norm": 1.4738884192318065, + "learning_rate": 1.709325411151399e-05, + "loss": 0.5897858142852783, + "step": 1961 + }, + { + "epoch": 0.5737680947506946, + "grad_norm": 1.537196415964603, + "learning_rate": 1.7089845388678015e-05, + "loss": 0.6822922229766846, + "step": 1962 + }, + { + "epoch": 0.5740605351659599, + "grad_norm": 1.2963583337618676, + "learning_rate": 1.7086435008633792e-05, + "loss": 0.7694820165634155, + "step": 1963 + }, + { + "epoch": 0.5743529755812253, + "grad_norm": 1.5109651591265172, + "learning_rate": 1.7083022972178473e-05, + "loss": 0.702151358127594, + "step": 1964 + }, + { + "epoch": 0.5746454159964907, + "grad_norm": 1.564445011536072, + "learning_rate": 1.7079609280109597e-05, + "loss": 0.768844485282898, + "step": 1965 + }, + { + "epoch": 0.5749378564117561, + "grad_norm": 1.4251497195478635, + "learning_rate": 1.7076193933225097e-05, + "loss": 0.6641331911087036, + "step": 1966 + }, + { + "epoch": 0.5752302968270215, + "grad_norm": 1.3577479649866828, + "learning_rate": 1.707277693232329e-05, + "loss": 0.7176777124404907, + "step": 1967 + }, + { + "epoch": 0.5755227372422869, + "grad_norm": 1.4539026175393464, + "learning_rate": 1.7069358278202877e-05, + "loss": 0.6543929576873779, + "step": 1968 + }, + { + "epoch": 0.5758151776575523, + "grad_norm": 1.422676342883674, + "learning_rate": 1.7065937971662953e-05, + "loss": 0.7501214742660522, + "step": 1969 + }, + { + "epoch": 0.5761076180728176, + "grad_norm": 1.1830543705848042, + "learning_rate": 1.7062516013502984e-05, + "loss": 0.6013212203979492, + "step": 1970 + }, + { + "epoch": 0.5764000584880831, + "grad_norm": 1.489892931502725, + "learning_rate": 1.7059092404522843e-05, + "loss": 0.5920547246932983, + "step": 1971 + }, + { + "epoch": 0.5766924989033484, + "grad_norm": 1.1082983109051399, + "learning_rate": 1.7055667145522767e-05, + "loss": 0.6720744371414185, + "step": 1972 + }, + { + "epoch": 0.5769849393186138, + "grad_norm": 1.3476214386922525, + "learning_rate": 1.70522402373034e-05, + "loss": 0.6938234567642212, + "step": 1973 + }, + { + "epoch": 0.5772773797338793, + "grad_norm": 1.321699429936501, + "learning_rate": 1.704881168066575e-05, + "loss": 0.6430555582046509, + "step": 1974 + }, + { + "epoch": 0.5775698201491446, + "grad_norm": 1.331724408429167, + "learning_rate": 1.7045381476411234e-05, + "loss": 0.7738221883773804, + "step": 1975 + }, + { + "epoch": 0.57786226056441, + "grad_norm": 1.2033511527827634, + "learning_rate": 1.704194962534163e-05, + "loss": 0.5335453748703003, + "step": 1976 + }, + { + "epoch": 0.5781547009796754, + "grad_norm": 1.4123366931040846, + "learning_rate": 1.7038516128259118e-05, + "loss": 0.691404402256012, + "step": 1977 + }, + { + "epoch": 0.5784471413949408, + "grad_norm": 1.6032589522393152, + "learning_rate": 1.7035080985966253e-05, + "loss": 0.7371880412101746, + "step": 1978 + }, + { + "epoch": 0.5787395818102061, + "grad_norm": 1.356558066648364, + "learning_rate": 1.7031644199265987e-05, + "loss": 0.5661574602127075, + "step": 1979 + }, + { + "epoch": 0.5790320222254716, + "grad_norm": 1.069750621474732, + "learning_rate": 1.702820576896164e-05, + "loss": 0.5823863744735718, + "step": 1980 + }, + { + "epoch": 0.579324462640737, + "grad_norm": 1.608685609966537, + "learning_rate": 1.7024765695856924e-05, + "loss": 0.6228796243667603, + "step": 1981 + }, + { + "epoch": 0.5796169030560023, + "grad_norm": 1.3395261062815815, + "learning_rate": 1.702132398075594e-05, + "loss": 0.5788040161132812, + "step": 1982 + }, + { + "epoch": 0.5799093434712678, + "grad_norm": 1.1540676629937416, + "learning_rate": 1.701788062446317e-05, + "loss": 0.5950253009796143, + "step": 1983 + }, + { + "epoch": 0.5802017838865331, + "grad_norm": 1.2446098890682338, + "learning_rate": 1.7014435627783466e-05, + "loss": 0.5672034025192261, + "step": 1984 + }, + { + "epoch": 0.5804942243017985, + "grad_norm": 1.329055336569987, + "learning_rate": 1.7010988991522085e-05, + "loss": 0.6646316051483154, + "step": 1985 + }, + { + "epoch": 0.5807866647170639, + "grad_norm": 1.2423480846022465, + "learning_rate": 1.7007540716484657e-05, + "loss": 0.6430097818374634, + "step": 1986 + }, + { + "epoch": 0.5810791051323293, + "grad_norm": 1.2889752174339557, + "learning_rate": 1.700409080347719e-05, + "loss": 0.5803329348564148, + "step": 1987 + }, + { + "epoch": 0.5813715455475946, + "grad_norm": 1.613226747300198, + "learning_rate": 1.7000639253306085e-05, + "loss": 0.7526525259017944, + "step": 1988 + }, + { + "epoch": 0.5816639859628601, + "grad_norm": 1.329271357875936, + "learning_rate": 1.6997186066778118e-05, + "loss": 0.6679468750953674, + "step": 1989 + }, + { + "epoch": 0.5819564263781255, + "grad_norm": 1.5773364597040387, + "learning_rate": 1.6993731244700454e-05, + "loss": 0.7233256101608276, + "step": 1990 + }, + { + "epoch": 0.5822488667933908, + "grad_norm": 1.3632345541871926, + "learning_rate": 1.6990274787880633e-05, + "loss": 0.5986290574073792, + "step": 1991 + }, + { + "epoch": 0.5825413072086563, + "grad_norm": 1.3136772281139917, + "learning_rate": 1.6986816697126583e-05, + "loss": 0.6898672580718994, + "step": 1992 + }, + { + "epoch": 0.5828337476239216, + "grad_norm": 1.6057802032529045, + "learning_rate": 1.698335697324661e-05, + "loss": 0.6888613104820251, + "step": 1993 + }, + { + "epoch": 0.583126188039187, + "grad_norm": 1.3469913891844598, + "learning_rate": 1.6979895617049404e-05, + "loss": 0.6002428531646729, + "step": 1994 + }, + { + "epoch": 0.5834186284544524, + "grad_norm": 1.3517104173069454, + "learning_rate": 1.6976432629344036e-05, + "loss": 0.6372438669204712, + "step": 1995 + }, + { + "epoch": 0.5837110688697178, + "grad_norm": 1.0868680846473084, + "learning_rate": 1.6972968010939953e-05, + "loss": 0.529569149017334, + "step": 1996 + }, + { + "epoch": 0.5840035092849832, + "grad_norm": 1.415626330345063, + "learning_rate": 1.6969501762647002e-05, + "loss": 0.5534025430679321, + "step": 1997 + }, + { + "epoch": 0.5842959497002486, + "grad_norm": 1.5855609078257513, + "learning_rate": 1.6966033885275384e-05, + "loss": 0.8105937242507935, + "step": 1998 + }, + { + "epoch": 0.584588390115514, + "grad_norm": 1.4927698791899027, + "learning_rate": 1.6962564379635702e-05, + "loss": 0.7657530903816223, + "step": 1999 + }, + { + "epoch": 0.5848808305307793, + "grad_norm": 1.2186213815751603, + "learning_rate": 1.6959093246538927e-05, + "loss": 0.5941641330718994, + "step": 2000 + }, + { + "epoch": 0.5851732709460448, + "grad_norm": 1.2310851342087676, + "learning_rate": 1.695562048679642e-05, + "loss": 0.6130149364471436, + "step": 2001 + }, + { + "epoch": 0.5854657113613101, + "grad_norm": 1.4904324383349616, + "learning_rate": 1.6952146101219914e-05, + "loss": 0.7078043222427368, + "step": 2002 + }, + { + "epoch": 0.5857581517765755, + "grad_norm": 1.4412882425814895, + "learning_rate": 1.6948670090621528e-05, + "loss": 0.6330863237380981, + "step": 2003 + }, + { + "epoch": 0.5860505921918409, + "grad_norm": 1.2835823957491164, + "learning_rate": 1.6945192455813755e-05, + "loss": 0.6631220579147339, + "step": 2004 + }, + { + "epoch": 0.5863430326071063, + "grad_norm": 1.411600977622384, + "learning_rate": 1.6941713197609476e-05, + "loss": 0.6669473648071289, + "step": 2005 + }, + { + "epoch": 0.5866354730223717, + "grad_norm": 1.370088328820007, + "learning_rate": 1.6938232316821938e-05, + "loss": 0.608252763748169, + "step": 2006 + }, + { + "epoch": 0.5869279134376371, + "grad_norm": 1.3777699704962545, + "learning_rate": 1.6934749814264786e-05, + "loss": 0.5979427695274353, + "step": 2007 + }, + { + "epoch": 0.5872203538529025, + "grad_norm": 1.541200433158731, + "learning_rate": 1.6931265690752027e-05, + "loss": 0.5653454661369324, + "step": 2008 + }, + { + "epoch": 0.5875127942681678, + "grad_norm": 1.1212005773159774, + "learning_rate": 1.6927779947098052e-05, + "loss": 0.6399147510528564, + "step": 2009 + }, + { + "epoch": 0.5878052346834333, + "grad_norm": 1.1797468758477498, + "learning_rate": 1.6924292584117642e-05, + "loss": 0.41824793815612793, + "step": 2010 + }, + { + "epoch": 0.5880976750986986, + "grad_norm": 1.917297128854583, + "learning_rate": 1.6920803602625938e-05, + "loss": 0.8881042003631592, + "step": 2011 + }, + { + "epoch": 0.588390115513964, + "grad_norm": 1.331713386917835, + "learning_rate": 1.6917313003438473e-05, + "loss": 0.636030912399292, + "step": 2012 + }, + { + "epoch": 0.5886825559292295, + "grad_norm": 1.4002891525649699, + "learning_rate": 1.6913820787371147e-05, + "loss": 0.6038305759429932, + "step": 2013 + }, + { + "epoch": 0.5889749963444948, + "grad_norm": 1.1435051787090085, + "learning_rate": 1.6910326955240252e-05, + "loss": 0.7073840498924255, + "step": 2014 + }, + { + "epoch": 0.5892674367597602, + "grad_norm": 1.4386346426993692, + "learning_rate": 1.6906831507862446e-05, + "loss": 0.5804994106292725, + "step": 2015 + }, + { + "epoch": 0.5895598771750256, + "grad_norm": 1.5741785374654678, + "learning_rate": 1.6903334446054768e-05, + "loss": 0.8194780349731445, + "step": 2016 + }, + { + "epoch": 0.589852317590291, + "grad_norm": 1.812303850133564, + "learning_rate": 1.689983577063464e-05, + "loss": 0.7348685264587402, + "step": 2017 + }, + { + "epoch": 0.5901447580055563, + "grad_norm": 1.1971589423872142, + "learning_rate": 1.689633548241985e-05, + "loss": 0.5855007171630859, + "step": 2018 + }, + { + "epoch": 0.5904371984208218, + "grad_norm": 1.3707253561652837, + "learning_rate": 1.689283358222857e-05, + "loss": 0.7387616634368896, + "step": 2019 + }, + { + "epoch": 0.5907296388360872, + "grad_norm": 1.1680954205847025, + "learning_rate": 1.688933007087935e-05, + "loss": 0.688759446144104, + "step": 2020 + }, + { + "epoch": 0.5910220792513525, + "grad_norm": 1.5341989172452428, + "learning_rate": 1.6885824949191117e-05, + "loss": 0.7203953266143799, + "step": 2021 + }, + { + "epoch": 0.591314519666618, + "grad_norm": 1.2850552689542662, + "learning_rate": 1.6882318217983165e-05, + "loss": 0.6465663909912109, + "step": 2022 + }, + { + "epoch": 0.5916069600818833, + "grad_norm": 1.1725524993946357, + "learning_rate": 1.6878809878075176e-05, + "loss": 0.6625394821166992, + "step": 2023 + }, + { + "epoch": 0.5918994004971487, + "grad_norm": 1.1518619162929866, + "learning_rate": 1.68752999302872e-05, + "loss": 0.6577074527740479, + "step": 2024 + }, + { + "epoch": 0.5921918409124141, + "grad_norm": 1.2660442226503865, + "learning_rate": 1.6871788375439667e-05, + "loss": 0.50509113073349, + "step": 2025 + }, + { + "epoch": 0.5924842813276795, + "grad_norm": 1.1506732126554624, + "learning_rate": 1.6868275214353387e-05, + "loss": 0.5723974704742432, + "step": 2026 + }, + { + "epoch": 0.5927767217429448, + "grad_norm": 1.5630741195611901, + "learning_rate": 1.6864760447849533e-05, + "loss": 0.6383459568023682, + "step": 2027 + }, + { + "epoch": 0.5930691621582103, + "grad_norm": 1.5937791400894217, + "learning_rate": 1.6861244076749663e-05, + "loss": 0.5307388305664062, + "step": 2028 + }, + { + "epoch": 0.5933616025734757, + "grad_norm": 1.3756662975981515, + "learning_rate": 1.6857726101875706e-05, + "loss": 0.8009265661239624, + "step": 2029 + }, + { + "epoch": 0.593654042988741, + "grad_norm": 1.3635510886639874, + "learning_rate": 1.685420652404997e-05, + "loss": 0.5505321025848389, + "step": 2030 + }, + { + "epoch": 0.5939464834040065, + "grad_norm": 1.2645625310092812, + "learning_rate": 1.6850685344095134e-05, + "loss": 0.680927038192749, + "step": 2031 + }, + { + "epoch": 0.5942389238192718, + "grad_norm": 1.419624052256642, + "learning_rate": 1.684716256283425e-05, + "loss": 0.7357309460639954, + "step": 2032 + }, + { + "epoch": 0.5945313642345372, + "grad_norm": 1.2277919560967578, + "learning_rate": 1.6843638181090748e-05, + "loss": 0.5896620750427246, + "step": 2033 + }, + { + "epoch": 0.5948238046498026, + "grad_norm": 1.261982037348603, + "learning_rate": 1.6840112199688432e-05, + "loss": 0.5567387342453003, + "step": 2034 + }, + { + "epoch": 0.595116245065068, + "grad_norm": 1.2606984508496513, + "learning_rate": 1.6836584619451478e-05, + "loss": 0.6428712606430054, + "step": 2035 + }, + { + "epoch": 0.5954086854803334, + "grad_norm": 1.3387753764851709, + "learning_rate": 1.6833055441204436e-05, + "loss": 0.7430459260940552, + "step": 2036 + }, + { + "epoch": 0.5957011258955988, + "grad_norm": 1.250181817593343, + "learning_rate": 1.682952466577223e-05, + "loss": 0.5982654690742493, + "step": 2037 + }, + { + "epoch": 0.5959935663108642, + "grad_norm": 1.2721973260460164, + "learning_rate": 1.6825992293980158e-05, + "loss": 0.5807450413703918, + "step": 2038 + }, + { + "epoch": 0.5962860067261295, + "grad_norm": 1.4202543697420538, + "learning_rate": 1.6822458326653888e-05, + "loss": 0.7667814493179321, + "step": 2039 + }, + { + "epoch": 0.596578447141395, + "grad_norm": 1.4555539952275451, + "learning_rate": 1.6818922764619467e-05, + "loss": 0.8192781805992126, + "step": 2040 + }, + { + "epoch": 0.5968708875566603, + "grad_norm": 1.3146767820144227, + "learning_rate": 1.681538560870331e-05, + "loss": 0.6652504205703735, + "step": 2041 + }, + { + "epoch": 0.5971633279719257, + "grad_norm": 1.4465108366403951, + "learning_rate": 1.6811846859732207e-05, + "loss": 0.6227332353591919, + "step": 2042 + }, + { + "epoch": 0.597455768387191, + "grad_norm": 1.1394575473936808, + "learning_rate": 1.6808306518533315e-05, + "loss": 0.5459558963775635, + "step": 2043 + }, + { + "epoch": 0.5977482088024565, + "grad_norm": 1.3498516241816683, + "learning_rate": 1.6804764585934167e-05, + "loss": 0.5176202058792114, + "step": 2044 + }, + { + "epoch": 0.5980406492177219, + "grad_norm": 1.5025501377940633, + "learning_rate": 1.6801221062762677e-05, + "loss": 0.5818016529083252, + "step": 2045 + }, + { + "epoch": 0.5983330896329873, + "grad_norm": 1.3397658451047565, + "learning_rate": 1.679767594984711e-05, + "loss": 0.622256875038147, + "step": 2046 + }, + { + "epoch": 0.5986255300482527, + "grad_norm": 1.2198859984633783, + "learning_rate": 1.6794129248016124e-05, + "loss": 0.5538911819458008, + "step": 2047 + }, + { + "epoch": 0.598917970463518, + "grad_norm": 1.1939205886096602, + "learning_rate": 1.6790580958098733e-05, + "loss": 0.4934890568256378, + "step": 2048 + }, + { + "epoch": 0.5992104108787835, + "grad_norm": 1.5628453531282531, + "learning_rate": 1.678703108092433e-05, + "loss": 0.6754223108291626, + "step": 2049 + }, + { + "epoch": 0.5995028512940488, + "grad_norm": 1.3047429440272302, + "learning_rate": 1.678347961732268e-05, + "loss": 0.48618268966674805, + "step": 2050 + }, + { + "epoch": 0.5997952917093142, + "grad_norm": 2.239352665042965, + "learning_rate": 1.6779926568123913e-05, + "loss": 0.6844758987426758, + "step": 2051 + }, + { + "epoch": 0.6000877321245797, + "grad_norm": 1.222439693123936, + "learning_rate": 1.677637193415853e-05, + "loss": 0.5258621573448181, + "step": 2052 + }, + { + "epoch": 0.600380172539845, + "grad_norm": 1.5856950316684058, + "learning_rate": 1.6772815716257414e-05, + "loss": 0.5571128129959106, + "step": 2053 + }, + { + "epoch": 0.6006726129551104, + "grad_norm": 1.514916071293939, + "learning_rate": 1.67692579152518e-05, + "loss": 0.5881344079971313, + "step": 2054 + }, + { + "epoch": 0.6009650533703758, + "grad_norm": 1.5804701546241575, + "learning_rate": 1.6765698531973305e-05, + "loss": 0.7162419557571411, + "step": 2055 + }, + { + "epoch": 0.6012574937856412, + "grad_norm": 1.487082432347586, + "learning_rate": 1.6762137567253917e-05, + "loss": 0.7470849752426147, + "step": 2056 + }, + { + "epoch": 0.6015499342009065, + "grad_norm": 1.4154424289161787, + "learning_rate": 1.6758575021925987e-05, + "loss": 0.6043628454208374, + "step": 2057 + }, + { + "epoch": 0.601842374616172, + "grad_norm": 1.4033964052969388, + "learning_rate": 1.6755010896822237e-05, + "loss": 0.6574143171310425, + "step": 2058 + }, + { + "epoch": 0.6021348150314374, + "grad_norm": 1.3508419478610747, + "learning_rate": 1.675144519277576e-05, + "loss": 0.605838418006897, + "step": 2059 + }, + { + "epoch": 0.6024272554467027, + "grad_norm": 1.2890691190480261, + "learning_rate": 1.6747877910620022e-05, + "loss": 0.5859218239784241, + "step": 2060 + }, + { + "epoch": 0.6027196958619682, + "grad_norm": 1.3985794655421304, + "learning_rate": 1.674430905118885e-05, + "loss": 0.7272971868515015, + "step": 2061 + }, + { + "epoch": 0.6030121362772335, + "grad_norm": 1.2870566467248659, + "learning_rate": 1.674073861531644e-05, + "loss": 0.606023907661438, + "step": 2062 + }, + { + "epoch": 0.6033045766924989, + "grad_norm": 1.345090429761192, + "learning_rate": 1.6737166603837364e-05, + "loss": 0.6029521822929382, + "step": 2063 + }, + { + "epoch": 0.6035970171077643, + "grad_norm": 1.1860277395685632, + "learning_rate": 1.673359301758656e-05, + "loss": 0.7544999122619629, + "step": 2064 + }, + { + "epoch": 0.6038894575230297, + "grad_norm": 1.3953376279645262, + "learning_rate": 1.6730017857399327e-05, + "loss": 0.7487601637840271, + "step": 2065 + }, + { + "epoch": 0.604181897938295, + "grad_norm": 1.3904468062872732, + "learning_rate": 1.672644112411134e-05, + "loss": 0.6429200172424316, + "step": 2066 + }, + { + "epoch": 0.6044743383535605, + "grad_norm": 1.4246263416975375, + "learning_rate": 1.6722862818558635e-05, + "loss": 0.7337179183959961, + "step": 2067 + }, + { + "epoch": 0.6047667787688259, + "grad_norm": 1.432290850861675, + "learning_rate": 1.671928294157762e-05, + "loss": 0.6644014120101929, + "step": 2068 + }, + { + "epoch": 0.6050592191840912, + "grad_norm": 1.3048966935224826, + "learning_rate": 1.6715701494005078e-05, + "loss": 0.5987672805786133, + "step": 2069 + }, + { + "epoch": 0.6053516595993567, + "grad_norm": 1.5176113056744007, + "learning_rate": 1.671211847667814e-05, + "loss": 0.5878695845603943, + "step": 2070 + }, + { + "epoch": 0.605644100014622, + "grad_norm": 1.3348485026555847, + "learning_rate": 1.670853389043432e-05, + "loss": 0.540128231048584, + "step": 2071 + }, + { + "epoch": 0.6059365404298874, + "grad_norm": 1.3888450119982874, + "learning_rate": 1.670494773611149e-05, + "loss": 0.667206346988678, + "step": 2072 + }, + { + "epoch": 0.6062289808451528, + "grad_norm": 1.5911825658421195, + "learning_rate": 1.6701360014547896e-05, + "loss": 0.6433641910552979, + "step": 2073 + }, + { + "epoch": 0.6065214212604182, + "grad_norm": 1.447981653333928, + "learning_rate": 1.669777072658214e-05, + "loss": 0.5803529024124146, + "step": 2074 + }, + { + "epoch": 0.6068138616756836, + "grad_norm": 1.125005009009719, + "learning_rate": 1.6694179873053202e-05, + "loss": 0.6203820705413818, + "step": 2075 + }, + { + "epoch": 0.607106302090949, + "grad_norm": 1.3092542979615172, + "learning_rate": 1.669058745480042e-05, + "loss": 0.6194918155670166, + "step": 2076 + }, + { + "epoch": 0.6073987425062144, + "grad_norm": 1.593480689755987, + "learning_rate": 1.66869934726635e-05, + "loss": 0.6797547936439514, + "step": 2077 + }, + { + "epoch": 0.6076911829214797, + "grad_norm": 1.3923211889522802, + "learning_rate": 1.6683397927482512e-05, + "loss": 0.6076459884643555, + "step": 2078 + }, + { + "epoch": 0.6079836233367452, + "grad_norm": 1.3874225830336557, + "learning_rate": 1.6679800820097895e-05, + "loss": 0.6958068609237671, + "step": 2079 + }, + { + "epoch": 0.6082760637520105, + "grad_norm": 1.3355509335032223, + "learning_rate": 1.6676202151350453e-05, + "loss": 0.5819929242134094, + "step": 2080 + }, + { + "epoch": 0.6085685041672759, + "grad_norm": 1.3476445996808082, + "learning_rate": 1.6672601922081347e-05, + "loss": 0.7125047445297241, + "step": 2081 + }, + { + "epoch": 0.6088609445825413, + "grad_norm": 1.4432332437479862, + "learning_rate": 1.6669000133132108e-05, + "loss": 0.8046560287475586, + "step": 2082 + }, + { + "epoch": 0.6091533849978067, + "grad_norm": 1.192025927247586, + "learning_rate": 1.666539678534464e-05, + "loss": 0.5468478202819824, + "step": 2083 + }, + { + "epoch": 0.6094458254130721, + "grad_norm": 1.3403719695971306, + "learning_rate": 1.6661791879561204e-05, + "loss": 0.6387852430343628, + "step": 2084 + }, + { + "epoch": 0.6097382658283375, + "grad_norm": 1.3327872578740647, + "learning_rate": 1.6658185416624415e-05, + "loss": 0.643539547920227, + "step": 2085 + }, + { + "epoch": 0.6100307062436029, + "grad_norm": 1.2236148701775094, + "learning_rate": 1.6654577397377266e-05, + "loss": 0.5031965374946594, + "step": 2086 + }, + { + "epoch": 0.6103231466588682, + "grad_norm": 1.507439246425782, + "learning_rate": 1.6650967822663115e-05, + "loss": 0.6690273284912109, + "step": 2087 + }, + { + "epoch": 0.6106155870741337, + "grad_norm": 1.2924449065282086, + "learning_rate": 1.6647356693325672e-05, + "loss": 0.6396887302398682, + "step": 2088 + }, + { + "epoch": 0.610908027489399, + "grad_norm": 1.4444361497865652, + "learning_rate": 1.664374401020902e-05, + "loss": 0.6306549310684204, + "step": 2089 + }, + { + "epoch": 0.6112004679046644, + "grad_norm": 1.3565777173208147, + "learning_rate": 1.66401297741576e-05, + "loss": 0.5936366319656372, + "step": 2090 + }, + { + "epoch": 0.6114929083199299, + "grad_norm": 1.1669567203268514, + "learning_rate": 1.6636513986016215e-05, + "loss": 0.6153277158737183, + "step": 2091 + }, + { + "epoch": 0.6117853487351952, + "grad_norm": 1.2085146124175858, + "learning_rate": 1.663289664663004e-05, + "loss": 0.6361621618270874, + "step": 2092 + }, + { + "epoch": 0.6120777891504606, + "grad_norm": 1.2163858440552462, + "learning_rate": 1.6629277756844603e-05, + "loss": 0.6511524319648743, + "step": 2093 + }, + { + "epoch": 0.612370229565726, + "grad_norm": 1.2219001757495958, + "learning_rate": 1.6625657317505792e-05, + "loss": 0.5811333656311035, + "step": 2094 + }, + { + "epoch": 0.6126626699809914, + "grad_norm": 1.4531007944498606, + "learning_rate": 1.6622035329459872e-05, + "loss": 0.6935377717018127, + "step": 2095 + }, + { + "epoch": 0.6129551103962567, + "grad_norm": 1.3697721797296887, + "learning_rate": 1.6618411793553455e-05, + "loss": 0.6363199949264526, + "step": 2096 + }, + { + "epoch": 0.6132475508115222, + "grad_norm": 1.6107434013725794, + "learning_rate": 1.6614786710633525e-05, + "loss": 0.7325713634490967, + "step": 2097 + }, + { + "epoch": 0.6135399912267876, + "grad_norm": 1.3944095356365322, + "learning_rate": 1.6611160081547414e-05, + "loss": 0.5739182829856873, + "step": 2098 + }, + { + "epoch": 0.6138324316420529, + "grad_norm": 1.4193388816384238, + "learning_rate": 1.6607531907142835e-05, + "loss": 0.611133873462677, + "step": 2099 + }, + { + "epoch": 0.6141248720573184, + "grad_norm": 1.579788361702439, + "learning_rate": 1.6603902188267842e-05, + "loss": 0.6419532299041748, + "step": 2100 + }, + { + "epoch": 0.6144173124725837, + "grad_norm": 1.482873128334509, + "learning_rate": 1.660027092577087e-05, + "loss": 0.7736743688583374, + "step": 2101 + }, + { + "epoch": 0.6147097528878491, + "grad_norm": 1.199857125427724, + "learning_rate": 1.6596638120500696e-05, + "loss": 0.5249119400978088, + "step": 2102 + }, + { + "epoch": 0.6150021933031145, + "grad_norm": 2.505852142425954, + "learning_rate": 1.6593003773306475e-05, + "loss": 0.7145636081695557, + "step": 2103 + }, + { + "epoch": 0.6152946337183799, + "grad_norm": 1.3335089477583737, + "learning_rate": 1.65893678850377e-05, + "loss": 0.5807666182518005, + "step": 2104 + }, + { + "epoch": 0.6155870741336452, + "grad_norm": 1.2437068513912055, + "learning_rate": 1.6585730456544255e-05, + "loss": 0.5049663782119751, + "step": 2105 + }, + { + "epoch": 0.6158795145489107, + "grad_norm": 1.4826397888996732, + "learning_rate": 1.658209148867635e-05, + "loss": 0.6744092702865601, + "step": 2106 + }, + { + "epoch": 0.6161719549641761, + "grad_norm": 1.4821897923446594, + "learning_rate": 1.6578450982284584e-05, + "loss": 0.605404794216156, + "step": 2107 + }, + { + "epoch": 0.6164643953794414, + "grad_norm": 1.1917544416711534, + "learning_rate": 1.6574808938219894e-05, + "loss": 0.6074866056442261, + "step": 2108 + }, + { + "epoch": 0.6167568357947069, + "grad_norm": 1.284543555588908, + "learning_rate": 1.6571165357333594e-05, + "loss": 0.6758207082748413, + "step": 2109 + }, + { + "epoch": 0.6170492762099722, + "grad_norm": 1.580962080275822, + "learning_rate": 1.6567520240477344e-05, + "loss": 0.7669274806976318, + "step": 2110 + }, + { + "epoch": 0.6173417166252376, + "grad_norm": 1.3997913559025885, + "learning_rate": 1.6563873588503173e-05, + "loss": 0.497562050819397, + "step": 2111 + }, + { + "epoch": 0.617634157040503, + "grad_norm": 1.6655652024231358, + "learning_rate": 1.656022540226345e-05, + "loss": 0.6398104429244995, + "step": 2112 + }, + { + "epoch": 0.6179265974557684, + "grad_norm": 1.4155810596985208, + "learning_rate": 1.6556575682610935e-05, + "loss": 0.6739988327026367, + "step": 2113 + }, + { + "epoch": 0.6182190378710338, + "grad_norm": 1.3164921836609038, + "learning_rate": 1.6552924430398716e-05, + "loss": 0.5710165500640869, + "step": 2114 + }, + { + "epoch": 0.6185114782862992, + "grad_norm": 1.1567442833736337, + "learning_rate": 1.6549271646480253e-05, + "loss": 0.6087738871574402, + "step": 2115 + }, + { + "epoch": 0.6188039187015646, + "grad_norm": 1.1877649418617353, + "learning_rate": 1.6545617331709364e-05, + "loss": 0.5300824642181396, + "step": 2116 + }, + { + "epoch": 0.6190963591168299, + "grad_norm": 1.3759503189909044, + "learning_rate": 1.6541961486940222e-05, + "loss": 0.7384774684906006, + "step": 2117 + }, + { + "epoch": 0.6193887995320954, + "grad_norm": 1.1608035895573054, + "learning_rate": 1.6538304113027356e-05, + "loss": 0.5867838263511658, + "step": 2118 + }, + { + "epoch": 0.6196812399473607, + "grad_norm": 1.4435135524238625, + "learning_rate": 1.653464521082566e-05, + "loss": 0.617068886756897, + "step": 2119 + }, + { + "epoch": 0.6199736803626261, + "grad_norm": 1.2420433862943483, + "learning_rate": 1.6530984781190374e-05, + "loss": 0.7316439151763916, + "step": 2120 + }, + { + "epoch": 0.6202661207778914, + "grad_norm": 1.3153827472233475, + "learning_rate": 1.6527322824977104e-05, + "loss": 0.5469995737075806, + "step": 2121 + }, + { + "epoch": 0.6205585611931569, + "grad_norm": 1.4608354678316708, + "learning_rate": 1.6523659343041815e-05, + "loss": 0.6577411890029907, + "step": 2122 + }, + { + "epoch": 0.6208510016084223, + "grad_norm": 1.5130442860821829, + "learning_rate": 1.6519994336240816e-05, + "loss": 0.7425049543380737, + "step": 2123 + }, + { + "epoch": 0.6211434420236877, + "grad_norm": 1.7408354143028393, + "learning_rate": 1.6516327805430785e-05, + "loss": 0.7894090414047241, + "step": 2124 + }, + { + "epoch": 0.6214358824389531, + "grad_norm": 1.2267269656084083, + "learning_rate": 1.651265975146875e-05, + "loss": 0.5739543437957764, + "step": 2125 + }, + { + "epoch": 0.6217283228542184, + "grad_norm": 1.2973694692382243, + "learning_rate": 1.6508990175212092e-05, + "loss": 0.6987308263778687, + "step": 2126 + }, + { + "epoch": 0.6220207632694839, + "grad_norm": 1.237403110571432, + "learning_rate": 1.650531907751856e-05, + "loss": 0.5956544280052185, + "step": 2127 + }, + { + "epoch": 0.6223132036847492, + "grad_norm": 1.3646659152675398, + "learning_rate": 1.6501646459246245e-05, + "loss": 0.582348108291626, + "step": 2128 + }, + { + "epoch": 0.6226056441000146, + "grad_norm": 1.327256978138479, + "learning_rate": 1.64979723212536e-05, + "loss": 0.8057917356491089, + "step": 2129 + }, + { + "epoch": 0.6228980845152801, + "grad_norm": 1.1623408864017983, + "learning_rate": 1.6494296664399428e-05, + "loss": 0.6237305402755737, + "step": 2130 + }, + { + "epoch": 0.6231905249305454, + "grad_norm": 1.3152067943219485, + "learning_rate": 1.6490619489542905e-05, + "loss": 0.6445767879486084, + "step": 2131 + }, + { + "epoch": 0.6234829653458108, + "grad_norm": 1.4611569228302668, + "learning_rate": 1.648694079754354e-05, + "loss": 0.6397994160652161, + "step": 2132 + }, + { + "epoch": 0.6237754057610762, + "grad_norm": 1.3955823025243248, + "learning_rate": 1.64832605892612e-05, + "loss": 0.8216533660888672, + "step": 2133 + }, + { + "epoch": 0.6240678461763416, + "grad_norm": 1.3134524569329014, + "learning_rate": 1.6479578865556115e-05, + "loss": 0.6894406080245972, + "step": 2134 + }, + { + "epoch": 0.6243602865916069, + "grad_norm": 1.2940264658828888, + "learning_rate": 1.6475895627288873e-05, + "loss": 0.6608946323394775, + "step": 2135 + }, + { + "epoch": 0.6246527270068724, + "grad_norm": 1.4094544295935185, + "learning_rate": 1.6472210875320397e-05, + "loss": 0.6070076823234558, + "step": 2136 + }, + { + "epoch": 0.6249451674221378, + "grad_norm": 1.4359082412623407, + "learning_rate": 1.6468524610511982e-05, + "loss": 0.7357348799705505, + "step": 2137 + }, + { + "epoch": 0.6252376078374031, + "grad_norm": 1.201965871501085, + "learning_rate": 1.6464836833725267e-05, + "loss": 0.5959880352020264, + "step": 2138 + }, + { + "epoch": 0.6255300482526686, + "grad_norm": 1.3046810888024383, + "learning_rate": 1.646114754582225e-05, + "loss": 0.7812649011611938, + "step": 2139 + }, + { + "epoch": 0.6258224886679339, + "grad_norm": 1.6609760293820528, + "learning_rate": 1.6457456747665282e-05, + "loss": 0.5985091924667358, + "step": 2140 + }, + { + "epoch": 0.6261149290831993, + "grad_norm": 1.5609316045902142, + "learning_rate": 1.645376444011706e-05, + "loss": 0.6610564589500427, + "step": 2141 + }, + { + "epoch": 0.6264073694984647, + "grad_norm": 1.3917319855245425, + "learning_rate": 1.6450070624040636e-05, + "loss": 0.6876299381256104, + "step": 2142 + }, + { + "epoch": 0.6266998099137301, + "grad_norm": 1.3567193814213938, + "learning_rate": 1.6446375300299425e-05, + "loss": 0.6715782284736633, + "step": 2143 + }, + { + "epoch": 0.6269922503289954, + "grad_norm": 1.6061237563072754, + "learning_rate": 1.644267846975718e-05, + "loss": 0.6066923141479492, + "step": 2144 + }, + { + "epoch": 0.6272846907442609, + "grad_norm": 1.2493532553829008, + "learning_rate": 1.6438980133278017e-05, + "loss": 0.5642968416213989, + "step": 2145 + }, + { + "epoch": 0.6275771311595263, + "grad_norm": 1.0703284322753808, + "learning_rate": 1.6435280291726394e-05, + "loss": 0.604590654373169, + "step": 2146 + }, + { + "epoch": 0.6278695715747916, + "grad_norm": 1.3292746736885825, + "learning_rate": 1.643157894596713e-05, + "loss": 0.6313889026641846, + "step": 2147 + }, + { + "epoch": 0.6281620119900571, + "grad_norm": 1.0767305616181233, + "learning_rate": 1.6427876096865394e-05, + "loss": 0.5084092617034912, + "step": 2148 + }, + { + "epoch": 0.6284544524053224, + "grad_norm": 1.250433663172197, + "learning_rate": 1.6424171745286704e-05, + "loss": 0.5191931128501892, + "step": 2149 + }, + { + "epoch": 0.6287468928205878, + "grad_norm": 1.3567625810681667, + "learning_rate": 1.6420465892096924e-05, + "loss": 0.7397615909576416, + "step": 2150 + }, + { + "epoch": 0.6290393332358531, + "grad_norm": 1.1359315638082286, + "learning_rate": 1.641675853816228e-05, + "loss": 0.622586727142334, + "step": 2151 + }, + { + "epoch": 0.6293317736511186, + "grad_norm": 1.433028642480203, + "learning_rate": 1.6413049684349344e-05, + "loss": 0.7894928455352783, + "step": 2152 + }, + { + "epoch": 0.629624214066384, + "grad_norm": 1.4395392231763253, + "learning_rate": 1.640933933152504e-05, + "loss": 0.5752773284912109, + "step": 2153 + }, + { + "epoch": 0.6299166544816494, + "grad_norm": 1.3952520818076775, + "learning_rate": 1.640562748055663e-05, + "loss": 0.6738473176956177, + "step": 2154 + }, + { + "epoch": 0.6302090948969148, + "grad_norm": 1.2597002399242925, + "learning_rate": 1.6401914132311745e-05, + "loss": 0.5789517164230347, + "step": 2155 + }, + { + "epoch": 0.6305015353121801, + "grad_norm": 1.2840904364476742, + "learning_rate": 1.6398199287658358e-05, + "loss": 0.5925524830818176, + "step": 2156 + }, + { + "epoch": 0.6307939757274456, + "grad_norm": 1.4374336859820211, + "learning_rate": 1.6394482947464784e-05, + "loss": 0.6949414610862732, + "step": 2157 + }, + { + "epoch": 0.6310864161427109, + "grad_norm": 1.3617313094593515, + "learning_rate": 1.6390765112599705e-05, + "loss": 0.7435301542282104, + "step": 2158 + }, + { + "epoch": 0.6313788565579763, + "grad_norm": 1.5109256996682827, + "learning_rate": 1.6387045783932137e-05, + "loss": 0.6931856274604797, + "step": 2159 + }, + { + "epoch": 0.6316712969732416, + "grad_norm": 1.4369843702380298, + "learning_rate": 1.638332496233145e-05, + "loss": 0.7856471538543701, + "step": 2160 + }, + { + "epoch": 0.6319637373885071, + "grad_norm": 1.460850634730034, + "learning_rate": 1.6379602648667362e-05, + "loss": 0.6299946308135986, + "step": 2161 + }, + { + "epoch": 0.6322561778037725, + "grad_norm": 1.5299113211206812, + "learning_rate": 1.6375878843809946e-05, + "loss": 0.6209328174591064, + "step": 2162 + }, + { + "epoch": 0.6325486182190379, + "grad_norm": 1.4269696757613273, + "learning_rate": 1.6372153548629617e-05, + "loss": 0.6498390436172485, + "step": 2163 + }, + { + "epoch": 0.6328410586343033, + "grad_norm": 2.1028833494160573, + "learning_rate": 1.6368426763997137e-05, + "loss": 0.6757122278213501, + "step": 2164 + }, + { + "epoch": 0.6331334990495686, + "grad_norm": 1.289589419762841, + "learning_rate": 1.6364698490783623e-05, + "loss": 0.5137026906013489, + "step": 2165 + }, + { + "epoch": 0.633425939464834, + "grad_norm": 1.3914324771074273, + "learning_rate": 1.6360968729860536e-05, + "loss": 0.5876519680023193, + "step": 2166 + }, + { + "epoch": 0.6337183798800994, + "grad_norm": 1.2533286000898018, + "learning_rate": 1.6357237482099682e-05, + "loss": 0.5804057717323303, + "step": 2167 + }, + { + "epoch": 0.6340108202953648, + "grad_norm": 1.361440329822907, + "learning_rate": 1.635350474837322e-05, + "loss": 0.6186444759368896, + "step": 2168 + }, + { + "epoch": 0.6343032607106303, + "grad_norm": 1.4479908785794617, + "learning_rate": 1.6349770529553654e-05, + "loss": 0.6358560919761658, + "step": 2169 + }, + { + "epoch": 0.6345957011258956, + "grad_norm": 1.2507636068938528, + "learning_rate": 1.6346034826513834e-05, + "loss": 0.64283686876297, + "step": 2170 + }, + { + "epoch": 0.634888141541161, + "grad_norm": 1.3854516647796151, + "learning_rate": 1.6342297640126955e-05, + "loss": 0.5269169807434082, + "step": 2171 + }, + { + "epoch": 0.6351805819564263, + "grad_norm": 1.3233372829927026, + "learning_rate": 1.6338558971266563e-05, + "loss": 0.5338561534881592, + "step": 2172 + }, + { + "epoch": 0.6354730223716918, + "grad_norm": 1.365606957045604, + "learning_rate": 1.6334818820806555e-05, + "loss": 0.5587184429168701, + "step": 2173 + }, + { + "epoch": 0.6357654627869571, + "grad_norm": 1.2288709810094502, + "learning_rate": 1.633107718962116e-05, + "loss": 0.6468764543533325, + "step": 2174 + }, + { + "epoch": 0.6360579032022226, + "grad_norm": 1.4431243955955453, + "learning_rate": 1.6327334078584967e-05, + "loss": 0.7305203676223755, + "step": 2175 + }, + { + "epoch": 0.636350343617488, + "grad_norm": 1.3207763162749322, + "learning_rate": 1.6323589488572908e-05, + "loss": 0.6226189136505127, + "step": 2176 + }, + { + "epoch": 0.6366427840327533, + "grad_norm": 1.4828987038724675, + "learning_rate": 1.631984342046025e-05, + "loss": 0.6552053093910217, + "step": 2177 + }, + { + "epoch": 0.6369352244480188, + "grad_norm": 1.6836072588979352, + "learning_rate": 1.6316095875122617e-05, + "loss": 0.8121978044509888, + "step": 2178 + }, + { + "epoch": 0.6372276648632841, + "grad_norm": 1.3359221660901908, + "learning_rate": 1.6312346853435976e-05, + "loss": 0.5826296806335449, + "step": 2179 + }, + { + "epoch": 0.6375201052785495, + "grad_norm": 1.3567795832303162, + "learning_rate": 1.630859635627664e-05, + "loss": 0.5862709283828735, + "step": 2180 + }, + { + "epoch": 0.6378125456938148, + "grad_norm": 1.2132204868801326, + "learning_rate": 1.6304844384521263e-05, + "loss": 0.7081524133682251, + "step": 2181 + }, + { + "epoch": 0.6381049861090803, + "grad_norm": 1.2359384159808198, + "learning_rate": 1.6301090939046843e-05, + "loss": 0.6394449472427368, + "step": 2182 + }, + { + "epoch": 0.6383974265243456, + "grad_norm": 1.25131780401235, + "learning_rate": 1.6297336020730727e-05, + "loss": 0.6184799075126648, + "step": 2183 + }, + { + "epoch": 0.638689866939611, + "grad_norm": 1.3090426226978378, + "learning_rate": 1.6293579630450606e-05, + "loss": 0.6877666711807251, + "step": 2184 + }, + { + "epoch": 0.6389823073548765, + "grad_norm": 1.3648594367613462, + "learning_rate": 1.6289821769084512e-05, + "loss": 0.5596371293067932, + "step": 2185 + }, + { + "epoch": 0.6392747477701418, + "grad_norm": 1.1779148594123119, + "learning_rate": 1.6286062437510823e-05, + "loss": 0.5378291010856628, + "step": 2186 + }, + { + "epoch": 0.6395671881854073, + "grad_norm": 1.2132664638530417, + "learning_rate": 1.6282301636608256e-05, + "loss": 0.6965627670288086, + "step": 2187 + }, + { + "epoch": 0.6398596286006726, + "grad_norm": 1.3017112466193883, + "learning_rate": 1.6278539367255885e-05, + "loss": 0.5939220190048218, + "step": 2188 + }, + { + "epoch": 0.640152069015938, + "grad_norm": 1.3743138396251577, + "learning_rate": 1.6274775630333104e-05, + "loss": 0.6225341558456421, + "step": 2189 + }, + { + "epoch": 0.6404445094312033, + "grad_norm": 1.103061387587319, + "learning_rate": 1.6271010426719672e-05, + "loss": 0.471333384513855, + "step": 2190 + }, + { + "epoch": 0.6407369498464688, + "grad_norm": 1.3505910885858836, + "learning_rate": 1.626724375729568e-05, + "loss": 0.6066263914108276, + "step": 2191 + }, + { + "epoch": 0.6410293902617342, + "grad_norm": 1.2842885881869934, + "learning_rate": 1.626347562294157e-05, + "loss": 0.6525982618331909, + "step": 2192 + }, + { + "epoch": 0.6413218306769995, + "grad_norm": 1.375624970339684, + "learning_rate": 1.6259706024538113e-05, + "loss": 0.7395817041397095, + "step": 2193 + }, + { + "epoch": 0.641614271092265, + "grad_norm": 1.326045982489242, + "learning_rate": 1.6255934962966432e-05, + "loss": 0.720014214515686, + "step": 2194 + }, + { + "epoch": 0.6419067115075303, + "grad_norm": 1.4102074363113735, + "learning_rate": 1.625216243910799e-05, + "loss": 0.6905295252799988, + "step": 2195 + }, + { + "epoch": 0.6421991519227958, + "grad_norm": 1.3533501829991437, + "learning_rate": 1.6248388453844596e-05, + "loss": 0.6877295970916748, + "step": 2196 + }, + { + "epoch": 0.6424915923380611, + "grad_norm": 1.414790050061214, + "learning_rate": 1.6244613008058386e-05, + "loss": 0.5782181024551392, + "step": 2197 + }, + { + "epoch": 0.6427840327533265, + "grad_norm": 1.2129092557671588, + "learning_rate": 1.6240836102631856e-05, + "loss": 0.5253425240516663, + "step": 2198 + }, + { + "epoch": 0.6430764731685918, + "grad_norm": 1.2461747547364295, + "learning_rate": 1.623705773844783e-05, + "loss": 0.6631319522857666, + "step": 2199 + }, + { + "epoch": 0.6433689135838573, + "grad_norm": 1.6130890971192966, + "learning_rate": 1.6233277916389482e-05, + "loss": 0.6458526849746704, + "step": 2200 + }, + { + "epoch": 0.6436613539991227, + "grad_norm": 1.5712729506149452, + "learning_rate": 1.622949663734032e-05, + "loss": 0.5723023414611816, + "step": 2201 + }, + { + "epoch": 0.643953794414388, + "grad_norm": 1.4119455791937807, + "learning_rate": 1.6225713902184193e-05, + "loss": 0.6852096319198608, + "step": 2202 + }, + { + "epoch": 0.6442462348296535, + "grad_norm": 1.460558869527006, + "learning_rate": 1.6221929711805297e-05, + "loss": 0.6343507170677185, + "step": 2203 + }, + { + "epoch": 0.6445386752449188, + "grad_norm": 1.217897103510346, + "learning_rate": 1.6218144067088157e-05, + "loss": 0.6378631591796875, + "step": 2204 + }, + { + "epoch": 0.6448311156601843, + "grad_norm": 1.1203441428966674, + "learning_rate": 1.621435696891765e-05, + "loss": 0.6550023555755615, + "step": 2205 + }, + { + "epoch": 0.6451235560754496, + "grad_norm": 1.3522778560223117, + "learning_rate": 1.6210568418178983e-05, + "loss": 0.5555052757263184, + "step": 2206 + }, + { + "epoch": 0.645415996490715, + "grad_norm": 1.330819772406298, + "learning_rate": 1.6206778415757715e-05, + "loss": 0.7171934247016907, + "step": 2207 + }, + { + "epoch": 0.6457084369059805, + "grad_norm": 1.2953726655501339, + "learning_rate": 1.6202986962539726e-05, + "loss": 0.6464889049530029, + "step": 2208 + }, + { + "epoch": 0.6460008773212458, + "grad_norm": 1.5324773487302452, + "learning_rate": 1.619919405941125e-05, + "loss": 0.6316033601760864, + "step": 2209 + }, + { + "epoch": 0.6462933177365112, + "grad_norm": 1.2083095479015487, + "learning_rate": 1.6195399707258855e-05, + "loss": 0.5548732876777649, + "step": 2210 + }, + { + "epoch": 0.6465857581517765, + "grad_norm": 1.088879983740594, + "learning_rate": 1.6191603906969447e-05, + "loss": 0.5055203437805176, + "step": 2211 + }, + { + "epoch": 0.646878198567042, + "grad_norm": 1.3416079726495937, + "learning_rate": 1.6187806659430268e-05, + "loss": 0.7010073661804199, + "step": 2212 + }, + { + "epoch": 0.6471706389823073, + "grad_norm": 1.39696751963916, + "learning_rate": 1.6184007965528908e-05, + "loss": 0.6188487410545349, + "step": 2213 + }, + { + "epoch": 0.6474630793975727, + "grad_norm": 1.1122504211535682, + "learning_rate": 1.6180207826153284e-05, + "loss": 0.46920153498649597, + "step": 2214 + }, + { + "epoch": 0.6477555198128382, + "grad_norm": 1.1420938414191775, + "learning_rate": 1.617640624219166e-05, + "loss": 0.6811172962188721, + "step": 2215 + }, + { + "epoch": 0.6480479602281035, + "grad_norm": 1.456471656413964, + "learning_rate": 1.617260321453263e-05, + "loss": 0.6425800323486328, + "step": 2216 + }, + { + "epoch": 0.648340400643369, + "grad_norm": 1.5968265799871777, + "learning_rate": 1.6168798744065123e-05, + "loss": 0.7020897269248962, + "step": 2217 + }, + { + "epoch": 0.6486328410586343, + "grad_norm": 1.1227944263783516, + "learning_rate": 1.6164992831678422e-05, + "loss": 0.5872179865837097, + "step": 2218 + }, + { + "epoch": 0.6489252814738997, + "grad_norm": 1.6374275819992907, + "learning_rate": 1.6161185478262127e-05, + "loss": 0.7414118647575378, + "step": 2219 + }, + { + "epoch": 0.649217721889165, + "grad_norm": 1.2707285395428818, + "learning_rate": 1.615737668470619e-05, + "loss": 0.5408385396003723, + "step": 2220 + }, + { + "epoch": 0.6495101623044305, + "grad_norm": 1.2587309097221344, + "learning_rate": 1.6153566451900887e-05, + "loss": 0.6145513653755188, + "step": 2221 + }, + { + "epoch": 0.6498026027196958, + "grad_norm": 1.1746181148032837, + "learning_rate": 1.6149754780736847e-05, + "loss": 0.556422233581543, + "step": 2222 + }, + { + "epoch": 0.6500950431349612, + "grad_norm": 1.4903419319059785, + "learning_rate": 1.614594167210501e-05, + "loss": 0.7155405282974243, + "step": 2223 + }, + { + "epoch": 0.6503874835502267, + "grad_norm": 1.2945043385192228, + "learning_rate": 1.6142127126896682e-05, + "loss": 0.4988427758216858, + "step": 2224 + }, + { + "epoch": 0.650679923965492, + "grad_norm": 1.3962995233264988, + "learning_rate": 1.6138311146003477e-05, + "loss": 0.6187007427215576, + "step": 2225 + }, + { + "epoch": 0.6509723643807575, + "grad_norm": 1.329312474096709, + "learning_rate": 1.6134493730317364e-05, + "loss": 0.5668798685073853, + "step": 2226 + }, + { + "epoch": 0.6512648047960228, + "grad_norm": 1.2528148742640925, + "learning_rate": 1.6130674880730642e-05, + "loss": 0.6354215145111084, + "step": 2227 + }, + { + "epoch": 0.6515572452112882, + "grad_norm": 1.3738601794334195, + "learning_rate": 1.612685459813594e-05, + "loss": 0.5409573912620544, + "step": 2228 + }, + { + "epoch": 0.6518496856265535, + "grad_norm": 1.24582725943008, + "learning_rate": 1.612303288342623e-05, + "loss": 0.5622435808181763, + "step": 2229 + }, + { + "epoch": 0.652142126041819, + "grad_norm": 1.3303126336426627, + "learning_rate": 1.6119209737494814e-05, + "loss": 0.786159873008728, + "step": 2230 + }, + { + "epoch": 0.6524345664570844, + "grad_norm": 1.3038971892359654, + "learning_rate": 1.611538516123532e-05, + "loss": 0.6359272003173828, + "step": 2231 + }, + { + "epoch": 0.6527270068723497, + "grad_norm": 1.2508619512631416, + "learning_rate": 1.6111559155541732e-05, + "loss": 0.5688974261283875, + "step": 2232 + }, + { + "epoch": 0.6530194472876152, + "grad_norm": 1.1877745994435736, + "learning_rate": 1.610773172130835e-05, + "loss": 0.581497311592102, + "step": 2233 + }, + { + "epoch": 0.6533118877028805, + "grad_norm": 1.6577687870030173, + "learning_rate": 1.6103902859429812e-05, + "loss": 0.674004316329956, + "step": 2234 + }, + { + "epoch": 0.653604328118146, + "grad_norm": 1.4167456148188138, + "learning_rate": 1.6100072570801092e-05, + "loss": 0.6798728108406067, + "step": 2235 + }, + { + "epoch": 0.6538967685334113, + "grad_norm": 1.245467514643811, + "learning_rate": 1.60962408563175e-05, + "loss": 0.5742023587226868, + "step": 2236 + }, + { + "epoch": 0.6541892089486767, + "grad_norm": 1.1993067492933944, + "learning_rate": 1.6092407716874674e-05, + "loss": 0.470009446144104, + "step": 2237 + }, + { + "epoch": 0.654481649363942, + "grad_norm": 1.3725626324774514, + "learning_rate": 1.6088573153368586e-05, + "loss": 0.8113270998001099, + "step": 2238 + }, + { + "epoch": 0.6547740897792075, + "grad_norm": 1.4825942391015299, + "learning_rate": 1.6084737166695542e-05, + "loss": 0.7737559676170349, + "step": 2239 + }, + { + "epoch": 0.6550665301944729, + "grad_norm": 1.5932921988768602, + "learning_rate": 1.6080899757752183e-05, + "loss": 0.6499667167663574, + "step": 2240 + }, + { + "epoch": 0.6553589706097382, + "grad_norm": 1.5295213411109583, + "learning_rate": 1.6077060927435476e-05, + "loss": 0.6898500323295593, + "step": 2241 + }, + { + "epoch": 0.6556514110250037, + "grad_norm": 1.264521733401818, + "learning_rate": 1.6073220676642724e-05, + "loss": 0.5933262705802917, + "step": 2242 + }, + { + "epoch": 0.655943851440269, + "grad_norm": 1.6150723182894215, + "learning_rate": 1.606937900627157e-05, + "loss": 0.6566172242164612, + "step": 2243 + }, + { + "epoch": 0.6562362918555344, + "grad_norm": 1.5267009306631556, + "learning_rate": 1.606553591721997e-05, + "loss": 0.6955286264419556, + "step": 2244 + }, + { + "epoch": 0.6565287322707998, + "grad_norm": 1.2904648803296817, + "learning_rate": 1.6061691410386234e-05, + "loss": 0.6905182600021362, + "step": 2245 + }, + { + "epoch": 0.6568211726860652, + "grad_norm": 1.3780634556903595, + "learning_rate": 1.6057845486668984e-05, + "loss": 0.6733677387237549, + "step": 2246 + }, + { + "epoch": 0.6571136131013307, + "grad_norm": 1.2340466884298544, + "learning_rate": 1.6053998146967186e-05, + "loss": 0.5368545055389404, + "step": 2247 + }, + { + "epoch": 0.657406053516596, + "grad_norm": 1.4627351725055429, + "learning_rate": 1.6050149392180125e-05, + "loss": 0.6995619535446167, + "step": 2248 + }, + { + "epoch": 0.6576984939318614, + "grad_norm": 1.2552392614352392, + "learning_rate": 1.6046299223207432e-05, + "loss": 0.6637085676193237, + "step": 2249 + }, + { + "epoch": 0.6579909343471267, + "grad_norm": 1.3894808498189977, + "learning_rate": 1.6042447640949058e-05, + "loss": 0.5834380388259888, + "step": 2250 + }, + { + "epoch": 0.6582833747623922, + "grad_norm": 1.1700440243092598, + "learning_rate": 1.6038594646305285e-05, + "loss": 0.5735288858413696, + "step": 2251 + }, + { + "epoch": 0.6585758151776575, + "grad_norm": 1.274727070163542, + "learning_rate": 1.6034740240176728e-05, + "loss": 0.6227413415908813, + "step": 2252 + }, + { + "epoch": 0.658868255592923, + "grad_norm": 1.5091805441488135, + "learning_rate": 1.6030884423464336e-05, + "loss": 0.6881246566772461, + "step": 2253 + }, + { + "epoch": 0.6591606960081884, + "grad_norm": 1.3237201049051734, + "learning_rate": 1.6027027197069376e-05, + "loss": 0.6059132814407349, + "step": 2254 + }, + { + "epoch": 0.6594531364234537, + "grad_norm": 1.5070949945133527, + "learning_rate": 1.6023168561893453e-05, + "loss": 0.5829097032546997, + "step": 2255 + }, + { + "epoch": 0.6597455768387191, + "grad_norm": 1.1821076640408643, + "learning_rate": 1.60193085188385e-05, + "loss": 0.5173588991165161, + "step": 2256 + }, + { + "epoch": 0.6600380172539845, + "grad_norm": 1.0404057140160172, + "learning_rate": 1.601544706880678e-05, + "loss": 0.5128534436225891, + "step": 2257 + }, + { + "epoch": 0.6603304576692499, + "grad_norm": 1.4274902732235735, + "learning_rate": 1.601158421270088e-05, + "loss": 0.5472848415374756, + "step": 2258 + }, + { + "epoch": 0.6606228980845152, + "grad_norm": 1.2505155913554076, + "learning_rate": 1.6007719951423725e-05, + "loss": 0.5775434970855713, + "step": 2259 + }, + { + "epoch": 0.6609153384997807, + "grad_norm": 1.2760490287043558, + "learning_rate": 1.6003854285878558e-05, + "loss": 0.5529654622077942, + "step": 2260 + }, + { + "epoch": 0.661207778915046, + "grad_norm": 1.2950239037035343, + "learning_rate": 1.5999987216968954e-05, + "loss": 0.5295222997665405, + "step": 2261 + }, + { + "epoch": 0.6615002193303114, + "grad_norm": 1.42880093351922, + "learning_rate": 1.5996118745598817e-05, + "loss": 0.6782759428024292, + "step": 2262 + }, + { + "epoch": 0.6617926597455769, + "grad_norm": 1.5123560217291456, + "learning_rate": 1.5992248872672384e-05, + "loss": 0.7698723077774048, + "step": 2263 + }, + { + "epoch": 0.6620851001608422, + "grad_norm": 1.224014553870767, + "learning_rate": 1.5988377599094208e-05, + "loss": 0.5056325793266296, + "step": 2264 + }, + { + "epoch": 0.6623775405761076, + "grad_norm": 1.2811286417806291, + "learning_rate": 1.598450492576918e-05, + "loss": 0.6748740673065186, + "step": 2265 + }, + { + "epoch": 0.662669980991373, + "grad_norm": 1.4413699029522251, + "learning_rate": 1.598063085360251e-05, + "loss": 0.6594111919403076, + "step": 2266 + }, + { + "epoch": 0.6629624214066384, + "grad_norm": 1.490546706478741, + "learning_rate": 1.5976755383499743e-05, + "loss": 0.5942472815513611, + "step": 2267 + }, + { + "epoch": 0.6632548618219037, + "grad_norm": 1.4166382340274284, + "learning_rate": 1.5972878516366742e-05, + "loss": 0.6956725120544434, + "step": 2268 + }, + { + "epoch": 0.6635473022371692, + "grad_norm": 1.5479108671282409, + "learning_rate": 1.5969000253109707e-05, + "loss": 0.6743103265762329, + "step": 2269 + }, + { + "epoch": 0.6638397426524346, + "grad_norm": 1.2415014970437994, + "learning_rate": 1.596512059463515e-05, + "loss": 0.5452187061309814, + "step": 2270 + }, + { + "epoch": 0.6641321830676999, + "grad_norm": 1.305856048148522, + "learning_rate": 1.5961239541849923e-05, + "loss": 0.6064754128456116, + "step": 2271 + }, + { + "epoch": 0.6644246234829654, + "grad_norm": 1.1672873660489786, + "learning_rate": 1.59573570956612e-05, + "loss": 0.5879498720169067, + "step": 2272 + }, + { + "epoch": 0.6647170638982307, + "grad_norm": 1.2464190562799757, + "learning_rate": 1.595347325697648e-05, + "loss": 0.6610721945762634, + "step": 2273 + }, + { + "epoch": 0.6650095043134961, + "grad_norm": 1.5001752360693776, + "learning_rate": 1.594958802670358e-05, + "loss": 0.6674839854240417, + "step": 2274 + }, + { + "epoch": 0.6653019447287615, + "grad_norm": 1.2669024802691538, + "learning_rate": 1.5945701405750654e-05, + "loss": 0.5189186334609985, + "step": 2275 + }, + { + "epoch": 0.6655943851440269, + "grad_norm": 1.096047033017533, + "learning_rate": 1.5941813395026174e-05, + "loss": 0.5225304365158081, + "step": 2276 + }, + { + "epoch": 0.6658868255592922, + "grad_norm": 1.1982797539630743, + "learning_rate": 1.5937923995438942e-05, + "loss": 0.5426747798919678, + "step": 2277 + }, + { + "epoch": 0.6661792659745577, + "grad_norm": 1.1331316680397499, + "learning_rate": 1.593403320789808e-05, + "loss": 0.6408158540725708, + "step": 2278 + }, + { + "epoch": 0.6664717063898231, + "grad_norm": 1.2777185085969938, + "learning_rate": 1.5930141033313034e-05, + "loss": 0.6213311553001404, + "step": 2279 + }, + { + "epoch": 0.6667641468050884, + "grad_norm": 1.2938845863415658, + "learning_rate": 1.5926247472593575e-05, + "loss": 0.6538233757019043, + "step": 2280 + }, + { + "epoch": 0.6670565872203539, + "grad_norm": 1.4396815547692279, + "learning_rate": 1.5922352526649803e-05, + "loss": 0.6714701056480408, + "step": 2281 + }, + { + "epoch": 0.6673490276356192, + "grad_norm": 1.2875131974555427, + "learning_rate": 1.5918456196392137e-05, + "loss": 0.501068115234375, + "step": 2282 + }, + { + "epoch": 0.6676414680508846, + "grad_norm": 1.483722651200639, + "learning_rate": 1.5914558482731317e-05, + "loss": 0.6551339626312256, + "step": 2283 + }, + { + "epoch": 0.66793390846615, + "grad_norm": 1.575561891265528, + "learning_rate": 1.5910659386578415e-05, + "loss": 0.666611909866333, + "step": 2284 + }, + { + "epoch": 0.6682263488814154, + "grad_norm": 1.3058077151253007, + "learning_rate": 1.590675890884482e-05, + "loss": 0.6612483859062195, + "step": 2285 + }, + { + "epoch": 0.6685187892966808, + "grad_norm": 1.535602248808955, + "learning_rate": 1.590285705044224e-05, + "loss": 0.5299272537231445, + "step": 2286 + }, + { + "epoch": 0.6688112297119462, + "grad_norm": 1.5209550044520355, + "learning_rate": 1.589895381228272e-05, + "loss": 0.6873815655708313, + "step": 2287 + }, + { + "epoch": 0.6691036701272116, + "grad_norm": 1.333463107294571, + "learning_rate": 1.5895049195278608e-05, + "loss": 0.6473613977432251, + "step": 2288 + }, + { + "epoch": 0.6693961105424769, + "grad_norm": 1.4389212790848083, + "learning_rate": 1.589114320034259e-05, + "loss": 0.6600902080535889, + "step": 2289 + }, + { + "epoch": 0.6696885509577424, + "grad_norm": 1.7581559017014303, + "learning_rate": 1.5887235828387667e-05, + "loss": 0.6066039800643921, + "step": 2290 + }, + { + "epoch": 0.6699809913730077, + "grad_norm": 1.2475073124572584, + "learning_rate": 1.5883327080327165e-05, + "loss": 0.5411461591720581, + "step": 2291 + }, + { + "epoch": 0.6702734317882731, + "grad_norm": 1.3264098990068387, + "learning_rate": 1.587941695707473e-05, + "loss": 0.5678138136863708, + "step": 2292 + }, + { + "epoch": 0.6705658722035386, + "grad_norm": 1.2017893940389541, + "learning_rate": 1.5875505459544327e-05, + "loss": 0.6175323724746704, + "step": 2293 + }, + { + "epoch": 0.6708583126188039, + "grad_norm": 1.2255154092981597, + "learning_rate": 1.587159258865025e-05, + "loss": 0.5790976285934448, + "step": 2294 + }, + { + "epoch": 0.6711507530340693, + "grad_norm": 1.4070059880127774, + "learning_rate": 1.58676783453071e-05, + "loss": 0.5891247391700745, + "step": 2295 + }, + { + "epoch": 0.6714431934493347, + "grad_norm": 1.3680740765730994, + "learning_rate": 1.5863762730429817e-05, + "loss": 0.5604299902915955, + "step": 2296 + }, + { + "epoch": 0.6717356338646001, + "grad_norm": 1.156075846793115, + "learning_rate": 1.585984574493365e-05, + "loss": 0.5402317047119141, + "step": 2297 + }, + { + "epoch": 0.6720280742798654, + "grad_norm": 1.2729484704762741, + "learning_rate": 1.5855927389734163e-05, + "loss": 0.5569097995758057, + "step": 2298 + }, + { + "epoch": 0.6723205146951309, + "grad_norm": 1.792109537125727, + "learning_rate": 1.5852007665747255e-05, + "loss": 0.6754734516143799, + "step": 2299 + }, + { + "epoch": 0.6726129551103962, + "grad_norm": 1.2015482502693244, + "learning_rate": 1.584808657388914e-05, + "loss": 0.5555064678192139, + "step": 2300 + }, + { + "epoch": 0.6729053955256616, + "grad_norm": 1.2978798977032824, + "learning_rate": 1.584416411507634e-05, + "loss": 0.5735480785369873, + "step": 2301 + }, + { + "epoch": 0.6731978359409271, + "grad_norm": 1.3948021707686127, + "learning_rate": 1.5840240290225713e-05, + "loss": 0.6084697842597961, + "step": 2302 + }, + { + "epoch": 0.6734902763561924, + "grad_norm": 1.3972987341637648, + "learning_rate": 1.5836315100254427e-05, + "loss": 0.5747361779212952, + "step": 2303 + }, + { + "epoch": 0.6737827167714578, + "grad_norm": 1.3042539657521541, + "learning_rate": 1.583238854607997e-05, + "loss": 0.6597394943237305, + "step": 2304 + }, + { + "epoch": 0.6740751571867232, + "grad_norm": 1.2885200657030746, + "learning_rate": 1.582846062862016e-05, + "loss": 0.6054418087005615, + "step": 2305 + }, + { + "epoch": 0.6743675976019886, + "grad_norm": 1.4670353156004656, + "learning_rate": 1.5824531348793106e-05, + "loss": 0.6897715330123901, + "step": 2306 + }, + { + "epoch": 0.6746600380172539, + "grad_norm": 1.2379672312585208, + "learning_rate": 1.5820600707517265e-05, + "loss": 0.5438888072967529, + "step": 2307 + }, + { + "epoch": 0.6749524784325194, + "grad_norm": 1.3511076823584265, + "learning_rate": 1.5816668705711402e-05, + "loss": 0.5139850378036499, + "step": 2308 + }, + { + "epoch": 0.6752449188477848, + "grad_norm": 1.3878243291723096, + "learning_rate": 1.5812735344294594e-05, + "loss": 0.5970615744590759, + "step": 2309 + }, + { + "epoch": 0.6755373592630501, + "grad_norm": 1.5290136714699685, + "learning_rate": 1.580880062418624e-05, + "loss": 0.6206730604171753, + "step": 2310 + }, + { + "epoch": 0.6758297996783156, + "grad_norm": 1.5283867982171593, + "learning_rate": 1.580486454630606e-05, + "loss": 0.6545864939689636, + "step": 2311 + }, + { + "epoch": 0.6761222400935809, + "grad_norm": 1.6726831788405112, + "learning_rate": 1.5800927111574084e-05, + "loss": 0.6284571290016174, + "step": 2312 + }, + { + "epoch": 0.6764146805088463, + "grad_norm": 1.3062366838416066, + "learning_rate": 1.5796988320910665e-05, + "loss": 0.6662822365760803, + "step": 2313 + }, + { + "epoch": 0.6767071209241117, + "grad_norm": 1.4857961720461585, + "learning_rate": 1.5793048175236477e-05, + "loss": 0.6952080130577087, + "step": 2314 + }, + { + "epoch": 0.6769995613393771, + "grad_norm": 1.1527122349254486, + "learning_rate": 1.5789106675472496e-05, + "loss": 0.55562424659729, + "step": 2315 + }, + { + "epoch": 0.6772920017546424, + "grad_norm": 1.417075363017466, + "learning_rate": 1.578516382254003e-05, + "loss": 0.696354866027832, + "step": 2316 + }, + { + "epoch": 0.6775844421699079, + "grad_norm": 1.2481046919985836, + "learning_rate": 1.5781219617360695e-05, + "loss": 0.5764954686164856, + "step": 2317 + }, + { + "epoch": 0.6778768825851733, + "grad_norm": 1.5617477082955222, + "learning_rate": 1.577727406085642e-05, + "loss": 0.6944533586502075, + "step": 2318 + }, + { + "epoch": 0.6781693230004386, + "grad_norm": 1.5273473613933928, + "learning_rate": 1.5773327153949465e-05, + "loss": 0.5517882704734802, + "step": 2319 + }, + { + "epoch": 0.6784617634157041, + "grad_norm": 1.3495609581159556, + "learning_rate": 1.576937889756239e-05, + "loss": 0.6151533126831055, + "step": 2320 + }, + { + "epoch": 0.6787542038309694, + "grad_norm": 1.3729348393231853, + "learning_rate": 1.5765429292618075e-05, + "loss": 0.6221417784690857, + "step": 2321 + }, + { + "epoch": 0.6790466442462348, + "grad_norm": 1.5561656408525308, + "learning_rate": 1.576147834003972e-05, + "loss": 0.6218827962875366, + "step": 2322 + }, + { + "epoch": 0.6793390846615002, + "grad_norm": 1.2844085482190328, + "learning_rate": 1.575752604075083e-05, + "loss": 0.689696192741394, + "step": 2323 + }, + { + "epoch": 0.6796315250767656, + "grad_norm": 1.459910366351317, + "learning_rate": 1.5753572395675234e-05, + "loss": 0.6457825899124146, + "step": 2324 + }, + { + "epoch": 0.679923965492031, + "grad_norm": 1.660980107305809, + "learning_rate": 1.5749617405737075e-05, + "loss": 0.6261845827102661, + "step": 2325 + }, + { + "epoch": 0.6802164059072964, + "grad_norm": 1.5113706854166593, + "learning_rate": 1.5745661071860802e-05, + "loss": 0.6631760597229004, + "step": 2326 + }, + { + "epoch": 0.6805088463225618, + "grad_norm": 1.4700703601826162, + "learning_rate": 1.574170339497119e-05, + "loss": 0.6223125457763672, + "step": 2327 + }, + { + "epoch": 0.6808012867378271, + "grad_norm": 1.4289384563362724, + "learning_rate": 1.5737744375993318e-05, + "loss": 0.5649152398109436, + "step": 2328 + }, + { + "epoch": 0.6810937271530926, + "grad_norm": 1.3637036537520066, + "learning_rate": 1.573378401585259e-05, + "loss": 0.6822011470794678, + "step": 2329 + }, + { + "epoch": 0.6813861675683579, + "grad_norm": 1.243454490323945, + "learning_rate": 1.5729822315474704e-05, + "loss": 0.4853206276893616, + "step": 2330 + }, + { + "epoch": 0.6816786079836233, + "grad_norm": 1.3491879449563893, + "learning_rate": 1.572585927578569e-05, + "loss": 0.6410783529281616, + "step": 2331 + }, + { + "epoch": 0.6819710483988888, + "grad_norm": 1.2349335330440738, + "learning_rate": 1.572189489771189e-05, + "loss": 0.607154369354248, + "step": 2332 + }, + { + "epoch": 0.6822634888141541, + "grad_norm": 1.2303800918258645, + "learning_rate": 1.571792918217994e-05, + "loss": 0.5079061388969421, + "step": 2333 + }, + { + "epoch": 0.6825559292294195, + "grad_norm": 1.355109139858454, + "learning_rate": 1.5713962130116812e-05, + "loss": 0.534178614616394, + "step": 2334 + }, + { + "epoch": 0.6828483696446849, + "grad_norm": 1.099124567807314, + "learning_rate": 1.5709993742449777e-05, + "loss": 0.6172807812690735, + "step": 2335 + }, + { + "epoch": 0.6831408100599503, + "grad_norm": 1.468863618054796, + "learning_rate": 1.5706024020106425e-05, + "loss": 0.6863975524902344, + "step": 2336 + }, + { + "epoch": 0.6834332504752156, + "grad_norm": 1.3542187494807805, + "learning_rate": 1.570205296401465e-05, + "loss": 0.6314880847930908, + "step": 2337 + }, + { + "epoch": 0.6837256908904811, + "grad_norm": 1.4888474767820694, + "learning_rate": 1.5698080575102662e-05, + "loss": 0.5420910120010376, + "step": 2338 + }, + { + "epoch": 0.6840181313057464, + "grad_norm": 1.545548665208996, + "learning_rate": 1.5694106854298988e-05, + "loss": 0.6598352789878845, + "step": 2339 + }, + { + "epoch": 0.6843105717210118, + "grad_norm": 1.1855737189309028, + "learning_rate": 1.5690131802532454e-05, + "loss": 0.49957770109176636, + "step": 2340 + }, + { + "epoch": 0.6846030121362773, + "grad_norm": 1.3910703437631544, + "learning_rate": 1.568615542073221e-05, + "loss": 0.7217017412185669, + "step": 2341 + }, + { + "epoch": 0.6848954525515426, + "grad_norm": 1.383168011584397, + "learning_rate": 1.5682177709827705e-05, + "loss": 0.5824606418609619, + "step": 2342 + }, + { + "epoch": 0.685187892966808, + "grad_norm": 1.4861418668417947, + "learning_rate": 1.567819867074871e-05, + "loss": 0.5932704210281372, + "step": 2343 + }, + { + "epoch": 0.6854803333820734, + "grad_norm": 1.1927307747773088, + "learning_rate": 1.5674218304425304e-05, + "loss": 0.6098836660385132, + "step": 2344 + }, + { + "epoch": 0.6857727737973388, + "grad_norm": 1.3302018518433079, + "learning_rate": 1.5670236611787865e-05, + "loss": 0.5158270597457886, + "step": 2345 + }, + { + "epoch": 0.6860652142126041, + "grad_norm": 1.431950758183516, + "learning_rate": 1.5666253593767095e-05, + "loss": 0.7840174436569214, + "step": 2346 + }, + { + "epoch": 0.6863576546278696, + "grad_norm": 1.3462478651155303, + "learning_rate": 1.5662269251294e-05, + "loss": 0.5665150880813599, + "step": 2347 + }, + { + "epoch": 0.686650095043135, + "grad_norm": 1.2308130347699304, + "learning_rate": 1.5658283585299894e-05, + "loss": 0.5801588296890259, + "step": 2348 + }, + { + "epoch": 0.6869425354584003, + "grad_norm": 1.487298330014143, + "learning_rate": 1.56542965967164e-05, + "loss": 0.759188175201416, + "step": 2349 + }, + { + "epoch": 0.6872349758736658, + "grad_norm": 1.5717076197736846, + "learning_rate": 1.565030828647546e-05, + "loss": 0.7182703018188477, + "step": 2350 + }, + { + "epoch": 0.6875274162889311, + "grad_norm": 1.3681215378392677, + "learning_rate": 1.564631865550931e-05, + "loss": 0.7172018885612488, + "step": 2351 + }, + { + "epoch": 0.6878198567041965, + "grad_norm": 1.3897042930637002, + "learning_rate": 1.5642327704750502e-05, + "loss": 0.5959519743919373, + "step": 2352 + }, + { + "epoch": 0.6881122971194619, + "grad_norm": 1.3686338632915553, + "learning_rate": 1.5638335435131902e-05, + "loss": 0.5531836748123169, + "step": 2353 + }, + { + "epoch": 0.6884047375347273, + "grad_norm": 1.2097339017222586, + "learning_rate": 1.5634341847586676e-05, + "loss": 0.672225296497345, + "step": 2354 + }, + { + "epoch": 0.6886971779499926, + "grad_norm": 1.3740176007353215, + "learning_rate": 1.5630346943048297e-05, + "loss": 0.5721465349197388, + "step": 2355 + }, + { + "epoch": 0.6889896183652581, + "grad_norm": 1.2416767467837069, + "learning_rate": 1.5626350722450555e-05, + "loss": 0.6357900500297546, + "step": 2356 + }, + { + "epoch": 0.6892820587805235, + "grad_norm": 1.241847883566859, + "learning_rate": 1.5622353186727542e-05, + "loss": 0.6348878145217896, + "step": 2357 + }, + { + "epoch": 0.6895744991957888, + "grad_norm": 1.390537638221337, + "learning_rate": 1.5618354336813656e-05, + "loss": 0.5473623275756836, + "step": 2358 + }, + { + "epoch": 0.6898669396110543, + "grad_norm": 1.4299851255948683, + "learning_rate": 1.5614354173643606e-05, + "loss": 0.8284158706665039, + "step": 2359 + }, + { + "epoch": 0.6901593800263196, + "grad_norm": 1.3561063303885135, + "learning_rate": 1.5610352698152396e-05, + "loss": 0.5915359854698181, + "step": 2360 + }, + { + "epoch": 0.690451820441585, + "grad_norm": 1.434488423567872, + "learning_rate": 1.560634991127536e-05, + "loss": 0.6173555254936218, + "step": 2361 + }, + { + "epoch": 0.6907442608568504, + "grad_norm": 1.2348756002421877, + "learning_rate": 1.560234581394812e-05, + "loss": 0.5551577806472778, + "step": 2362 + }, + { + "epoch": 0.6910367012721158, + "grad_norm": 1.6912535037446208, + "learning_rate": 1.559834040710661e-05, + "loss": 0.7160264253616333, + "step": 2363 + }, + { + "epoch": 0.6913291416873812, + "grad_norm": 1.4348139771874249, + "learning_rate": 1.5594333691687062e-05, + "loss": 0.5986248850822449, + "step": 2364 + }, + { + "epoch": 0.6916215821026466, + "grad_norm": 1.6827348555719241, + "learning_rate": 1.559032566862603e-05, + "loss": 0.7347019910812378, + "step": 2365 + }, + { + "epoch": 0.691914022517912, + "grad_norm": 1.1496166027771255, + "learning_rate": 1.5586316338860363e-05, + "loss": 0.502663791179657, + "step": 2366 + }, + { + "epoch": 0.6922064629331773, + "grad_norm": 1.1610976211375774, + "learning_rate": 1.558230570332722e-05, + "loss": 0.5026617050170898, + "step": 2367 + }, + { + "epoch": 0.6924989033484428, + "grad_norm": 1.3196703072069724, + "learning_rate": 1.5578293762964057e-05, + "loss": 0.6091101169586182, + "step": 2368 + }, + { + "epoch": 0.6927913437637081, + "grad_norm": 1.1607138049044183, + "learning_rate": 1.5574280518708645e-05, + "loss": 0.6202579736709595, + "step": 2369 + }, + { + "epoch": 0.6930837841789735, + "grad_norm": 1.3867301068189375, + "learning_rate": 1.557026597149905e-05, + "loss": 0.6532948017120361, + "step": 2370 + }, + { + "epoch": 0.693376224594239, + "grad_norm": 1.2799465632685962, + "learning_rate": 1.5566250122273658e-05, + "loss": 0.6197448372840881, + "step": 2371 + }, + { + "epoch": 0.6936686650095043, + "grad_norm": 1.330123548058068, + "learning_rate": 1.556223297197114e-05, + "loss": 0.6181553602218628, + "step": 2372 + }, + { + "epoch": 0.6939611054247697, + "grad_norm": 1.3757625130132767, + "learning_rate": 1.5558214521530482e-05, + "loss": 0.6015427112579346, + "step": 2373 + }, + { + "epoch": 0.6942535458400351, + "grad_norm": 1.4511778478720454, + "learning_rate": 1.555419477189098e-05, + "loss": 0.6204534769058228, + "step": 2374 + }, + { + "epoch": 0.6945459862553005, + "grad_norm": 1.2237746404921626, + "learning_rate": 1.5550173723992218e-05, + "loss": 0.5914584994316101, + "step": 2375 + }, + { + "epoch": 0.6948384266705658, + "grad_norm": 1.2633817911858796, + "learning_rate": 1.554615137877409e-05, + "loss": 0.5077188611030579, + "step": 2376 + }, + { + "epoch": 0.6951308670858313, + "grad_norm": 1.1523903505061626, + "learning_rate": 1.55421277371768e-05, + "loss": 0.5560270547866821, + "step": 2377 + }, + { + "epoch": 0.6954233075010966, + "grad_norm": 1.6214020445600121, + "learning_rate": 1.553810280014085e-05, + "loss": 0.7064549922943115, + "step": 2378 + }, + { + "epoch": 0.695715747916362, + "grad_norm": 1.4249847873824701, + "learning_rate": 1.5534076568607043e-05, + "loss": 0.7433110475540161, + "step": 2379 + }, + { + "epoch": 0.6960081883316275, + "grad_norm": 1.4661372034410074, + "learning_rate": 1.553004904351648e-05, + "loss": 0.6061110496520996, + "step": 2380 + }, + { + "epoch": 0.6963006287468928, + "grad_norm": 1.3530915937691412, + "learning_rate": 1.5526020225810583e-05, + "loss": 0.604006290435791, + "step": 2381 + }, + { + "epoch": 0.6965930691621582, + "grad_norm": 1.3193058416919141, + "learning_rate": 1.5521990116431052e-05, + "loss": 0.6221635341644287, + "step": 2382 + }, + { + "epoch": 0.6968855095774236, + "grad_norm": 1.17260855579956, + "learning_rate": 1.551795871631991e-05, + "loss": 0.5848093032836914, + "step": 2383 + }, + { + "epoch": 0.697177949992689, + "grad_norm": 1.3909866883805502, + "learning_rate": 1.5513926026419464e-05, + "loss": 0.6451606154441833, + "step": 2384 + }, + { + "epoch": 0.6974703904079543, + "grad_norm": 1.2515682694896817, + "learning_rate": 1.5509892047672336e-05, + "loss": 0.7922245264053345, + "step": 2385 + }, + { + "epoch": 0.6977628308232198, + "grad_norm": 1.501698757307051, + "learning_rate": 1.5505856781021443e-05, + "loss": 0.6458885073661804, + "step": 2386 + }, + { + "epoch": 0.6980552712384852, + "grad_norm": 1.3253141303151825, + "learning_rate": 1.5501820227410002e-05, + "loss": 0.5989570617675781, + "step": 2387 + }, + { + "epoch": 0.6983477116537505, + "grad_norm": 1.4240123629840666, + "learning_rate": 1.5497782387781536e-05, + "loss": 0.740998387336731, + "step": 2388 + }, + { + "epoch": 0.698640152069016, + "grad_norm": 1.4547948512453808, + "learning_rate": 1.5493743263079866e-05, + "loss": 0.63981032371521, + "step": 2389 + }, + { + "epoch": 0.6989325924842813, + "grad_norm": 1.325001348454028, + "learning_rate": 1.5489702854249106e-05, + "loss": 0.766716480255127, + "step": 2390 + }, + { + "epoch": 0.6992250328995467, + "grad_norm": 1.541044208915787, + "learning_rate": 1.5485661162233684e-05, + "loss": 0.7879365086555481, + "step": 2391 + }, + { + "epoch": 0.6995174733148121, + "grad_norm": 1.3532949065271656, + "learning_rate": 1.5481618187978322e-05, + "loss": 0.6005786657333374, + "step": 2392 + }, + { + "epoch": 0.6998099137300775, + "grad_norm": 1.2952910023515818, + "learning_rate": 1.5477573932428033e-05, + "loss": 0.6207927465438843, + "step": 2393 + }, + { + "epoch": 0.7001023541453428, + "grad_norm": 1.4490674696543298, + "learning_rate": 1.5473528396528144e-05, + "loss": 0.5582053661346436, + "step": 2394 + }, + { + "epoch": 0.7003947945606083, + "grad_norm": 1.6315416515790502, + "learning_rate": 1.5469481581224274e-05, + "loss": 0.5701307058334351, + "step": 2395 + }, + { + "epoch": 0.7006872349758737, + "grad_norm": 1.3804181292115258, + "learning_rate": 1.546543348746233e-05, + "loss": 0.6201068162918091, + "step": 2396 + }, + { + "epoch": 0.700979675391139, + "grad_norm": 1.3282086716914991, + "learning_rate": 1.5461384116188546e-05, + "loss": 0.6102321147918701, + "step": 2397 + }, + { + "epoch": 0.7012721158064045, + "grad_norm": 1.361382387889105, + "learning_rate": 1.545733346834943e-05, + "loss": 0.5445820093154907, + "step": 2398 + }, + { + "epoch": 0.7015645562216698, + "grad_norm": 1.3134018034606705, + "learning_rate": 1.5453281544891797e-05, + "loss": 0.5278012752532959, + "step": 2399 + }, + { + "epoch": 0.7018569966369352, + "grad_norm": 1.6159840401286016, + "learning_rate": 1.544922834676276e-05, + "loss": 0.7051252126693726, + "step": 2400 + }, + { + "epoch": 0.7021494370522006, + "grad_norm": 1.3552623655435003, + "learning_rate": 1.544517387490973e-05, + "loss": 0.6024646759033203, + "step": 2401 + }, + { + "epoch": 0.702441877467466, + "grad_norm": 1.3323978020414873, + "learning_rate": 1.5441118130280406e-05, + "loss": 0.5563746094703674, + "step": 2402 + }, + { + "epoch": 0.7027343178827314, + "grad_norm": 1.3671297363224464, + "learning_rate": 1.5437061113822805e-05, + "loss": 0.5971669554710388, + "step": 2403 + }, + { + "epoch": 0.7030267582979968, + "grad_norm": 1.5082475685517047, + "learning_rate": 1.5433002826485234e-05, + "loss": 0.5846019983291626, + "step": 2404 + }, + { + "epoch": 0.7033191987132622, + "grad_norm": 1.2921876796744827, + "learning_rate": 1.5428943269216278e-05, + "loss": 0.5571885108947754, + "step": 2405 + }, + { + "epoch": 0.7036116391285275, + "grad_norm": 1.15652993390593, + "learning_rate": 1.542488244296484e-05, + "loss": 0.4770846962928772, + "step": 2406 + }, + { + "epoch": 0.703904079543793, + "grad_norm": 1.6398352091801953, + "learning_rate": 1.542082034868012e-05, + "loss": 0.636760950088501, + "step": 2407 + }, + { + "epoch": 0.7041965199590583, + "grad_norm": 1.6877906333209267, + "learning_rate": 1.5416756987311603e-05, + "loss": 0.7264662981033325, + "step": 2408 + }, + { + "epoch": 0.7044889603743237, + "grad_norm": 1.372256728403267, + "learning_rate": 1.5412692359809073e-05, + "loss": 0.6723978519439697, + "step": 2409 + }, + { + "epoch": 0.7047814007895892, + "grad_norm": 1.4362583031777838, + "learning_rate": 1.5408626467122612e-05, + "loss": 0.6205083727836609, + "step": 2410 + }, + { + "epoch": 0.7050738412048545, + "grad_norm": 1.4495567778043355, + "learning_rate": 1.54045593102026e-05, + "loss": 0.5980903506278992, + "step": 2411 + }, + { + "epoch": 0.7053662816201199, + "grad_norm": 1.4897959908790472, + "learning_rate": 1.540049088999971e-05, + "loss": 0.6311691999435425, + "step": 2412 + }, + { + "epoch": 0.7056587220353853, + "grad_norm": 1.428243709143454, + "learning_rate": 1.539642120746491e-05, + "loss": 0.5872593522071838, + "step": 2413 + }, + { + "epoch": 0.7059511624506507, + "grad_norm": 1.351001450570791, + "learning_rate": 1.5392350263549462e-05, + "loss": 0.5037539005279541, + "step": 2414 + }, + { + "epoch": 0.706243602865916, + "grad_norm": 1.4775045660401276, + "learning_rate": 1.538827805920493e-05, + "loss": 0.5917855501174927, + "step": 2415 + }, + { + "epoch": 0.7065360432811815, + "grad_norm": 1.3687769613569196, + "learning_rate": 1.538420459538316e-05, + "loss": 0.6350749731063843, + "step": 2416 + }, + { + "epoch": 0.7068284836964468, + "grad_norm": 1.330110483636511, + "learning_rate": 1.53801298730363e-05, + "loss": 0.6828908920288086, + "step": 2417 + }, + { + "epoch": 0.7071209241117122, + "grad_norm": 1.5864329436081315, + "learning_rate": 1.5376053893116796e-05, + "loss": 0.6307995319366455, + "step": 2418 + }, + { + "epoch": 0.7074133645269777, + "grad_norm": 1.3609756396375527, + "learning_rate": 1.5371976656577385e-05, + "loss": 0.5305014252662659, + "step": 2419 + }, + { + "epoch": 0.707705804942243, + "grad_norm": 1.2953614031977334, + "learning_rate": 1.536789816437109e-05, + "loss": 0.560103178024292, + "step": 2420 + }, + { + "epoch": 0.7079982453575084, + "grad_norm": 1.4823675619867462, + "learning_rate": 1.5363818417451236e-05, + "loss": 0.5449249148368835, + "step": 2421 + }, + { + "epoch": 0.7082906857727738, + "grad_norm": 1.575423149049035, + "learning_rate": 1.5359737416771438e-05, + "loss": 0.7456427812576294, + "step": 2422 + }, + { + "epoch": 0.7085831261880392, + "grad_norm": 1.4606336998212586, + "learning_rate": 1.5355655163285607e-05, + "loss": 0.5401932597160339, + "step": 2423 + }, + { + "epoch": 0.7088755666033045, + "grad_norm": 1.4384817217494414, + "learning_rate": 1.5351571657947947e-05, + "loss": 0.6215255856513977, + "step": 2424 + }, + { + "epoch": 0.70916800701857, + "grad_norm": 1.454238489435378, + "learning_rate": 1.5347486901712946e-05, + "loss": 0.724073052406311, + "step": 2425 + }, + { + "epoch": 0.7094604474338354, + "grad_norm": 1.280381472439187, + "learning_rate": 1.5343400895535402e-05, + "loss": 0.6375223398208618, + "step": 2426 + }, + { + "epoch": 0.7097528878491007, + "grad_norm": 1.4740965908748953, + "learning_rate": 1.533931364037038e-05, + "loss": 0.6087045669555664, + "step": 2427 + }, + { + "epoch": 0.7100453282643662, + "grad_norm": 1.4709664710326, + "learning_rate": 1.5335225137173262e-05, + "loss": 0.7927658557891846, + "step": 2428 + }, + { + "epoch": 0.7103377686796315, + "grad_norm": 1.4583904783773962, + "learning_rate": 1.5331135386899702e-05, + "loss": 0.6312417387962341, + "step": 2429 + }, + { + "epoch": 0.7106302090948969, + "grad_norm": 1.3514647325044575, + "learning_rate": 1.5327044390505666e-05, + "loss": 0.6856948137283325, + "step": 2430 + }, + { + "epoch": 0.7109226495101623, + "grad_norm": 1.3486136616110067, + "learning_rate": 1.532295214894739e-05, + "loss": 0.5683865547180176, + "step": 2431 + }, + { + "epoch": 0.7112150899254277, + "grad_norm": 1.5290242403967753, + "learning_rate": 1.5318858663181412e-05, + "loss": 0.6208291053771973, + "step": 2432 + }, + { + "epoch": 0.711507530340693, + "grad_norm": 1.5265891330435364, + "learning_rate": 1.531476393416456e-05, + "loss": 0.6751389503479004, + "step": 2433 + }, + { + "epoch": 0.7117999707559585, + "grad_norm": 1.1685210774635664, + "learning_rate": 1.5310667962853954e-05, + "loss": 0.422024667263031, + "step": 2434 + }, + { + "epoch": 0.7120924111712239, + "grad_norm": 1.377587949543332, + "learning_rate": 1.5306570750207003e-05, + "loss": 0.6714169979095459, + "step": 2435 + }, + { + "epoch": 0.7123848515864892, + "grad_norm": 1.4808127088080212, + "learning_rate": 1.53024722971814e-05, + "loss": 0.5757386088371277, + "step": 2436 + }, + { + "epoch": 0.7126772920017547, + "grad_norm": 1.4790386820456973, + "learning_rate": 1.529837260473514e-05, + "loss": 0.5686037540435791, + "step": 2437 + }, + { + "epoch": 0.71296973241702, + "grad_norm": 1.570681384959534, + "learning_rate": 1.5294271673826498e-05, + "loss": 0.7601959705352783, + "step": 2438 + }, + { + "epoch": 0.7132621728322854, + "grad_norm": 1.44814607189911, + "learning_rate": 1.529016950541404e-05, + "loss": 0.5654840469360352, + "step": 2439 + }, + { + "epoch": 0.7135546132475508, + "grad_norm": 1.5844428110219366, + "learning_rate": 1.5286066100456623e-05, + "loss": 0.7009234428405762, + "step": 2440 + }, + { + "epoch": 0.7138470536628162, + "grad_norm": 1.166961279939158, + "learning_rate": 1.52819614599134e-05, + "loss": 0.4856370687484741, + "step": 2441 + }, + { + "epoch": 0.7141394940780816, + "grad_norm": 1.2283639685035557, + "learning_rate": 1.52778555847438e-05, + "loss": 0.5135019421577454, + "step": 2442 + }, + { + "epoch": 0.714431934493347, + "grad_norm": 1.3437978900697465, + "learning_rate": 1.5273748475907542e-05, + "loss": 0.7350283861160278, + "step": 2443 + }, + { + "epoch": 0.7147243749086124, + "grad_norm": 1.1274424242274286, + "learning_rate": 1.5269640134364646e-05, + "loss": 0.5985803604125977, + "step": 2444 + }, + { + "epoch": 0.7150168153238777, + "grad_norm": 1.2982732418248375, + "learning_rate": 1.5265530561075407e-05, + "loss": 0.6840892434120178, + "step": 2445 + }, + { + "epoch": 0.7153092557391432, + "grad_norm": 1.2979743249484705, + "learning_rate": 1.5261419757000417e-05, + "loss": 0.6921327114105225, + "step": 2446 + }, + { + "epoch": 0.7156016961544085, + "grad_norm": 1.429595570109343, + "learning_rate": 1.525730772310055e-05, + "loss": 0.6428500413894653, + "step": 2447 + }, + { + "epoch": 0.7158941365696739, + "grad_norm": 1.3812578358040712, + "learning_rate": 1.5253194460336964e-05, + "loss": 0.645559549331665, + "step": 2448 + }, + { + "epoch": 0.7161865769849394, + "grad_norm": 1.7222193716043204, + "learning_rate": 1.5249079969671114e-05, + "loss": 0.6211013793945312, + "step": 2449 + }, + { + "epoch": 0.7164790174002047, + "grad_norm": 1.2302035745629583, + "learning_rate": 1.5244964252064737e-05, + "loss": 0.5709721446037292, + "step": 2450 + }, + { + "epoch": 0.7167714578154701, + "grad_norm": 1.4516717315033434, + "learning_rate": 1.5240847308479855e-05, + "loss": 0.6781377196311951, + "step": 2451 + }, + { + "epoch": 0.7170638982307355, + "grad_norm": 1.5070563114338018, + "learning_rate": 1.523672913987878e-05, + "loss": 0.6476876735687256, + "step": 2452 + }, + { + "epoch": 0.7173563386460009, + "grad_norm": 1.5653544129198373, + "learning_rate": 1.523260974722411e-05, + "loss": 0.6564218997955322, + "step": 2453 + }, + { + "epoch": 0.7176487790612662, + "grad_norm": 1.3630096136191563, + "learning_rate": 1.5228489131478722e-05, + "loss": 0.6455773711204529, + "step": 2454 + }, + { + "epoch": 0.7179412194765317, + "grad_norm": 1.393672298684458, + "learning_rate": 1.5224367293605791e-05, + "loss": 0.6039570569992065, + "step": 2455 + }, + { + "epoch": 0.718233659891797, + "grad_norm": 1.337144764968105, + "learning_rate": 1.522024423456877e-05, + "loss": 0.7060747146606445, + "step": 2456 + }, + { + "epoch": 0.7185261003070624, + "grad_norm": 1.3843662041380984, + "learning_rate": 1.52161199553314e-05, + "loss": 0.561469316482544, + "step": 2457 + }, + { + "epoch": 0.7188185407223279, + "grad_norm": 1.4419398084710615, + "learning_rate": 1.5211994456857706e-05, + "loss": 0.6682697534561157, + "step": 2458 + }, + { + "epoch": 0.7191109811375932, + "grad_norm": 1.5477256567407798, + "learning_rate": 1.5207867740111994e-05, + "loss": 0.7893983125686646, + "step": 2459 + }, + { + "epoch": 0.7194034215528586, + "grad_norm": 1.3454706918314496, + "learning_rate": 1.5203739806058863e-05, + "loss": 0.617809534072876, + "step": 2460 + }, + { + "epoch": 0.719695861968124, + "grad_norm": 1.2866429351470308, + "learning_rate": 1.5199610655663193e-05, + "loss": 0.5444413423538208, + "step": 2461 + }, + { + "epoch": 0.7199883023833894, + "grad_norm": 1.342633438363169, + "learning_rate": 1.5195480289890146e-05, + "loss": 0.615330696105957, + "step": 2462 + }, + { + "epoch": 0.7202807427986547, + "grad_norm": 1.7658118623485195, + "learning_rate": 1.5191348709705169e-05, + "loss": 0.6811497211456299, + "step": 2463 + }, + { + "epoch": 0.7205731832139202, + "grad_norm": 1.3224396770739022, + "learning_rate": 1.5187215916073997e-05, + "loss": 0.612322211265564, + "step": 2464 + }, + { + "epoch": 0.7208656236291856, + "grad_norm": 1.2201627110269677, + "learning_rate": 1.518308190996264e-05, + "loss": 0.6106880903244019, + "step": 2465 + }, + { + "epoch": 0.7211580640444509, + "grad_norm": 1.2431923365136468, + "learning_rate": 1.5178946692337405e-05, + "loss": 0.4901464581489563, + "step": 2466 + }, + { + "epoch": 0.7214505044597164, + "grad_norm": 2.053814058775723, + "learning_rate": 1.5174810264164865e-05, + "loss": 0.6777167320251465, + "step": 2467 + }, + { + "epoch": 0.7217429448749817, + "grad_norm": 1.4212256530727148, + "learning_rate": 1.5170672626411888e-05, + "loss": 0.6353746056556702, + "step": 2468 + }, + { + "epoch": 0.7220353852902471, + "grad_norm": 1.4867453474426244, + "learning_rate": 1.516653378004563e-05, + "loss": 0.6218847632408142, + "step": 2469 + }, + { + "epoch": 0.7223278257055125, + "grad_norm": 1.2225434595050702, + "learning_rate": 1.5162393726033508e-05, + "loss": 0.5001585483551025, + "step": 2470 + }, + { + "epoch": 0.7226202661207779, + "grad_norm": 1.5129628743171017, + "learning_rate": 1.5158252465343242e-05, + "loss": 0.6801280975341797, + "step": 2471 + }, + { + "epoch": 0.7229127065360432, + "grad_norm": 1.208746836224967, + "learning_rate": 1.5154109998942823e-05, + "loss": 0.6739565134048462, + "step": 2472 + }, + { + "epoch": 0.7232051469513087, + "grad_norm": 1.1414220178862078, + "learning_rate": 1.5149966327800532e-05, + "loss": 0.5970213413238525, + "step": 2473 + }, + { + "epoch": 0.7234975873665741, + "grad_norm": 1.295455082889375, + "learning_rate": 1.5145821452884923e-05, + "loss": 0.7367317080497742, + "step": 2474 + }, + { + "epoch": 0.7237900277818394, + "grad_norm": 1.3877158266331615, + "learning_rate": 1.5141675375164839e-05, + "loss": 0.6332153677940369, + "step": 2475 + }, + { + "epoch": 0.7240824681971049, + "grad_norm": 1.223786080062607, + "learning_rate": 1.5137528095609395e-05, + "loss": 0.6185739636421204, + "step": 2476 + }, + { + "epoch": 0.7243749086123702, + "grad_norm": 1.436341367228992, + "learning_rate": 1.5133379615187996e-05, + "loss": 0.5982746481895447, + "step": 2477 + }, + { + "epoch": 0.7246673490276356, + "grad_norm": 1.32306496712973, + "learning_rate": 1.512922993487032e-05, + "loss": 0.5946815013885498, + "step": 2478 + }, + { + "epoch": 0.724959789442901, + "grad_norm": 1.2916301226572995, + "learning_rate": 1.5125079055626337e-05, + "loss": 0.5645624399185181, + "step": 2479 + }, + { + "epoch": 0.7252522298581664, + "grad_norm": 1.0689440382368105, + "learning_rate": 1.5120926978426288e-05, + "loss": 0.43329858779907227, + "step": 2480 + }, + { + "epoch": 0.7255446702734318, + "grad_norm": 1.420557871943188, + "learning_rate": 1.5116773704240689e-05, + "loss": 0.64244544506073, + "step": 2481 + }, + { + "epoch": 0.7258371106886972, + "grad_norm": 1.3002221181867923, + "learning_rate": 1.5112619234040348e-05, + "loss": 0.6640222072601318, + "step": 2482 + }, + { + "epoch": 0.7261295511039626, + "grad_norm": 1.4810661665547034, + "learning_rate": 1.5108463568796346e-05, + "loss": 0.6346921324729919, + "step": 2483 + }, + { + "epoch": 0.7264219915192279, + "grad_norm": 1.4101536258246594, + "learning_rate": 1.5104306709480045e-05, + "loss": 0.5891947746276855, + "step": 2484 + }, + { + "epoch": 0.7267144319344934, + "grad_norm": 1.2478330500785222, + "learning_rate": 1.5100148657063089e-05, + "loss": 0.616216242313385, + "step": 2485 + }, + { + "epoch": 0.7270068723497587, + "grad_norm": 1.3541911638943873, + "learning_rate": 1.5095989412517389e-05, + "loss": 0.5961766242980957, + "step": 2486 + }, + { + "epoch": 0.7272993127650241, + "grad_norm": 1.27681624299837, + "learning_rate": 1.509182897681515e-05, + "loss": 0.5629050731658936, + "step": 2487 + }, + { + "epoch": 0.7275917531802896, + "grad_norm": 1.3918382252124497, + "learning_rate": 1.5087667350928844e-05, + "loss": 0.6640661954879761, + "step": 2488 + }, + { + "epoch": 0.7278841935955549, + "grad_norm": 1.1741006713729014, + "learning_rate": 1.5083504535831233e-05, + "loss": 0.5884503126144409, + "step": 2489 + }, + { + "epoch": 0.7281766340108203, + "grad_norm": 1.2387841976936662, + "learning_rate": 1.5079340532495344e-05, + "loss": 0.5395207405090332, + "step": 2490 + }, + { + "epoch": 0.7284690744260857, + "grad_norm": 1.5570127298934886, + "learning_rate": 1.5075175341894487e-05, + "loss": 0.5713212490081787, + "step": 2491 + }, + { + "epoch": 0.7287615148413511, + "grad_norm": 1.8811783299638292, + "learning_rate": 1.5071008965002252e-05, + "loss": 0.5732176303863525, + "step": 2492 + }, + { + "epoch": 0.7290539552566164, + "grad_norm": 1.3314823409610355, + "learning_rate": 1.50668414027925e-05, + "loss": 0.6381006240844727, + "step": 2493 + }, + { + "epoch": 0.7293463956718819, + "grad_norm": 1.5687830928425197, + "learning_rate": 1.5062672656239381e-05, + "loss": 0.6533833742141724, + "step": 2494 + }, + { + "epoch": 0.7296388360871472, + "grad_norm": 1.213698756503139, + "learning_rate": 1.5058502726317309e-05, + "loss": 0.5919456481933594, + "step": 2495 + }, + { + "epoch": 0.7299312765024126, + "grad_norm": 1.3954865057419796, + "learning_rate": 1.5054331614000984e-05, + "loss": 0.6128921508789062, + "step": 2496 + }, + { + "epoch": 0.7302237169176781, + "grad_norm": 1.3910630571139424, + "learning_rate": 1.5050159320265371e-05, + "loss": 0.5949394702911377, + "step": 2497 + }, + { + "epoch": 0.7305161573329434, + "grad_norm": 1.5386167534502115, + "learning_rate": 1.5045985846085724e-05, + "loss": 0.6262483596801758, + "step": 2498 + }, + { + "epoch": 0.7308085977482088, + "grad_norm": 1.4477928134421267, + "learning_rate": 1.5041811192437563e-05, + "loss": 0.5032243728637695, + "step": 2499 + }, + { + "epoch": 0.7311010381634742, + "grad_norm": 1.31776348667592, + "learning_rate": 1.5037635360296695e-05, + "loss": 0.6721810102462769, + "step": 2500 + }, + { + "epoch": 0.7313934785787396, + "grad_norm": 1.3556666925406757, + "learning_rate": 1.5033458350639185e-05, + "loss": 0.7091001272201538, + "step": 2501 + }, + { + "epoch": 0.7316859189940049, + "grad_norm": 3.8317594491760163, + "learning_rate": 1.5029280164441395e-05, + "loss": 0.5414971113204956, + "step": 2502 + }, + { + "epoch": 0.7319783594092704, + "grad_norm": 1.364096425695391, + "learning_rate": 1.5025100802679944e-05, + "loss": 0.6714789271354675, + "step": 2503 + }, + { + "epoch": 0.7322707998245358, + "grad_norm": 1.3566105024089323, + "learning_rate": 1.5020920266331733e-05, + "loss": 0.5008493065834045, + "step": 2504 + }, + { + "epoch": 0.7325632402398011, + "grad_norm": 1.652155025588763, + "learning_rate": 1.5016738556373936e-05, + "loss": 0.563892126083374, + "step": 2505 + }, + { + "epoch": 0.7328556806550666, + "grad_norm": 1.3313159442091285, + "learning_rate": 1.5012555673784004e-05, + "loss": 0.6371973752975464, + "step": 2506 + }, + { + "epoch": 0.7331481210703319, + "grad_norm": 1.5289698261558242, + "learning_rate": 1.5008371619539661e-05, + "loss": 0.7365365624427795, + "step": 2507 + }, + { + "epoch": 0.7334405614855973, + "grad_norm": 1.357001447635837, + "learning_rate": 1.5004186394618906e-05, + "loss": 0.5401967763900757, + "step": 2508 + }, + { + "epoch": 0.7337330019008627, + "grad_norm": 1.4338213701683389, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.5827134847640991, + "step": 2509 + }, + { + "epoch": 0.7340254423161281, + "grad_norm": 1.5173171956884226, + "learning_rate": 1.49958124366615e-05, + "loss": 0.7655869126319885, + "step": 2510 + }, + { + "epoch": 0.7343178827313934, + "grad_norm": 1.3360976464033478, + "learning_rate": 1.4991623705582216e-05, + "loss": 0.5410823822021484, + "step": 2511 + }, + { + "epoch": 0.7346103231466589, + "grad_norm": 1.6453007873220271, + "learning_rate": 1.4987433807741242e-05, + "loss": 0.6831178665161133, + "step": 2512 + }, + { + "epoch": 0.7349027635619243, + "grad_norm": 1.2152056235269613, + "learning_rate": 1.498324274411794e-05, + "loss": 0.4952821731567383, + "step": 2513 + }, + { + "epoch": 0.7351952039771896, + "grad_norm": 1.4425254779779118, + "learning_rate": 1.4979050515691944e-05, + "loss": 0.6973339319229126, + "step": 2514 + }, + { + "epoch": 0.7354876443924551, + "grad_norm": 1.428400853551732, + "learning_rate": 1.4974857123443163e-05, + "loss": 0.6604373455047607, + "step": 2515 + }, + { + "epoch": 0.7357800848077204, + "grad_norm": 1.3355207439959806, + "learning_rate": 1.4970662568351776e-05, + "loss": 0.6523034572601318, + "step": 2516 + }, + { + "epoch": 0.7360725252229858, + "grad_norm": 1.2739776061453822, + "learning_rate": 1.4966466851398238e-05, + "loss": 0.6557538509368896, + "step": 2517 + }, + { + "epoch": 0.7363649656382512, + "grad_norm": 1.3243836594251046, + "learning_rate": 1.4962269973563269e-05, + "loss": 0.6993967294692993, + "step": 2518 + }, + { + "epoch": 0.7366574060535166, + "grad_norm": 1.3043008466806634, + "learning_rate": 1.4958071935827862e-05, + "loss": 0.611979067325592, + "step": 2519 + }, + { + "epoch": 0.736949846468782, + "grad_norm": 1.5837280682600245, + "learning_rate": 1.4953872739173289e-05, + "loss": 0.9108786582946777, + "step": 2520 + }, + { + "epoch": 0.7372422868840474, + "grad_norm": 1.5471791396278156, + "learning_rate": 1.4949672384581082e-05, + "loss": 0.7086392045021057, + "step": 2521 + }, + { + "epoch": 0.7375347272993128, + "grad_norm": 1.341070279173996, + "learning_rate": 1.494547087303305e-05, + "loss": 0.6103025674819946, + "step": 2522 + }, + { + "epoch": 0.7378271677145781, + "grad_norm": 1.223930383405044, + "learning_rate": 1.4941268205511272e-05, + "loss": 0.5597528219223022, + "step": 2523 + }, + { + "epoch": 0.7381196081298436, + "grad_norm": 1.4817126292023657, + "learning_rate": 1.4937064382998091e-05, + "loss": 0.6222598552703857, + "step": 2524 + }, + { + "epoch": 0.7384120485451089, + "grad_norm": 1.4738198225513357, + "learning_rate": 1.4932859406476131e-05, + "loss": 0.6083353757858276, + "step": 2525 + }, + { + "epoch": 0.7387044889603743, + "grad_norm": 1.2716230350108357, + "learning_rate": 1.4928653276928275e-05, + "loss": 0.47920671105384827, + "step": 2526 + }, + { + "epoch": 0.7389969293756398, + "grad_norm": 1.2356122713189879, + "learning_rate": 1.4924445995337685e-05, + "loss": 0.5752983093261719, + "step": 2527 + }, + { + "epoch": 0.7392893697909051, + "grad_norm": 1.3500870063925003, + "learning_rate": 1.4920237562687784e-05, + "loss": 0.6275333762168884, + "step": 2528 + }, + { + "epoch": 0.7395818102061705, + "grad_norm": 1.3423023519178945, + "learning_rate": 1.4916027979962266e-05, + "loss": 0.6362103223800659, + "step": 2529 + }, + { + "epoch": 0.7398742506214359, + "grad_norm": 1.4246415171584412, + "learning_rate": 1.49118172481451e-05, + "loss": 0.5902664661407471, + "step": 2530 + }, + { + "epoch": 0.7401666910367013, + "grad_norm": 1.3036213595476636, + "learning_rate": 1.4907605368220514e-05, + "loss": 0.5293874740600586, + "step": 2531 + }, + { + "epoch": 0.7404591314519666, + "grad_norm": 1.3590290047464213, + "learning_rate": 1.4903392341173013e-05, + "loss": 0.7298746109008789, + "step": 2532 + }, + { + "epoch": 0.7407515718672321, + "grad_norm": 1.3755489549876734, + "learning_rate": 1.4899178167987367e-05, + "loss": 0.6428382396697998, + "step": 2533 + }, + { + "epoch": 0.7410440122824974, + "grad_norm": 1.3444422145970576, + "learning_rate": 1.489496284964861e-05, + "loss": 0.6204425096511841, + "step": 2534 + }, + { + "epoch": 0.7413364526977628, + "grad_norm": 1.2627663029943075, + "learning_rate": 1.4890746387142052e-05, + "loss": 0.6025601625442505, + "step": 2535 + }, + { + "epoch": 0.7416288931130283, + "grad_norm": 1.212213289149315, + "learning_rate": 1.4886528781453258e-05, + "loss": 0.5570085644721985, + "step": 2536 + }, + { + "epoch": 0.7419213335282936, + "grad_norm": 1.387517207017057, + "learning_rate": 1.4882310033568072e-05, + "loss": 0.6816439628601074, + "step": 2537 + }, + { + "epoch": 0.742213773943559, + "grad_norm": 1.341130650337267, + "learning_rate": 1.4878090144472603e-05, + "loss": 0.5424396991729736, + "step": 2538 + }, + { + "epoch": 0.7425062143588244, + "grad_norm": 1.583973779595893, + "learning_rate": 1.4873869115153223e-05, + "loss": 0.58860182762146, + "step": 2539 + }, + { + "epoch": 0.7427986547740898, + "grad_norm": 1.227937032120959, + "learning_rate": 1.4869646946596568e-05, + "loss": 0.513140857219696, + "step": 2540 + }, + { + "epoch": 0.7430910951893551, + "grad_norm": 1.3321578929704418, + "learning_rate": 1.486542363978955e-05, + "loss": 0.5967035293579102, + "step": 2541 + }, + { + "epoch": 0.7433835356046206, + "grad_norm": 1.2958174333377406, + "learning_rate": 1.4861199195719334e-05, + "loss": 0.6988440752029419, + "step": 2542 + }, + { + "epoch": 0.743675976019886, + "grad_norm": 1.3279731889181368, + "learning_rate": 1.4856973615373366e-05, + "loss": 0.6176164746284485, + "step": 2543 + }, + { + "epoch": 0.7439684164351513, + "grad_norm": 1.394214331783624, + "learning_rate": 1.4852746899739346e-05, + "loss": 0.5616505742073059, + "step": 2544 + }, + { + "epoch": 0.7442608568504168, + "grad_norm": 1.199172810090394, + "learning_rate": 1.4848519049805243e-05, + "loss": 0.5470465421676636, + "step": 2545 + }, + { + "epoch": 0.7445532972656821, + "grad_norm": 1.393649724579279, + "learning_rate": 1.4844290066559292e-05, + "loss": 0.6362754106521606, + "step": 2546 + }, + { + "epoch": 0.7448457376809475, + "grad_norm": 1.2298975206172837, + "learning_rate": 1.4840059950989992e-05, + "loss": 0.6290515661239624, + "step": 2547 + }, + { + "epoch": 0.7451381780962129, + "grad_norm": 1.4356832247939193, + "learning_rate": 1.4835828704086105e-05, + "loss": 0.7225647568702698, + "step": 2548 + }, + { + "epoch": 0.7454306185114783, + "grad_norm": 1.4603777863967904, + "learning_rate": 1.483159632683666e-05, + "loss": 0.6993023157119751, + "step": 2549 + }, + { + "epoch": 0.7457230589267436, + "grad_norm": 1.5062925776475273, + "learning_rate": 1.482736282023095e-05, + "loss": 0.6960086226463318, + "step": 2550 + }, + { + "epoch": 0.7460154993420091, + "grad_norm": 1.4783046017210701, + "learning_rate": 1.4823128185258535e-05, + "loss": 0.627712607383728, + "step": 2551 + }, + { + "epoch": 0.7463079397572745, + "grad_norm": 1.3756379084869055, + "learning_rate": 1.481889242290923e-05, + "loss": 0.6314729452133179, + "step": 2552 + }, + { + "epoch": 0.7466003801725398, + "grad_norm": 1.293029687195421, + "learning_rate": 1.4814655534173121e-05, + "loss": 0.5948070287704468, + "step": 2553 + }, + { + "epoch": 0.7468928205878053, + "grad_norm": 1.28283626174806, + "learning_rate": 1.4810417520040551e-05, + "loss": 0.6227586269378662, + "step": 2554 + }, + { + "epoch": 0.7471852610030706, + "grad_norm": 1.156874509923564, + "learning_rate": 1.4806178381502139e-05, + "loss": 0.589213490486145, + "step": 2555 + }, + { + "epoch": 0.747477701418336, + "grad_norm": 1.3920763104069633, + "learning_rate": 1.4801938119548748e-05, + "loss": 0.6748968362808228, + "step": 2556 + }, + { + "epoch": 0.7477701418336014, + "grad_norm": 1.5278244850962377, + "learning_rate": 1.4797696735171521e-05, + "loss": 0.627450704574585, + "step": 2557 + }, + { + "epoch": 0.7480625822488668, + "grad_norm": 1.3979513679962843, + "learning_rate": 1.479345422936185e-05, + "loss": 0.5816184878349304, + "step": 2558 + }, + { + "epoch": 0.7483550226641322, + "grad_norm": 1.3403975244231432, + "learning_rate": 1.4789210603111399e-05, + "loss": 0.5184855461120605, + "step": 2559 + }, + { + "epoch": 0.7486474630793976, + "grad_norm": 1.3184163367774433, + "learning_rate": 1.4784965857412088e-05, + "loss": 0.5747300982475281, + "step": 2560 + }, + { + "epoch": 0.748939903494663, + "grad_norm": 1.5154750654158269, + "learning_rate": 1.4780719993256104e-05, + "loss": 0.6957682371139526, + "step": 2561 + }, + { + "epoch": 0.7492323439099283, + "grad_norm": 1.3790848349629903, + "learning_rate": 1.4776473011635886e-05, + "loss": 0.5711330771446228, + "step": 2562 + }, + { + "epoch": 0.7495247843251938, + "grad_norm": 1.260228471581513, + "learning_rate": 1.4772224913544142e-05, + "loss": 0.687350869178772, + "step": 2563 + }, + { + "epoch": 0.7498172247404591, + "grad_norm": 1.549796921470129, + "learning_rate": 1.476797569997384e-05, + "loss": 0.71396803855896, + "step": 2564 + }, + { + "epoch": 0.7501096651557245, + "grad_norm": 1.3620133851355087, + "learning_rate": 1.4763725371918209e-05, + "loss": 0.5457814335823059, + "step": 2565 + }, + { + "epoch": 0.75040210557099, + "grad_norm": 1.4687420339775556, + "learning_rate": 1.4759473930370738e-05, + "loss": 0.5889413952827454, + "step": 2566 + }, + { + "epoch": 0.7506945459862553, + "grad_norm": 1.8883582542449355, + "learning_rate": 1.4755221376325171e-05, + "loss": 0.6222226619720459, + "step": 2567 + }, + { + "epoch": 0.7509869864015207, + "grad_norm": 1.17580934018018, + "learning_rate": 1.475096771077552e-05, + "loss": 0.5273243188858032, + "step": 2568 + }, + { + "epoch": 0.7512794268167861, + "grad_norm": 1.2062680853030614, + "learning_rate": 1.4746712934716055e-05, + "loss": 0.5665162801742554, + "step": 2569 + }, + { + "epoch": 0.7515718672320515, + "grad_norm": 1.6320800654071554, + "learning_rate": 1.4742457049141298e-05, + "loss": 0.5748391151428223, + "step": 2570 + }, + { + "epoch": 0.7518643076473168, + "grad_norm": 1.4197866961281498, + "learning_rate": 1.4738200055046044e-05, + "loss": 0.7002041339874268, + "step": 2571 + }, + { + "epoch": 0.7521567480625823, + "grad_norm": 1.3507056136966096, + "learning_rate": 1.4733941953425337e-05, + "loss": 0.6841630935668945, + "step": 2572 + }, + { + "epoch": 0.7524491884778476, + "grad_norm": 1.6017928671701795, + "learning_rate": 1.4729682745274478e-05, + "loss": 0.7047172784805298, + "step": 2573 + }, + { + "epoch": 0.752741628893113, + "grad_norm": 1.4397980876250445, + "learning_rate": 1.4725422431589035e-05, + "loss": 0.6979919672012329, + "step": 2574 + }, + { + "epoch": 0.7530340693083785, + "grad_norm": 1.3152000128748418, + "learning_rate": 1.4721161013364829e-05, + "loss": 0.6437125205993652, + "step": 2575 + }, + { + "epoch": 0.7533265097236438, + "grad_norm": 1.4573280156715103, + "learning_rate": 1.4716898491597942e-05, + "loss": 0.591254711151123, + "step": 2576 + }, + { + "epoch": 0.7536189501389092, + "grad_norm": 1.592793146861773, + "learning_rate": 1.4712634867284714e-05, + "loss": 0.6276297569274902, + "step": 2577 + }, + { + "epoch": 0.7539113905541746, + "grad_norm": 1.2004846116513588, + "learning_rate": 1.4708370141421737e-05, + "loss": 0.5310626029968262, + "step": 2578 + }, + { + "epoch": 0.75420383096944, + "grad_norm": 1.374287364754045, + "learning_rate": 1.4704104315005864e-05, + "loss": 0.5256849527359009, + "step": 2579 + }, + { + "epoch": 0.7544962713847053, + "grad_norm": 1.4473126972035357, + "learning_rate": 1.4699837389034212e-05, + "loss": 0.6050584316253662, + "step": 2580 + }, + { + "epoch": 0.7547887117999708, + "grad_norm": 1.3425248874126274, + "learning_rate": 1.4695569364504144e-05, + "loss": 0.5124386548995972, + "step": 2581 + }, + { + "epoch": 0.7550811522152362, + "grad_norm": 1.1600080124683732, + "learning_rate": 1.4691300242413289e-05, + "loss": 0.5631951093673706, + "step": 2582 + }, + { + "epoch": 0.7553735926305015, + "grad_norm": 1.3017433820111879, + "learning_rate": 1.4687030023759527e-05, + "loss": 0.6352444291114807, + "step": 2583 + }, + { + "epoch": 0.755666033045767, + "grad_norm": 1.4490307646785157, + "learning_rate": 1.4682758709540992e-05, + "loss": 0.6717500686645508, + "step": 2584 + }, + { + "epoch": 0.7559584734610323, + "grad_norm": 3.0905292476778428, + "learning_rate": 1.467848630075608e-05, + "loss": 0.5889217853546143, + "step": 2585 + }, + { + "epoch": 0.7562509138762977, + "grad_norm": 1.258529998432557, + "learning_rate": 1.4674212798403443e-05, + "loss": 0.49069908261299133, + "step": 2586 + }, + { + "epoch": 0.756543354291563, + "grad_norm": 1.1729027861993524, + "learning_rate": 1.4669938203481982e-05, + "loss": 0.6272397041320801, + "step": 2587 + }, + { + "epoch": 0.7568357947068285, + "grad_norm": 1.5090841451643915, + "learning_rate": 1.466566251699086e-05, + "loss": 0.6218451261520386, + "step": 2588 + }, + { + "epoch": 0.7571282351220939, + "grad_norm": 1.4025085245751263, + "learning_rate": 1.4661385739929492e-05, + "loss": 0.6174849271774292, + "step": 2589 + }, + { + "epoch": 0.7574206755373593, + "grad_norm": 1.3554209784525295, + "learning_rate": 1.465710787329755e-05, + "loss": 0.5595160126686096, + "step": 2590 + }, + { + "epoch": 0.7577131159526247, + "grad_norm": 1.5657464206953444, + "learning_rate": 1.4652828918094954e-05, + "loss": 0.757240891456604, + "step": 2591 + }, + { + "epoch": 0.75800555636789, + "grad_norm": 1.3337551846990978, + "learning_rate": 1.4648548875321893e-05, + "loss": 0.630811333656311, + "step": 2592 + }, + { + "epoch": 0.7582979967831555, + "grad_norm": 1.208341715070646, + "learning_rate": 1.4644267745978797e-05, + "loss": 0.5857812762260437, + "step": 2593 + }, + { + "epoch": 0.7585904371984208, + "grad_norm": 1.1785954348430454, + "learning_rate": 1.463998553106635e-05, + "loss": 0.5869519710540771, + "step": 2594 + }, + { + "epoch": 0.7588828776136862, + "grad_norm": 1.2035584714461103, + "learning_rate": 1.4635702231585498e-05, + "loss": 0.5610413551330566, + "step": 2595 + }, + { + "epoch": 0.7591753180289516, + "grad_norm": 1.255732340436211, + "learning_rate": 1.4631417848537435e-05, + "loss": 0.5634676218032837, + "step": 2596 + }, + { + "epoch": 0.759467758444217, + "grad_norm": 1.2847976698363035, + "learning_rate": 1.4627132382923607e-05, + "loss": 0.6813392639160156, + "step": 2597 + }, + { + "epoch": 0.7597601988594824, + "grad_norm": 1.5611350123657577, + "learning_rate": 1.4622845835745723e-05, + "loss": 0.644945502281189, + "step": 2598 + }, + { + "epoch": 0.7600526392747478, + "grad_norm": 1.4458723370490596, + "learning_rate": 1.461855820800573e-05, + "loss": 0.7432133555412292, + "step": 2599 + }, + { + "epoch": 0.7603450796900132, + "grad_norm": 1.1406983279122715, + "learning_rate": 1.4614269500705832e-05, + "loss": 0.4729112982749939, + "step": 2600 + }, + { + "epoch": 0.7606375201052785, + "grad_norm": 1.4806970647351285, + "learning_rate": 1.4609979714848499e-05, + "loss": 0.7146443128585815, + "step": 2601 + }, + { + "epoch": 0.760929960520544, + "grad_norm": 1.4348530933940364, + "learning_rate": 1.4605688851436436e-05, + "loss": 0.5959945917129517, + "step": 2602 + }, + { + "epoch": 0.7612224009358093, + "grad_norm": 1.3380784718799885, + "learning_rate": 1.4601396911472605e-05, + "loss": 0.6091525554656982, + "step": 2603 + }, + { + "epoch": 0.7615148413510747, + "grad_norm": 1.3043703832448297, + "learning_rate": 1.4597103895960228e-05, + "loss": 0.5101523399353027, + "step": 2604 + }, + { + "epoch": 0.7618072817663402, + "grad_norm": 1.3937793894568855, + "learning_rate": 1.4592809805902762e-05, + "loss": 0.6036165952682495, + "step": 2605 + }, + { + "epoch": 0.7620997221816055, + "grad_norm": 1.361507946530242, + "learning_rate": 1.4588514642303928e-05, + "loss": 0.6094970703125, + "step": 2606 + }, + { + "epoch": 0.7623921625968709, + "grad_norm": 1.3770518433820003, + "learning_rate": 1.4584218406167697e-05, + "loss": 0.49754881858825684, + "step": 2607 + }, + { + "epoch": 0.7626846030121363, + "grad_norm": 1.3703785644048119, + "learning_rate": 1.4579921098498285e-05, + "loss": 0.6066807508468628, + "step": 2608 + }, + { + "epoch": 0.7629770434274017, + "grad_norm": 1.4768479795454132, + "learning_rate": 1.4575622720300162e-05, + "loss": 0.5758910179138184, + "step": 2609 + }, + { + "epoch": 0.763269483842667, + "grad_norm": 1.4281250780822374, + "learning_rate": 1.457132327257805e-05, + "loss": 0.6641621589660645, + "step": 2610 + }, + { + "epoch": 0.7635619242579325, + "grad_norm": 1.506727865728889, + "learning_rate": 1.4567022756336916e-05, + "loss": 0.7024788856506348, + "step": 2611 + }, + { + "epoch": 0.7638543646731978, + "grad_norm": 1.2921755321984356, + "learning_rate": 1.4562721172581982e-05, + "loss": 0.6066344380378723, + "step": 2612 + }, + { + "epoch": 0.7641468050884632, + "grad_norm": 1.3533854830579282, + "learning_rate": 1.4558418522318713e-05, + "loss": 0.566038966178894, + "step": 2613 + }, + { + "epoch": 0.7644392455037287, + "grad_norm": 1.3370326372322123, + "learning_rate": 1.4554114806552833e-05, + "loss": 0.5817335844039917, + "step": 2614 + }, + { + "epoch": 0.764731685918994, + "grad_norm": 1.2813703243908812, + "learning_rate": 1.4549810026290305e-05, + "loss": 0.6001763343811035, + "step": 2615 + }, + { + "epoch": 0.7650241263342594, + "grad_norm": 1.617460530676573, + "learning_rate": 1.4545504182537346e-05, + "loss": 0.6363068222999573, + "step": 2616 + }, + { + "epoch": 0.7653165667495248, + "grad_norm": 1.4805158326873171, + "learning_rate": 1.4541197276300424e-05, + "loss": 0.669566810131073, + "step": 2617 + }, + { + "epoch": 0.7656090071647902, + "grad_norm": 1.2122677055370945, + "learning_rate": 1.4536889308586245e-05, + "loss": 0.47967004776000977, + "step": 2618 + }, + { + "epoch": 0.7659014475800555, + "grad_norm": 1.310958704364757, + "learning_rate": 1.4532580280401777e-05, + "loss": 0.5803399085998535, + "step": 2619 + }, + { + "epoch": 0.766193887995321, + "grad_norm": 1.3185113057937472, + "learning_rate": 1.452827019275423e-05, + "loss": 0.6870115995407104, + "step": 2620 + }, + { + "epoch": 0.7664863284105864, + "grad_norm": 1.307156915151953, + "learning_rate": 1.4523959046651058e-05, + "loss": 0.6190885901451111, + "step": 2621 + }, + { + "epoch": 0.7667787688258517, + "grad_norm": 1.4891479565012034, + "learning_rate": 1.4519646843099961e-05, + "loss": 0.6624859571456909, + "step": 2622 + }, + { + "epoch": 0.7670712092411172, + "grad_norm": 1.253302711959068, + "learning_rate": 1.4515333583108896e-05, + "loss": 0.5770546197891235, + "step": 2623 + }, + { + "epoch": 0.7673636496563825, + "grad_norm": 1.3410371709150275, + "learning_rate": 1.451101926768606e-05, + "loss": 0.6843355894088745, + "step": 2624 + }, + { + "epoch": 0.7676560900716479, + "grad_norm": 1.0930173610522418, + "learning_rate": 1.4506703897839895e-05, + "loss": 0.5293717384338379, + "step": 2625 + }, + { + "epoch": 0.7679485304869133, + "grad_norm": 1.1789701874259584, + "learning_rate": 1.45023874745791e-05, + "loss": 0.44534316658973694, + "step": 2626 + }, + { + "epoch": 0.7682409709021787, + "grad_norm": 4.2234169958332295, + "learning_rate": 1.4498069998912603e-05, + "loss": 0.7279446721076965, + "step": 2627 + }, + { + "epoch": 0.7685334113174441, + "grad_norm": 1.3924343198630234, + "learning_rate": 1.4493751471849596e-05, + "loss": 0.6990453600883484, + "step": 2628 + }, + { + "epoch": 0.7688258517327095, + "grad_norm": 1.3337373981179779, + "learning_rate": 1.44894318943995e-05, + "loss": 0.6610965728759766, + "step": 2629 + }, + { + "epoch": 0.7691182921479749, + "grad_norm": 1.285212706548779, + "learning_rate": 1.4485111267571999e-05, + "loss": 0.5124749541282654, + "step": 2630 + }, + { + "epoch": 0.7694107325632402, + "grad_norm": 1.3445630320041935, + "learning_rate": 1.448078959237701e-05, + "loss": 0.7191518545150757, + "step": 2631 + }, + { + "epoch": 0.7697031729785057, + "grad_norm": 1.1499690572165278, + "learning_rate": 1.4476466869824694e-05, + "loss": 0.5798880457878113, + "step": 2632 + }, + { + "epoch": 0.769995613393771, + "grad_norm": 1.3900006441925277, + "learning_rate": 1.4472143100925467e-05, + "loss": 0.5187106728553772, + "step": 2633 + }, + { + "epoch": 0.7702880538090364, + "grad_norm": 1.1672945310140501, + "learning_rate": 1.4467818286689981e-05, + "loss": 0.5794588327407837, + "step": 2634 + }, + { + "epoch": 0.7705804942243017, + "grad_norm": 1.2435528275045493, + "learning_rate": 1.4463492428129133e-05, + "loss": 0.4884936809539795, + "step": 2635 + }, + { + "epoch": 0.7708729346395672, + "grad_norm": 1.3037745440935204, + "learning_rate": 1.4459165526254074e-05, + "loss": 0.5782946348190308, + "step": 2636 + }, + { + "epoch": 0.7711653750548326, + "grad_norm": 1.2531837165046444, + "learning_rate": 1.445483758207618e-05, + "loss": 0.5173349380493164, + "step": 2637 + }, + { + "epoch": 0.771457815470098, + "grad_norm": 1.4752149684021225, + "learning_rate": 1.4450508596607087e-05, + "loss": 0.616407573223114, + "step": 2638 + }, + { + "epoch": 0.7717502558853634, + "grad_norm": 1.4855666629653779, + "learning_rate": 1.4446178570858672e-05, + "loss": 0.537878155708313, + "step": 2639 + }, + { + "epoch": 0.7720426963006287, + "grad_norm": 1.2968861628303388, + "learning_rate": 1.4441847505843048e-05, + "loss": 0.674277663230896, + "step": 2640 + }, + { + "epoch": 0.7723351367158942, + "grad_norm": 1.440782866010467, + "learning_rate": 1.4437515402572576e-05, + "loss": 0.5064860582351685, + "step": 2641 + }, + { + "epoch": 0.7726275771311595, + "grad_norm": 1.2859384806045262, + "learning_rate": 1.4433182262059861e-05, + "loss": 0.6256883144378662, + "step": 2642 + }, + { + "epoch": 0.7729200175464249, + "grad_norm": 1.2490391757844836, + "learning_rate": 1.4428848085317744e-05, + "loss": 0.6023700833320618, + "step": 2643 + }, + { + "epoch": 0.7732124579616904, + "grad_norm": 1.5137270909206324, + "learning_rate": 1.4424512873359316e-05, + "loss": 0.5670932531356812, + "step": 2644 + }, + { + "epoch": 0.7735048983769557, + "grad_norm": 1.406486208295682, + "learning_rate": 1.4420176627197906e-05, + "loss": 0.760460376739502, + "step": 2645 + }, + { + "epoch": 0.7737973387922211, + "grad_norm": 1.3383411751300025, + "learning_rate": 1.4415839347847082e-05, + "loss": 0.5680848956108093, + "step": 2646 + }, + { + "epoch": 0.7740897792074865, + "grad_norm": 1.2948318300140997, + "learning_rate": 1.4411501036320661e-05, + "loss": 0.5962368249893188, + "step": 2647 + }, + { + "epoch": 0.7743822196227519, + "grad_norm": 1.3851281269469669, + "learning_rate": 1.4407161693632697e-05, + "loss": 0.7149791121482849, + "step": 2648 + }, + { + "epoch": 0.7746746600380172, + "grad_norm": 1.4438569377090373, + "learning_rate": 1.440282132079748e-05, + "loss": 0.5943992733955383, + "step": 2649 + }, + { + "epoch": 0.7749671004532827, + "grad_norm": 1.681920535370579, + "learning_rate": 1.439847991882955e-05, + "loss": 0.7265899181365967, + "step": 2650 + }, + { + "epoch": 0.775259540868548, + "grad_norm": 1.257384791880329, + "learning_rate": 1.4394137488743682e-05, + "loss": 0.6011309027671814, + "step": 2651 + }, + { + "epoch": 0.7755519812838134, + "grad_norm": 1.4419500386554907, + "learning_rate": 1.4389794031554894e-05, + "loss": 0.6853964328765869, + "step": 2652 + }, + { + "epoch": 0.7758444216990789, + "grad_norm": 1.4140520249216477, + "learning_rate": 1.438544954827844e-05, + "loss": 0.6598547697067261, + "step": 2653 + }, + { + "epoch": 0.7761368621143442, + "grad_norm": 1.3919438302264315, + "learning_rate": 1.4381104039929819e-05, + "loss": 0.5776119232177734, + "step": 2654 + }, + { + "epoch": 0.7764293025296096, + "grad_norm": 1.182931573556341, + "learning_rate": 1.4376757507524766e-05, + "loss": 0.6026376485824585, + "step": 2655 + }, + { + "epoch": 0.776721742944875, + "grad_norm": 1.2883148172478378, + "learning_rate": 1.4372409952079256e-05, + "loss": 0.5776997804641724, + "step": 2656 + }, + { + "epoch": 0.7770141833601404, + "grad_norm": 1.5317545348037325, + "learning_rate": 1.4368061374609505e-05, + "loss": 0.5766068696975708, + "step": 2657 + }, + { + "epoch": 0.7773066237754057, + "grad_norm": 1.0428168520269592, + "learning_rate": 1.4363711776131966e-05, + "loss": 0.4783105254173279, + "step": 2658 + }, + { + "epoch": 0.7775990641906712, + "grad_norm": 1.4837098758543301, + "learning_rate": 1.4359361157663332e-05, + "loss": 0.6563695073127747, + "step": 2659 + }, + { + "epoch": 0.7778915046059366, + "grad_norm": 1.0898257169197185, + "learning_rate": 1.4355009520220531e-05, + "loss": 0.5177119374275208, + "step": 2660 + }, + { + "epoch": 0.7781839450212019, + "grad_norm": 1.3520526907259511, + "learning_rate": 1.4350656864820733e-05, + "loss": 0.6590641736984253, + "step": 2661 + }, + { + "epoch": 0.7784763854364674, + "grad_norm": 1.2923155412118275, + "learning_rate": 1.4346303192481348e-05, + "loss": 0.6012274622917175, + "step": 2662 + }, + { + "epoch": 0.7787688258517327, + "grad_norm": 1.439032337982527, + "learning_rate": 1.4341948504220016e-05, + "loss": 0.6731704473495483, + "step": 2663 + }, + { + "epoch": 0.7790612662669981, + "grad_norm": 1.4598986218346195, + "learning_rate": 1.4337592801054623e-05, + "loss": 0.6827171444892883, + "step": 2664 + }, + { + "epoch": 0.7793537066822634, + "grad_norm": 1.3963311439466064, + "learning_rate": 1.4333236084003282e-05, + "loss": 0.6654937267303467, + "step": 2665 + }, + { + "epoch": 0.7796461470975289, + "grad_norm": 1.276825216432019, + "learning_rate": 1.4328878354084355e-05, + "loss": 0.5673532485961914, + "step": 2666 + }, + { + "epoch": 0.7799385875127943, + "grad_norm": 1.3049192363130713, + "learning_rate": 1.432451961231643e-05, + "loss": 0.5401986241340637, + "step": 2667 + }, + { + "epoch": 0.7802310279280597, + "grad_norm": 1.2877259559166432, + "learning_rate": 1.4320159859718341e-05, + "loss": 0.6134701371192932, + "step": 2668 + }, + { + "epoch": 0.7805234683433251, + "grad_norm": 1.5022932512908924, + "learning_rate": 1.4315799097309152e-05, + "loss": 0.6913554668426514, + "step": 2669 + }, + { + "epoch": 0.7808159087585904, + "grad_norm": 1.6126405133572825, + "learning_rate": 1.4311437326108167e-05, + "loss": 0.6969482898712158, + "step": 2670 + }, + { + "epoch": 0.7811083491738559, + "grad_norm": 1.343855488902383, + "learning_rate": 1.4307074547134918e-05, + "loss": 0.6612537503242493, + "step": 2671 + }, + { + "epoch": 0.7814007895891212, + "grad_norm": 1.1627822310905236, + "learning_rate": 1.430271076140918e-05, + "loss": 0.5545899868011475, + "step": 2672 + }, + { + "epoch": 0.7816932300043866, + "grad_norm": 1.1885930128001867, + "learning_rate": 1.4298345969950965e-05, + "loss": 0.6635574698448181, + "step": 2673 + }, + { + "epoch": 0.781985670419652, + "grad_norm": 1.4316816688950922, + "learning_rate": 1.4293980173780514e-05, + "loss": 0.5859510898590088, + "step": 2674 + }, + { + "epoch": 0.7822781108349174, + "grad_norm": 1.246244040215616, + "learning_rate": 1.4289613373918304e-05, + "loss": 0.5839825868606567, + "step": 2675 + }, + { + "epoch": 0.7825705512501828, + "grad_norm": 1.7192756445293216, + "learning_rate": 1.428524557138505e-05, + "loss": 0.6376889944076538, + "step": 2676 + }, + { + "epoch": 0.7828629916654481, + "grad_norm": 1.2061132029389496, + "learning_rate": 1.4280876767201696e-05, + "loss": 0.5473129749298096, + "step": 2677 + }, + { + "epoch": 0.7831554320807136, + "grad_norm": 1.2355367438994083, + "learning_rate": 1.4276506962389429e-05, + "loss": 0.6723904609680176, + "step": 2678 + }, + { + "epoch": 0.7834478724959789, + "grad_norm": 1.318329485547163, + "learning_rate": 1.4272136157969658e-05, + "loss": 0.6036845445632935, + "step": 2679 + }, + { + "epoch": 0.7837403129112444, + "grad_norm": 1.4527977807212105, + "learning_rate": 1.4267764354964038e-05, + "loss": 0.5993655920028687, + "step": 2680 + }, + { + "epoch": 0.7840327533265097, + "grad_norm": 1.5159579383707373, + "learning_rate": 1.4263391554394448e-05, + "loss": 0.6678075194358826, + "step": 2681 + }, + { + "epoch": 0.7843251937417751, + "grad_norm": 1.2588619303254647, + "learning_rate": 1.4259017757283003e-05, + "loss": 0.5627151727676392, + "step": 2682 + }, + { + "epoch": 0.7846176341570406, + "grad_norm": 1.2632820141578516, + "learning_rate": 1.4254642964652053e-05, + "loss": 0.6060316562652588, + "step": 2683 + }, + { + "epoch": 0.7849100745723059, + "grad_norm": 1.590473454276912, + "learning_rate": 1.4250267177524177e-05, + "loss": 0.6535854935646057, + "step": 2684 + }, + { + "epoch": 0.7852025149875713, + "grad_norm": 1.499355267260573, + "learning_rate": 1.4245890396922195e-05, + "loss": 0.7141643762588501, + "step": 2685 + }, + { + "epoch": 0.7854949554028366, + "grad_norm": 1.5067703709229516, + "learning_rate": 1.4241512623869143e-05, + "loss": 0.6685847640037537, + "step": 2686 + }, + { + "epoch": 0.7857873958181021, + "grad_norm": 1.4195544467165693, + "learning_rate": 1.4237133859388305e-05, + "loss": 0.6745196580886841, + "step": 2687 + }, + { + "epoch": 0.7860798362333674, + "grad_norm": 1.5617010746630147, + "learning_rate": 1.423275410450319e-05, + "loss": 0.6891968250274658, + "step": 2688 + }, + { + "epoch": 0.7863722766486329, + "grad_norm": 1.3584703297700564, + "learning_rate": 1.422837336023754e-05, + "loss": 0.5614763498306274, + "step": 2689 + }, + { + "epoch": 0.7866647170638982, + "grad_norm": 1.3595148335065306, + "learning_rate": 1.4223991627615324e-05, + "loss": 0.5867494344711304, + "step": 2690 + }, + { + "epoch": 0.7869571574791636, + "grad_norm": 1.453264768444311, + "learning_rate": 1.421960890766075e-05, + "loss": 0.644777774810791, + "step": 2691 + }, + { + "epoch": 0.787249597894429, + "grad_norm": 1.3023857436912896, + "learning_rate": 1.4215225201398249e-05, + "loss": 0.7237588167190552, + "step": 2692 + }, + { + "epoch": 0.7875420383096944, + "grad_norm": 1.45851809360972, + "learning_rate": 1.4210840509852484e-05, + "loss": 0.6314423680305481, + "step": 2693 + }, + { + "epoch": 0.7878344787249598, + "grad_norm": 1.2286351961246127, + "learning_rate": 1.4206454834048353e-05, + "loss": 0.5298433303833008, + "step": 2694 + }, + { + "epoch": 0.7881269191402251, + "grad_norm": 1.1185262454319822, + "learning_rate": 1.420206817501098e-05, + "loss": 0.507548451423645, + "step": 2695 + }, + { + "epoch": 0.7884193595554906, + "grad_norm": 1.7207072983596743, + "learning_rate": 1.4197680533765721e-05, + "loss": 0.7742520570755005, + "step": 2696 + }, + { + "epoch": 0.7887117999707559, + "grad_norm": 1.3752660802878722, + "learning_rate": 1.4193291911338161e-05, + "loss": 0.6261187195777893, + "step": 2697 + }, + { + "epoch": 0.7890042403860213, + "grad_norm": 1.521521524262885, + "learning_rate": 1.4188902308754108e-05, + "loss": 0.7501171827316284, + "step": 2698 + }, + { + "epoch": 0.7892966808012868, + "grad_norm": 1.3001128857102173, + "learning_rate": 1.4184511727039612e-05, + "loss": 0.5590647459030151, + "step": 2699 + }, + { + "epoch": 0.7895891212165521, + "grad_norm": 1.4479349527989895, + "learning_rate": 1.4180120167220941e-05, + "loss": 0.586786150932312, + "step": 2700 + }, + { + "epoch": 0.7898815616318176, + "grad_norm": 1.2133244570308048, + "learning_rate": 1.4175727630324598e-05, + "loss": 0.5208219289779663, + "step": 2701 + }, + { + "epoch": 0.7901740020470829, + "grad_norm": 1.2365924450408214, + "learning_rate": 1.4171334117377312e-05, + "loss": 0.5925623178482056, + "step": 2702 + }, + { + "epoch": 0.7904664424623483, + "grad_norm": 1.5006045037979843, + "learning_rate": 1.4166939629406034e-05, + "loss": 0.7095032930374146, + "step": 2703 + }, + { + "epoch": 0.7907588828776136, + "grad_norm": 1.167282378609361, + "learning_rate": 1.4162544167437955e-05, + "loss": 0.5683872699737549, + "step": 2704 + }, + { + "epoch": 0.7910513232928791, + "grad_norm": 1.2605941476894575, + "learning_rate": 1.4158147732500482e-05, + "loss": 0.7079274654388428, + "step": 2705 + }, + { + "epoch": 0.7913437637081445, + "grad_norm": 1.3186161570017685, + "learning_rate": 1.415375032562126e-05, + "loss": 0.6336439847946167, + "step": 2706 + }, + { + "epoch": 0.7916362041234098, + "grad_norm": 1.14446239802259, + "learning_rate": 1.414935194782816e-05, + "loss": 0.4842381477355957, + "step": 2707 + }, + { + "epoch": 0.7919286445386753, + "grad_norm": 1.4296190875249344, + "learning_rate": 1.4144952600149267e-05, + "loss": 0.5439653396606445, + "step": 2708 + }, + { + "epoch": 0.7922210849539406, + "grad_norm": 1.2988205927389838, + "learning_rate": 1.4140552283612906e-05, + "loss": 0.6365468502044678, + "step": 2709 + }, + { + "epoch": 0.792513525369206, + "grad_norm": 1.3854921286863888, + "learning_rate": 1.4136150999247623e-05, + "loss": 0.6192438006401062, + "step": 2710 + }, + { + "epoch": 0.7928059657844714, + "grad_norm": 1.2293031316317269, + "learning_rate": 1.4131748748082191e-05, + "loss": 0.5695269703865051, + "step": 2711 + }, + { + "epoch": 0.7930984061997368, + "grad_norm": 1.3405661548900325, + "learning_rate": 1.4127345531145614e-05, + "loss": 0.6892319321632385, + "step": 2712 + }, + { + "epoch": 0.7933908466150021, + "grad_norm": 1.5220370415080073, + "learning_rate": 1.4122941349467109e-05, + "loss": 0.6294678449630737, + "step": 2713 + }, + { + "epoch": 0.7936832870302676, + "grad_norm": 1.2086123903849104, + "learning_rate": 1.4118536204076135e-05, + "loss": 0.6666272878646851, + "step": 2714 + }, + { + "epoch": 0.793975727445533, + "grad_norm": 1.2066166036349477, + "learning_rate": 1.4114130096002363e-05, + "loss": 0.5981796383857727, + "step": 2715 + }, + { + "epoch": 0.7942681678607983, + "grad_norm": 1.5676320725913573, + "learning_rate": 1.4109723026275695e-05, + "loss": 0.6120023131370544, + "step": 2716 + }, + { + "epoch": 0.7945606082760638, + "grad_norm": 1.536602454646116, + "learning_rate": 1.4105314995926257e-05, + "loss": 0.5892866849899292, + "step": 2717 + }, + { + "epoch": 0.7948530486913291, + "grad_norm": 1.504529299257153, + "learning_rate": 1.4100906005984404e-05, + "loss": 0.7625553607940674, + "step": 2718 + }, + { + "epoch": 0.7951454891065945, + "grad_norm": 1.4565362056936688, + "learning_rate": 1.40964960574807e-05, + "loss": 0.643633246421814, + "step": 2719 + }, + { + "epoch": 0.7954379295218599, + "grad_norm": 1.2108583839611744, + "learning_rate": 1.4092085151445953e-05, + "loss": 0.46422284841537476, + "step": 2720 + }, + { + "epoch": 0.7957303699371253, + "grad_norm": 1.2654408745652597, + "learning_rate": 1.4087673288911182e-05, + "loss": 0.6290001273155212, + "step": 2721 + }, + { + "epoch": 0.7960228103523908, + "grad_norm": 1.2400549293858325, + "learning_rate": 1.4083260470907632e-05, + "loss": 0.5175197124481201, + "step": 2722 + }, + { + "epoch": 0.7963152507676561, + "grad_norm": 1.4748861405916942, + "learning_rate": 1.4078846698466776e-05, + "loss": 0.6475427150726318, + "step": 2723 + }, + { + "epoch": 0.7966076911829215, + "grad_norm": 1.3254407316825372, + "learning_rate": 1.40744319726203e-05, + "loss": 0.5978254079818726, + "step": 2724 + }, + { + "epoch": 0.7969001315981868, + "grad_norm": 1.2991181525686113, + "learning_rate": 1.4070016294400124e-05, + "loss": 0.5738629102706909, + "step": 2725 + }, + { + "epoch": 0.7971925720134523, + "grad_norm": 1.3493198611941248, + "learning_rate": 1.4065599664838388e-05, + "loss": 0.5809024572372437, + "step": 2726 + }, + { + "epoch": 0.7974850124287176, + "grad_norm": 1.1539725667160117, + "learning_rate": 1.4061182084967446e-05, + "loss": 0.5907782316207886, + "step": 2727 + }, + { + "epoch": 0.797777452843983, + "grad_norm": 1.4493981600012322, + "learning_rate": 1.4056763555819887e-05, + "loss": 0.7640036344528198, + "step": 2728 + }, + { + "epoch": 0.7980698932592484, + "grad_norm": 1.5601806517528776, + "learning_rate": 1.4052344078428513e-05, + "loss": 0.7472168207168579, + "step": 2729 + }, + { + "epoch": 0.7983623336745138, + "grad_norm": 1.6018546047693625, + "learning_rate": 1.4047923653826347e-05, + "loss": 0.6726990342140198, + "step": 2730 + }, + { + "epoch": 0.7986547740897793, + "grad_norm": 1.3791137229331067, + "learning_rate": 1.404350228304664e-05, + "loss": 0.5949650406837463, + "step": 2731 + }, + { + "epoch": 0.7989472145050446, + "grad_norm": 1.386756095528374, + "learning_rate": 1.403907996712286e-05, + "loss": 0.5578774213790894, + "step": 2732 + }, + { + "epoch": 0.79923965492031, + "grad_norm": 1.5271585141569006, + "learning_rate": 1.4034656707088692e-05, + "loss": 0.6092333197593689, + "step": 2733 + }, + { + "epoch": 0.7995320953355753, + "grad_norm": 1.3098390209876276, + "learning_rate": 1.4030232503978053e-05, + "loss": 0.5095718502998352, + "step": 2734 + }, + { + "epoch": 0.7998245357508408, + "grad_norm": 1.3675399597044373, + "learning_rate": 1.4025807358825072e-05, + "loss": 0.5155727863311768, + "step": 2735 + }, + { + "epoch": 0.8001169761661061, + "grad_norm": 1.3309663791332569, + "learning_rate": 1.4021381272664094e-05, + "loss": 0.5752589702606201, + "step": 2736 + }, + { + "epoch": 0.8004094165813715, + "grad_norm": 1.3619611747950222, + "learning_rate": 1.4016954246529697e-05, + "loss": 0.6334787607192993, + "step": 2737 + }, + { + "epoch": 0.800701856996637, + "grad_norm": 1.3830503239164076, + "learning_rate": 1.4012526281456666e-05, + "loss": 0.7406032085418701, + "step": 2738 + }, + { + "epoch": 0.8009942974119023, + "grad_norm": 1.2904369174268238, + "learning_rate": 1.4008097378480014e-05, + "loss": 0.5805078744888306, + "step": 2739 + }, + { + "epoch": 0.8012867378271677, + "grad_norm": 1.3584200788658642, + "learning_rate": 1.4003667538634972e-05, + "loss": 0.6849163770675659, + "step": 2740 + }, + { + "epoch": 0.8015791782424331, + "grad_norm": 1.5354340760410032, + "learning_rate": 1.3999236762956985e-05, + "loss": 0.7707695960998535, + "step": 2741 + }, + { + "epoch": 0.8018716186576985, + "grad_norm": 1.426293329050591, + "learning_rate": 1.3994805052481715e-05, + "loss": 0.6253059506416321, + "step": 2742 + }, + { + "epoch": 0.8021640590729638, + "grad_norm": 1.274928204575108, + "learning_rate": 1.3990372408245057e-05, + "loss": 0.6450316905975342, + "step": 2743 + }, + { + "epoch": 0.8024564994882293, + "grad_norm": 1.2867865996346037, + "learning_rate": 1.398593883128311e-05, + "loss": 0.672899603843689, + "step": 2744 + }, + { + "epoch": 0.8027489399034947, + "grad_norm": 1.38176481949922, + "learning_rate": 1.3981504322632198e-05, + "loss": 0.6203787326812744, + "step": 2745 + }, + { + "epoch": 0.80304138031876, + "grad_norm": 1.296034523853111, + "learning_rate": 1.3977068883328854e-05, + "loss": 0.541740894317627, + "step": 2746 + }, + { + "epoch": 0.8033338207340255, + "grad_norm": 1.3608273440615848, + "learning_rate": 1.3972632514409843e-05, + "loss": 0.5566504001617432, + "step": 2747 + }, + { + "epoch": 0.8036262611492908, + "grad_norm": 1.378445494532888, + "learning_rate": 1.3968195216912135e-05, + "loss": 0.6911404728889465, + "step": 2748 + }, + { + "epoch": 0.8039187015645562, + "grad_norm": 1.3758218413869647, + "learning_rate": 1.3963756991872921e-05, + "loss": 0.6744735240936279, + "step": 2749 + }, + { + "epoch": 0.8042111419798216, + "grad_norm": 1.3810636187989935, + "learning_rate": 1.3959317840329613e-05, + "loss": 0.6660502552986145, + "step": 2750 + }, + { + "epoch": 0.804503582395087, + "grad_norm": 1.611467815082346, + "learning_rate": 1.3954877763319832e-05, + "loss": 0.607395589351654, + "step": 2751 + }, + { + "epoch": 0.8047960228103523, + "grad_norm": 1.3065536354182021, + "learning_rate": 1.395043676188142e-05, + "loss": 0.53249192237854, + "step": 2752 + }, + { + "epoch": 0.8050884632256178, + "grad_norm": 1.384670069600496, + "learning_rate": 1.394599483705243e-05, + "loss": 0.5728630423545837, + "step": 2753 + }, + { + "epoch": 0.8053809036408832, + "grad_norm": 1.354298055615179, + "learning_rate": 1.3941551989871142e-05, + "loss": 0.6912537813186646, + "step": 2754 + }, + { + "epoch": 0.8056733440561485, + "grad_norm": 1.2211163784496284, + "learning_rate": 1.3937108221376041e-05, + "loss": 0.6002523899078369, + "step": 2755 + }, + { + "epoch": 0.805965784471414, + "grad_norm": 1.165855753943377, + "learning_rate": 1.3932663532605832e-05, + "loss": 0.6573797464370728, + "step": 2756 + }, + { + "epoch": 0.8062582248866793, + "grad_norm": 1.2846173311931015, + "learning_rate": 1.3928217924599433e-05, + "loss": 0.6997278928756714, + "step": 2757 + }, + { + "epoch": 0.8065506653019447, + "grad_norm": 1.3457721921363819, + "learning_rate": 1.3923771398395978e-05, + "loss": 0.565264105796814, + "step": 2758 + }, + { + "epoch": 0.8068431057172101, + "grad_norm": 1.7064740069380804, + "learning_rate": 1.3919323955034815e-05, + "loss": 0.8065239191055298, + "step": 2759 + }, + { + "epoch": 0.8071355461324755, + "grad_norm": 1.4850507802988735, + "learning_rate": 1.3914875595555509e-05, + "loss": 0.556678056716919, + "step": 2760 + }, + { + "epoch": 0.807427986547741, + "grad_norm": 1.653442619870376, + "learning_rate": 1.3910426320997834e-05, + "loss": 0.5528635382652283, + "step": 2761 + }, + { + "epoch": 0.8077204269630063, + "grad_norm": 1.4210714864438183, + "learning_rate": 1.3905976132401785e-05, + "loss": 0.6127038598060608, + "step": 2762 + }, + { + "epoch": 0.8080128673782717, + "grad_norm": 1.4473812948635245, + "learning_rate": 1.390152503080756e-05, + "loss": 0.6311757564544678, + "step": 2763 + }, + { + "epoch": 0.808305307793537, + "grad_norm": 1.256496005559394, + "learning_rate": 1.389707301725558e-05, + "loss": 0.669788122177124, + "step": 2764 + }, + { + "epoch": 0.8085977482088025, + "grad_norm": 1.1602455830470428, + "learning_rate": 1.3892620092786477e-05, + "loss": 0.48408570885658264, + "step": 2765 + }, + { + "epoch": 0.8088901886240678, + "grad_norm": 1.3816192110102654, + "learning_rate": 1.3888166258441098e-05, + "loss": 0.5648288726806641, + "step": 2766 + }, + { + "epoch": 0.8091826290393332, + "grad_norm": 1.359222924847667, + "learning_rate": 1.3883711515260497e-05, + "loss": 0.5894806385040283, + "step": 2767 + }, + { + "epoch": 0.8094750694545986, + "grad_norm": 1.609438084965147, + "learning_rate": 1.3879255864285939e-05, + "loss": 0.8325392603874207, + "step": 2768 + }, + { + "epoch": 0.809767509869864, + "grad_norm": 1.3200888192290248, + "learning_rate": 1.387479930655891e-05, + "loss": 0.5282119512557983, + "step": 2769 + }, + { + "epoch": 0.8100599502851294, + "grad_norm": 1.2020970963419326, + "learning_rate": 1.3870341843121104e-05, + "loss": 0.7565277218818665, + "step": 2770 + }, + { + "epoch": 0.8103523907003948, + "grad_norm": 1.20769025145285, + "learning_rate": 1.3865883475014424e-05, + "loss": 0.5767146944999695, + "step": 2771 + }, + { + "epoch": 0.8106448311156602, + "grad_norm": 1.3747646237948088, + "learning_rate": 1.3861424203280987e-05, + "loss": 0.5988898873329163, + "step": 2772 + }, + { + "epoch": 0.8109372715309255, + "grad_norm": 1.2837797411261327, + "learning_rate": 1.3856964028963119e-05, + "loss": 0.5752500295639038, + "step": 2773 + }, + { + "epoch": 0.811229711946191, + "grad_norm": 1.3281997353125305, + "learning_rate": 1.385250295310336e-05, + "loss": 0.6834297776222229, + "step": 2774 + }, + { + "epoch": 0.8115221523614563, + "grad_norm": 1.376792748908409, + "learning_rate": 1.3848040976744459e-05, + "loss": 0.5667037963867188, + "step": 2775 + }, + { + "epoch": 0.8118145927767217, + "grad_norm": 1.33236222276005, + "learning_rate": 1.3843578100929375e-05, + "loss": 0.5618781447410583, + "step": 2776 + }, + { + "epoch": 0.8121070331919872, + "grad_norm": 1.4974631308124338, + "learning_rate": 1.3839114326701281e-05, + "loss": 0.538033664226532, + "step": 2777 + }, + { + "epoch": 0.8123994736072525, + "grad_norm": 1.3236430994846111, + "learning_rate": 1.3834649655103556e-05, + "loss": 0.7218335270881653, + "step": 2778 + }, + { + "epoch": 0.812691914022518, + "grad_norm": 1.3045533775783231, + "learning_rate": 1.383018408717979e-05, + "loss": 0.5979611873626709, + "step": 2779 + }, + { + "epoch": 0.8129843544377833, + "grad_norm": 1.191818251767074, + "learning_rate": 1.3825717623973775e-05, + "loss": 0.4958215355873108, + "step": 2780 + }, + { + "epoch": 0.8132767948530487, + "grad_norm": 1.4132643925978479, + "learning_rate": 1.3821250266529531e-05, + "loss": 0.6759654879570007, + "step": 2781 + }, + { + "epoch": 0.813569235268314, + "grad_norm": 1.1873413404245543, + "learning_rate": 1.3816782015891272e-05, + "loss": 0.5499521493911743, + "step": 2782 + }, + { + "epoch": 0.8138616756835795, + "grad_norm": 1.327517100573182, + "learning_rate": 1.3812312873103425e-05, + "loss": 0.5308753252029419, + "step": 2783 + }, + { + "epoch": 0.8141541160988449, + "grad_norm": 1.4850132833469487, + "learning_rate": 1.3807842839210617e-05, + "loss": 0.585492730140686, + "step": 2784 + }, + { + "epoch": 0.8144465565141102, + "grad_norm": 1.5985853231384999, + "learning_rate": 1.3803371915257702e-05, + "loss": 0.6598281860351562, + "step": 2785 + }, + { + "epoch": 0.8147389969293757, + "grad_norm": 1.2500600856454092, + "learning_rate": 1.3798900102289726e-05, + "loss": 0.6819334030151367, + "step": 2786 + }, + { + "epoch": 0.815031437344641, + "grad_norm": 2.1106639284366877, + "learning_rate": 1.3794427401351946e-05, + "loss": 0.6548545360565186, + "step": 2787 + }, + { + "epoch": 0.8153238777599064, + "grad_norm": 1.4934248295829666, + "learning_rate": 1.3789953813489834e-05, + "loss": 0.7836263179779053, + "step": 2788 + }, + { + "epoch": 0.8156163181751718, + "grad_norm": 1.3092153960785353, + "learning_rate": 1.3785479339749062e-05, + "loss": 0.6108324527740479, + "step": 2789 + }, + { + "epoch": 0.8159087585904372, + "grad_norm": 1.4189973842835568, + "learning_rate": 1.378100398117551e-05, + "loss": 0.7079485058784485, + "step": 2790 + }, + { + "epoch": 0.8162011990057025, + "grad_norm": 1.2593140459847156, + "learning_rate": 1.3776527738815264e-05, + "loss": 0.5935578346252441, + "step": 2791 + }, + { + "epoch": 0.816493639420968, + "grad_norm": 1.159439153093783, + "learning_rate": 1.3772050613714623e-05, + "loss": 0.5559983253479004, + "step": 2792 + }, + { + "epoch": 0.8167860798362334, + "grad_norm": 1.2282449471592758, + "learning_rate": 1.3767572606920083e-05, + "loss": 0.6230447292327881, + "step": 2793 + }, + { + "epoch": 0.8170785202514987, + "grad_norm": 1.3750755360912204, + "learning_rate": 1.3763093719478357e-05, + "loss": 0.5672184824943542, + "step": 2794 + }, + { + "epoch": 0.8173709606667642, + "grad_norm": 1.3345649111405589, + "learning_rate": 1.3758613952436353e-05, + "loss": 0.6933468580245972, + "step": 2795 + }, + { + "epoch": 0.8176634010820295, + "grad_norm": 1.299919441217989, + "learning_rate": 1.3754133306841188e-05, + "loss": 0.5873827934265137, + "step": 2796 + }, + { + "epoch": 0.8179558414972949, + "grad_norm": 1.3238138716227077, + "learning_rate": 1.3749651783740188e-05, + "loss": 0.6061393022537231, + "step": 2797 + }, + { + "epoch": 0.8182482819125603, + "grad_norm": 1.3503137209197107, + "learning_rate": 1.3745169384180886e-05, + "loss": 0.6218947768211365, + "step": 2798 + }, + { + "epoch": 0.8185407223278257, + "grad_norm": 1.584036085033884, + "learning_rate": 1.3740686109211008e-05, + "loss": 0.6092264652252197, + "step": 2799 + }, + { + "epoch": 0.8188331627430911, + "grad_norm": 1.4327213465282531, + "learning_rate": 1.3736201959878497e-05, + "loss": 0.6145539283752441, + "step": 2800 + }, + { + "epoch": 0.8191256031583565, + "grad_norm": 1.1433366189059146, + "learning_rate": 1.3731716937231493e-05, + "loss": 0.4637746214866638, + "step": 2801 + }, + { + "epoch": 0.8194180435736219, + "grad_norm": 1.2802202387296946, + "learning_rate": 1.3727231042318345e-05, + "loss": 0.6102726459503174, + "step": 2802 + }, + { + "epoch": 0.8197104839888872, + "grad_norm": 1.3432330324336637, + "learning_rate": 1.3722744276187603e-05, + "loss": 0.5885297060012817, + "step": 2803 + }, + { + "epoch": 0.8200029244041527, + "grad_norm": 1.4575985112282515, + "learning_rate": 1.3718256639888021e-05, + "loss": 0.592369019985199, + "step": 2804 + }, + { + "epoch": 0.820295364819418, + "grad_norm": 1.4943856663354038, + "learning_rate": 1.3713768134468557e-05, + "loss": 0.5194098949432373, + "step": 2805 + }, + { + "epoch": 0.8205878052346834, + "grad_norm": 1.3716539173176907, + "learning_rate": 1.370927876097837e-05, + "loss": 0.6033506393432617, + "step": 2806 + }, + { + "epoch": 0.8208802456499488, + "grad_norm": 1.686602588559283, + "learning_rate": 1.3704788520466828e-05, + "loss": 0.6866108179092407, + "step": 2807 + }, + { + "epoch": 0.8211726860652142, + "grad_norm": 1.564205528186879, + "learning_rate": 1.3700297413983492e-05, + "loss": 0.7325261831283569, + "step": 2808 + }, + { + "epoch": 0.8214651264804796, + "grad_norm": 1.531257665763453, + "learning_rate": 1.3695805442578136e-05, + "loss": 0.5422608852386475, + "step": 2809 + }, + { + "epoch": 0.821757566895745, + "grad_norm": 1.5581516895112182, + "learning_rate": 1.369131260730073e-05, + "loss": 0.6124732494354248, + "step": 2810 + }, + { + "epoch": 0.8220500073110104, + "grad_norm": 1.3009124551880797, + "learning_rate": 1.3686818909201442e-05, + "loss": 0.6097716093063354, + "step": 2811 + }, + { + "epoch": 0.8223424477262757, + "grad_norm": 1.302794206877671, + "learning_rate": 1.3682324349330652e-05, + "loss": 0.6283478140830994, + "step": 2812 + }, + { + "epoch": 0.8226348881415412, + "grad_norm": 1.6179042229288885, + "learning_rate": 1.3677828928738934e-05, + "loss": 0.6590027213096619, + "step": 2813 + }, + { + "epoch": 0.8229273285568065, + "grad_norm": 1.5247617474384554, + "learning_rate": 1.3673332648477065e-05, + "loss": 0.6417049169540405, + "step": 2814 + }, + { + "epoch": 0.8232197689720719, + "grad_norm": 1.510678230362789, + "learning_rate": 1.3668835509596023e-05, + "loss": 0.6217149496078491, + "step": 2815 + }, + { + "epoch": 0.8235122093873374, + "grad_norm": 1.9022694632783144, + "learning_rate": 1.3664337513146993e-05, + "loss": 0.7530043125152588, + "step": 2816 + }, + { + "epoch": 0.8238046498026027, + "grad_norm": 1.3235640761468095, + "learning_rate": 1.3659838660181341e-05, + "loss": 0.6690578460693359, + "step": 2817 + }, + { + "epoch": 0.8240970902178681, + "grad_norm": 1.5311368229830338, + "learning_rate": 1.3655338951750657e-05, + "loss": 0.5348777174949646, + "step": 2818 + }, + { + "epoch": 0.8243895306331335, + "grad_norm": 1.494896630136579, + "learning_rate": 1.3650838388906718e-05, + "loss": 0.7076361179351807, + "step": 2819 + }, + { + "epoch": 0.8246819710483989, + "grad_norm": 1.611810759372966, + "learning_rate": 1.3646336972701507e-05, + "loss": 0.6649855375289917, + "step": 2820 + }, + { + "epoch": 0.8249744114636642, + "grad_norm": 1.4188027146347701, + "learning_rate": 1.3641834704187194e-05, + "loss": 0.6484942436218262, + "step": 2821 + }, + { + "epoch": 0.8252668518789297, + "grad_norm": 1.066364944063908, + "learning_rate": 1.3637331584416163e-05, + "loss": 0.5167717337608337, + "step": 2822 + }, + { + "epoch": 0.8255592922941951, + "grad_norm": 1.4320675291883214, + "learning_rate": 1.3632827614440988e-05, + "loss": 0.7808440327644348, + "step": 2823 + }, + { + "epoch": 0.8258517327094604, + "grad_norm": 1.6437853600585473, + "learning_rate": 1.3628322795314449e-05, + "loss": 0.551183819770813, + "step": 2824 + }, + { + "epoch": 0.8261441731247259, + "grad_norm": 1.3439080199790612, + "learning_rate": 1.3623817128089513e-05, + "loss": 0.6084691286087036, + "step": 2825 + }, + { + "epoch": 0.8264366135399912, + "grad_norm": 1.3974747336185755, + "learning_rate": 1.3619310613819363e-05, + "loss": 0.6251019239425659, + "step": 2826 + }, + { + "epoch": 0.8267290539552566, + "grad_norm": 1.237260204163714, + "learning_rate": 1.3614803253557358e-05, + "loss": 0.5037761926651001, + "step": 2827 + }, + { + "epoch": 0.827021494370522, + "grad_norm": 1.3461097726205675, + "learning_rate": 1.3610295048357072e-05, + "loss": 0.5606831312179565, + "step": 2828 + }, + { + "epoch": 0.8273139347857874, + "grad_norm": 1.3850167464051482, + "learning_rate": 1.360578599927227e-05, + "loss": 0.6664785146713257, + "step": 2829 + }, + { + "epoch": 0.8276063752010527, + "grad_norm": 1.3613746427457352, + "learning_rate": 1.360127610735691e-05, + "loss": 0.7105492353439331, + "step": 2830 + }, + { + "epoch": 0.8278988156163182, + "grad_norm": 1.3577681820511107, + "learning_rate": 1.3596765373665162e-05, + "loss": 0.6255359053611755, + "step": 2831 + }, + { + "epoch": 0.8281912560315836, + "grad_norm": 1.3150522794807806, + "learning_rate": 1.3592253799251377e-05, + "loss": 0.5422149300575256, + "step": 2832 + }, + { + "epoch": 0.8284836964468489, + "grad_norm": 1.4383576380181533, + "learning_rate": 1.3587741385170104e-05, + "loss": 0.6044044494628906, + "step": 2833 + }, + { + "epoch": 0.8287761368621144, + "grad_norm": 1.2478223452248756, + "learning_rate": 1.3583228132476094e-05, + "loss": 0.6256763935089111, + "step": 2834 + }, + { + "epoch": 0.8290685772773797, + "grad_norm": 1.2507601544621354, + "learning_rate": 1.3578714042224297e-05, + "loss": 0.6759064793586731, + "step": 2835 + }, + { + "epoch": 0.8293610176926451, + "grad_norm": 1.3610869198536528, + "learning_rate": 1.3574199115469852e-05, + "loss": 0.5819023251533508, + "step": 2836 + }, + { + "epoch": 0.8296534581079105, + "grad_norm": 1.331505314238688, + "learning_rate": 1.3569683353268098e-05, + "loss": 0.5412642359733582, + "step": 2837 + }, + { + "epoch": 0.8299458985231759, + "grad_norm": 1.0998151045906572, + "learning_rate": 1.356516675667456e-05, + "loss": 0.5129171013832092, + "step": 2838 + }, + { + "epoch": 0.8302383389384413, + "grad_norm": 1.310393887156268, + "learning_rate": 1.356064932674497e-05, + "loss": 0.5165198445320129, + "step": 2839 + }, + { + "epoch": 0.8305307793537067, + "grad_norm": 1.287643091691659, + "learning_rate": 1.3556131064535249e-05, + "loss": 0.6545724272727966, + "step": 2840 + }, + { + "epoch": 0.8308232197689721, + "grad_norm": 1.2180901867245224, + "learning_rate": 1.3551611971101513e-05, + "loss": 0.5715968608856201, + "step": 2841 + }, + { + "epoch": 0.8311156601842374, + "grad_norm": 1.1619522611517994, + "learning_rate": 1.3547092047500074e-05, + "loss": 0.7063779830932617, + "step": 2842 + }, + { + "epoch": 0.8314081005995029, + "grad_norm": 1.2876429096537105, + "learning_rate": 1.3542571294787437e-05, + "loss": 0.6391212940216064, + "step": 2843 + }, + { + "epoch": 0.8317005410147682, + "grad_norm": 1.3047489403917027, + "learning_rate": 1.3538049714020298e-05, + "loss": 0.7145380973815918, + "step": 2844 + }, + { + "epoch": 0.8319929814300336, + "grad_norm": 1.4749234473747483, + "learning_rate": 1.3533527306255547e-05, + "loss": 0.7262213230133057, + "step": 2845 + }, + { + "epoch": 0.832285421845299, + "grad_norm": 1.5661213009447377, + "learning_rate": 1.3529004072550276e-05, + "loss": 0.7621959447860718, + "step": 2846 + }, + { + "epoch": 0.8325778622605644, + "grad_norm": 1.2349365167185542, + "learning_rate": 1.3524480013961757e-05, + "loss": 0.6372592449188232, + "step": 2847 + }, + { + "epoch": 0.8328703026758298, + "grad_norm": 1.5746526285594844, + "learning_rate": 1.3519955131547469e-05, + "loss": 0.6223774552345276, + "step": 2848 + }, + { + "epoch": 0.8331627430910952, + "grad_norm": 1.3246634087041118, + "learning_rate": 1.3515429426365066e-05, + "loss": 0.6500433683395386, + "step": 2849 + }, + { + "epoch": 0.8334551835063606, + "grad_norm": 1.4424195637381385, + "learning_rate": 1.3510902899472408e-05, + "loss": 0.6136040687561035, + "step": 2850 + }, + { + "epoch": 0.8337476239216259, + "grad_norm": 1.512738908953339, + "learning_rate": 1.3506375551927546e-05, + "loss": 0.5297173261642456, + "step": 2851 + }, + { + "epoch": 0.8340400643368914, + "grad_norm": 1.4629352546381682, + "learning_rate": 1.3501847384788718e-05, + "loss": 0.6215870976448059, + "step": 2852 + }, + { + "epoch": 0.8343325047521567, + "grad_norm": 1.3184866454725659, + "learning_rate": 1.3497318399114354e-05, + "loss": 0.5507583618164062, + "step": 2853 + }, + { + "epoch": 0.8346249451674221, + "grad_norm": 1.6022185079697295, + "learning_rate": 1.349278859596308e-05, + "loss": 0.6348794102668762, + "step": 2854 + }, + { + "epoch": 0.8349173855826876, + "grad_norm": 1.4038791520130975, + "learning_rate": 1.3488257976393708e-05, + "loss": 0.7009605765342712, + "step": 2855 + }, + { + "epoch": 0.8352098259979529, + "grad_norm": 1.2288500000369813, + "learning_rate": 1.3483726541465238e-05, + "loss": 0.6268658638000488, + "step": 2856 + }, + { + "epoch": 0.8355022664132183, + "grad_norm": 1.1391793971559063, + "learning_rate": 1.3479194292236875e-05, + "loss": 0.7187683582305908, + "step": 2857 + }, + { + "epoch": 0.8357947068284837, + "grad_norm": 1.5724396660128028, + "learning_rate": 1.3474661229768002e-05, + "loss": 0.7016449570655823, + "step": 2858 + }, + { + "epoch": 0.8360871472437491, + "grad_norm": 1.5882858400771258, + "learning_rate": 1.347012735511819e-05, + "loss": 0.5852428674697876, + "step": 2859 + }, + { + "epoch": 0.8363795876590144, + "grad_norm": 1.4143289380031852, + "learning_rate": 1.3465592669347207e-05, + "loss": 0.6232450008392334, + "step": 2860 + }, + { + "epoch": 0.8366720280742799, + "grad_norm": 1.3444277392597084, + "learning_rate": 1.346105717351501e-05, + "loss": 0.526097297668457, + "step": 2861 + }, + { + "epoch": 0.8369644684895453, + "grad_norm": 1.5627282993073515, + "learning_rate": 1.3456520868681741e-05, + "loss": 0.6065535545349121, + "step": 2862 + }, + { + "epoch": 0.8372569089048106, + "grad_norm": 1.3941305759607394, + "learning_rate": 1.3451983755907736e-05, + "loss": 0.5836296677589417, + "step": 2863 + }, + { + "epoch": 0.8375493493200761, + "grad_norm": 1.336778139255592, + "learning_rate": 1.3447445836253519e-05, + "loss": 0.678827166557312, + "step": 2864 + }, + { + "epoch": 0.8378417897353414, + "grad_norm": 1.3002974651392025, + "learning_rate": 1.3442907110779794e-05, + "loss": 0.5206096172332764, + "step": 2865 + }, + { + "epoch": 0.8381342301506068, + "grad_norm": 1.3468789034772342, + "learning_rate": 1.3438367580547468e-05, + "loss": 0.6424980163574219, + "step": 2866 + }, + { + "epoch": 0.8384266705658722, + "grad_norm": 1.1467777796306478, + "learning_rate": 1.3433827246617624e-05, + "loss": 0.6293484568595886, + "step": 2867 + }, + { + "epoch": 0.8387191109811376, + "grad_norm": 1.2601562582063903, + "learning_rate": 1.3429286110051539e-05, + "loss": 0.5912167429924011, + "step": 2868 + }, + { + "epoch": 0.8390115513964029, + "grad_norm": 1.5181261084157656, + "learning_rate": 1.342474417191068e-05, + "loss": 0.6571674346923828, + "step": 2869 + }, + { + "epoch": 0.8393039918116684, + "grad_norm": 1.421037061270542, + "learning_rate": 1.342020143325669e-05, + "loss": 0.5519720911979675, + "step": 2870 + }, + { + "epoch": 0.8395964322269338, + "grad_norm": 1.3997247827352193, + "learning_rate": 1.341565789515141e-05, + "loss": 0.6465001106262207, + "step": 2871 + }, + { + "epoch": 0.8398888726421991, + "grad_norm": 1.398359818513133, + "learning_rate": 1.3411113558656865e-05, + "loss": 0.6022073030471802, + "step": 2872 + }, + { + "epoch": 0.8401813130574646, + "grad_norm": 1.361775248337709, + "learning_rate": 1.3406568424835264e-05, + "loss": 0.610893726348877, + "step": 2873 + }, + { + "epoch": 0.8404737534727299, + "grad_norm": 1.427563498701008, + "learning_rate": 1.340202249474901e-05, + "loss": 0.5296563506126404, + "step": 2874 + }, + { + "epoch": 0.8407661938879953, + "grad_norm": 1.170906744718837, + "learning_rate": 1.3397475769460679e-05, + "loss": 0.6327008605003357, + "step": 2875 + }, + { + "epoch": 0.8410586343032607, + "grad_norm": 1.3517928558744952, + "learning_rate": 1.3392928250033045e-05, + "loss": 0.6437617540359497, + "step": 2876 + }, + { + "epoch": 0.8413510747185261, + "grad_norm": 1.3416431365752262, + "learning_rate": 1.3388379937529063e-05, + "loss": 0.5627291202545166, + "step": 2877 + }, + { + "epoch": 0.8416435151337915, + "grad_norm": 1.3602688623647594, + "learning_rate": 1.3383830833011871e-05, + "loss": 0.5921163558959961, + "step": 2878 + }, + { + "epoch": 0.8419359555490569, + "grad_norm": 1.2033937218328357, + "learning_rate": 1.3379280937544797e-05, + "loss": 0.5749082565307617, + "step": 2879 + }, + { + "epoch": 0.8422283959643223, + "grad_norm": 1.462463173522237, + "learning_rate": 1.3374730252191347e-05, + "loss": 0.6294553279876709, + "step": 2880 + }, + { + "epoch": 0.8425208363795876, + "grad_norm": 1.222130659730857, + "learning_rate": 1.3370178778015223e-05, + "loss": 0.5172078609466553, + "step": 2881 + }, + { + "epoch": 0.8428132767948531, + "grad_norm": 1.3695607626504847, + "learning_rate": 1.3365626516080301e-05, + "loss": 0.44069811701774597, + "step": 2882 + }, + { + "epoch": 0.8431057172101184, + "grad_norm": 1.31704500891114, + "learning_rate": 1.336107346745064e-05, + "loss": 0.72663813829422, + "step": 2883 + }, + { + "epoch": 0.8433981576253838, + "grad_norm": 1.3488066557741722, + "learning_rate": 1.3356519633190495e-05, + "loss": 0.6562269926071167, + "step": 2884 + }, + { + "epoch": 0.8436905980406492, + "grad_norm": 1.3994820366244107, + "learning_rate": 1.3351965014364293e-05, + "loss": 0.699925422668457, + "step": 2885 + }, + { + "epoch": 0.8439830384559146, + "grad_norm": 1.3294441855934318, + "learning_rate": 1.3347409612036651e-05, + "loss": 0.5902425646781921, + "step": 2886 + }, + { + "epoch": 0.84427547887118, + "grad_norm": 1.401705271294413, + "learning_rate": 1.3342853427272362e-05, + "loss": 0.613966703414917, + "step": 2887 + }, + { + "epoch": 0.8445679192864454, + "grad_norm": 1.2630848315271062, + "learning_rate": 1.333829646113641e-05, + "loss": 0.5864139199256897, + "step": 2888 + }, + { + "epoch": 0.8448603597017108, + "grad_norm": 1.5447722719058155, + "learning_rate": 1.3333738714693958e-05, + "loss": 0.5851572751998901, + "step": 2889 + }, + { + "epoch": 0.8451528001169761, + "grad_norm": 1.4679598706703352, + "learning_rate": 1.3329180189010348e-05, + "loss": 0.6564328074455261, + "step": 2890 + }, + { + "epoch": 0.8454452405322416, + "grad_norm": 1.3794930949186583, + "learning_rate": 1.3324620885151115e-05, + "loss": 0.6745615005493164, + "step": 2891 + }, + { + "epoch": 0.8457376809475069, + "grad_norm": 1.277678612967463, + "learning_rate": 1.3320060804181962e-05, + "loss": 0.5003606081008911, + "step": 2892 + }, + { + "epoch": 0.8460301213627723, + "grad_norm": 1.4995028165986726, + "learning_rate": 1.3315499947168781e-05, + "loss": 0.6646369695663452, + "step": 2893 + }, + { + "epoch": 0.8463225617780378, + "grad_norm": 1.3696086888087433, + "learning_rate": 1.3310938315177647e-05, + "loss": 0.6903572082519531, + "step": 2894 + }, + { + "epoch": 0.8466150021933031, + "grad_norm": 1.633835119151456, + "learning_rate": 1.330637590927481e-05, + "loss": 0.6221956610679626, + "step": 2895 + }, + { + "epoch": 0.8469074426085685, + "grad_norm": 1.5369372818354106, + "learning_rate": 1.3301812730526713e-05, + "loss": 0.5602666139602661, + "step": 2896 + }, + { + "epoch": 0.8471998830238339, + "grad_norm": 1.2910113915198014, + "learning_rate": 1.3297248779999963e-05, + "loss": 0.5843783617019653, + "step": 2897 + }, + { + "epoch": 0.8474923234390993, + "grad_norm": 1.304495064263293, + "learning_rate": 1.3292684058761357e-05, + "loss": 0.5040254592895508, + "step": 2898 + }, + { + "epoch": 0.8477847638543646, + "grad_norm": 1.4968280315795712, + "learning_rate": 1.3288118567877874e-05, + "loss": 0.6180210709571838, + "step": 2899 + }, + { + "epoch": 0.8480772042696301, + "grad_norm": 1.345230482752467, + "learning_rate": 1.3283552308416668e-05, + "loss": 0.5050851106643677, + "step": 2900 + }, + { + "epoch": 0.8483696446848955, + "grad_norm": 1.604217394640997, + "learning_rate": 1.3278985281445072e-05, + "loss": 0.6627126932144165, + "step": 2901 + }, + { + "epoch": 0.8486620851001608, + "grad_norm": 1.40930260394039, + "learning_rate": 1.3274417488030607e-05, + "loss": 0.5984441041946411, + "step": 2902 + }, + { + "epoch": 0.8489545255154263, + "grad_norm": 1.3584927833580034, + "learning_rate": 1.3269848929240958e-05, + "loss": 0.611599326133728, + "step": 2903 + }, + { + "epoch": 0.8492469659306916, + "grad_norm": 1.4743229169395644, + "learning_rate": 1.3265279606144006e-05, + "loss": 0.6057847142219543, + "step": 2904 + }, + { + "epoch": 0.849539406345957, + "grad_norm": 1.5324921987406994, + "learning_rate": 1.3260709519807797e-05, + "loss": 0.7123644948005676, + "step": 2905 + }, + { + "epoch": 0.8498318467612224, + "grad_norm": 1.4337194400937256, + "learning_rate": 1.3256138671300564e-05, + "loss": 0.6193811893463135, + "step": 2906 + }, + { + "epoch": 0.8501242871764878, + "grad_norm": 1.6102821646068017, + "learning_rate": 1.3251567061690717e-05, + "loss": 0.5775484442710876, + "step": 2907 + }, + { + "epoch": 0.8504167275917531, + "grad_norm": 1.5171257755680165, + "learning_rate": 1.3246994692046837e-05, + "loss": 0.5655511617660522, + "step": 2908 + }, + { + "epoch": 0.8507091680070186, + "grad_norm": 1.8299545213851978, + "learning_rate": 1.3242421563437688e-05, + "loss": 0.6216102838516235, + "step": 2909 + }, + { + "epoch": 0.851001608422284, + "grad_norm": 1.4045274179517395, + "learning_rate": 1.3237847676932217e-05, + "loss": 0.649554967880249, + "step": 2910 + }, + { + "epoch": 0.8512940488375493, + "grad_norm": 1.5965021256139, + "learning_rate": 1.3233273033599534e-05, + "loss": 0.6688281297683716, + "step": 2911 + }, + { + "epoch": 0.8515864892528148, + "grad_norm": 1.2158705367599922, + "learning_rate": 1.322869763450894e-05, + "loss": 0.664188027381897, + "step": 2912 + }, + { + "epoch": 0.8518789296680801, + "grad_norm": 1.27994094299147, + "learning_rate": 1.3224121480729905e-05, + "loss": 0.47189265489578247, + "step": 2913 + }, + { + "epoch": 0.8521713700833455, + "grad_norm": 1.387813816085696, + "learning_rate": 1.3219544573332075e-05, + "loss": 0.6190480589866638, + "step": 2914 + }, + { + "epoch": 0.8524638104986109, + "grad_norm": 1.3459335682790516, + "learning_rate": 1.3214966913385277e-05, + "loss": 0.6564091444015503, + "step": 2915 + }, + { + "epoch": 0.8527562509138763, + "grad_norm": 1.563994961699158, + "learning_rate": 1.321038850195951e-05, + "loss": 0.6083766222000122, + "step": 2916 + }, + { + "epoch": 0.8530486913291417, + "grad_norm": 1.2689051257322506, + "learning_rate": 1.3205809340124951e-05, + "loss": 0.5262473821640015, + "step": 2917 + }, + { + "epoch": 0.8533411317444071, + "grad_norm": 1.3633671661320785, + "learning_rate": 1.320122942895195e-05, + "loss": 0.6170297861099243, + "step": 2918 + }, + { + "epoch": 0.8536335721596725, + "grad_norm": 1.3838619263880951, + "learning_rate": 1.3196648769511036e-05, + "loss": 0.5791536569595337, + "step": 2919 + }, + { + "epoch": 0.8539260125749378, + "grad_norm": 1.4116909766151964, + "learning_rate": 1.3192067362872904e-05, + "loss": 0.5870766639709473, + "step": 2920 + }, + { + "epoch": 0.8542184529902033, + "grad_norm": 1.5317627298998806, + "learning_rate": 1.3187485210108438e-05, + "loss": 0.604548990726471, + "step": 2921 + }, + { + "epoch": 0.8545108934054686, + "grad_norm": 1.3458362989469688, + "learning_rate": 1.3182902312288682e-05, + "loss": 0.5292568206787109, + "step": 2922 + }, + { + "epoch": 0.854803333820734, + "grad_norm": 1.276264176970529, + "learning_rate": 1.3178318670484862e-05, + "loss": 0.5638582706451416, + "step": 2923 + }, + { + "epoch": 0.8550957742359994, + "grad_norm": 1.5369089697533718, + "learning_rate": 1.317373428576838e-05, + "loss": 0.5730164051055908, + "step": 2924 + }, + { + "epoch": 0.8553882146512648, + "grad_norm": 1.369500285153578, + "learning_rate": 1.3169149159210803e-05, + "loss": 0.6170799732208252, + "step": 2925 + }, + { + "epoch": 0.8556806550665302, + "grad_norm": 1.3401436683949477, + "learning_rate": 1.3164563291883879e-05, + "loss": 0.591925323009491, + "step": 2926 + }, + { + "epoch": 0.8559730954817956, + "grad_norm": 1.5178314176439451, + "learning_rate": 1.3159976684859528e-05, + "loss": 0.7269439697265625, + "step": 2927 + }, + { + "epoch": 0.856265535897061, + "grad_norm": 1.552203527248451, + "learning_rate": 1.3155389339209839e-05, + "loss": 0.615471363067627, + "step": 2928 + }, + { + "epoch": 0.8565579763123263, + "grad_norm": 1.4397776020126687, + "learning_rate": 1.3150801256007076e-05, + "loss": 0.6264692544937134, + "step": 2929 + }, + { + "epoch": 0.8568504167275918, + "grad_norm": 1.203302342126932, + "learning_rate": 1.314621243632368e-05, + "loss": 0.5729779005050659, + "step": 2930 + }, + { + "epoch": 0.8571428571428571, + "grad_norm": 1.3833464526102248, + "learning_rate": 1.314162288123225e-05, + "loss": 0.6462980508804321, + "step": 2931 + }, + { + "epoch": 0.8574352975581225, + "grad_norm": 1.1795102455310789, + "learning_rate": 1.3137032591805577e-05, + "loss": 0.5493176579475403, + "step": 2932 + }, + { + "epoch": 0.857727737973388, + "grad_norm": 1.1422942251299026, + "learning_rate": 1.3132441569116608e-05, + "loss": 0.49161234498023987, + "step": 2933 + }, + { + "epoch": 0.8580201783886533, + "grad_norm": 1.428090020215004, + "learning_rate": 1.312784981423847e-05, + "loss": 0.6724506616592407, + "step": 2934 + }, + { + "epoch": 0.8583126188039187, + "grad_norm": 1.6216709335890533, + "learning_rate": 1.3123257328244455e-05, + "loss": 0.6180965900421143, + "step": 2935 + }, + { + "epoch": 0.8586050592191841, + "grad_norm": 1.6797724821518334, + "learning_rate": 1.3118664112208027e-05, + "loss": 0.6676491498947144, + "step": 2936 + }, + { + "epoch": 0.8588974996344495, + "grad_norm": 1.1911121778916818, + "learning_rate": 1.3114070167202827e-05, + "loss": 0.5964041948318481, + "step": 2937 + }, + { + "epoch": 0.8591899400497148, + "grad_norm": 1.3660050885815391, + "learning_rate": 1.3109475494302657e-05, + "loss": 0.708328366279602, + "step": 2938 + }, + { + "epoch": 0.8594823804649803, + "grad_norm": 1.6146616988047677, + "learning_rate": 1.3104880094581495e-05, + "loss": 0.6360403299331665, + "step": 2939 + }, + { + "epoch": 0.8597748208802457, + "grad_norm": 1.5628439078603966, + "learning_rate": 1.3100283969113494e-05, + "loss": 0.5450131893157959, + "step": 2940 + }, + { + "epoch": 0.860067261295511, + "grad_norm": 1.2422442713506727, + "learning_rate": 1.3095687118972962e-05, + "loss": 0.4472329020500183, + "step": 2941 + }, + { + "epoch": 0.8603597017107765, + "grad_norm": 1.2824654152788901, + "learning_rate": 1.3091089545234387e-05, + "loss": 0.6853972673416138, + "step": 2942 + }, + { + "epoch": 0.8606521421260418, + "grad_norm": 1.5236765495118778, + "learning_rate": 1.3086491248972429e-05, + "loss": 0.6547979116439819, + "step": 2943 + }, + { + "epoch": 0.8609445825413072, + "grad_norm": 1.2521364069886292, + "learning_rate": 1.3081892231261903e-05, + "loss": 0.46194693446159363, + "step": 2944 + }, + { + "epoch": 0.8612370229565726, + "grad_norm": 1.3749685968664958, + "learning_rate": 1.307729249317781e-05, + "loss": 0.5715345144271851, + "step": 2945 + }, + { + "epoch": 0.861529463371838, + "grad_norm": 1.2925136251134925, + "learning_rate": 1.3072692035795305e-05, + "loss": 0.5590982437133789, + "step": 2946 + }, + { + "epoch": 0.8618219037871033, + "grad_norm": 1.4594997051230878, + "learning_rate": 1.3068090860189719e-05, + "loss": 0.5435009002685547, + "step": 2947 + }, + { + "epoch": 0.8621143442023688, + "grad_norm": 1.1604259212434795, + "learning_rate": 1.3063488967436548e-05, + "loss": 0.4528965651988983, + "step": 2948 + }, + { + "epoch": 0.8624067846176342, + "grad_norm": 1.1967844606343032, + "learning_rate": 1.3058886358611457e-05, + "loss": 0.5520291328430176, + "step": 2949 + }, + { + "epoch": 0.8626992250328995, + "grad_norm": 1.3959982999797578, + "learning_rate": 1.305428303479028e-05, + "loss": 0.6444021463394165, + "step": 2950 + }, + { + "epoch": 0.862991665448165, + "grad_norm": 1.597979452275331, + "learning_rate": 1.3049678997049016e-05, + "loss": 0.7808041572570801, + "step": 2951 + }, + { + "epoch": 0.8632841058634303, + "grad_norm": 1.6855013913251111, + "learning_rate": 1.3045074246463825e-05, + "loss": 0.6297428607940674, + "step": 2952 + }, + { + "epoch": 0.8635765462786957, + "grad_norm": 1.5203533995419023, + "learning_rate": 1.3040468784111045e-05, + "loss": 0.5776612162590027, + "step": 2953 + }, + { + "epoch": 0.8638689866939611, + "grad_norm": 1.3696314111811954, + "learning_rate": 1.3035862611067169e-05, + "loss": 0.49298524856567383, + "step": 2954 + }, + { + "epoch": 0.8641614271092265, + "grad_norm": 1.7023849342400221, + "learning_rate": 1.303125572840887e-05, + "loss": 0.8061650991439819, + "step": 2955 + }, + { + "epoch": 0.8644538675244919, + "grad_norm": 1.4509821363343893, + "learning_rate": 1.3026648137212976e-05, + "loss": 0.7741662859916687, + "step": 2956 + }, + { + "epoch": 0.8647463079397573, + "grad_norm": 1.350671993753925, + "learning_rate": 1.302203983855648e-05, + "loss": 0.5589889287948608, + "step": 2957 + }, + { + "epoch": 0.8650387483550227, + "grad_norm": 1.400755532782556, + "learning_rate": 1.3017430833516547e-05, + "loss": 0.5801941752433777, + "step": 2958 + }, + { + "epoch": 0.865331188770288, + "grad_norm": 1.3298019485580883, + "learning_rate": 1.30128211231705e-05, + "loss": 0.5874185562133789, + "step": 2959 + }, + { + "epoch": 0.8656236291855535, + "grad_norm": 1.1737111706818832, + "learning_rate": 1.3008210708595837e-05, + "loss": 0.6062727570533752, + "step": 2960 + }, + { + "epoch": 0.8659160696008188, + "grad_norm": 1.3334829952801492, + "learning_rate": 1.3003599590870209e-05, + "loss": 0.571448802947998, + "step": 2961 + }, + { + "epoch": 0.8662085100160842, + "grad_norm": 1.3654619359177553, + "learning_rate": 1.2998987771071442e-05, + "loss": 0.7001944780349731, + "step": 2962 + }, + { + "epoch": 0.8665009504313496, + "grad_norm": 1.48577297171421, + "learning_rate": 1.2994375250277516e-05, + "loss": 0.49182790517807007, + "step": 2963 + }, + { + "epoch": 0.866793390846615, + "grad_norm": 1.342673325945858, + "learning_rate": 1.298976202956658e-05, + "loss": 0.5299041271209717, + "step": 2964 + }, + { + "epoch": 0.8670858312618804, + "grad_norm": 1.1975267191215118, + "learning_rate": 1.2985148110016947e-05, + "loss": 0.4955265522003174, + "step": 2965 + }, + { + "epoch": 0.8673782716771458, + "grad_norm": 1.132262479106049, + "learning_rate": 1.2980533492707094e-05, + "loss": 0.6395630836486816, + "step": 2966 + }, + { + "epoch": 0.8676707120924112, + "grad_norm": 1.1303573523984183, + "learning_rate": 1.2975918178715661e-05, + "loss": 0.5926274061203003, + "step": 2967 + }, + { + "epoch": 0.8679631525076765, + "grad_norm": 1.194805436445147, + "learning_rate": 1.2971302169121447e-05, + "loss": 0.5556914806365967, + "step": 2968 + }, + { + "epoch": 0.868255592922942, + "grad_norm": 1.2766981949480176, + "learning_rate": 1.2966685465003415e-05, + "loss": 0.5347195863723755, + "step": 2969 + }, + { + "epoch": 0.8685480333382073, + "grad_norm": 1.3728880032694415, + "learning_rate": 1.2962068067440694e-05, + "loss": 0.6839208006858826, + "step": 2970 + }, + { + "epoch": 0.8688404737534727, + "grad_norm": 1.1132776608061867, + "learning_rate": 1.295744997751257e-05, + "loss": 0.5741337537765503, + "step": 2971 + }, + { + "epoch": 0.8691329141687382, + "grad_norm": 1.536125480269087, + "learning_rate": 1.29528311962985e-05, + "loss": 0.7383404970169067, + "step": 2972 + }, + { + "epoch": 0.8694253545840035, + "grad_norm": 1.4560088611056379, + "learning_rate": 1.294821172487809e-05, + "loss": 0.5075374245643616, + "step": 2973 + }, + { + "epoch": 0.8697177949992689, + "grad_norm": 1.235849675897421, + "learning_rate": 1.2943591564331113e-05, + "loss": 0.557248592376709, + "step": 2974 + }, + { + "epoch": 0.8700102354145343, + "grad_norm": 1.3655420768672006, + "learning_rate": 1.2938970715737506e-05, + "loss": 0.5687203407287598, + "step": 2975 + }, + { + "epoch": 0.8703026758297997, + "grad_norm": 1.3479345698129241, + "learning_rate": 1.2934349180177364e-05, + "loss": 0.5946108102798462, + "step": 2976 + }, + { + "epoch": 0.870595116245065, + "grad_norm": 1.258994257926457, + "learning_rate": 1.2929726958730942e-05, + "loss": 0.6103173494338989, + "step": 2977 + }, + { + "epoch": 0.8708875566603305, + "grad_norm": 1.4914714674105345, + "learning_rate": 1.2925104052478657e-05, + "loss": 0.7007244825363159, + "step": 2978 + }, + { + "epoch": 0.8711799970755959, + "grad_norm": 1.4140285074261345, + "learning_rate": 1.2920480462501082e-05, + "loss": 0.6157742142677307, + "step": 2979 + }, + { + "epoch": 0.8714724374908612, + "grad_norm": 1.4708644175648395, + "learning_rate": 1.2915856189878956e-05, + "loss": 0.6501113176345825, + "step": 2980 + }, + { + "epoch": 0.8717648779061267, + "grad_norm": 1.2555000815915451, + "learning_rate": 1.2911231235693178e-05, + "loss": 0.5084626078605652, + "step": 2981 + }, + { + "epoch": 0.872057318321392, + "grad_norm": 1.343175395168551, + "learning_rate": 1.2906605601024796e-05, + "loss": 0.5953651666641235, + "step": 2982 + }, + { + "epoch": 0.8723497587366574, + "grad_norm": 1.218776434986359, + "learning_rate": 1.290197928695503e-05, + "loss": 0.5733205676078796, + "step": 2983 + }, + { + "epoch": 0.8726421991519228, + "grad_norm": 1.5420791901099857, + "learning_rate": 1.2897352294565248e-05, + "loss": 0.5976133942604065, + "step": 2984 + }, + { + "epoch": 0.8729346395671882, + "grad_norm": 1.2904353456419873, + "learning_rate": 1.2892724624936983e-05, + "loss": 0.5092414617538452, + "step": 2985 + }, + { + "epoch": 0.8732270799824535, + "grad_norm": 1.4935525581566107, + "learning_rate": 1.2888096279151926e-05, + "loss": 0.7244688272476196, + "step": 2986 + }, + { + "epoch": 0.873519520397719, + "grad_norm": 1.5818576721862576, + "learning_rate": 1.2883467258291922e-05, + "loss": 0.6943881511688232, + "step": 2987 + }, + { + "epoch": 0.8738119608129844, + "grad_norm": 1.422762914124539, + "learning_rate": 1.287883756343898e-05, + "loss": 0.6484338641166687, + "step": 2988 + }, + { + "epoch": 0.8741044012282497, + "grad_norm": 1.23046146833686, + "learning_rate": 1.2874207195675262e-05, + "loss": 0.620865523815155, + "step": 2989 + }, + { + "epoch": 0.8743968416435152, + "grad_norm": 1.245843663622743, + "learning_rate": 1.2869576156083085e-05, + "loss": 0.5290236473083496, + "step": 2990 + }, + { + "epoch": 0.8746892820587805, + "grad_norm": 1.383695697280258, + "learning_rate": 1.2864944445744932e-05, + "loss": 0.7140257358551025, + "step": 2991 + }, + { + "epoch": 0.8749817224740459, + "grad_norm": 1.3901579888827407, + "learning_rate": 1.286031206574343e-05, + "loss": 0.7167611122131348, + "step": 2992 + }, + { + "epoch": 0.8752741628893113, + "grad_norm": 1.4097752029885913, + "learning_rate": 1.2855679017161372e-05, + "loss": 0.5631322860717773, + "step": 2993 + }, + { + "epoch": 0.8755666033045767, + "grad_norm": 1.4535459078300315, + "learning_rate": 1.2851045301081714e-05, + "loss": 0.6250770092010498, + "step": 2994 + }, + { + "epoch": 0.8758590437198421, + "grad_norm": 1.3041015408341177, + "learning_rate": 1.2846410918587546e-05, + "loss": 0.5121266841888428, + "step": 2995 + }, + { + "epoch": 0.8761514841351075, + "grad_norm": 1.2982813372349626, + "learning_rate": 1.2841775870762134e-05, + "loss": 0.6075780987739563, + "step": 2996 + }, + { + "epoch": 0.8764439245503729, + "grad_norm": 1.2610269556078437, + "learning_rate": 1.283714015868889e-05, + "loss": 0.516838014125824, + "step": 2997 + }, + { + "epoch": 0.8767363649656382, + "grad_norm": 1.3997368275790003, + "learning_rate": 1.2832503783451384e-05, + "loss": 0.6952051520347595, + "step": 2998 + }, + { + "epoch": 0.8770288053809037, + "grad_norm": 1.668277066498958, + "learning_rate": 1.2827866746133342e-05, + "loss": 0.8039685487747192, + "step": 2999 + }, + { + "epoch": 0.877321245796169, + "grad_norm": 1.3275322129226486, + "learning_rate": 1.2823229047818642e-05, + "loss": 0.6200549602508545, + "step": 3000 + }, + { + "epoch": 0.8776136862114344, + "grad_norm": 1.3153882408773916, + "learning_rate": 1.2818590689591315e-05, + "loss": 0.6666116714477539, + "step": 3001 + }, + { + "epoch": 0.8779061266266998, + "grad_norm": 1.4255915593552042, + "learning_rate": 1.2813951672535551e-05, + "loss": 0.566741943359375, + "step": 3002 + }, + { + "epoch": 0.8781985670419652, + "grad_norm": 1.2390037918473238, + "learning_rate": 1.2809311997735697e-05, + "loss": 0.6103402376174927, + "step": 3003 + }, + { + "epoch": 0.8784910074572306, + "grad_norm": 1.1444583076116077, + "learning_rate": 1.280467166627624e-05, + "loss": 0.48296916484832764, + "step": 3004 + }, + { + "epoch": 0.878783447872496, + "grad_norm": 1.4235586871910597, + "learning_rate": 1.2800030679241834e-05, + "loss": 0.5995723605155945, + "step": 3005 + }, + { + "epoch": 0.8790758882877614, + "grad_norm": 1.5173093942193803, + "learning_rate": 1.2795389037717286e-05, + "loss": 0.6199642419815063, + "step": 3006 + }, + { + "epoch": 0.8793683287030267, + "grad_norm": 1.5757356892284924, + "learning_rate": 1.279074674278754e-05, + "loss": 0.6740807294845581, + "step": 3007 + }, + { + "epoch": 0.8796607691182922, + "grad_norm": 1.4923318097982954, + "learning_rate": 1.2786103795537714e-05, + "loss": 0.7330688238143921, + "step": 3008 + }, + { + "epoch": 0.8799532095335575, + "grad_norm": 1.1357910142893406, + "learning_rate": 1.2781460197053066e-05, + "loss": 0.5048441290855408, + "step": 3009 + }, + { + "epoch": 0.8802456499488229, + "grad_norm": 1.2484561154788956, + "learning_rate": 1.277681594841901e-05, + "loss": 0.6103702187538147, + "step": 3010 + }, + { + "epoch": 0.8805380903640884, + "grad_norm": 1.3117487221252475, + "learning_rate": 1.2772171050721107e-05, + "loss": 0.5223366022109985, + "step": 3011 + }, + { + "epoch": 0.8808305307793537, + "grad_norm": 1.5806437295259135, + "learning_rate": 1.2767525505045078e-05, + "loss": 0.708305835723877, + "step": 3012 + }, + { + "epoch": 0.8811229711946191, + "grad_norm": 1.324207789268205, + "learning_rate": 1.2762879312476785e-05, + "loss": 0.6827911734580994, + "step": 3013 + }, + { + "epoch": 0.8814154116098845, + "grad_norm": 1.7302207886555443, + "learning_rate": 1.2758232474102254e-05, + "loss": 0.6977027654647827, + "step": 3014 + }, + { + "epoch": 0.8817078520251499, + "grad_norm": 1.235299173012923, + "learning_rate": 1.2753584991007654e-05, + "loss": 0.5534720420837402, + "step": 3015 + }, + { + "epoch": 0.8820002924404152, + "grad_norm": 1.1722300923390174, + "learning_rate": 1.2748936864279305e-05, + "loss": 0.541682243347168, + "step": 3016 + }, + { + "epoch": 0.8822927328556807, + "grad_norm": 1.4134630737456748, + "learning_rate": 1.2744288095003674e-05, + "loss": 0.6195456981658936, + "step": 3017 + }, + { + "epoch": 0.8825851732709461, + "grad_norm": 1.1963339495389647, + "learning_rate": 1.2739638684267387e-05, + "loss": 0.5050234794616699, + "step": 3018 + }, + { + "epoch": 0.8828776136862114, + "grad_norm": 1.1967088542641229, + "learning_rate": 1.2734988633157218e-05, + "loss": 0.5397066473960876, + "step": 3019 + }, + { + "epoch": 0.8831700541014769, + "grad_norm": 1.3480056981854442, + "learning_rate": 1.273033794276008e-05, + "loss": 0.5932190418243408, + "step": 3020 + }, + { + "epoch": 0.8834624945167422, + "grad_norm": 1.2383533139434324, + "learning_rate": 1.2725686614163055e-05, + "loss": 0.5780059099197388, + "step": 3021 + }, + { + "epoch": 0.8837549349320076, + "grad_norm": 1.4379159594856536, + "learning_rate": 1.2721034648453353e-05, + "loss": 0.5850226879119873, + "step": 3022 + }, + { + "epoch": 0.884047375347273, + "grad_norm": 1.351057706249645, + "learning_rate": 1.2716382046718346e-05, + "loss": 0.6684393882751465, + "step": 3023 + }, + { + "epoch": 0.8843398157625384, + "grad_norm": 1.3578422906902012, + "learning_rate": 1.271172881004555e-05, + "loss": 0.6045842170715332, + "step": 3024 + }, + { + "epoch": 0.8846322561778037, + "grad_norm": 1.4246831207517041, + "learning_rate": 1.2707074939522633e-05, + "loss": 0.6769551038742065, + "step": 3025 + }, + { + "epoch": 0.8849246965930692, + "grad_norm": 1.541147063192512, + "learning_rate": 1.2702420436237408e-05, + "loss": 0.5581091642379761, + "step": 3026 + }, + { + "epoch": 0.8852171370083346, + "grad_norm": 1.381695049653859, + "learning_rate": 1.269776530127784e-05, + "loss": 0.5010186433792114, + "step": 3027 + }, + { + "epoch": 0.8855095774235999, + "grad_norm": 1.3620137613749654, + "learning_rate": 1.2693109535732034e-05, + "loss": 0.4537884294986725, + "step": 3028 + }, + { + "epoch": 0.8858020178388654, + "grad_norm": 1.155156838639785, + "learning_rate": 1.2688453140688246e-05, + "loss": 0.5920443534851074, + "step": 3029 + }, + { + "epoch": 0.8860944582541307, + "grad_norm": 1.4193738144287875, + "learning_rate": 1.2683796117234884e-05, + "loss": 0.564072847366333, + "step": 3030 + }, + { + "epoch": 0.8863868986693961, + "grad_norm": 1.3206014730711304, + "learning_rate": 1.26791384664605e-05, + "loss": 0.657585620880127, + "step": 3031 + }, + { + "epoch": 0.8866793390846615, + "grad_norm": 1.4850669504718117, + "learning_rate": 1.2674480189453786e-05, + "loss": 0.6864298582077026, + "step": 3032 + }, + { + "epoch": 0.8869717794999269, + "grad_norm": 1.4143994971740543, + "learning_rate": 1.266982128730359e-05, + "loss": 0.6416069865226746, + "step": 3033 + }, + { + "epoch": 0.8872642199151923, + "grad_norm": 1.3298070008922416, + "learning_rate": 1.2665161761098899e-05, + "loss": 0.6405118703842163, + "step": 3034 + }, + { + "epoch": 0.8875566603304577, + "grad_norm": 1.4036133965159712, + "learning_rate": 1.266050161192885e-05, + "loss": 0.649673342704773, + "step": 3035 + }, + { + "epoch": 0.8878491007457231, + "grad_norm": 1.359043965576467, + "learning_rate": 1.2655840840882729e-05, + "loss": 0.5914620161056519, + "step": 3036 + }, + { + "epoch": 0.8881415411609884, + "grad_norm": 1.5837746169822255, + "learning_rate": 1.2651179449049958e-05, + "loss": 0.6080621480941772, + "step": 3037 + }, + { + "epoch": 0.8884339815762539, + "grad_norm": 1.5302588008128089, + "learning_rate": 1.264651743752011e-05, + "loss": 0.657015860080719, + "step": 3038 + }, + { + "epoch": 0.8887264219915192, + "grad_norm": 1.3603604072518423, + "learning_rate": 1.26418548073829e-05, + "loss": 0.5384848713874817, + "step": 3039 + }, + { + "epoch": 0.8890188624067846, + "grad_norm": 1.5457096573294893, + "learning_rate": 1.2637191559728195e-05, + "loss": 0.7452554106712341, + "step": 3040 + }, + { + "epoch": 0.88931130282205, + "grad_norm": 1.4411555623785637, + "learning_rate": 1.2632527695645993e-05, + "loss": 0.743236780166626, + "step": 3041 + }, + { + "epoch": 0.8896037432373154, + "grad_norm": 1.5417347407679962, + "learning_rate": 1.2627863216226453e-05, + "loss": 0.557692289352417, + "step": 3042 + }, + { + "epoch": 0.8898961836525808, + "grad_norm": 1.3302198914823486, + "learning_rate": 1.2623198122559863e-05, + "loss": 0.5637259483337402, + "step": 3043 + }, + { + "epoch": 0.8901886240678462, + "grad_norm": 1.4403910054587767, + "learning_rate": 1.261853241573666e-05, + "loss": 0.5217350721359253, + "step": 3044 + }, + { + "epoch": 0.8904810644831116, + "grad_norm": 1.4659582389098327, + "learning_rate": 1.2613866096847423e-05, + "loss": 0.5971624255180359, + "step": 3045 + }, + { + "epoch": 0.8907735048983769, + "grad_norm": 1.5641010174504344, + "learning_rate": 1.260919916698288e-05, + "loss": 0.6586427092552185, + "step": 3046 + }, + { + "epoch": 0.8910659453136424, + "grad_norm": 1.8045032510726307, + "learning_rate": 1.2604531627233895e-05, + "loss": 0.7059915661811829, + "step": 3047 + }, + { + "epoch": 0.8913583857289077, + "grad_norm": 1.3406441666811264, + "learning_rate": 1.2599863478691483e-05, + "loss": 0.582252025604248, + "step": 3048 + }, + { + "epoch": 0.8916508261441731, + "grad_norm": 1.2760858553291834, + "learning_rate": 1.2595194722446786e-05, + "loss": 0.6901981830596924, + "step": 3049 + }, + { + "epoch": 0.8919432665594386, + "grad_norm": 1.5789638647855007, + "learning_rate": 1.2590525359591101e-05, + "loss": 0.7462388873100281, + "step": 3050 + }, + { + "epoch": 0.8922357069747039, + "grad_norm": 1.1893369289763132, + "learning_rate": 1.2585855391215866e-05, + "loss": 0.4963245391845703, + "step": 3051 + }, + { + "epoch": 0.8925281473899693, + "grad_norm": 1.427293357699651, + "learning_rate": 1.2581184818412655e-05, + "loss": 0.6408337354660034, + "step": 3052 + }, + { + "epoch": 0.8928205878052347, + "grad_norm": 1.3357664905418998, + "learning_rate": 1.257651364227319e-05, + "loss": 0.44528326392173767, + "step": 3053 + }, + { + "epoch": 0.8931130282205001, + "grad_norm": 1.4527206031665332, + "learning_rate": 1.2571841863889322e-05, + "loss": 0.4595017731189728, + "step": 3054 + }, + { + "epoch": 0.8934054686357654, + "grad_norm": 1.435143014894245, + "learning_rate": 1.2567169484353057e-05, + "loss": 0.6934910416603088, + "step": 3055 + }, + { + "epoch": 0.8936979090510309, + "grad_norm": 1.3543177360296097, + "learning_rate": 1.2562496504756535e-05, + "loss": 0.6392845511436462, + "step": 3056 + }, + { + "epoch": 0.8939903494662963, + "grad_norm": 1.3638361282130094, + "learning_rate": 1.255782292619203e-05, + "loss": 0.5506458878517151, + "step": 3057 + }, + { + "epoch": 0.8942827898815616, + "grad_norm": 1.3861859212756857, + "learning_rate": 1.255314874975197e-05, + "loss": 0.5871223211288452, + "step": 3058 + }, + { + "epoch": 0.8945752302968271, + "grad_norm": 1.4446737131271559, + "learning_rate": 1.254847397652892e-05, + "loss": 0.603033185005188, + "step": 3059 + }, + { + "epoch": 0.8948676707120924, + "grad_norm": 1.4764688506929942, + "learning_rate": 1.2543798607615566e-05, + "loss": 0.667452335357666, + "step": 3060 + }, + { + "epoch": 0.8951601111273578, + "grad_norm": 1.5052245195755742, + "learning_rate": 1.2539122644104755e-05, + "loss": 0.6264449954032898, + "step": 3061 + }, + { + "epoch": 0.8954525515426232, + "grad_norm": 1.2694525054193362, + "learning_rate": 1.2534446087089465e-05, + "loss": 0.6085609793663025, + "step": 3062 + }, + { + "epoch": 0.8957449919578886, + "grad_norm": 1.5027824768205942, + "learning_rate": 1.252976893766281e-05, + "loss": 0.6414828896522522, + "step": 3063 + }, + { + "epoch": 0.8960374323731539, + "grad_norm": 1.5067492390612103, + "learning_rate": 1.2525091196918049e-05, + "loss": 0.714614987373352, + "step": 3064 + }, + { + "epoch": 0.8963298727884194, + "grad_norm": 1.4473594871396505, + "learning_rate": 1.2520412865948574e-05, + "loss": 0.5966176986694336, + "step": 3065 + }, + { + "epoch": 0.8966223132036848, + "grad_norm": 1.234582474772498, + "learning_rate": 1.2515733945847914e-05, + "loss": 0.5162957906723022, + "step": 3066 + }, + { + "epoch": 0.8969147536189501, + "grad_norm": 1.5378382727824902, + "learning_rate": 1.2511054437709743e-05, + "loss": 0.6460821628570557, + "step": 3067 + }, + { + "epoch": 0.8972071940342156, + "grad_norm": 1.3526579806372556, + "learning_rate": 1.2506374342627861e-05, + "loss": 0.6802507638931274, + "step": 3068 + }, + { + "epoch": 0.8974996344494809, + "grad_norm": 1.4306769896677902, + "learning_rate": 1.2501693661696218e-05, + "loss": 0.5966957807540894, + "step": 3069 + }, + { + "epoch": 0.8977920748647463, + "grad_norm": 1.336293797847081, + "learning_rate": 1.2497012396008893e-05, + "loss": 0.607227087020874, + "step": 3070 + }, + { + "epoch": 0.8980845152800117, + "grad_norm": 1.360686606627987, + "learning_rate": 1.2492330546660098e-05, + "loss": 0.6544637084007263, + "step": 3071 + }, + { + "epoch": 0.8983769556952771, + "grad_norm": 1.410133865972111, + "learning_rate": 1.2487648114744196e-05, + "loss": 0.5896593332290649, + "step": 3072 + }, + { + "epoch": 0.8986693961105425, + "grad_norm": 1.296908458370691, + "learning_rate": 1.248296510135567e-05, + "loss": 0.5710231065750122, + "step": 3073 + }, + { + "epoch": 0.8989618365258079, + "grad_norm": 1.2057046094411794, + "learning_rate": 1.2478281507589147e-05, + "loss": 0.5918926000595093, + "step": 3074 + }, + { + "epoch": 0.8992542769410733, + "grad_norm": 1.5306817529094334, + "learning_rate": 1.2473597334539392e-05, + "loss": 0.681663453578949, + "step": 3075 + }, + { + "epoch": 0.8995467173563386, + "grad_norm": 1.2671727964507529, + "learning_rate": 1.24689125833013e-05, + "loss": 0.5229436159133911, + "step": 3076 + }, + { + "epoch": 0.8998391577716041, + "grad_norm": 1.5769374861363958, + "learning_rate": 1.2464227254969903e-05, + "loss": 0.7165119051933289, + "step": 3077 + }, + { + "epoch": 0.9001315981868694, + "grad_norm": 1.2324966791017462, + "learning_rate": 1.2459541350640368e-05, + "loss": 0.514594554901123, + "step": 3078 + }, + { + "epoch": 0.9004240386021348, + "grad_norm": 1.4144268048636097, + "learning_rate": 1.2454854871407993e-05, + "loss": 0.6173784732818604, + "step": 3079 + }, + { + "epoch": 0.9007164790174002, + "grad_norm": 1.6555744107314199, + "learning_rate": 1.245016781836822e-05, + "loss": 0.6796407103538513, + "step": 3080 + }, + { + "epoch": 0.9010089194326656, + "grad_norm": 1.3666754181554102, + "learning_rate": 1.2445480192616619e-05, + "loss": 0.6901683807373047, + "step": 3081 + }, + { + "epoch": 0.901301359847931, + "grad_norm": 1.295839204252469, + "learning_rate": 1.2440791995248886e-05, + "loss": 0.6215920448303223, + "step": 3082 + }, + { + "epoch": 0.9015938002631964, + "grad_norm": 1.29381925555321, + "learning_rate": 1.243610322736087e-05, + "loss": 0.6109690070152283, + "step": 3083 + }, + { + "epoch": 0.9018862406784618, + "grad_norm": 1.3751453546430485, + "learning_rate": 1.2431413890048534e-05, + "loss": 0.5273362398147583, + "step": 3084 + }, + { + "epoch": 0.9021786810937271, + "grad_norm": 1.197511083408015, + "learning_rate": 1.2426723984407982e-05, + "loss": 0.5219408273696899, + "step": 3085 + }, + { + "epoch": 0.9024711215089926, + "grad_norm": 1.4389803986869047, + "learning_rate": 1.2422033511535458e-05, + "loss": 0.6894690990447998, + "step": 3086 + }, + { + "epoch": 0.9027635619242579, + "grad_norm": 1.2949596320128054, + "learning_rate": 1.2417342472527325e-05, + "loss": 0.6135656833648682, + "step": 3087 + }, + { + "epoch": 0.9030560023395233, + "grad_norm": 1.4997841327771624, + "learning_rate": 1.2412650868480088e-05, + "loss": 0.595108151435852, + "step": 3088 + }, + { + "epoch": 0.9033484427547888, + "grad_norm": 1.4068106482758378, + "learning_rate": 1.2407958700490376e-05, + "loss": 0.6445261240005493, + "step": 3089 + }, + { + "epoch": 0.9036408831700541, + "grad_norm": 1.1391728287440939, + "learning_rate": 1.240326596965496e-05, + "loss": 0.5601890087127686, + "step": 3090 + }, + { + "epoch": 0.9039333235853195, + "grad_norm": 1.4556896662499954, + "learning_rate": 1.239857267707074e-05, + "loss": 0.6229134798049927, + "step": 3091 + }, + { + "epoch": 0.9042257640005849, + "grad_norm": 1.3633245090329542, + "learning_rate": 1.2393878823834737e-05, + "loss": 0.5769803524017334, + "step": 3092 + }, + { + "epoch": 0.9045182044158503, + "grad_norm": 1.5373386649577192, + "learning_rate": 1.2389184411044113e-05, + "loss": 0.8101233243942261, + "step": 3093 + }, + { + "epoch": 0.9048106448311156, + "grad_norm": 1.3507156228218853, + "learning_rate": 1.2384489439796159e-05, + "loss": 0.5562945604324341, + "step": 3094 + }, + { + "epoch": 0.9051030852463811, + "grad_norm": 1.6942487879562902, + "learning_rate": 1.2379793911188299e-05, + "loss": 0.5764975547790527, + "step": 3095 + }, + { + "epoch": 0.9053955256616465, + "grad_norm": 1.4280019855873591, + "learning_rate": 1.2375097826318079e-05, + "loss": 0.5951659083366394, + "step": 3096 + }, + { + "epoch": 0.9056879660769118, + "grad_norm": 1.3804272066554735, + "learning_rate": 1.2370401186283186e-05, + "loss": 0.5550940632820129, + "step": 3097 + }, + { + "epoch": 0.9059804064921773, + "grad_norm": 1.5012418323017303, + "learning_rate": 1.2365703992181425e-05, + "loss": 0.5423737168312073, + "step": 3098 + }, + { + "epoch": 0.9062728469074426, + "grad_norm": 1.3277873552974655, + "learning_rate": 1.236100624511074e-05, + "loss": 0.633366048336029, + "step": 3099 + }, + { + "epoch": 0.906565287322708, + "grad_norm": 1.2113954677804317, + "learning_rate": 1.2356307946169202e-05, + "loss": 0.6067361831665039, + "step": 3100 + }, + { + "epoch": 0.9068577277379734, + "grad_norm": 1.1800329005672614, + "learning_rate": 1.2351609096455006e-05, + "loss": 0.6039519309997559, + "step": 3101 + }, + { + "epoch": 0.9071501681532388, + "grad_norm": 1.3373962705942997, + "learning_rate": 1.2346909697066486e-05, + "loss": 0.5643757581710815, + "step": 3102 + }, + { + "epoch": 0.9074426085685041, + "grad_norm": 1.4963223668806274, + "learning_rate": 1.2342209749102088e-05, + "loss": 0.5406394004821777, + "step": 3103 + }, + { + "epoch": 0.9077350489837696, + "grad_norm": 1.3377232980381308, + "learning_rate": 1.2337509253660404e-05, + "loss": 0.5845915079116821, + "step": 3104 + }, + { + "epoch": 0.908027489399035, + "grad_norm": 1.614536146442758, + "learning_rate": 1.2332808211840147e-05, + "loss": 0.6912981271743774, + "step": 3105 + }, + { + "epoch": 0.9083199298143003, + "grad_norm": 1.2433178855630291, + "learning_rate": 1.2328106624740151e-05, + "loss": 0.5571672320365906, + "step": 3106 + }, + { + "epoch": 0.9086123702295658, + "grad_norm": 1.0866011599268561, + "learning_rate": 1.2323404493459386e-05, + "loss": 0.5219087600708008, + "step": 3107 + }, + { + "epoch": 0.9089048106448311, + "grad_norm": 1.3110052749572634, + "learning_rate": 1.2318701819096952e-05, + "loss": 0.5780971050262451, + "step": 3108 + }, + { + "epoch": 0.9091972510600965, + "grad_norm": 1.4311943893173962, + "learning_rate": 1.2313998602752063e-05, + "loss": 0.6206589937210083, + "step": 3109 + }, + { + "epoch": 0.9094896914753618, + "grad_norm": 1.4768884476442792, + "learning_rate": 1.2309294845524068e-05, + "loss": 0.6063584089279175, + "step": 3110 + }, + { + "epoch": 0.9097821318906273, + "grad_norm": 1.7547035202334638, + "learning_rate": 1.2304590548512445e-05, + "loss": 0.5733555555343628, + "step": 3111 + }, + { + "epoch": 0.9100745723058927, + "grad_norm": 1.0786362412869268, + "learning_rate": 1.2299885712816792e-05, + "loss": 0.5227848887443542, + "step": 3112 + }, + { + "epoch": 0.910367012721158, + "grad_norm": 1.3268713618037162, + "learning_rate": 1.2295180339536839e-05, + "loss": 0.6357969045639038, + "step": 3113 + }, + { + "epoch": 0.9106594531364235, + "grad_norm": 1.4243975329678797, + "learning_rate": 1.2290474429772438e-05, + "loss": 0.6194056272506714, + "step": 3114 + }, + { + "epoch": 0.9109518935516888, + "grad_norm": 1.3151715542581663, + "learning_rate": 1.2285767984623563e-05, + "loss": 0.5274733304977417, + "step": 3115 + }, + { + "epoch": 0.9112443339669543, + "grad_norm": 1.370068266036648, + "learning_rate": 1.228106100519032e-05, + "loss": 0.5612698197364807, + "step": 3116 + }, + { + "epoch": 0.9115367743822196, + "grad_norm": 1.578530779654035, + "learning_rate": 1.2276353492572937e-05, + "loss": 0.6261074542999268, + "step": 3117 + }, + { + "epoch": 0.911829214797485, + "grad_norm": 1.2011662273206838, + "learning_rate": 1.2271645447871764e-05, + "loss": 0.6407681703567505, + "step": 3118 + }, + { + "epoch": 0.9121216552127503, + "grad_norm": 1.869370443317622, + "learning_rate": 1.226693687218728e-05, + "loss": 0.7862328290939331, + "step": 3119 + }, + { + "epoch": 0.9124140956280158, + "grad_norm": 1.4175623746202768, + "learning_rate": 1.2262227766620083e-05, + "loss": 0.5079205632209778, + "step": 3120 + }, + { + "epoch": 0.9127065360432812, + "grad_norm": 1.5666620241066453, + "learning_rate": 1.2257518132270903e-05, + "loss": 0.6074210405349731, + "step": 3121 + }, + { + "epoch": 0.9129989764585466, + "grad_norm": 1.5222891825114737, + "learning_rate": 1.2252807970240582e-05, + "loss": 0.642460823059082, + "step": 3122 + }, + { + "epoch": 0.913291416873812, + "grad_norm": 1.5105961127505823, + "learning_rate": 1.22480972816301e-05, + "loss": 0.5996612310409546, + "step": 3123 + }, + { + "epoch": 0.9135838572890773, + "grad_norm": 1.4191755584361432, + "learning_rate": 1.2243386067540548e-05, + "loss": 0.5629523992538452, + "step": 3124 + }, + { + "epoch": 0.9138762977043428, + "grad_norm": 1.488297008451051, + "learning_rate": 1.223867432907314e-05, + "loss": 0.5794960260391235, + "step": 3125 + }, + { + "epoch": 0.9141687381196081, + "grad_norm": 1.4839380471480481, + "learning_rate": 1.2233962067329217e-05, + "loss": 0.6665213108062744, + "step": 3126 + }, + { + "epoch": 0.9144611785348735, + "grad_norm": 1.7069185609011637, + "learning_rate": 1.2229249283410245e-05, + "loss": 0.6834249496459961, + "step": 3127 + }, + { + "epoch": 0.914753618950139, + "grad_norm": 1.472483487554638, + "learning_rate": 1.2224535978417809e-05, + "loss": 0.5709845423698425, + "step": 3128 + }, + { + "epoch": 0.9150460593654043, + "grad_norm": 1.3783113695609808, + "learning_rate": 1.2219822153453613e-05, + "loss": 0.5455344915390015, + "step": 3129 + }, + { + "epoch": 0.9153384997806697, + "grad_norm": 1.5138708664001599, + "learning_rate": 1.2215107809619483e-05, + "loss": 0.6291406154632568, + "step": 3130 + }, + { + "epoch": 0.915630940195935, + "grad_norm": 1.340686035335307, + "learning_rate": 1.2210392948017371e-05, + "loss": 0.5953069925308228, + "step": 3131 + }, + { + "epoch": 0.9159233806112005, + "grad_norm": 1.3390197673162056, + "learning_rate": 1.2205677569749347e-05, + "loss": 0.6958901882171631, + "step": 3132 + }, + { + "epoch": 0.9162158210264658, + "grad_norm": 2.251590691230911, + "learning_rate": 1.2200961675917605e-05, + "loss": 0.5867033004760742, + "step": 3133 + }, + { + "epoch": 0.9165082614417313, + "grad_norm": 1.2167957981489814, + "learning_rate": 1.2196245267624449e-05, + "loss": 0.5364042520523071, + "step": 3134 + }, + { + "epoch": 0.9168007018569967, + "grad_norm": 1.0997310314063415, + "learning_rate": 1.2191528345972318e-05, + "loss": 0.5141438841819763, + "step": 3135 + }, + { + "epoch": 0.917093142272262, + "grad_norm": 1.1435709173541644, + "learning_rate": 1.218681091206376e-05, + "loss": 0.5024605393409729, + "step": 3136 + }, + { + "epoch": 0.9173855826875275, + "grad_norm": 1.4583614763595478, + "learning_rate": 1.2182092967001447e-05, + "loss": 0.567114531993866, + "step": 3137 + }, + { + "epoch": 0.9176780231027928, + "grad_norm": 1.4993671644221835, + "learning_rate": 1.217737451188817e-05, + "loss": 0.7224113941192627, + "step": 3138 + }, + { + "epoch": 0.9179704635180582, + "grad_norm": 1.368376715547139, + "learning_rate": 1.2172655547826839e-05, + "loss": 0.6033936738967896, + "step": 3139 + }, + { + "epoch": 0.9182629039333235, + "grad_norm": 1.4327847369216065, + "learning_rate": 1.2167936075920486e-05, + "loss": 0.5555745363235474, + "step": 3140 + }, + { + "epoch": 0.918555344348589, + "grad_norm": 1.1757378939927343, + "learning_rate": 1.2163216097272255e-05, + "loss": 0.5939170718193054, + "step": 3141 + }, + { + "epoch": 0.9188477847638543, + "grad_norm": 1.49535441688526, + "learning_rate": 1.2158495612985415e-05, + "loss": 0.7141895294189453, + "step": 3142 + }, + { + "epoch": 0.9191402251791198, + "grad_norm": 1.5558405168210478, + "learning_rate": 1.2153774624163345e-05, + "loss": 0.585646390914917, + "step": 3143 + }, + { + "epoch": 0.9194326655943852, + "grad_norm": 1.114182805953909, + "learning_rate": 1.2149053131909556e-05, + "loss": 0.5378825068473816, + "step": 3144 + }, + { + "epoch": 0.9197251060096505, + "grad_norm": 1.383902731385194, + "learning_rate": 1.2144331137327663e-05, + "loss": 0.569821834564209, + "step": 3145 + }, + { + "epoch": 0.920017546424916, + "grad_norm": 1.6457891792908532, + "learning_rate": 1.2139608641521406e-05, + "loss": 0.6101462244987488, + "step": 3146 + }, + { + "epoch": 0.9203099868401813, + "grad_norm": 1.2016357640033675, + "learning_rate": 1.2134885645594637e-05, + "loss": 0.5481746792793274, + "step": 3147 + }, + { + "epoch": 0.9206024272554467, + "grad_norm": 1.538402380383642, + "learning_rate": 1.2130162150651326e-05, + "loss": 0.7075197696685791, + "step": 3148 + }, + { + "epoch": 0.920894867670712, + "grad_norm": 1.7217246005422928, + "learning_rate": 1.2125438157795567e-05, + "loss": 0.6375464200973511, + "step": 3149 + }, + { + "epoch": 0.9211873080859775, + "grad_norm": 1.3850395600859229, + "learning_rate": 1.2120713668131558e-05, + "loss": 0.6954327821731567, + "step": 3150 + }, + { + "epoch": 0.9214797485012429, + "grad_norm": 1.3658544095341296, + "learning_rate": 1.2115988682763626e-05, + "loss": 0.5855636596679688, + "step": 3151 + }, + { + "epoch": 0.9217721889165083, + "grad_norm": 1.4751760026778278, + "learning_rate": 1.2111263202796206e-05, + "loss": 0.6056143641471863, + "step": 3152 + }, + { + "epoch": 0.9220646293317737, + "grad_norm": 1.551741495670365, + "learning_rate": 1.2106537229333848e-05, + "loss": 0.7918239831924438, + "step": 3153 + }, + { + "epoch": 0.922357069747039, + "grad_norm": 1.7033588700340108, + "learning_rate": 1.2101810763481218e-05, + "loss": 0.7772212028503418, + "step": 3154 + }, + { + "epoch": 0.9226495101623045, + "grad_norm": 1.511966147005096, + "learning_rate": 1.2097083806343104e-05, + "loss": 0.6332443356513977, + "step": 3155 + }, + { + "epoch": 0.9229419505775698, + "grad_norm": 1.358434184305942, + "learning_rate": 1.2092356359024399e-05, + "loss": 0.6254568099975586, + "step": 3156 + }, + { + "epoch": 0.9232343909928352, + "grad_norm": 1.5630990314712985, + "learning_rate": 1.208762842263012e-05, + "loss": 0.6178697347640991, + "step": 3157 + }, + { + "epoch": 0.9235268314081005, + "grad_norm": 1.1998616171531247, + "learning_rate": 1.2082899998265387e-05, + "loss": 0.5049355030059814, + "step": 3158 + }, + { + "epoch": 0.923819271823366, + "grad_norm": 1.4513160919924062, + "learning_rate": 1.2078171087035444e-05, + "loss": 0.7013234496116638, + "step": 3159 + }, + { + "epoch": 0.9241117122386314, + "grad_norm": 1.4119575222677514, + "learning_rate": 1.2073441690045647e-05, + "loss": 0.576643705368042, + "step": 3160 + }, + { + "epoch": 0.9244041526538967, + "grad_norm": 1.2307321356514476, + "learning_rate": 1.2068711808401459e-05, + "loss": 0.5163617134094238, + "step": 3161 + }, + { + "epoch": 0.9246965930691622, + "grad_norm": 1.39625806011197, + "learning_rate": 1.2063981443208466e-05, + "loss": 0.571370005607605, + "step": 3162 + }, + { + "epoch": 0.9249890334844275, + "grad_norm": 1.3814954844513003, + "learning_rate": 1.2059250595572358e-05, + "loss": 0.7424927949905396, + "step": 3163 + }, + { + "epoch": 0.925281473899693, + "grad_norm": 1.398481393831642, + "learning_rate": 1.2054519266598946e-05, + "loss": 0.6661131381988525, + "step": 3164 + }, + { + "epoch": 0.9255739143149583, + "grad_norm": 1.382448951979987, + "learning_rate": 1.2049787457394145e-05, + "loss": 0.6416351795196533, + "step": 3165 + }, + { + "epoch": 0.9258663547302237, + "grad_norm": 1.5012000035545232, + "learning_rate": 1.2045055169063988e-05, + "loss": 0.6708394289016724, + "step": 3166 + }, + { + "epoch": 0.9261587951454892, + "grad_norm": 1.5269915566780659, + "learning_rate": 1.2040322402714624e-05, + "loss": 0.536340057849884, + "step": 3167 + }, + { + "epoch": 0.9264512355607545, + "grad_norm": 1.4556897812811458, + "learning_rate": 1.20355891594523e-05, + "loss": 0.5621340274810791, + "step": 3168 + }, + { + "epoch": 0.9267436759760199, + "grad_norm": 1.274628172323648, + "learning_rate": 1.2030855440383387e-05, + "loss": 0.5972496271133423, + "step": 3169 + }, + { + "epoch": 0.9270361163912852, + "grad_norm": 1.4230845419048714, + "learning_rate": 1.2026121246614362e-05, + "loss": 0.567542314529419, + "step": 3170 + }, + { + "epoch": 0.9273285568065507, + "grad_norm": 1.092340586033623, + "learning_rate": 1.2021386579251814e-05, + "loss": 0.5487483739852905, + "step": 3171 + }, + { + "epoch": 0.927620997221816, + "grad_norm": 1.6219751059797927, + "learning_rate": 1.2016651439402445e-05, + "loss": 0.7988057136535645, + "step": 3172 + }, + { + "epoch": 0.9279134376370815, + "grad_norm": 1.2231171520157942, + "learning_rate": 1.2011915828173066e-05, + "loss": 0.5333850979804993, + "step": 3173 + }, + { + "epoch": 0.9282058780523469, + "grad_norm": 1.1146388373256622, + "learning_rate": 1.2007179746670592e-05, + "loss": 0.5640296936035156, + "step": 3174 + }, + { + "epoch": 0.9284983184676122, + "grad_norm": 1.7918188640848236, + "learning_rate": 1.2002443196002057e-05, + "loss": 0.7154449820518494, + "step": 3175 + }, + { + "epoch": 0.9287907588828777, + "grad_norm": 1.533684329230312, + "learning_rate": 1.1997706177274597e-05, + "loss": 0.8660446405410767, + "step": 3176 + }, + { + "epoch": 0.929083199298143, + "grad_norm": 1.498753630747748, + "learning_rate": 1.1992968691595465e-05, + "loss": 0.601166307926178, + "step": 3177 + }, + { + "epoch": 0.9293756397134084, + "grad_norm": 1.4563708289231845, + "learning_rate": 1.1988230740072022e-05, + "loss": 0.6197638511657715, + "step": 3178 + }, + { + "epoch": 0.9296680801286737, + "grad_norm": 1.2218794629813654, + "learning_rate": 1.198349232381173e-05, + "loss": 0.5716423988342285, + "step": 3179 + }, + { + "epoch": 0.9299605205439392, + "grad_norm": 1.1601969521725652, + "learning_rate": 1.197875344392217e-05, + "loss": 0.4319373071193695, + "step": 3180 + }, + { + "epoch": 0.9302529609592045, + "grad_norm": 1.3226372570662766, + "learning_rate": 1.1974014101511018e-05, + "loss": 0.5299028158187866, + "step": 3181 + }, + { + "epoch": 0.93054540137447, + "grad_norm": 1.4024951088839022, + "learning_rate": 1.1969274297686075e-05, + "loss": 0.7085509300231934, + "step": 3182 + }, + { + "epoch": 0.9308378417897354, + "grad_norm": 1.3237854936063287, + "learning_rate": 1.1964534033555237e-05, + "loss": 0.6025770902633667, + "step": 3183 + }, + { + "epoch": 0.9311302822050007, + "grad_norm": 1.2585066067859425, + "learning_rate": 1.1959793310226518e-05, + "loss": 0.5624677538871765, + "step": 3184 + }, + { + "epoch": 0.9314227226202662, + "grad_norm": 1.3607236544497474, + "learning_rate": 1.1955052128808025e-05, + "loss": 0.602645754814148, + "step": 3185 + }, + { + "epoch": 0.9317151630355315, + "grad_norm": 1.5196424442530971, + "learning_rate": 1.1950310490407984e-05, + "loss": 0.6495026350021362, + "step": 3186 + }, + { + "epoch": 0.9320076034507969, + "grad_norm": 1.2037819566859902, + "learning_rate": 1.1945568396134721e-05, + "loss": 0.50370192527771, + "step": 3187 + }, + { + "epoch": 0.9323000438660622, + "grad_norm": 1.4578860564520788, + "learning_rate": 1.1940825847096677e-05, + "loss": 0.5717373490333557, + "step": 3188 + }, + { + "epoch": 0.9325924842813277, + "grad_norm": 1.2463647398252022, + "learning_rate": 1.1936082844402395e-05, + "loss": 0.5863519310951233, + "step": 3189 + }, + { + "epoch": 0.9328849246965931, + "grad_norm": 1.3634372027202455, + "learning_rate": 1.1931339389160516e-05, + "loss": 0.6607284545898438, + "step": 3190 + }, + { + "epoch": 0.9331773651118584, + "grad_norm": 1.2667041686104175, + "learning_rate": 1.1926595482479799e-05, + "loss": 0.5578058958053589, + "step": 3191 + }, + { + "epoch": 0.9334698055271239, + "grad_norm": 1.577459199872034, + "learning_rate": 1.19218511254691e-05, + "loss": 0.6839171648025513, + "step": 3192 + }, + { + "epoch": 0.9337622459423892, + "grad_norm": 1.4197717809462, + "learning_rate": 1.1917106319237386e-05, + "loss": 0.5071141719818115, + "step": 3193 + }, + { + "epoch": 0.9340546863576547, + "grad_norm": 1.3302825340941604, + "learning_rate": 1.1912361064893726e-05, + "loss": 0.5112525820732117, + "step": 3194 + }, + { + "epoch": 0.93434712677292, + "grad_norm": 1.3701575961238917, + "learning_rate": 1.1907615363547299e-05, + "loss": 0.5661873817443848, + "step": 3195 + }, + { + "epoch": 0.9346395671881854, + "grad_norm": 1.3078991902724904, + "learning_rate": 1.190286921630737e-05, + "loss": 0.5520195364952087, + "step": 3196 + }, + { + "epoch": 0.9349320076034507, + "grad_norm": 1.1923433518822224, + "learning_rate": 1.1898122624283337e-05, + "loss": 0.560089111328125, + "step": 3197 + }, + { + "epoch": 0.9352244480187162, + "grad_norm": 1.3393482355065873, + "learning_rate": 1.1893375588584681e-05, + "loss": 0.6431207656860352, + "step": 3198 + }, + { + "epoch": 0.9355168884339816, + "grad_norm": 1.6025933525200546, + "learning_rate": 1.1888628110320995e-05, + "loss": 0.7365666031837463, + "step": 3199 + }, + { + "epoch": 0.935809328849247, + "grad_norm": 1.5181397488734587, + "learning_rate": 1.1883880190601968e-05, + "loss": 0.5455417633056641, + "step": 3200 + }, + { + "epoch": 0.9361017692645124, + "grad_norm": 1.2648151177686433, + "learning_rate": 1.1879131830537403e-05, + "loss": 0.5749938488006592, + "step": 3201 + }, + { + "epoch": 0.9363942096797777, + "grad_norm": 1.4774526931967815, + "learning_rate": 1.1874383031237196e-05, + "loss": 0.588424563407898, + "step": 3202 + }, + { + "epoch": 0.9366866500950431, + "grad_norm": 1.7045519601542285, + "learning_rate": 1.1869633793811352e-05, + "loss": 0.7039792537689209, + "step": 3203 + }, + { + "epoch": 0.9369790905103085, + "grad_norm": 1.3777530310932211, + "learning_rate": 1.1864884119369977e-05, + "loss": 0.5972777009010315, + "step": 3204 + }, + { + "epoch": 0.9372715309255739, + "grad_norm": 1.5348242749242778, + "learning_rate": 1.1860134009023281e-05, + "loss": 0.6510647535324097, + "step": 3205 + }, + { + "epoch": 0.9375639713408394, + "grad_norm": 1.3174058455781212, + "learning_rate": 1.1855383463881566e-05, + "loss": 0.606874406337738, + "step": 3206 + }, + { + "epoch": 0.9378564117561047, + "grad_norm": 1.4675285988638056, + "learning_rate": 1.1850632485055247e-05, + "loss": 0.5527048110961914, + "step": 3207 + }, + { + "epoch": 0.9381488521713701, + "grad_norm": 1.3531723389548285, + "learning_rate": 1.1845881073654838e-05, + "loss": 0.6297399997711182, + "step": 3208 + }, + { + "epoch": 0.9384412925866354, + "grad_norm": 1.4561464002236073, + "learning_rate": 1.184112923079095e-05, + "loss": 0.5852634310722351, + "step": 3209 + }, + { + "epoch": 0.9387337330019009, + "grad_norm": 1.276124242645333, + "learning_rate": 1.1836376957574301e-05, + "loss": 0.5648211240768433, + "step": 3210 + }, + { + "epoch": 0.9390261734171662, + "grad_norm": 1.4542765956455581, + "learning_rate": 1.1831624255115703e-05, + "loss": 0.5547506213188171, + "step": 3211 + }, + { + "epoch": 0.9393186138324316, + "grad_norm": 1.3882723904405088, + "learning_rate": 1.1826871124526072e-05, + "loss": 0.5927829146385193, + "step": 3212 + }, + { + "epoch": 0.9396110542476971, + "grad_norm": 1.4870159815211654, + "learning_rate": 1.182211756691642e-05, + "loss": 0.5705278515815735, + "step": 3213 + }, + { + "epoch": 0.9399034946629624, + "grad_norm": 1.3481561389317809, + "learning_rate": 1.1817363583397868e-05, + "loss": 0.547038197517395, + "step": 3214 + }, + { + "epoch": 0.9401959350782279, + "grad_norm": 1.6799026497887648, + "learning_rate": 1.1812609175081626e-05, + "loss": 0.6136760115623474, + "step": 3215 + }, + { + "epoch": 0.9404883754934932, + "grad_norm": 1.3697737055687615, + "learning_rate": 1.1807854343079015e-05, + "loss": 0.5784845352172852, + "step": 3216 + }, + { + "epoch": 0.9407808159087586, + "grad_norm": 1.306268521565337, + "learning_rate": 1.1803099088501439e-05, + "loss": 0.6629599332809448, + "step": 3217 + }, + { + "epoch": 0.9410732563240239, + "grad_norm": 1.3560413521315915, + "learning_rate": 1.1798343412460416e-05, + "loss": 0.6058052778244019, + "step": 3218 + }, + { + "epoch": 0.9413656967392894, + "grad_norm": 1.236587656133179, + "learning_rate": 1.1793587316067552e-05, + "loss": 0.5689725875854492, + "step": 3219 + }, + { + "epoch": 0.9416581371545547, + "grad_norm": 1.2722209400014248, + "learning_rate": 1.1788830800434561e-05, + "loss": 0.5718861818313599, + "step": 3220 + }, + { + "epoch": 0.9419505775698201, + "grad_norm": 1.4517063699959183, + "learning_rate": 1.1784073866673245e-05, + "loss": 0.6061254739761353, + "step": 3221 + }, + { + "epoch": 0.9422430179850856, + "grad_norm": 1.3732176542504997, + "learning_rate": 1.1779316515895511e-05, + "loss": 0.6805517077445984, + "step": 3222 + }, + { + "epoch": 0.9425354584003509, + "grad_norm": 1.3828844754339646, + "learning_rate": 1.1774558749213358e-05, + "loss": 0.5553466081619263, + "step": 3223 + }, + { + "epoch": 0.9428278988156163, + "grad_norm": 1.2173236944216692, + "learning_rate": 1.176980056773889e-05, + "loss": 0.6408798694610596, + "step": 3224 + }, + { + "epoch": 0.9431203392308817, + "grad_norm": 1.222815565053331, + "learning_rate": 1.1765041972584296e-05, + "loss": 0.5269505381584167, + "step": 3225 + }, + { + "epoch": 0.9434127796461471, + "grad_norm": 1.424391391794669, + "learning_rate": 1.1760282964861873e-05, + "loss": 0.682415246963501, + "step": 3226 + }, + { + "epoch": 0.9437052200614124, + "grad_norm": 1.4623421356805024, + "learning_rate": 1.1755523545684016e-05, + "loss": 0.507567286491394, + "step": 3227 + }, + { + "epoch": 0.9439976604766779, + "grad_norm": 1.4192334343942388, + "learning_rate": 1.1750763716163199e-05, + "loss": 0.6977763175964355, + "step": 3228 + }, + { + "epoch": 0.9442901008919433, + "grad_norm": 1.3754010773945908, + "learning_rate": 1.1746003477412007e-05, + "loss": 0.5626407861709595, + "step": 3229 + }, + { + "epoch": 0.9445825413072086, + "grad_norm": 1.537446067568307, + "learning_rate": 1.1741242830543118e-05, + "loss": 0.5280323624610901, + "step": 3230 + }, + { + "epoch": 0.9448749817224741, + "grad_norm": 1.564549447099706, + "learning_rate": 1.1736481776669307e-05, + "loss": 0.6236885190010071, + "step": 3231 + }, + { + "epoch": 0.9451674221377394, + "grad_norm": 1.2957140073878561, + "learning_rate": 1.1731720316903435e-05, + "loss": 0.5250823497772217, + "step": 3232 + }, + { + "epoch": 0.9454598625530048, + "grad_norm": 1.3562245135276858, + "learning_rate": 1.1726958452358472e-05, + "loss": 0.5885770320892334, + "step": 3233 + }, + { + "epoch": 0.9457523029682702, + "grad_norm": 1.5466392002562799, + "learning_rate": 1.1722196184147467e-05, + "loss": 0.7812498807907104, + "step": 3234 + }, + { + "epoch": 0.9460447433835356, + "grad_norm": 2.1182720670568678, + "learning_rate": 1.1717433513383575e-05, + "loss": 0.6763796210289001, + "step": 3235 + }, + { + "epoch": 0.9463371837988009, + "grad_norm": 1.4130641179603503, + "learning_rate": 1.1712670441180045e-05, + "loss": 0.5983982682228088, + "step": 3236 + }, + { + "epoch": 0.9466296242140664, + "grad_norm": 1.4075974845813908, + "learning_rate": 1.1707906968650214e-05, + "loss": 0.6665002107620239, + "step": 3237 + }, + { + "epoch": 0.9469220646293318, + "grad_norm": 1.3129047594602676, + "learning_rate": 1.1703143096907507e-05, + "loss": 0.7676652669906616, + "step": 3238 + }, + { + "epoch": 0.9472145050445971, + "grad_norm": 1.552106023331421, + "learning_rate": 1.1698378827065461e-05, + "loss": 0.710014820098877, + "step": 3239 + }, + { + "epoch": 0.9475069454598626, + "grad_norm": 1.3709978679968329, + "learning_rate": 1.169361416023769e-05, + "loss": 0.5800554752349854, + "step": 3240 + }, + { + "epoch": 0.9477993858751279, + "grad_norm": 1.2790925568283578, + "learning_rate": 1.1688849097537904e-05, + "loss": 0.602012574672699, + "step": 3241 + }, + { + "epoch": 0.9480918262903933, + "grad_norm": 1.4089569844293444, + "learning_rate": 1.1684083640079912e-05, + "loss": 0.4943910241127014, + "step": 3242 + }, + { + "epoch": 0.9483842667056587, + "grad_norm": 1.3173293444454082, + "learning_rate": 1.1679317788977609e-05, + "loss": 0.49094298481941223, + "step": 3243 + }, + { + "epoch": 0.9486767071209241, + "grad_norm": 1.1684708220820899, + "learning_rate": 1.1674551545344983e-05, + "loss": 0.46416157484054565, + "step": 3244 + }, + { + "epoch": 0.9489691475361896, + "grad_norm": 1.3422229221849986, + "learning_rate": 1.1669784910296114e-05, + "loss": 0.5170255899429321, + "step": 3245 + }, + { + "epoch": 0.9492615879514549, + "grad_norm": 1.3467691134757651, + "learning_rate": 1.1665017884945174e-05, + "loss": 0.7673200368881226, + "step": 3246 + }, + { + "epoch": 0.9495540283667203, + "grad_norm": 1.194998950326605, + "learning_rate": 1.1660250470406426e-05, + "loss": 0.49335333704948425, + "step": 3247 + }, + { + "epoch": 0.9498464687819856, + "grad_norm": 1.5055569823397887, + "learning_rate": 1.1655482667794228e-05, + "loss": 0.6620640754699707, + "step": 3248 + }, + { + "epoch": 0.9501389091972511, + "grad_norm": 1.5536985980342881, + "learning_rate": 1.1650714478223022e-05, + "loss": 0.600047767162323, + "step": 3249 + }, + { + "epoch": 0.9504313496125164, + "grad_norm": 1.449375702915225, + "learning_rate": 1.164594590280734e-05, + "loss": 0.668572187423706, + "step": 3250 + }, + { + "epoch": 0.9507237900277818, + "grad_norm": 1.28696773590094, + "learning_rate": 1.1641176942661812e-05, + "loss": 0.4460945725440979, + "step": 3251 + }, + { + "epoch": 0.9510162304430473, + "grad_norm": 1.553130185640807, + "learning_rate": 1.1636407598901154e-05, + "loss": 0.6650545597076416, + "step": 3252 + }, + { + "epoch": 0.9513086708583126, + "grad_norm": 1.4537452557116313, + "learning_rate": 1.1631637872640166e-05, + "loss": 0.5631237030029297, + "step": 3253 + }, + { + "epoch": 0.951601111273578, + "grad_norm": 1.2642307643713007, + "learning_rate": 1.162686776499375e-05, + "loss": 0.650580883026123, + "step": 3254 + }, + { + "epoch": 0.9518935516888434, + "grad_norm": 1.2808622379645098, + "learning_rate": 1.1622097277076883e-05, + "loss": 0.5606606602668762, + "step": 3255 + }, + { + "epoch": 0.9521859921041088, + "grad_norm": 1.6059525544711786, + "learning_rate": 1.1617326410004639e-05, + "loss": 0.667366623878479, + "step": 3256 + }, + { + "epoch": 0.9524784325193741, + "grad_norm": 1.2848877829061671, + "learning_rate": 1.1612555164892181e-05, + "loss": 0.5895084738731384, + "step": 3257 + }, + { + "epoch": 0.9527708729346396, + "grad_norm": 1.3031742059601414, + "learning_rate": 1.1607783542854759e-05, + "loss": 0.6468119025230408, + "step": 3258 + }, + { + "epoch": 0.9530633133499049, + "grad_norm": 1.567653748749065, + "learning_rate": 1.1603011545007708e-05, + "loss": 0.7178056240081787, + "step": 3259 + }, + { + "epoch": 0.9533557537651703, + "grad_norm": 1.0796246328531958, + "learning_rate": 1.1598239172466457e-05, + "loss": 0.42994585633277893, + "step": 3260 + }, + { + "epoch": 0.9536481941804358, + "grad_norm": 1.3208710287997751, + "learning_rate": 1.1593466426346513e-05, + "loss": 0.4939822554588318, + "step": 3261 + }, + { + "epoch": 0.9539406345957011, + "grad_norm": 1.4828958620285886, + "learning_rate": 1.1588693307763483e-05, + "loss": 0.4252137839794159, + "step": 3262 + }, + { + "epoch": 0.9542330750109665, + "grad_norm": 1.4293991408504185, + "learning_rate": 1.1583919817833051e-05, + "loss": 0.5772995948791504, + "step": 3263 + }, + { + "epoch": 0.9545255154262319, + "grad_norm": 1.4892265763022432, + "learning_rate": 1.1579145957670992e-05, + "loss": 0.6784560680389404, + "step": 3264 + }, + { + "epoch": 0.9548179558414973, + "grad_norm": 1.4340903064465058, + "learning_rate": 1.1574371728393169e-05, + "loss": 0.5373483896255493, + "step": 3265 + }, + { + "epoch": 0.9551103962567626, + "grad_norm": 1.5590731671081544, + "learning_rate": 1.1569597131115523e-05, + "loss": 0.7517837285995483, + "step": 3266 + }, + { + "epoch": 0.9554028366720281, + "grad_norm": 1.2323534514024168, + "learning_rate": 1.1564822166954092e-05, + "loss": 0.6715551614761353, + "step": 3267 + }, + { + "epoch": 0.9556952770872935, + "grad_norm": 1.5740418428519831, + "learning_rate": 1.1560046837024994e-05, + "loss": 0.6892265677452087, + "step": 3268 + }, + { + "epoch": 0.9559877175025588, + "grad_norm": 1.1845546480418727, + "learning_rate": 1.1555271142444433e-05, + "loss": 0.5564894676208496, + "step": 3269 + }, + { + "epoch": 0.9562801579178243, + "grad_norm": 1.4735106062071393, + "learning_rate": 1.15504950843287e-05, + "loss": 0.6211465001106262, + "step": 3270 + }, + { + "epoch": 0.9565725983330896, + "grad_norm": 1.360797371118281, + "learning_rate": 1.1545718663794165e-05, + "loss": 0.6189093589782715, + "step": 3271 + }, + { + "epoch": 0.956865038748355, + "grad_norm": 1.332461163898103, + "learning_rate": 1.1540941881957293e-05, + "loss": 0.6600508689880371, + "step": 3272 + }, + { + "epoch": 0.9571574791636204, + "grad_norm": 1.1722369932825303, + "learning_rate": 1.1536164739934626e-05, + "loss": 0.5891202688217163, + "step": 3273 + }, + { + "epoch": 0.9574499195788858, + "grad_norm": 1.450456789269031, + "learning_rate": 1.1531387238842788e-05, + "loss": 0.5996856093406677, + "step": 3274 + }, + { + "epoch": 0.9577423599941511, + "grad_norm": 1.3947581203143906, + "learning_rate": 1.15266093797985e-05, + "loss": 0.5645085573196411, + "step": 3275 + }, + { + "epoch": 0.9580348004094166, + "grad_norm": 1.3192013477387883, + "learning_rate": 1.1521831163918545e-05, + "loss": 0.5934250354766846, + "step": 3276 + }, + { + "epoch": 0.958327240824682, + "grad_norm": 1.3125475487560205, + "learning_rate": 1.151705259231981e-05, + "loss": 0.6659657955169678, + "step": 3277 + }, + { + "epoch": 0.9586196812399473, + "grad_norm": 1.4439329469838202, + "learning_rate": 1.1512273666119255e-05, + "loss": 0.518921434879303, + "step": 3278 + }, + { + "epoch": 0.9589121216552128, + "grad_norm": 1.5520324796179028, + "learning_rate": 1.1507494386433927e-05, + "loss": 0.6015551686286926, + "step": 3279 + }, + { + "epoch": 0.9592045620704781, + "grad_norm": 1.3864839845404684, + "learning_rate": 1.150271475438095e-05, + "loss": 0.5590265393257141, + "step": 3280 + }, + { + "epoch": 0.9594970024857435, + "grad_norm": 2.135782810317134, + "learning_rate": 1.149793477107754e-05, + "loss": 0.5820340514183044, + "step": 3281 + }, + { + "epoch": 0.9597894429010089, + "grad_norm": 1.5263684685914536, + "learning_rate": 1.1493154437640981e-05, + "loss": 0.5356709957122803, + "step": 3282 + }, + { + "epoch": 0.9600818833162743, + "grad_norm": 1.6754028625571513, + "learning_rate": 1.1488373755188651e-05, + "loss": 0.7024146318435669, + "step": 3283 + }, + { + "epoch": 0.9603743237315397, + "grad_norm": 1.1672092433368113, + "learning_rate": 1.1483592724838007e-05, + "loss": 0.4929785132408142, + "step": 3284 + }, + { + "epoch": 0.9606667641468051, + "grad_norm": 1.288237919875972, + "learning_rate": 1.147881134770658e-05, + "loss": 0.6902902126312256, + "step": 3285 + }, + { + "epoch": 0.9609592045620705, + "grad_norm": 1.3348356135288268, + "learning_rate": 1.1474029624911997e-05, + "loss": 0.5339258313179016, + "step": 3286 + }, + { + "epoch": 0.9612516449773358, + "grad_norm": 1.4657145875756896, + "learning_rate": 1.146924755757195e-05, + "loss": 0.6998730897903442, + "step": 3287 + }, + { + "epoch": 0.9615440853926013, + "grad_norm": 1.257948537764273, + "learning_rate": 1.1464465146804218e-05, + "loss": 0.6174519062042236, + "step": 3288 + }, + { + "epoch": 0.9618365258078666, + "grad_norm": 1.812192547108516, + "learning_rate": 1.145968239372666e-05, + "loss": 0.5395258665084839, + "step": 3289 + }, + { + "epoch": 0.962128966223132, + "grad_norm": 1.4759469600623887, + "learning_rate": 1.1454899299457221e-05, + "loss": 0.6355341672897339, + "step": 3290 + }, + { + "epoch": 0.9624214066383975, + "grad_norm": 1.519697305957534, + "learning_rate": 1.1450115865113916e-05, + "loss": 0.5315179228782654, + "step": 3291 + }, + { + "epoch": 0.9627138470536628, + "grad_norm": 1.468105168017502, + "learning_rate": 1.1445332091814844e-05, + "loss": 0.5595142841339111, + "step": 3292 + }, + { + "epoch": 0.9630062874689282, + "grad_norm": 1.2033736096293444, + "learning_rate": 1.1440547980678185e-05, + "loss": 0.5509291291236877, + "step": 3293 + }, + { + "epoch": 0.9632987278841936, + "grad_norm": 1.5381505996084959, + "learning_rate": 1.1435763532822191e-05, + "loss": 0.6831322908401489, + "step": 3294 + }, + { + "epoch": 0.963591168299459, + "grad_norm": 1.3733453232745707, + "learning_rate": 1.1430978749365203e-05, + "loss": 0.5494598150253296, + "step": 3295 + }, + { + "epoch": 0.9638836087147243, + "grad_norm": 1.498661160088125, + "learning_rate": 1.142619363142563e-05, + "loss": 0.5613550543785095, + "step": 3296 + }, + { + "epoch": 0.9641760491299898, + "grad_norm": 1.5212850266198317, + "learning_rate": 1.1421408180121972e-05, + "loss": 0.656089186668396, + "step": 3297 + }, + { + "epoch": 0.9644684895452551, + "grad_norm": 1.1510410875603876, + "learning_rate": 1.1416622396572791e-05, + "loss": 0.5913431644439697, + "step": 3298 + }, + { + "epoch": 0.9647609299605205, + "grad_norm": 1.3644056514467953, + "learning_rate": 1.1411836281896737e-05, + "loss": 0.6706565022468567, + "step": 3299 + }, + { + "epoch": 0.965053370375786, + "grad_norm": 1.3661421058655916, + "learning_rate": 1.1407049837212539e-05, + "loss": 0.6169217824935913, + "step": 3300 + }, + { + "epoch": 0.9653458107910513, + "grad_norm": 1.2988460072876178, + "learning_rate": 1.1402263063638994e-05, + "loss": 0.5516680479049683, + "step": 3301 + }, + { + "epoch": 0.9656382512063167, + "grad_norm": 1.2914486970247845, + "learning_rate": 1.1397475962294986e-05, + "loss": 0.7105098962783813, + "step": 3302 + }, + { + "epoch": 0.9659306916215821, + "grad_norm": 1.5297340917133426, + "learning_rate": 1.139268853429947e-05, + "loss": 0.6183327436447144, + "step": 3303 + }, + { + "epoch": 0.9662231320368475, + "grad_norm": 1.4183780196378124, + "learning_rate": 1.1387900780771472e-05, + "loss": 0.6160033941268921, + "step": 3304 + }, + { + "epoch": 0.9665155724521128, + "grad_norm": 1.4212044707464202, + "learning_rate": 1.1383112702830108e-05, + "loss": 0.5526994466781616, + "step": 3305 + }, + { + "epoch": 0.9668080128673783, + "grad_norm": 1.381901469460175, + "learning_rate": 1.137832430159456e-05, + "loss": 0.5476477742195129, + "step": 3306 + }, + { + "epoch": 0.9671004532826437, + "grad_norm": 1.3794404018811846, + "learning_rate": 1.1373535578184083e-05, + "loss": 0.558393657207489, + "step": 3307 + }, + { + "epoch": 0.967392893697909, + "grad_norm": 1.4577860579810487, + "learning_rate": 1.1368746533718017e-05, + "loss": 0.6302276849746704, + "step": 3308 + }, + { + "epoch": 0.9676853341131745, + "grad_norm": 1.2805956031485568, + "learning_rate": 1.1363957169315773e-05, + "loss": 0.619697630405426, + "step": 3309 + }, + { + "epoch": 0.9679777745284398, + "grad_norm": 1.4119075289775231, + "learning_rate": 1.135916748609683e-05, + "loss": 0.564563512802124, + "step": 3310 + }, + { + "epoch": 0.9682702149437052, + "grad_norm": 1.6014783450991135, + "learning_rate": 1.1354377485180756e-05, + "loss": 0.6238751411437988, + "step": 3311 + }, + { + "epoch": 0.9685626553589706, + "grad_norm": 1.4620948350058627, + "learning_rate": 1.1349587167687177e-05, + "loss": 0.8079221844673157, + "step": 3312 + }, + { + "epoch": 0.968855095774236, + "grad_norm": 1.4034979651528738, + "learning_rate": 1.1344796534735805e-05, + "loss": 0.5547629594802856, + "step": 3313 + }, + { + "epoch": 0.9691475361895013, + "grad_norm": 1.2187187942390127, + "learning_rate": 1.134000558744642e-05, + "loss": 0.630042552947998, + "step": 3314 + }, + { + "epoch": 0.9694399766047668, + "grad_norm": 1.284912675244452, + "learning_rate": 1.1335214326938872e-05, + "loss": 0.5283412337303162, + "step": 3315 + }, + { + "epoch": 0.9697324170200322, + "grad_norm": 1.3484514955842084, + "learning_rate": 1.1330422754333097e-05, + "loss": 0.6356452703475952, + "step": 3316 + }, + { + "epoch": 0.9700248574352975, + "grad_norm": 1.265116321608699, + "learning_rate": 1.132563087074909e-05, + "loss": 0.6531886458396912, + "step": 3317 + }, + { + "epoch": 0.970317297850563, + "grad_norm": 1.6209665553722108, + "learning_rate": 1.1320838677306927e-05, + "loss": 0.5725178718566895, + "step": 3318 + }, + { + "epoch": 0.9706097382658283, + "grad_norm": 1.460783947968998, + "learning_rate": 1.1316046175126758e-05, + "loss": 0.6341495513916016, + "step": 3319 + }, + { + "epoch": 0.9709021786810937, + "grad_norm": 1.428850290510927, + "learning_rate": 1.1311253365328794e-05, + "loss": 0.5792768597602844, + "step": 3320 + }, + { + "epoch": 0.9711946190963591, + "grad_norm": 1.2539734431492524, + "learning_rate": 1.1306460249033326e-05, + "loss": 0.5495700836181641, + "step": 3321 + }, + { + "epoch": 0.9714870595116245, + "grad_norm": 1.3779597112573112, + "learning_rate": 1.1301666827360721e-05, + "loss": 0.7092291116714478, + "step": 3322 + }, + { + "epoch": 0.97177949992689, + "grad_norm": 1.210154083257435, + "learning_rate": 1.1296873101431409e-05, + "loss": 0.5368257761001587, + "step": 3323 + }, + { + "epoch": 0.9720719403421553, + "grad_norm": 1.2901315838159502, + "learning_rate": 1.1292079072365898e-05, + "loss": 0.6116393804550171, + "step": 3324 + }, + { + "epoch": 0.9723643807574207, + "grad_norm": 1.6375876584807947, + "learning_rate": 1.1287284741284757e-05, + "loss": 0.5654028654098511, + "step": 3325 + }, + { + "epoch": 0.972656821172686, + "grad_norm": 1.4007947938241085, + "learning_rate": 1.1282490109308633e-05, + "loss": 0.6436389684677124, + "step": 3326 + }, + { + "epoch": 0.9729492615879515, + "grad_norm": 1.6286174854172328, + "learning_rate": 1.1277695177558243e-05, + "loss": 0.7687330842018127, + "step": 3327 + }, + { + "epoch": 0.9732417020032168, + "grad_norm": 1.3338540478099405, + "learning_rate": 1.1272899947154377e-05, + "loss": 0.5350443124771118, + "step": 3328 + }, + { + "epoch": 0.9735341424184822, + "grad_norm": 1.5528633763871835, + "learning_rate": 1.1268104419217884e-05, + "loss": 0.6032785773277283, + "step": 3329 + }, + { + "epoch": 0.9738265828337477, + "grad_norm": 1.410347655987774, + "learning_rate": 1.1263308594869697e-05, + "loss": 0.5756093263626099, + "step": 3330 + }, + { + "epoch": 0.974119023249013, + "grad_norm": 1.5831169693775362, + "learning_rate": 1.1258512475230807e-05, + "loss": 0.6977418065071106, + "step": 3331 + }, + { + "epoch": 0.9744114636642784, + "grad_norm": 1.3726893652594243, + "learning_rate": 1.1253716061422275e-05, + "loss": 0.5409448146820068, + "step": 3332 + }, + { + "epoch": 0.9747039040795438, + "grad_norm": 1.3626349639764654, + "learning_rate": 1.1248919354565237e-05, + "loss": 0.5863862037658691, + "step": 3333 + }, + { + "epoch": 0.9749963444948092, + "grad_norm": 1.313934697737098, + "learning_rate": 1.1244122355780895e-05, + "loss": 0.6039433479309082, + "step": 3334 + }, + { + "epoch": 0.9752887849100745, + "grad_norm": 1.4813691831553626, + "learning_rate": 1.1239325066190513e-05, + "loss": 0.6696581840515137, + "step": 3335 + }, + { + "epoch": 0.97558122532534, + "grad_norm": 1.5159715106591773, + "learning_rate": 1.1234527486915439e-05, + "loss": 0.6308715343475342, + "step": 3336 + }, + { + "epoch": 0.9758736657406053, + "grad_norm": 1.4927391317525602, + "learning_rate": 1.1229729619077065e-05, + "loss": 0.580268383026123, + "step": 3337 + }, + { + "epoch": 0.9761661061558707, + "grad_norm": 1.775582999909584, + "learning_rate": 1.1224931463796871e-05, + "loss": 0.8080834746360779, + "step": 3338 + }, + { + "epoch": 0.9764585465711362, + "grad_norm": 1.3814988427954438, + "learning_rate": 1.1220133022196395e-05, + "loss": 0.4933619499206543, + "step": 3339 + }, + { + "epoch": 0.9767509869864015, + "grad_norm": 1.26412210808527, + "learning_rate": 1.1215334295397244e-05, + "loss": 0.5639102458953857, + "step": 3340 + }, + { + "epoch": 0.9770434274016669, + "grad_norm": 1.3947001629341338, + "learning_rate": 1.1210535284521094e-05, + "loss": 0.6332741975784302, + "step": 3341 + }, + { + "epoch": 0.9773358678169323, + "grad_norm": 1.4234927806293247, + "learning_rate": 1.1205735990689677e-05, + "loss": 0.5425227880477905, + "step": 3342 + }, + { + "epoch": 0.9776283082321977, + "grad_norm": 1.2841671137073696, + "learning_rate": 1.1200936415024804e-05, + "loss": 0.48746997117996216, + "step": 3343 + }, + { + "epoch": 0.977920748647463, + "grad_norm": 1.3045240526527524, + "learning_rate": 1.1196136558648345e-05, + "loss": 0.5509577393531799, + "step": 3344 + }, + { + "epoch": 0.9782131890627285, + "grad_norm": 1.5306708658005588, + "learning_rate": 1.1191336422682237e-05, + "loss": 0.5939484238624573, + "step": 3345 + }, + { + "epoch": 0.9785056294779939, + "grad_norm": 1.4772741629174198, + "learning_rate": 1.1186536008248487e-05, + "loss": 0.6078917384147644, + "step": 3346 + }, + { + "epoch": 0.9787980698932592, + "grad_norm": 1.4449426772113496, + "learning_rate": 1.1181735316469157e-05, + "loss": 0.5578145980834961, + "step": 3347 + }, + { + "epoch": 0.9790905103085247, + "grad_norm": 1.5556898331182667, + "learning_rate": 1.1176934348466384e-05, + "loss": 0.6809493899345398, + "step": 3348 + }, + { + "epoch": 0.97938295072379, + "grad_norm": 1.3454886518258895, + "learning_rate": 1.117213310536236e-05, + "loss": 0.6057093143463135, + "step": 3349 + }, + { + "epoch": 0.9796753911390554, + "grad_norm": 1.2918762120947054, + "learning_rate": 1.1167331588279351e-05, + "loss": 0.6656113266944885, + "step": 3350 + }, + { + "epoch": 0.9799678315543208, + "grad_norm": 1.3588186351553628, + "learning_rate": 1.1162529798339682e-05, + "loss": 0.5260547399520874, + "step": 3351 + }, + { + "epoch": 0.9802602719695862, + "grad_norm": 1.4059510686804249, + "learning_rate": 1.115772773666574e-05, + "loss": 0.6918379068374634, + "step": 3352 + }, + { + "epoch": 0.9805527123848515, + "grad_norm": 1.4859264660633271, + "learning_rate": 1.115292540437998e-05, + "loss": 0.7128825187683105, + "step": 3353 + }, + { + "epoch": 0.980845152800117, + "grad_norm": 1.7806281788252345, + "learning_rate": 1.1148122802604913e-05, + "loss": 0.6858257055282593, + "step": 3354 + }, + { + "epoch": 0.9811375932153824, + "grad_norm": 1.3250069966815017, + "learning_rate": 1.1143319932463124e-05, + "loss": 0.540290117263794, + "step": 3355 + }, + { + "epoch": 0.9814300336306477, + "grad_norm": 1.3692222106755043, + "learning_rate": 1.1138516795077251e-05, + "loss": 0.7293038368225098, + "step": 3356 + }, + { + "epoch": 0.9817224740459132, + "grad_norm": 1.2337952733643827, + "learning_rate": 1.1133713391570003e-05, + "loss": 0.5981270670890808, + "step": 3357 + }, + { + "epoch": 0.9820149144611785, + "grad_norm": 1.282642205016649, + "learning_rate": 1.1128909723064138e-05, + "loss": 0.6175673604011536, + "step": 3358 + }, + { + "epoch": 0.9823073548764439, + "grad_norm": 1.233452486411816, + "learning_rate": 1.112410579068249e-05, + "loss": 0.5385074615478516, + "step": 3359 + }, + { + "epoch": 0.9825997952917093, + "grad_norm": 1.372295513124522, + "learning_rate": 1.1119301595547952e-05, + "loss": 0.5754122734069824, + "step": 3360 + }, + { + "epoch": 0.9828922357069747, + "grad_norm": 1.4139982265628481, + "learning_rate": 1.1114497138783469e-05, + "loss": 0.5817348957061768, + "step": 3361 + }, + { + "epoch": 0.9831846761222401, + "grad_norm": 1.5953096945649214, + "learning_rate": 1.1109692421512058e-05, + "loss": 0.7561115026473999, + "step": 3362 + }, + { + "epoch": 0.9834771165375055, + "grad_norm": 1.4339527302516233, + "learning_rate": 1.1104887444856786e-05, + "loss": 0.5972003936767578, + "step": 3363 + }, + { + "epoch": 0.9837695569527709, + "grad_norm": 1.7933233288020083, + "learning_rate": 1.1100082209940795e-05, + "loss": 0.7569154500961304, + "step": 3364 + }, + { + "epoch": 0.9840619973680362, + "grad_norm": 1.6291951934588174, + "learning_rate": 1.1095276717887273e-05, + "loss": 0.587831437587738, + "step": 3365 + }, + { + "epoch": 0.9843544377833017, + "grad_norm": 1.3893746663182953, + "learning_rate": 1.109047096981948e-05, + "loss": 0.5265868902206421, + "step": 3366 + }, + { + "epoch": 0.984646878198567, + "grad_norm": 1.5308570155926502, + "learning_rate": 1.1085664966860728e-05, + "loss": 0.6065980792045593, + "step": 3367 + }, + { + "epoch": 0.9849393186138324, + "grad_norm": 1.2582827679300745, + "learning_rate": 1.1080858710134392e-05, + "loss": 0.5859705209732056, + "step": 3368 + }, + { + "epoch": 0.9852317590290979, + "grad_norm": 1.2323676627113982, + "learning_rate": 1.1076052200763903e-05, + "loss": 0.508766770362854, + "step": 3369 + }, + { + "epoch": 0.9855241994443632, + "grad_norm": 1.36193145330846, + "learning_rate": 1.1071245439872752e-05, + "loss": 0.569848358631134, + "step": 3370 + }, + { + "epoch": 0.9858166398596286, + "grad_norm": 1.5268801014665052, + "learning_rate": 1.1066438428584496e-05, + "loss": 0.6665600538253784, + "step": 3371 + }, + { + "epoch": 0.986109080274894, + "grad_norm": 4.0352208239875536, + "learning_rate": 1.1061631168022742e-05, + "loss": 0.5942315459251404, + "step": 3372 + }, + { + "epoch": 0.9864015206901594, + "grad_norm": 1.3552035470831052, + "learning_rate": 1.1056823659311158e-05, + "loss": 0.5270178318023682, + "step": 3373 + }, + { + "epoch": 0.9866939611054247, + "grad_norm": 1.484191192307279, + "learning_rate": 1.1052015903573465e-05, + "loss": 0.6879183053970337, + "step": 3374 + }, + { + "epoch": 0.9869864015206902, + "grad_norm": 1.3455375539569006, + "learning_rate": 1.1047207901933453e-05, + "loss": 0.5980993509292603, + "step": 3375 + }, + { + "epoch": 0.9872788419359555, + "grad_norm": 1.3905728698834559, + "learning_rate": 1.1042399655514961e-05, + "loss": 0.5616245865821838, + "step": 3376 + }, + { + "epoch": 0.9875712823512209, + "grad_norm": 1.186489901347366, + "learning_rate": 1.1037591165441887e-05, + "loss": 0.6233900785446167, + "step": 3377 + }, + { + "epoch": 0.9878637227664864, + "grad_norm": 1.2146885941659273, + "learning_rate": 1.1032782432838188e-05, + "loss": 0.612476110458374, + "step": 3378 + }, + { + "epoch": 0.9881561631817517, + "grad_norm": 1.4001611534955285, + "learning_rate": 1.1027973458827874e-05, + "loss": 0.7109482288360596, + "step": 3379 + }, + { + "epoch": 0.9884486035970171, + "grad_norm": 1.4339596644962305, + "learning_rate": 1.1023164244535013e-05, + "loss": 0.7105005383491516, + "step": 3380 + }, + { + "epoch": 0.9887410440122825, + "grad_norm": 1.1897152470249062, + "learning_rate": 1.1018354791083731e-05, + "loss": 0.5401301383972168, + "step": 3381 + }, + { + "epoch": 0.9890334844275479, + "grad_norm": 1.2391450524860042, + "learning_rate": 1.101354509959821e-05, + "loss": 0.504487156867981, + "step": 3382 + }, + { + "epoch": 0.9893259248428132, + "grad_norm": 1.5778073649668172, + "learning_rate": 1.1008735171202685e-05, + "loss": 0.5634675025939941, + "step": 3383 + }, + { + "epoch": 0.9896183652580787, + "grad_norm": 1.2596231385186676, + "learning_rate": 1.1003925007021444e-05, + "loss": 0.4828820824623108, + "step": 3384 + }, + { + "epoch": 0.9899108056733441, + "grad_norm": 1.5274466661026922, + "learning_rate": 1.0999114608178837e-05, + "loss": 0.7154384851455688, + "step": 3385 + }, + { + "epoch": 0.9902032460886094, + "grad_norm": 1.4762279403432657, + "learning_rate": 1.0994303975799268e-05, + "loss": 0.626085638999939, + "step": 3386 + }, + { + "epoch": 0.9904956865038749, + "grad_norm": 1.2276097303271793, + "learning_rate": 1.0989493111007186e-05, + "loss": 0.5179756283760071, + "step": 3387 + }, + { + "epoch": 0.9907881269191402, + "grad_norm": 1.443725456432181, + "learning_rate": 1.0984682014927108e-05, + "loss": 0.6992131471633911, + "step": 3388 + }, + { + "epoch": 0.9910805673344056, + "grad_norm": 1.3252934977411588, + "learning_rate": 1.0979870688683598e-05, + "loss": 0.5791709423065186, + "step": 3389 + }, + { + "epoch": 0.991373007749671, + "grad_norm": 1.2293406038140111, + "learning_rate": 1.097505913340127e-05, + "loss": 0.4703817367553711, + "step": 3390 + }, + { + "epoch": 0.9916654481649364, + "grad_norm": 1.7130975290215298, + "learning_rate": 1.0970247350204797e-05, + "loss": 0.6042051911354065, + "step": 3391 + }, + { + "epoch": 0.9919578885802017, + "grad_norm": 1.5075227997294136, + "learning_rate": 1.0965435340218905e-05, + "loss": 0.6806557178497314, + "step": 3392 + }, + { + "epoch": 0.9922503289954672, + "grad_norm": 1.4336313879655775, + "learning_rate": 1.0960623104568373e-05, + "loss": 0.6372751593589783, + "step": 3393 + }, + { + "epoch": 0.9925427694107326, + "grad_norm": 1.2403325317456615, + "learning_rate": 1.0955810644378031e-05, + "loss": 0.48651185631752014, + "step": 3394 + }, + { + "epoch": 0.9928352098259979, + "grad_norm": 1.5056465468012041, + "learning_rate": 1.0950997960772764e-05, + "loss": 0.5244222283363342, + "step": 3395 + }, + { + "epoch": 0.9931276502412634, + "grad_norm": 1.4445958557594307, + "learning_rate": 1.0946185054877505e-05, + "loss": 0.6194322109222412, + "step": 3396 + }, + { + "epoch": 0.9934200906565287, + "grad_norm": 1.4199918179889868, + "learning_rate": 1.0941371927817241e-05, + "loss": 0.690010666847229, + "step": 3397 + }, + { + "epoch": 0.9937125310717941, + "grad_norm": 1.9110036566867663, + "learning_rate": 1.0936558580717013e-05, + "loss": 0.7332549095153809, + "step": 3398 + }, + { + "epoch": 0.9940049714870595, + "grad_norm": 1.428619260140058, + "learning_rate": 1.093174501470191e-05, + "loss": 0.5264838337898254, + "step": 3399 + }, + { + "epoch": 0.9942974119023249, + "grad_norm": 1.1922668548863515, + "learning_rate": 1.092693123089708e-05, + "loss": 0.624382734298706, + "step": 3400 + }, + { + "epoch": 0.9945898523175903, + "grad_norm": 1.6559518933415514, + "learning_rate": 1.0922117230427705e-05, + "loss": 0.6340548992156982, + "step": 3401 + }, + { + "epoch": 0.9948822927328557, + "grad_norm": 1.194444639014181, + "learning_rate": 1.0917303014419036e-05, + "loss": 0.4452754855155945, + "step": 3402 + }, + { + "epoch": 0.9951747331481211, + "grad_norm": 1.4241998861848877, + "learning_rate": 1.0912488583996364e-05, + "loss": 0.6180763244628906, + "step": 3403 + }, + { + "epoch": 0.9954671735633864, + "grad_norm": 1.7347993099568695, + "learning_rate": 1.0907673940285032e-05, + "loss": 0.7079293727874756, + "step": 3404 + }, + { + "epoch": 0.9957596139786519, + "grad_norm": 1.6216897448198107, + "learning_rate": 1.090285908441044e-05, + "loss": 0.6608254909515381, + "step": 3405 + }, + { + "epoch": 0.9960520543939172, + "grad_norm": 1.6873856420041173, + "learning_rate": 1.0898044017498024e-05, + "loss": 0.6450251340866089, + "step": 3406 + }, + { + "epoch": 0.9963444948091826, + "grad_norm": 1.4055094844579619, + "learning_rate": 1.089322874067328e-05, + "loss": 0.6267623901367188, + "step": 3407 + }, + { + "epoch": 0.9966369352244481, + "grad_norm": 1.6519553259967432, + "learning_rate": 1.0888413255061747e-05, + "loss": 0.6756424903869629, + "step": 3408 + }, + { + "epoch": 0.9969293756397134, + "grad_norm": 1.4122044676522614, + "learning_rate": 1.0883597561789017e-05, + "loss": 0.6578212976455688, + "step": 3409 + }, + { + "epoch": 0.9972218160549788, + "grad_norm": 1.600222297323414, + "learning_rate": 1.087878166198073e-05, + "loss": 0.8186248540878296, + "step": 3410 + }, + { + "epoch": 0.9975142564702442, + "grad_norm": 1.4575083835366422, + "learning_rate": 1.0873965556762573e-05, + "loss": 0.6689319610595703, + "step": 3411 + }, + { + "epoch": 0.9978066968855096, + "grad_norm": 1.5562694813418687, + "learning_rate": 1.0869149247260282e-05, + "loss": 0.5471278429031372, + "step": 3412 + }, + { + "epoch": 0.9980991373007749, + "grad_norm": 1.239131034827953, + "learning_rate": 1.0864332734599636e-05, + "loss": 0.4673747420310974, + "step": 3413 + }, + { + "epoch": 0.9983915777160404, + "grad_norm": 1.4054798008983762, + "learning_rate": 1.085951601990647e-05, + "loss": 0.5777568221092224, + "step": 3414 + }, + { + "epoch": 0.9986840181313057, + "grad_norm": 1.6708797545900484, + "learning_rate": 1.0854699104306661e-05, + "loss": 0.6758528351783752, + "step": 3415 + }, + { + "epoch": 0.9989764585465711, + "grad_norm": 1.169154860422915, + "learning_rate": 1.0849881988926132e-05, + "loss": 0.5759919881820679, + "step": 3416 + }, + { + "epoch": 0.9992688989618366, + "grad_norm": 1.3291108456245637, + "learning_rate": 1.0845064674890857e-05, + "loss": 0.606694221496582, + "step": 3417 + }, + { + "epoch": 0.9995613393771019, + "grad_norm": 1.475290016916602, + "learning_rate": 1.0840247163326851e-05, + "loss": 0.627873957157135, + "step": 3418 + }, + { + "epoch": 0.9998537797923673, + "grad_norm": 1.4144594545282698, + "learning_rate": 1.083542945536018e-05, + "loss": 0.5560880303382874, + "step": 3419 + }, + { + "epoch": 1.0, + "grad_norm": 2.3650000488034633, + "learning_rate": 1.0830611552116952e-05, + "loss": 0.5983354449272156, + "step": 3420 + }, + { + "epoch": 1.0002924404152653, + "grad_norm": 1.1169918975180415, + "learning_rate": 1.0825793454723325e-05, + "loss": 0.5012353658676147, + "step": 3421 + }, + { + "epoch": 1.0005848808305309, + "grad_norm": 1.6136465051179143, + "learning_rate": 1.0820975164305498e-05, + "loss": 0.4585106372833252, + "step": 3422 + }, + { + "epoch": 1.0008773212457962, + "grad_norm": 1.2831850675969656, + "learning_rate": 1.0816156681989717e-05, + "loss": 0.5790318846702576, + "step": 3423 + }, + { + "epoch": 1.0011697616610615, + "grad_norm": 1.5258008126885618, + "learning_rate": 1.0811338008902277e-05, + "loss": 0.6016381978988647, + "step": 3424 + }, + { + "epoch": 1.0014622020763269, + "grad_norm": 1.328199543518758, + "learning_rate": 1.0806519146169507e-05, + "loss": 0.5756744146347046, + "step": 3425 + }, + { + "epoch": 1.0017546424915924, + "grad_norm": 1.1865012964818713, + "learning_rate": 1.0801700094917792e-05, + "loss": 0.4776861369609833, + "step": 3426 + }, + { + "epoch": 1.0020470829068577, + "grad_norm": 1.8629358545914494, + "learning_rate": 1.0796880856273557e-05, + "loss": 0.645842969417572, + "step": 3427 + }, + { + "epoch": 1.002339523322123, + "grad_norm": 1.1125775865964678, + "learning_rate": 1.0792061431363266e-05, + "loss": 0.5645815134048462, + "step": 3428 + }, + { + "epoch": 1.0026319637373886, + "grad_norm": 1.4821141209987578, + "learning_rate": 1.0787241821313428e-05, + "loss": 0.5477975606918335, + "step": 3429 + }, + { + "epoch": 1.002924404152654, + "grad_norm": 1.0992693186116131, + "learning_rate": 1.0782422027250604e-05, + "loss": 0.4064188599586487, + "step": 3430 + }, + { + "epoch": 1.0032168445679193, + "grad_norm": 1.3634803374266724, + "learning_rate": 1.0777602050301384e-05, + "loss": 0.5360208749771118, + "step": 3431 + }, + { + "epoch": 1.0035092849831846, + "grad_norm": 1.4203435807547533, + "learning_rate": 1.0772781891592419e-05, + "loss": 0.6189982891082764, + "step": 3432 + }, + { + "epoch": 1.0038017253984501, + "grad_norm": 1.4406563602891276, + "learning_rate": 1.0767961552250382e-05, + "loss": 0.4623541533946991, + "step": 3433 + }, + { + "epoch": 1.0040941658137155, + "grad_norm": 1.4714321386033957, + "learning_rate": 1.0763141033402e-05, + "loss": 0.6094095706939697, + "step": 3434 + }, + { + "epoch": 1.0043866062289808, + "grad_norm": 1.8852494834868845, + "learning_rate": 1.0758320336174042e-05, + "loss": 0.6997445821762085, + "step": 3435 + }, + { + "epoch": 1.0046790466442463, + "grad_norm": 1.3591852438815977, + "learning_rate": 1.0753499461693316e-05, + "loss": 0.5447323322296143, + "step": 3436 + }, + { + "epoch": 1.0049714870595117, + "grad_norm": 1.526403087538078, + "learning_rate": 1.0748678411086672e-05, + "loss": 0.5851927995681763, + "step": 3437 + }, + { + "epoch": 1.005263927474777, + "grad_norm": 1.2443699762001765, + "learning_rate": 1.0743857185481006e-05, + "loss": 0.5897810459136963, + "step": 3438 + }, + { + "epoch": 1.0055563678900423, + "grad_norm": 1.277276792826896, + "learning_rate": 1.073903578600324e-05, + "loss": 0.47671592235565186, + "step": 3439 + }, + { + "epoch": 1.0058488083053079, + "grad_norm": 1.5091606917661848, + "learning_rate": 1.0734214213780355e-05, + "loss": 0.5586696863174438, + "step": 3440 + }, + { + "epoch": 1.0061412487205732, + "grad_norm": 1.7171075095449666, + "learning_rate": 1.0729392469939362e-05, + "loss": 0.6817598342895508, + "step": 3441 + }, + { + "epoch": 1.0064336891358385, + "grad_norm": 1.4899951597044825, + "learning_rate": 1.0724570555607311e-05, + "loss": 0.6503750085830688, + "step": 3442 + }, + { + "epoch": 1.0067261295511039, + "grad_norm": 1.516461978227071, + "learning_rate": 1.07197484719113e-05, + "loss": 0.7121564149856567, + "step": 3443 + }, + { + "epoch": 1.0070185699663694, + "grad_norm": 1.2899445236891802, + "learning_rate": 1.071492621997846e-05, + "loss": 0.5760178565979004, + "step": 3444 + }, + { + "epoch": 1.0073110103816347, + "grad_norm": 1.2567067936293974, + "learning_rate": 1.0710103800935965e-05, + "loss": 0.4555765390396118, + "step": 3445 + }, + { + "epoch": 1.0076034507969, + "grad_norm": 1.73824720674272, + "learning_rate": 1.0705281215911021e-05, + "loss": 0.6098523736000061, + "step": 3446 + }, + { + "epoch": 1.0078958912121656, + "grad_norm": 1.3529009112365886, + "learning_rate": 1.070045846603088e-05, + "loss": 0.49828749895095825, + "step": 3447 + }, + { + "epoch": 1.008188331627431, + "grad_norm": 1.6747165622943363, + "learning_rate": 1.0695635552422834e-05, + "loss": 0.5134999752044678, + "step": 3448 + }, + { + "epoch": 1.0084807720426963, + "grad_norm": 1.6379844761327287, + "learning_rate": 1.0690812476214209e-05, + "loss": 0.53546142578125, + "step": 3449 + }, + { + "epoch": 1.0087732124579616, + "grad_norm": 1.353591975524027, + "learning_rate": 1.0685989238532364e-05, + "loss": 0.4955276846885681, + "step": 3450 + }, + { + "epoch": 1.0090656528732271, + "grad_norm": 1.5308502126967132, + "learning_rate": 1.0681165840504708e-05, + "loss": 0.5693827271461487, + "step": 3451 + }, + { + "epoch": 1.0093580932884925, + "grad_norm": 1.2544327118971752, + "learning_rate": 1.0676342283258676e-05, + "loss": 0.5023596286773682, + "step": 3452 + }, + { + "epoch": 1.0096505337037578, + "grad_norm": 1.4830383604575028, + "learning_rate": 1.0671518567921748e-05, + "loss": 0.5601100921630859, + "step": 3453 + }, + { + "epoch": 1.0099429741190233, + "grad_norm": 1.5483896672555095, + "learning_rate": 1.0666694695621438e-05, + "loss": 0.5744563341140747, + "step": 3454 + }, + { + "epoch": 1.0102354145342887, + "grad_norm": 1.2243241739970807, + "learning_rate": 1.0661870667485298e-05, + "loss": 0.531909704208374, + "step": 3455 + }, + { + "epoch": 1.010527854949554, + "grad_norm": 1.5063779223920848, + "learning_rate": 1.0657046484640911e-05, + "loss": 0.5737274885177612, + "step": 3456 + }, + { + "epoch": 1.0108202953648193, + "grad_norm": 1.3852723907754825, + "learning_rate": 1.0652222148215905e-05, + "loss": 0.5550329089164734, + "step": 3457 + }, + { + "epoch": 1.0111127357800849, + "grad_norm": 1.6139287553682227, + "learning_rate": 1.0647397659337936e-05, + "loss": 0.47795504331588745, + "step": 3458 + }, + { + "epoch": 1.0114051761953502, + "grad_norm": 1.4543285146976004, + "learning_rate": 1.0642573019134703e-05, + "loss": 0.6817550659179688, + "step": 3459 + }, + { + "epoch": 1.0116976166106155, + "grad_norm": 1.1722820118460164, + "learning_rate": 1.063774822873393e-05, + "loss": 0.45271044969558716, + "step": 3460 + }, + { + "epoch": 1.011990057025881, + "grad_norm": 1.537598582173988, + "learning_rate": 1.0632923289263389e-05, + "loss": 0.611709475517273, + "step": 3461 + }, + { + "epoch": 1.0122824974411464, + "grad_norm": 1.4188302760105698, + "learning_rate": 1.0628098201850876e-05, + "loss": 0.5101709961891174, + "step": 3462 + }, + { + "epoch": 1.0125749378564117, + "grad_norm": 1.433548611715836, + "learning_rate": 1.0623272967624227e-05, + "loss": 0.6550514698028564, + "step": 3463 + }, + { + "epoch": 1.012867378271677, + "grad_norm": 1.2796248072280718, + "learning_rate": 1.0618447587711312e-05, + "loss": 0.479978084564209, + "step": 3464 + }, + { + "epoch": 1.0131598186869426, + "grad_norm": 1.5575466316491844, + "learning_rate": 1.0613622063240035e-05, + "loss": 0.5616719722747803, + "step": 3465 + }, + { + "epoch": 1.013452259102208, + "grad_norm": 1.5865800035698945, + "learning_rate": 1.060879639533833e-05, + "loss": 0.5160953998565674, + "step": 3466 + }, + { + "epoch": 1.0137446995174733, + "grad_norm": 1.5690447549246889, + "learning_rate": 1.0603970585134168e-05, + "loss": 0.6069898009300232, + "step": 3467 + }, + { + "epoch": 1.0140371399327388, + "grad_norm": 1.4806335128762829, + "learning_rate": 1.0599144633755555e-05, + "loss": 0.5800961256027222, + "step": 3468 + }, + { + "epoch": 1.0143295803480041, + "grad_norm": 1.2794607035027592, + "learning_rate": 1.0594318542330528e-05, + "loss": 0.5286555290222168, + "step": 3469 + }, + { + "epoch": 1.0146220207632695, + "grad_norm": 1.3098421389423984, + "learning_rate": 1.0589492311987157e-05, + "loss": 0.44960829615592957, + "step": 3470 + }, + { + "epoch": 1.0149144611785348, + "grad_norm": 1.787788159345536, + "learning_rate": 1.0584665943853538e-05, + "loss": 0.5799434781074524, + "step": 3471 + }, + { + "epoch": 1.0152069015938003, + "grad_norm": 1.3655057393381103, + "learning_rate": 1.057983943905781e-05, + "loss": 0.5142421126365662, + "step": 3472 + }, + { + "epoch": 1.0154993420090657, + "grad_norm": 1.3605211166498987, + "learning_rate": 1.0575012798728141e-05, + "loss": 0.5184981226921082, + "step": 3473 + }, + { + "epoch": 1.015791782424331, + "grad_norm": 1.6630390830837942, + "learning_rate": 1.0570186023992724e-05, + "loss": 0.5747173428535461, + "step": 3474 + }, + { + "epoch": 1.0160842228395965, + "grad_norm": 1.4307323575447104, + "learning_rate": 1.0565359115979792e-05, + "loss": 0.5994119644165039, + "step": 3475 + }, + { + "epoch": 1.0163766632548619, + "grad_norm": 1.4001969418816858, + "learning_rate": 1.0560532075817605e-05, + "loss": 0.5020599365234375, + "step": 3476 + }, + { + "epoch": 1.0166691036701272, + "grad_norm": 1.5266027572877992, + "learning_rate": 1.0555704904634451e-05, + "loss": 0.5023698806762695, + "step": 3477 + }, + { + "epoch": 1.0169615440853925, + "grad_norm": 1.3247610849347196, + "learning_rate": 1.0550877603558656e-05, + "loss": 0.3998676538467407, + "step": 3478 + }, + { + "epoch": 1.017253984500658, + "grad_norm": 1.2513443496343235, + "learning_rate": 1.0546050173718569e-05, + "loss": 0.5083760619163513, + "step": 3479 + }, + { + "epoch": 1.0175464249159234, + "grad_norm": 1.3684676716830397, + "learning_rate": 1.0541222616242575e-05, + "loss": 0.49840620160102844, + "step": 3480 + }, + { + "epoch": 1.0178388653311887, + "grad_norm": 1.3303553104888959, + "learning_rate": 1.0536394932259085e-05, + "loss": 0.5302960276603699, + "step": 3481 + }, + { + "epoch": 1.018131305746454, + "grad_norm": 1.338379797222235, + "learning_rate": 1.0531567122896543e-05, + "loss": 0.5694236755371094, + "step": 3482 + }, + { + "epoch": 1.0184237461617196, + "grad_norm": 1.4305833876226657, + "learning_rate": 1.0526739189283414e-05, + "loss": 0.5155326128005981, + "step": 3483 + }, + { + "epoch": 1.018716186576985, + "grad_norm": 1.3829306833852764, + "learning_rate": 1.0521911132548207e-05, + "loss": 0.6254806518554688, + "step": 3484 + }, + { + "epoch": 1.0190086269922503, + "grad_norm": 1.9177430357611984, + "learning_rate": 1.0517082953819442e-05, + "loss": 0.5623525977134705, + "step": 3485 + }, + { + "epoch": 1.0193010674075158, + "grad_norm": 1.67092732120196, + "learning_rate": 1.051225465422568e-05, + "loss": 0.6289865970611572, + "step": 3486 + }, + { + "epoch": 1.0195935078227811, + "grad_norm": 1.4045798370952283, + "learning_rate": 1.050742623489551e-05, + "loss": 0.5935345888137817, + "step": 3487 + }, + { + "epoch": 1.0198859482380465, + "grad_norm": 1.696103524125264, + "learning_rate": 1.0502597696957542e-05, + "loss": 0.5223839282989502, + "step": 3488 + }, + { + "epoch": 1.0201783886533118, + "grad_norm": 1.9382869881093494, + "learning_rate": 1.0497769041540418e-05, + "loss": 0.6766373515129089, + "step": 3489 + }, + { + "epoch": 1.0204708290685773, + "grad_norm": 1.7017290392950901, + "learning_rate": 1.0492940269772806e-05, + "loss": 0.4934672713279724, + "step": 3490 + }, + { + "epoch": 1.0207632694838427, + "grad_norm": 1.345123127698455, + "learning_rate": 1.0488111382783403e-05, + "loss": 0.5207735300064087, + "step": 3491 + }, + { + "epoch": 1.021055709899108, + "grad_norm": 1.6293706929191067, + "learning_rate": 1.0483282381700933e-05, + "loss": 0.6090695261955261, + "step": 3492 + }, + { + "epoch": 1.0213481503143735, + "grad_norm": 1.2927953162345942, + "learning_rate": 1.0478453267654147e-05, + "loss": 0.5777665376663208, + "step": 3493 + }, + { + "epoch": 1.0216405907296389, + "grad_norm": 1.5951555841510592, + "learning_rate": 1.0473624041771814e-05, + "loss": 0.7241395711898804, + "step": 3494 + }, + { + "epoch": 1.0219330311449042, + "grad_norm": 1.4480767991556562, + "learning_rate": 1.0468794705182742e-05, + "loss": 0.45545506477355957, + "step": 3495 + }, + { + "epoch": 1.0222254715601695, + "grad_norm": 1.422698945534055, + "learning_rate": 1.0463965259015761e-05, + "loss": 0.5519885420799255, + "step": 3496 + }, + { + "epoch": 1.022517911975435, + "grad_norm": 1.509316262763282, + "learning_rate": 1.045913570439972e-05, + "loss": 0.558646559715271, + "step": 3497 + }, + { + "epoch": 1.0228103523907004, + "grad_norm": 1.4960690347564465, + "learning_rate": 1.0454306042463499e-05, + "loss": 0.5259999632835388, + "step": 3498 + }, + { + "epoch": 1.0231027928059657, + "grad_norm": 1.2679527875669403, + "learning_rate": 1.0449476274336004e-05, + "loss": 0.4711627960205078, + "step": 3499 + }, + { + "epoch": 1.0233952332212313, + "grad_norm": 1.5395810801486782, + "learning_rate": 1.0444646401146161e-05, + "loss": 0.5893874168395996, + "step": 3500 + }, + { + "epoch": 1.0236876736364966, + "grad_norm": 1.498228532943397, + "learning_rate": 1.0439816424022926e-05, + "loss": 0.5596123933792114, + "step": 3501 + }, + { + "epoch": 1.023980114051762, + "grad_norm": 1.3706228388690522, + "learning_rate": 1.0434986344095276e-05, + "loss": 0.5228658318519592, + "step": 3502 + }, + { + "epoch": 1.0242725544670273, + "grad_norm": 1.3956010390337459, + "learning_rate": 1.0430156162492216e-05, + "loss": 0.5520567297935486, + "step": 3503 + }, + { + "epoch": 1.0245649948822928, + "grad_norm": 1.2988010194163804, + "learning_rate": 1.0425325880342762e-05, + "loss": 0.531911313533783, + "step": 3504 + }, + { + "epoch": 1.0248574352975581, + "grad_norm": 1.5296749459710133, + "learning_rate": 1.0420495498775974e-05, + "loss": 0.58717942237854, + "step": 3505 + }, + { + "epoch": 1.0251498757128235, + "grad_norm": 1.3937094974123596, + "learning_rate": 1.0415665018920919e-05, + "loss": 0.4972108006477356, + "step": 3506 + }, + { + "epoch": 1.025442316128089, + "grad_norm": 1.4653045497635373, + "learning_rate": 1.0410834441906692e-05, + "loss": 0.567977249622345, + "step": 3507 + }, + { + "epoch": 1.0257347565433543, + "grad_norm": 1.4984249963013099, + "learning_rate": 1.0406003768862416e-05, + "loss": 0.568755567073822, + "step": 3508 + }, + { + "epoch": 1.0260271969586197, + "grad_norm": 1.5140899451878516, + "learning_rate": 1.0401173000917224e-05, + "loss": 0.5668960809707642, + "step": 3509 + }, + { + "epoch": 1.026319637373885, + "grad_norm": 1.5737165138245863, + "learning_rate": 1.0396342139200282e-05, + "loss": 0.5956743955612183, + "step": 3510 + }, + { + "epoch": 1.0266120777891505, + "grad_norm": 1.3000472899601168, + "learning_rate": 1.0391511184840775e-05, + "loss": 0.5258834362030029, + "step": 3511 + }, + { + "epoch": 1.0269045182044159, + "grad_norm": 1.52676259543146, + "learning_rate": 1.038668013896791e-05, + "loss": 0.7358168363571167, + "step": 3512 + }, + { + "epoch": 1.0271969586196812, + "grad_norm": 1.6868440270891885, + "learning_rate": 1.0381849002710914e-05, + "loss": 0.5845209956169128, + "step": 3513 + }, + { + "epoch": 1.0274893990349467, + "grad_norm": 1.4837942506085555, + "learning_rate": 1.0377017777199034e-05, + "loss": 0.4475495219230652, + "step": 3514 + }, + { + "epoch": 1.027781839450212, + "grad_norm": 1.2830033919091985, + "learning_rate": 1.0372186463561542e-05, + "loss": 0.5555804371833801, + "step": 3515 + }, + { + "epoch": 1.0280742798654774, + "grad_norm": 1.65016913167245, + "learning_rate": 1.0367355062927726e-05, + "loss": 0.5927316546440125, + "step": 3516 + }, + { + "epoch": 1.0283667202807427, + "grad_norm": 1.3376999356667882, + "learning_rate": 1.0362523576426897e-05, + "loss": 0.47281715273857117, + "step": 3517 + }, + { + "epoch": 1.0286591606960083, + "grad_norm": 1.4195049172993812, + "learning_rate": 1.0357692005188387e-05, + "loss": 0.5275483727455139, + "step": 3518 + }, + { + "epoch": 1.0289516011112736, + "grad_norm": 1.6670234220228792, + "learning_rate": 1.0352860350341547e-05, + "loss": 0.5740839242935181, + "step": 3519 + }, + { + "epoch": 1.029244041526539, + "grad_norm": 1.3668449892598942, + "learning_rate": 1.0348028613015747e-05, + "loss": 0.6030054688453674, + "step": 3520 + }, + { + "epoch": 1.0295364819418042, + "grad_norm": 1.4423080423666719, + "learning_rate": 1.034319679434037e-05, + "loss": 0.5415347814559937, + "step": 3521 + }, + { + "epoch": 1.0298289223570698, + "grad_norm": 1.4756281264212951, + "learning_rate": 1.033836489544483e-05, + "loss": 0.5850083231925964, + "step": 3522 + }, + { + "epoch": 1.0301213627723351, + "grad_norm": 1.516707487989418, + "learning_rate": 1.0333532917458556e-05, + "loss": 0.47614163160324097, + "step": 3523 + }, + { + "epoch": 1.0304138031876005, + "grad_norm": 1.5357316287676814, + "learning_rate": 1.0328700861510987e-05, + "loss": 0.5645745992660522, + "step": 3524 + }, + { + "epoch": 1.030706243602866, + "grad_norm": 1.3186548714848774, + "learning_rate": 1.0323868728731591e-05, + "loss": 0.5729008913040161, + "step": 3525 + }, + { + "epoch": 1.0309986840181313, + "grad_norm": 1.373781447264802, + "learning_rate": 1.031903652024985e-05, + "loss": 0.5177778005599976, + "step": 3526 + }, + { + "epoch": 1.0312911244333967, + "grad_norm": 1.390457184292636, + "learning_rate": 1.0314204237195263e-05, + "loss": 0.49413079023361206, + "step": 3527 + }, + { + "epoch": 1.031583564848662, + "grad_norm": 1.4789369230243037, + "learning_rate": 1.0309371880697342e-05, + "loss": 0.5074756145477295, + "step": 3528 + }, + { + "epoch": 1.0318760052639275, + "grad_norm": 1.590543948205407, + "learning_rate": 1.0304539451885629e-05, + "loss": 0.5601285696029663, + "step": 3529 + }, + { + "epoch": 1.0321684456791929, + "grad_norm": 1.3273904087281212, + "learning_rate": 1.029970695188967e-05, + "loss": 0.48358121514320374, + "step": 3530 + }, + { + "epoch": 1.0324608860944582, + "grad_norm": 1.4772927313727484, + "learning_rate": 1.0294874381839033e-05, + "loss": 0.4472161829471588, + "step": 3531 + }, + { + "epoch": 1.0327533265097237, + "grad_norm": 1.4129544794929634, + "learning_rate": 1.02900417428633e-05, + "loss": 0.6011627912521362, + "step": 3532 + }, + { + "epoch": 1.033045766924989, + "grad_norm": 1.354725840134447, + "learning_rate": 1.0285209036092076e-05, + "loss": 0.5212395191192627, + "step": 3533 + }, + { + "epoch": 1.0333382073402544, + "grad_norm": 1.844431950477259, + "learning_rate": 1.0280376262654971e-05, + "loss": 0.5433810949325562, + "step": 3534 + }, + { + "epoch": 1.0336306477555197, + "grad_norm": 1.4124385690995565, + "learning_rate": 1.0275543423681622e-05, + "loss": 0.5215464234352112, + "step": 3535 + }, + { + "epoch": 1.0339230881707853, + "grad_norm": 1.3386210311441036, + "learning_rate": 1.0270710520301672e-05, + "loss": 0.511099100112915, + "step": 3536 + }, + { + "epoch": 1.0342155285860506, + "grad_norm": 1.3822305233430652, + "learning_rate": 1.0265877553644783e-05, + "loss": 0.4954407811164856, + "step": 3537 + }, + { + "epoch": 1.034507969001316, + "grad_norm": 1.5424734752588294, + "learning_rate": 1.0261044524840633e-05, + "loss": 0.5491081476211548, + "step": 3538 + }, + { + "epoch": 1.0348004094165815, + "grad_norm": 1.5108040554468096, + "learning_rate": 1.0256211435018912e-05, + "loss": 0.43202829360961914, + "step": 3539 + }, + { + "epoch": 1.0350928498318468, + "grad_norm": 1.5814180623509084, + "learning_rate": 1.0251378285309326e-05, + "loss": 0.4721212089061737, + "step": 3540 + }, + { + "epoch": 1.0353852902471121, + "grad_norm": 1.6070602892086314, + "learning_rate": 1.0246545076841596e-05, + "loss": 0.5621099472045898, + "step": 3541 + }, + { + "epoch": 1.0356777306623774, + "grad_norm": 1.5170284121136077, + "learning_rate": 1.0241711810745452e-05, + "loss": 0.5572346448898315, + "step": 3542 + }, + { + "epoch": 1.035970171077643, + "grad_norm": 1.3590672633285579, + "learning_rate": 1.023687848815064e-05, + "loss": 0.40916550159454346, + "step": 3543 + }, + { + "epoch": 1.0362626114929083, + "grad_norm": 1.5018716604616227, + "learning_rate": 1.0232045110186926e-05, + "loss": 0.5370572805404663, + "step": 3544 + }, + { + "epoch": 1.0365550519081737, + "grad_norm": 1.603253593979403, + "learning_rate": 1.0227211677984074e-05, + "loss": 0.5381634831428528, + "step": 3545 + }, + { + "epoch": 1.0368474923234392, + "grad_norm": 1.3795492267662186, + "learning_rate": 1.0222378192671878e-05, + "loss": 0.4807749092578888, + "step": 3546 + }, + { + "epoch": 1.0371399327387045, + "grad_norm": 1.4973562396665303, + "learning_rate": 1.0217544655380129e-05, + "loss": 0.5673447847366333, + "step": 3547 + }, + { + "epoch": 1.0374323731539699, + "grad_norm": 1.6360254172890698, + "learning_rate": 1.0212711067238639e-05, + "loss": 0.5259549021720886, + "step": 3548 + }, + { + "epoch": 1.0377248135692352, + "grad_norm": 1.4439961362376934, + "learning_rate": 1.0207877429377232e-05, + "loss": 0.48267534375190735, + "step": 3549 + }, + { + "epoch": 1.0380172539845007, + "grad_norm": 1.438603988067733, + "learning_rate": 1.0203043742925738e-05, + "loss": 0.44843387603759766, + "step": 3550 + }, + { + "epoch": 1.038309694399766, + "grad_norm": 1.5765887333733293, + "learning_rate": 1.0198210009014005e-05, + "loss": 0.8050575256347656, + "step": 3551 + }, + { + "epoch": 1.0386021348150314, + "grad_norm": 1.3559927051954717, + "learning_rate": 1.0193376228771887e-05, + "loss": 0.590203046798706, + "step": 3552 + }, + { + "epoch": 1.0388945752302967, + "grad_norm": 1.4420953878245995, + "learning_rate": 1.0188542403329252e-05, + "loss": 0.5974458456039429, + "step": 3553 + }, + { + "epoch": 1.0391870156455623, + "grad_norm": 1.4408311686918343, + "learning_rate": 1.0183708533815975e-05, + "loss": 0.4628743827342987, + "step": 3554 + }, + { + "epoch": 1.0394794560608276, + "grad_norm": 1.538902326182442, + "learning_rate": 1.0178874621361944e-05, + "loss": 0.6738137006759644, + "step": 3555 + }, + { + "epoch": 1.039771896476093, + "grad_norm": 1.2584091446339778, + "learning_rate": 1.0174040667097061e-05, + "loss": 0.48062413930892944, + "step": 3556 + }, + { + "epoch": 1.0400643368913585, + "grad_norm": 1.4180020858721523, + "learning_rate": 1.016920667215123e-05, + "loss": 0.564401388168335, + "step": 3557 + }, + { + "epoch": 1.0403567773066238, + "grad_norm": 1.5220611788966263, + "learning_rate": 1.0164372637654367e-05, + "loss": 0.4035246968269348, + "step": 3558 + }, + { + "epoch": 1.0406492177218891, + "grad_norm": 1.3759176374876299, + "learning_rate": 1.0159538564736399e-05, + "loss": 0.4484536051750183, + "step": 3559 + }, + { + "epoch": 1.0409416581371547, + "grad_norm": 1.5320485493087415, + "learning_rate": 1.0154704454527265e-05, + "loss": 0.6257200837135315, + "step": 3560 + }, + { + "epoch": 1.04123409855242, + "grad_norm": 1.7250809702027206, + "learning_rate": 1.0149870308156899e-05, + "loss": 0.5541477799415588, + "step": 3561 + }, + { + "epoch": 1.0415265389676853, + "grad_norm": 1.5360272319586679, + "learning_rate": 1.0145036126755264e-05, + "loss": 0.6248821020126343, + "step": 3562 + }, + { + "epoch": 1.0418189793829506, + "grad_norm": 1.3930925306710389, + "learning_rate": 1.0140201911452318e-05, + "loss": 0.574689507484436, + "step": 3563 + }, + { + "epoch": 1.0421114197982162, + "grad_norm": 1.45907196010364, + "learning_rate": 1.0135367663378025e-05, + "loss": 0.5873313546180725, + "step": 3564 + }, + { + "epoch": 1.0424038602134815, + "grad_norm": 1.7911480245961826, + "learning_rate": 1.0130533383662361e-05, + "loss": 0.6662088632583618, + "step": 3565 + }, + { + "epoch": 1.0426963006287469, + "grad_norm": 1.688392121046196, + "learning_rate": 1.0125699073435316e-05, + "loss": 0.6517773866653442, + "step": 3566 + }, + { + "epoch": 1.0429887410440122, + "grad_norm": 1.8273298961737783, + "learning_rate": 1.0120864733826877e-05, + "loss": 0.6311444640159607, + "step": 3567 + }, + { + "epoch": 1.0432811814592777, + "grad_norm": 1.4367651958960501, + "learning_rate": 1.0116030365967037e-05, + "loss": 0.49060457944869995, + "step": 3568 + }, + { + "epoch": 1.043573621874543, + "grad_norm": 1.609897253932932, + "learning_rate": 1.0111195970985813e-05, + "loss": 0.5405893921852112, + "step": 3569 + }, + { + "epoch": 1.0438660622898084, + "grad_norm": 1.4830806836977097, + "learning_rate": 1.01063615500132e-05, + "loss": 0.482162743806839, + "step": 3570 + }, + { + "epoch": 1.044158502705074, + "grad_norm": 1.4107369824500982, + "learning_rate": 1.0101527104179224e-05, + "loss": 0.4542362093925476, + "step": 3571 + }, + { + "epoch": 1.0444509431203393, + "grad_norm": 1.5628480243599212, + "learning_rate": 1.00966926346139e-05, + "loss": 0.6157265305519104, + "step": 3572 + }, + { + "epoch": 1.0447433835356046, + "grad_norm": 1.6143915430154057, + "learning_rate": 1.0091858142447266e-05, + "loss": 0.6591875553131104, + "step": 3573 + }, + { + "epoch": 1.04503582395087, + "grad_norm": 1.410506710976703, + "learning_rate": 1.0087023628809347e-05, + "loss": 0.5686256885528564, + "step": 3574 + }, + { + "epoch": 1.0453282643661355, + "grad_norm": 1.2971662039691743, + "learning_rate": 1.0082189094830183e-05, + "loss": 0.45131799578666687, + "step": 3575 + }, + { + "epoch": 1.0456207047814008, + "grad_norm": 1.6508365467694242, + "learning_rate": 1.0077354541639821e-05, + "loss": 0.5787829160690308, + "step": 3576 + }, + { + "epoch": 1.0459131451966661, + "grad_norm": 1.6915833775625508, + "learning_rate": 1.0072519970368303e-05, + "loss": 0.5755574107170105, + "step": 3577 + }, + { + "epoch": 1.0462055856119317, + "grad_norm": 1.4591194150184388, + "learning_rate": 1.0067685382145683e-05, + "loss": 0.5017693638801575, + "step": 3578 + }, + { + "epoch": 1.046498026027197, + "grad_norm": 1.508478769597254, + "learning_rate": 1.0062850778102017e-05, + "loss": 0.5096016526222229, + "step": 3579 + }, + { + "epoch": 1.0467904664424623, + "grad_norm": 1.443966956114079, + "learning_rate": 1.0058016159367365e-05, + "loss": 0.4988967180252075, + "step": 3580 + }, + { + "epoch": 1.0470829068577276, + "grad_norm": 1.5186890104543016, + "learning_rate": 1.0053181527071786e-05, + "loss": 0.5410172939300537, + "step": 3581 + }, + { + "epoch": 1.0473753472729932, + "grad_norm": 1.7546625585964495, + "learning_rate": 1.004834688234535e-05, + "loss": 0.5980710983276367, + "step": 3582 + }, + { + "epoch": 1.0476677876882585, + "grad_norm": 1.347751797857706, + "learning_rate": 1.0043512226318124e-05, + "loss": 0.4737449586391449, + "step": 3583 + }, + { + "epoch": 1.0479602281035238, + "grad_norm": 1.5493397390355739, + "learning_rate": 1.003867756012018e-05, + "loss": 0.6106469631195068, + "step": 3584 + }, + { + "epoch": 1.0482526685187894, + "grad_norm": 1.6077524420960543, + "learning_rate": 1.0033842884881593e-05, + "loss": 0.48002901673316956, + "step": 3585 + }, + { + "epoch": 1.0485451089340547, + "grad_norm": 1.4065529576638647, + "learning_rate": 1.0029008201732433e-05, + "loss": 0.5101731419563293, + "step": 3586 + }, + { + "epoch": 1.04883754934932, + "grad_norm": 1.6961382740739117, + "learning_rate": 1.0024173511802786e-05, + "loss": 0.6350706219673157, + "step": 3587 + }, + { + "epoch": 1.0491299897645854, + "grad_norm": 1.4947432010936612, + "learning_rate": 1.0019338816222725e-05, + "loss": 0.5268979072570801, + "step": 3588 + }, + { + "epoch": 1.049422430179851, + "grad_norm": 1.4955724361545546, + "learning_rate": 1.0014504116122335e-05, + "loss": 0.5670457482337952, + "step": 3589 + }, + { + "epoch": 1.0497148705951163, + "grad_norm": 1.7472274991386971, + "learning_rate": 1.0009669412631697e-05, + "loss": 0.6200711727142334, + "step": 3590 + }, + { + "epoch": 1.0500073110103816, + "grad_norm": 1.5117580085419962, + "learning_rate": 1.0004834706880891e-05, + "loss": 0.44014686346054077, + "step": 3591 + }, + { + "epoch": 1.050299751425647, + "grad_norm": 1.4806608082423456, + "learning_rate": 1e-05, + "loss": 0.4690900146961212, + "step": 3592 + }, + { + "epoch": 1.0505921918409125, + "grad_norm": 1.5061085663062508, + "learning_rate": 9.995165293119112e-06, + "loss": 0.5791969299316406, + "step": 3593 + }, + { + "epoch": 1.0508846322561778, + "grad_norm": 1.403652610849375, + "learning_rate": 9.990330587368306e-06, + "loss": 0.5566244125366211, + "step": 3594 + }, + { + "epoch": 1.0511770726714431, + "grad_norm": 1.47068511144412, + "learning_rate": 9.985495883877668e-06, + "loss": 0.5201646685600281, + "step": 3595 + }, + { + "epoch": 1.0514695130867087, + "grad_norm": 1.3147681531847344, + "learning_rate": 9.980661183777277e-06, + "loss": 0.44774526357650757, + "step": 3596 + }, + { + "epoch": 1.051761953501974, + "grad_norm": 1.641682032458417, + "learning_rate": 9.975826488197217e-06, + "loss": 0.5346901416778564, + "step": 3597 + }, + { + "epoch": 1.0520543939172393, + "grad_norm": 1.516503297952313, + "learning_rate": 9.970991798267568e-06, + "loss": 0.4639764428138733, + "step": 3598 + }, + { + "epoch": 1.0523468343325049, + "grad_norm": 1.5385061459553095, + "learning_rate": 9.966157115118412e-06, + "loss": 0.5505763292312622, + "step": 3599 + }, + { + "epoch": 1.0526392747477702, + "grad_norm": 1.5065604638146801, + "learning_rate": 9.961322439879821e-06, + "loss": 0.5187631845474243, + "step": 3600 + }, + { + "epoch": 1.0529317151630355, + "grad_norm": 1.5837365707911437, + "learning_rate": 9.95648777368188e-06, + "loss": 0.5990081429481506, + "step": 3601 + }, + { + "epoch": 1.0532241555783008, + "grad_norm": 1.5943954940503307, + "learning_rate": 9.951653117654653e-06, + "loss": 0.5926306843757629, + "step": 3602 + }, + { + "epoch": 1.0535165959935664, + "grad_norm": 1.5828616151591308, + "learning_rate": 9.946818472928215e-06, + "loss": 0.5294582843780518, + "step": 3603 + }, + { + "epoch": 1.0538090364088317, + "grad_norm": 1.4492789926079117, + "learning_rate": 9.941983840632637e-06, + "loss": 0.5442140102386475, + "step": 3604 + }, + { + "epoch": 1.054101476824097, + "grad_norm": 1.5960181258924353, + "learning_rate": 9.937149221897984e-06, + "loss": 0.5888028740882874, + "step": 3605 + }, + { + "epoch": 1.0543939172393624, + "grad_norm": 1.6823030520405429, + "learning_rate": 9.93231461785432e-06, + "loss": 0.7545796632766724, + "step": 3606 + }, + { + "epoch": 1.054686357654628, + "grad_norm": 1.4193397986001617, + "learning_rate": 9.9274800296317e-06, + "loss": 0.4850383996963501, + "step": 3607 + }, + { + "epoch": 1.0549787980698933, + "grad_norm": 1.7761903590602732, + "learning_rate": 9.922645458360182e-06, + "loss": 0.5658243894577026, + "step": 3608 + }, + { + "epoch": 1.0552712384851586, + "grad_norm": 1.913627443584159, + "learning_rate": 9.917810905169818e-06, + "loss": 0.6526712775230408, + "step": 3609 + }, + { + "epoch": 1.0555636789004241, + "grad_norm": 1.7132894383948376, + "learning_rate": 9.912976371190657e-06, + "loss": 0.6125987768173218, + "step": 3610 + }, + { + "epoch": 1.0558561193156895, + "grad_norm": 1.3139938490016692, + "learning_rate": 9.908141857552737e-06, + "loss": 0.40159785747528076, + "step": 3611 + }, + { + "epoch": 1.0561485597309548, + "grad_norm": 1.7052081125083998, + "learning_rate": 9.903307365386103e-06, + "loss": 0.6628924608230591, + "step": 3612 + }, + { + "epoch": 1.05644100014622, + "grad_norm": 1.638888923278887, + "learning_rate": 9.898472895820783e-06, + "loss": 0.6083816289901733, + "step": 3613 + }, + { + "epoch": 1.0567334405614857, + "grad_norm": 1.564812875636552, + "learning_rate": 9.893638449986806e-06, + "loss": 0.5349488854408264, + "step": 3614 + }, + { + "epoch": 1.057025880976751, + "grad_norm": 1.5340813216184335, + "learning_rate": 9.888804029014194e-06, + "loss": 0.6119222044944763, + "step": 3615 + }, + { + "epoch": 1.0573183213920163, + "grad_norm": 1.367693459120948, + "learning_rate": 9.883969634032964e-06, + "loss": 0.531359851360321, + "step": 3616 + }, + { + "epoch": 1.0576107618072819, + "grad_norm": 1.6344237981695606, + "learning_rate": 9.879135266173127e-06, + "loss": 0.6604791879653931, + "step": 3617 + }, + { + "epoch": 1.0579032022225472, + "grad_norm": 1.4352324880813543, + "learning_rate": 9.874300926564689e-06, + "loss": 0.4691445231437683, + "step": 3618 + }, + { + "epoch": 1.0581956426378125, + "grad_norm": 1.2910646539258182, + "learning_rate": 9.869466616337642e-06, + "loss": 0.5690087080001831, + "step": 3619 + }, + { + "epoch": 1.0584880830530778, + "grad_norm": 1.403700057828388, + "learning_rate": 9.86463233662198e-06, + "loss": 0.5426729917526245, + "step": 3620 + }, + { + "epoch": 1.0587805234683434, + "grad_norm": 1.578075476325045, + "learning_rate": 9.859798088547687e-06, + "loss": 0.5640411376953125, + "step": 3621 + }, + { + "epoch": 1.0590729638836087, + "grad_norm": 1.4838032713556162, + "learning_rate": 9.854963873244738e-06, + "loss": 0.6724091172218323, + "step": 3622 + }, + { + "epoch": 1.059365404298874, + "grad_norm": 1.4145337335983883, + "learning_rate": 9.850129691843105e-06, + "loss": 0.5448887348175049, + "step": 3623 + }, + { + "epoch": 1.0596578447141396, + "grad_norm": 1.5190623574509117, + "learning_rate": 9.845295545472742e-06, + "loss": 0.5555344820022583, + "step": 3624 + }, + { + "epoch": 1.059950285129405, + "grad_norm": 1.6879154347320564, + "learning_rate": 9.840461435263604e-06, + "loss": 0.5053969621658325, + "step": 3625 + }, + { + "epoch": 1.0602427255446703, + "grad_norm": 1.5675488432589333, + "learning_rate": 9.835627362345636e-06, + "loss": 0.5866390466690063, + "step": 3626 + }, + { + "epoch": 1.0605351659599356, + "grad_norm": 1.81247497722172, + "learning_rate": 9.830793327848773e-06, + "loss": 0.5936717987060547, + "step": 3627 + }, + { + "epoch": 1.0608276063752011, + "grad_norm": 1.5536122437945554, + "learning_rate": 9.82595933290294e-06, + "loss": 0.6009070873260498, + "step": 3628 + }, + { + "epoch": 1.0611200467904665, + "grad_norm": 1.588445125911092, + "learning_rate": 9.821125378638059e-06, + "loss": 0.5361435413360596, + "step": 3629 + }, + { + "epoch": 1.0614124872057318, + "grad_norm": 1.4856331412797505, + "learning_rate": 9.816291466184025e-06, + "loss": 0.5763939619064331, + "step": 3630 + }, + { + "epoch": 1.061704927620997, + "grad_norm": 1.618308780160016, + "learning_rate": 9.81145759667075e-06, + "loss": 0.57512366771698, + "step": 3631 + }, + { + "epoch": 1.0619973680362627, + "grad_norm": 1.4990484363196022, + "learning_rate": 9.806623771228115e-06, + "loss": 0.6144367456436157, + "step": 3632 + }, + { + "epoch": 1.062289808451528, + "grad_norm": 1.5222649609215075, + "learning_rate": 9.801789990985997e-06, + "loss": 0.5715698003768921, + "step": 3633 + }, + { + "epoch": 1.0625822488667933, + "grad_norm": 1.3438421364889925, + "learning_rate": 9.796956257074263e-06, + "loss": 0.632681131362915, + "step": 3634 + }, + { + "epoch": 1.0628746892820589, + "grad_norm": 1.2996961363437054, + "learning_rate": 9.79212257062277e-06, + "loss": 0.5362547636032104, + "step": 3635 + }, + { + "epoch": 1.0631671296973242, + "grad_norm": 1.2451948790215157, + "learning_rate": 9.787288932761361e-06, + "loss": 0.553846538066864, + "step": 3636 + }, + { + "epoch": 1.0634595701125895, + "grad_norm": 2.0033616068213456, + "learning_rate": 9.782455344619871e-06, + "loss": 0.7200362682342529, + "step": 3637 + }, + { + "epoch": 1.063752010527855, + "grad_norm": 1.5986858016901493, + "learning_rate": 9.777621807328126e-06, + "loss": 0.5544596910476685, + "step": 3638 + }, + { + "epoch": 1.0640444509431204, + "grad_norm": 1.9336329750915207, + "learning_rate": 9.772788322015926e-06, + "loss": 0.687321126461029, + "step": 3639 + }, + { + "epoch": 1.0643368913583857, + "grad_norm": 1.4658162923896687, + "learning_rate": 9.767954889813076e-06, + "loss": 0.4986167550086975, + "step": 3640 + }, + { + "epoch": 1.064629331773651, + "grad_norm": 1.6835767903522258, + "learning_rate": 9.763121511849358e-06, + "loss": 0.5021307468414307, + "step": 3641 + }, + { + "epoch": 1.0649217721889166, + "grad_norm": 1.6084332451713093, + "learning_rate": 9.758288189254548e-06, + "loss": 0.5542711019515991, + "step": 3642 + }, + { + "epoch": 1.065214212604182, + "grad_norm": 1.4567212868909125, + "learning_rate": 9.753454923158407e-06, + "loss": 0.5161126852035522, + "step": 3643 + }, + { + "epoch": 1.0655066530194472, + "grad_norm": 1.3588587385016027, + "learning_rate": 9.748621714690674e-06, + "loss": 0.6041361093521118, + "step": 3644 + }, + { + "epoch": 1.0657990934347126, + "grad_norm": 1.5312936542968558, + "learning_rate": 9.74378856498109e-06, + "loss": 0.5252672433853149, + "step": 3645 + }, + { + "epoch": 1.0660915338499781, + "grad_norm": 1.508976518247356, + "learning_rate": 9.738955475159369e-06, + "loss": 0.5198208093643188, + "step": 3646 + }, + { + "epoch": 1.0663839742652435, + "grad_norm": 1.617831688267231, + "learning_rate": 9.734122446355219e-06, + "loss": 0.5547968149185181, + "step": 3647 + }, + { + "epoch": 1.0666764146805088, + "grad_norm": 1.3192996989880752, + "learning_rate": 9.72928947969833e-06, + "loss": 0.5854370594024658, + "step": 3648 + }, + { + "epoch": 1.0669688550957743, + "grad_norm": 1.4612935433441103, + "learning_rate": 9.724456576318383e-06, + "loss": 0.5199173092842102, + "step": 3649 + }, + { + "epoch": 1.0672612955110397, + "grad_norm": 1.5597306303032106, + "learning_rate": 9.71962373734503e-06, + "loss": 0.49684566259384155, + "step": 3650 + }, + { + "epoch": 1.067553735926305, + "grad_norm": 1.5081407431370675, + "learning_rate": 9.714790963907927e-06, + "loss": 0.593805193901062, + "step": 3651 + }, + { + "epoch": 1.0678461763415703, + "grad_norm": 1.6501383657240702, + "learning_rate": 9.7099582571367e-06, + "loss": 0.5524622201919556, + "step": 3652 + }, + { + "epoch": 1.0681386167568359, + "grad_norm": 1.589706723326761, + "learning_rate": 9.70512561816097e-06, + "loss": 0.5796955227851868, + "step": 3653 + }, + { + "epoch": 1.0684310571721012, + "grad_norm": 1.6252059263075247, + "learning_rate": 9.700293048110335e-06, + "loss": 0.5470535159111023, + "step": 3654 + }, + { + "epoch": 1.0687234975873665, + "grad_norm": 1.180447413588476, + "learning_rate": 9.695460548114374e-06, + "loss": 0.5438790321350098, + "step": 3655 + }, + { + "epoch": 1.069015938002632, + "grad_norm": 1.5271792603913512, + "learning_rate": 9.69062811930266e-06, + "loss": 0.6324823498725891, + "step": 3656 + }, + { + "epoch": 1.0693083784178974, + "grad_norm": 1.5347219744388463, + "learning_rate": 9.68579576280474e-06, + "loss": 0.5261266231536865, + "step": 3657 + }, + { + "epoch": 1.0696008188331627, + "grad_norm": 1.408009396375569, + "learning_rate": 9.680963479750152e-06, + "loss": 0.49827292561531067, + "step": 3658 + }, + { + "epoch": 1.069893259248428, + "grad_norm": 1.8715423798930795, + "learning_rate": 9.67613127126841e-06, + "loss": 0.5273935794830322, + "step": 3659 + }, + { + "epoch": 1.0701856996636936, + "grad_norm": 1.5578682729768194, + "learning_rate": 9.671299138489017e-06, + "loss": 0.5816709995269775, + "step": 3660 + }, + { + "epoch": 1.070478140078959, + "grad_norm": 1.7016426471813102, + "learning_rate": 9.66646708254145e-06, + "loss": 0.5591616630554199, + "step": 3661 + }, + { + "epoch": 1.0707705804942242, + "grad_norm": 1.5738449439513973, + "learning_rate": 9.661635104555172e-06, + "loss": 0.581566572189331, + "step": 3662 + }, + { + "epoch": 1.0710630209094898, + "grad_norm": 1.5518333497561696, + "learning_rate": 9.656803205659632e-06, + "loss": 0.5339047312736511, + "step": 3663 + }, + { + "epoch": 1.0713554613247551, + "grad_norm": 1.6271916881343873, + "learning_rate": 9.651971386984258e-06, + "loss": 0.5200103521347046, + "step": 3664 + }, + { + "epoch": 1.0716479017400204, + "grad_norm": 1.6521270716003156, + "learning_rate": 9.647139649658454e-06, + "loss": 0.7201805114746094, + "step": 3665 + }, + { + "epoch": 1.0719403421552858, + "grad_norm": 1.534541270100013, + "learning_rate": 9.642307994811614e-06, + "loss": 0.4801551103591919, + "step": 3666 + }, + { + "epoch": 1.0722327825705513, + "grad_norm": 1.5215862158184845, + "learning_rate": 9.637476423573106e-06, + "loss": 0.5809728503227234, + "step": 3667 + }, + { + "epoch": 1.0725252229858167, + "grad_norm": 1.6423129831570165, + "learning_rate": 9.632644937072277e-06, + "loss": 0.6493573188781738, + "step": 3668 + }, + { + "epoch": 1.072817663401082, + "grad_norm": 1.5984538738730298, + "learning_rate": 9.627813536438461e-06, + "loss": 0.5858349800109863, + "step": 3669 + }, + { + "epoch": 1.0731101038163473, + "grad_norm": 1.5154205099747375, + "learning_rate": 9.622982222800968e-06, + "loss": 0.604835033416748, + "step": 3670 + }, + { + "epoch": 1.0734025442316129, + "grad_norm": 1.6814842296922758, + "learning_rate": 9.618150997289091e-06, + "loss": 0.6168441772460938, + "step": 3671 + }, + { + "epoch": 1.0736949846468782, + "grad_norm": 1.4221905571438933, + "learning_rate": 9.613319861032093e-06, + "loss": 0.5297094583511353, + "step": 3672 + }, + { + "epoch": 1.0739874250621435, + "grad_norm": 1.4440813284349416, + "learning_rate": 9.608488815159226e-06, + "loss": 0.513571560382843, + "step": 3673 + }, + { + "epoch": 1.074279865477409, + "grad_norm": 1.4202335692197015, + "learning_rate": 9.603657860799721e-06, + "loss": 0.4383837580680847, + "step": 3674 + }, + { + "epoch": 1.0745723058926744, + "grad_norm": 1.660966167075539, + "learning_rate": 9.59882699908278e-06, + "loss": 0.5428420305252075, + "step": 3675 + }, + { + "epoch": 1.0748647463079397, + "grad_norm": 1.331252403406651, + "learning_rate": 9.593996231137587e-06, + "loss": 0.5193662047386169, + "step": 3676 + }, + { + "epoch": 1.0751571867232053, + "grad_norm": 1.1890998376752542, + "learning_rate": 9.589165558093311e-06, + "loss": 0.47949904203414917, + "step": 3677 + }, + { + "epoch": 1.0754496271384706, + "grad_norm": 1.4440336102087743, + "learning_rate": 9.584334981079085e-06, + "loss": 0.5092326402664185, + "step": 3678 + }, + { + "epoch": 1.075742067553736, + "grad_norm": 1.642845621448486, + "learning_rate": 9.579504501224028e-06, + "loss": 0.6627280712127686, + "step": 3679 + }, + { + "epoch": 1.0760345079690012, + "grad_norm": 1.4633415466571795, + "learning_rate": 9.57467411965724e-06, + "loss": 0.45087775588035583, + "step": 3680 + }, + { + "epoch": 1.0763269483842668, + "grad_norm": 1.5441336288481917, + "learning_rate": 9.569843837507788e-06, + "loss": 0.5745380520820618, + "step": 3681 + }, + { + "epoch": 1.0766193887995321, + "grad_norm": 1.4663672637613454, + "learning_rate": 9.565013655904728e-06, + "loss": 0.4410436749458313, + "step": 3682 + }, + { + "epoch": 1.0769118292147974, + "grad_norm": 1.5197962338342057, + "learning_rate": 9.560183575977079e-06, + "loss": 0.4991244375705719, + "step": 3683 + }, + { + "epoch": 1.0772042696300628, + "grad_norm": 1.760205368894331, + "learning_rate": 9.555353598853842e-06, + "loss": 0.6316145658493042, + "step": 3684 + }, + { + "epoch": 1.0774967100453283, + "grad_norm": 1.7400994246729, + "learning_rate": 9.550523725664e-06, + "loss": 0.5593908429145813, + "step": 3685 + }, + { + "epoch": 1.0777891504605936, + "grad_norm": 1.360696277932948, + "learning_rate": 9.545693957536503e-06, + "loss": 0.5491319894790649, + "step": 3686 + }, + { + "epoch": 1.078081590875859, + "grad_norm": 1.6733496726210937, + "learning_rate": 9.540864295600282e-06, + "loss": 0.6299821138381958, + "step": 3687 + }, + { + "epoch": 1.0783740312911245, + "grad_norm": 1.584478567774571, + "learning_rate": 9.536034740984244e-06, + "loss": 0.5673841238021851, + "step": 3688 + }, + { + "epoch": 1.0786664717063899, + "grad_norm": 1.2029070866459273, + "learning_rate": 9.53120529481726e-06, + "loss": 0.45966464281082153, + "step": 3689 + }, + { + "epoch": 1.0789589121216552, + "grad_norm": 1.5763188044346095, + "learning_rate": 9.526375958228191e-06, + "loss": 0.5831631422042847, + "step": 3690 + }, + { + "epoch": 1.0792513525369205, + "grad_norm": 1.6299976133727174, + "learning_rate": 9.52154673234586e-06, + "loss": 0.5456256866455078, + "step": 3691 + }, + { + "epoch": 1.079543792952186, + "grad_norm": 1.4868906970264604, + "learning_rate": 9.516717618299069e-06, + "loss": 0.46428292989730835, + "step": 3692 + }, + { + "epoch": 1.0798362333674514, + "grad_norm": 1.4498481381133475, + "learning_rate": 9.511888617216602e-06, + "loss": 0.47320839762687683, + "step": 3693 + }, + { + "epoch": 1.0801286737827167, + "grad_norm": 1.4932376641022789, + "learning_rate": 9.507059730227199e-06, + "loss": 0.5205492973327637, + "step": 3694 + }, + { + "epoch": 1.0804211141979823, + "grad_norm": 1.631704411581211, + "learning_rate": 9.502230958459587e-06, + "loss": 0.42696553468704224, + "step": 3695 + }, + { + "epoch": 1.0807135546132476, + "grad_norm": 1.5001123816983175, + "learning_rate": 9.497402303042463e-06, + "loss": 0.5147116780281067, + "step": 3696 + }, + { + "epoch": 1.081005995028513, + "grad_norm": 1.38029323867701, + "learning_rate": 9.492573765104494e-06, + "loss": 0.5080294609069824, + "step": 3697 + }, + { + "epoch": 1.0812984354437782, + "grad_norm": 1.6652094239637947, + "learning_rate": 9.487745345774323e-06, + "loss": 0.6228866577148438, + "step": 3698 + }, + { + "epoch": 1.0815908758590438, + "grad_norm": 1.5822778586922481, + "learning_rate": 9.482917046180563e-06, + "loss": 0.5560915470123291, + "step": 3699 + }, + { + "epoch": 1.0818833162743091, + "grad_norm": 1.5535091238731367, + "learning_rate": 9.4780888674518e-06, + "loss": 0.5245859622955322, + "step": 3700 + }, + { + "epoch": 1.0821757566895744, + "grad_norm": 1.5051094804368905, + "learning_rate": 9.47326081071659e-06, + "loss": 0.6462790966033936, + "step": 3701 + }, + { + "epoch": 1.08246819710484, + "grad_norm": 1.5924758840128848, + "learning_rate": 9.468432877103462e-06, + "loss": 0.5196692943572998, + "step": 3702 + }, + { + "epoch": 1.0827606375201053, + "grad_norm": 1.7568328506180717, + "learning_rate": 9.463605067740917e-06, + "loss": 0.5487779974937439, + "step": 3703 + }, + { + "epoch": 1.0830530779353706, + "grad_norm": 1.6433197945872438, + "learning_rate": 9.458777383757428e-06, + "loss": 0.5471592545509338, + "step": 3704 + }, + { + "epoch": 1.083345518350636, + "grad_norm": 1.7295248979937683, + "learning_rate": 9.453949826281436e-06, + "loss": 0.6927378177642822, + "step": 3705 + }, + { + "epoch": 1.0836379587659015, + "grad_norm": 1.645450906929874, + "learning_rate": 9.449122396441344e-06, + "loss": 0.569003164768219, + "step": 3706 + }, + { + "epoch": 1.0839303991811668, + "grad_norm": 1.5204128580175535, + "learning_rate": 9.444295095365549e-06, + "loss": 0.5655964612960815, + "step": 3707 + }, + { + "epoch": 1.0842228395964322, + "grad_norm": 1.5653417821245283, + "learning_rate": 9.439467924182397e-06, + "loss": 0.6223032474517822, + "step": 3708 + }, + { + "epoch": 1.0845152800116975, + "grad_norm": 1.8058201614843348, + "learning_rate": 9.43464088402021e-06, + "loss": 0.6553555727005005, + "step": 3709 + }, + { + "epoch": 1.084807720426963, + "grad_norm": 1.7065419655088354, + "learning_rate": 9.429813976007277e-06, + "loss": 0.534509539604187, + "step": 3710 + }, + { + "epoch": 1.0851001608422284, + "grad_norm": 1.7341944929762452, + "learning_rate": 9.42498720127186e-06, + "loss": 0.5801417827606201, + "step": 3711 + }, + { + "epoch": 1.0853926012574937, + "grad_norm": 1.4311879630985456, + "learning_rate": 9.42016056094219e-06, + "loss": 0.47260361909866333, + "step": 3712 + }, + { + "epoch": 1.0856850416727593, + "grad_norm": 1.5640804855296242, + "learning_rate": 9.415334056146464e-06, + "loss": 0.5924841165542603, + "step": 3713 + }, + { + "epoch": 1.0859774820880246, + "grad_norm": 1.7346051575584198, + "learning_rate": 9.410507688012847e-06, + "loss": 0.6029725074768066, + "step": 3714 + }, + { + "epoch": 1.08626992250329, + "grad_norm": 1.6762909361099274, + "learning_rate": 9.405681457669472e-06, + "loss": 0.5838413834571838, + "step": 3715 + }, + { + "epoch": 1.0865623629185555, + "grad_norm": 1.277586165055191, + "learning_rate": 9.400855366244445e-06, + "loss": 0.4739546775817871, + "step": 3716 + }, + { + "epoch": 1.0868548033338208, + "grad_norm": 1.5391172094714582, + "learning_rate": 9.396029414865832e-06, + "loss": 0.4870055913925171, + "step": 3717 + }, + { + "epoch": 1.0871472437490861, + "grad_norm": 1.4254039758246118, + "learning_rate": 9.39120360466167e-06, + "loss": 0.5572132468223572, + "step": 3718 + }, + { + "epoch": 1.0874396841643514, + "grad_norm": 1.6824352313774058, + "learning_rate": 9.386377936759966e-06, + "loss": 0.5601439476013184, + "step": 3719 + }, + { + "epoch": 1.087732124579617, + "grad_norm": 1.4548205788512927, + "learning_rate": 9.38155241228869e-06, + "loss": 0.4551504850387573, + "step": 3720 + }, + { + "epoch": 1.0880245649948823, + "grad_norm": 1.447968175073075, + "learning_rate": 9.376727032375773e-06, + "loss": 0.5656375885009766, + "step": 3721 + }, + { + "epoch": 1.0883170054101476, + "grad_norm": 1.4767808933411752, + "learning_rate": 9.371901798149124e-06, + "loss": 0.5597153902053833, + "step": 3722 + }, + { + "epoch": 1.088609445825413, + "grad_norm": 1.5252235269095387, + "learning_rate": 9.367076710736613e-06, + "loss": 0.5946288108825684, + "step": 3723 + }, + { + "epoch": 1.0889018862406785, + "grad_norm": 1.9924638298376933, + "learning_rate": 9.36225177126607e-06, + "loss": 0.5951449871063232, + "step": 3724 + }, + { + "epoch": 1.0891943266559438, + "grad_norm": 1.7845167649533908, + "learning_rate": 9.3574269808653e-06, + "loss": 0.5755487680435181, + "step": 3725 + }, + { + "epoch": 1.0894867670712092, + "grad_norm": 1.5254834641419546, + "learning_rate": 9.352602340662065e-06, + "loss": 0.5118892788887024, + "step": 3726 + }, + { + "epoch": 1.0897792074864747, + "grad_norm": 1.596558008598135, + "learning_rate": 9.347777851784097e-06, + "loss": 0.5652351975440979, + "step": 3727 + }, + { + "epoch": 1.09007164790174, + "grad_norm": 1.5215560380827415, + "learning_rate": 9.34295351535909e-06, + "loss": 0.624887228012085, + "step": 3728 + }, + { + "epoch": 1.0903640883170054, + "grad_norm": 1.447383452488018, + "learning_rate": 9.338129332514705e-06, + "loss": 0.534363329410553, + "step": 3729 + }, + { + "epoch": 1.0906565287322707, + "grad_norm": 1.477841435635963, + "learning_rate": 9.333305304378565e-06, + "loss": 0.6203521490097046, + "step": 3730 + }, + { + "epoch": 1.0909489691475363, + "grad_norm": 1.7401174715864398, + "learning_rate": 9.328481432078254e-06, + "loss": 0.64560866355896, + "step": 3731 + }, + { + "epoch": 1.0912414095628016, + "grad_norm": 1.5841972191853104, + "learning_rate": 9.323657716741327e-06, + "loss": 0.5389514565467834, + "step": 3732 + }, + { + "epoch": 1.091533849978067, + "grad_norm": 1.4621625707128454, + "learning_rate": 9.318834159495295e-06, + "loss": 0.5245277881622314, + "step": 3733 + }, + { + "epoch": 1.0918262903933325, + "grad_norm": 1.6486990138865423, + "learning_rate": 9.314010761467637e-06, + "loss": 0.603967010974884, + "step": 3734 + }, + { + "epoch": 1.0921187308085978, + "grad_norm": 1.7983997195133608, + "learning_rate": 9.309187523785794e-06, + "loss": 0.5426995754241943, + "step": 3735 + }, + { + "epoch": 1.092411171223863, + "grad_norm": 1.6248514181798874, + "learning_rate": 9.30436444757717e-06, + "loss": 0.5400352478027344, + "step": 3736 + }, + { + "epoch": 1.0927036116391284, + "grad_norm": 1.5009984854869718, + "learning_rate": 9.299541533969121e-06, + "loss": 0.5016524195671082, + "step": 3737 + }, + { + "epoch": 1.092996052054394, + "grad_norm": 1.7929437285814107, + "learning_rate": 9.294718784088982e-06, + "loss": 0.526217520236969, + "step": 3738 + }, + { + "epoch": 1.0932884924696593, + "grad_norm": 1.7293517567202035, + "learning_rate": 9.289896199064038e-06, + "loss": 0.525063157081604, + "step": 3739 + }, + { + "epoch": 1.0935809328849246, + "grad_norm": 1.269101628653969, + "learning_rate": 9.285073780021541e-06, + "loss": 0.3792048692703247, + "step": 3740 + }, + { + "epoch": 1.0938733733001902, + "grad_norm": 1.4416380651624152, + "learning_rate": 9.280251528088702e-06, + "loss": 0.5326308012008667, + "step": 3741 + }, + { + "epoch": 1.0941658137154555, + "grad_norm": 1.3946561055322027, + "learning_rate": 9.275429444392692e-06, + "loss": 0.5675199627876282, + "step": 3742 + }, + { + "epoch": 1.0944582541307208, + "grad_norm": 1.640552639536372, + "learning_rate": 9.270607530060643e-06, + "loss": 0.6525516510009766, + "step": 3743 + }, + { + "epoch": 1.0947506945459862, + "grad_norm": 1.563647681973335, + "learning_rate": 9.265785786219647e-06, + "loss": 0.6376343369483948, + "step": 3744 + }, + { + "epoch": 1.0950431349612517, + "grad_norm": 1.7701418719133022, + "learning_rate": 9.260964213996763e-06, + "loss": 0.6440377235412598, + "step": 3745 + }, + { + "epoch": 1.095335575376517, + "grad_norm": 1.886853414823259, + "learning_rate": 9.256142814518997e-06, + "loss": 0.5971434116363525, + "step": 3746 + }, + { + "epoch": 1.0956280157917824, + "grad_norm": 1.3797760891901851, + "learning_rate": 9.251321588913331e-06, + "loss": 0.5096890330314636, + "step": 3747 + }, + { + "epoch": 1.0959204562070477, + "grad_norm": 1.7099901744739332, + "learning_rate": 9.246500538306686e-06, + "loss": 0.4303498864173889, + "step": 3748 + }, + { + "epoch": 1.0962128966223132, + "grad_norm": 1.5934571510718554, + "learning_rate": 9.241679663825961e-06, + "loss": 0.5484192371368408, + "step": 3749 + }, + { + "epoch": 1.0965053370375786, + "grad_norm": 1.6268147624989107, + "learning_rate": 9.236858966598004e-06, + "loss": 0.6057884693145752, + "step": 3750 + }, + { + "epoch": 1.096797777452844, + "grad_norm": 1.565840426411154, + "learning_rate": 9.232038447749623e-06, + "loss": 0.5261536836624146, + "step": 3751 + }, + { + "epoch": 1.0970902178681095, + "grad_norm": 1.3242416099520606, + "learning_rate": 9.227218108407586e-06, + "loss": 0.470365047454834, + "step": 3752 + }, + { + "epoch": 1.0973826582833748, + "grad_norm": 1.8694075496184692, + "learning_rate": 9.222397949698618e-06, + "loss": 0.6158323287963867, + "step": 3753 + }, + { + "epoch": 1.09767509869864, + "grad_norm": 1.4353847976975904, + "learning_rate": 9.217577972749401e-06, + "loss": 0.582190990447998, + "step": 3754 + }, + { + "epoch": 1.0979675391139057, + "grad_norm": 1.5377732823861585, + "learning_rate": 9.212758178686575e-06, + "loss": 0.4939305782318115, + "step": 3755 + }, + { + "epoch": 1.098259979529171, + "grad_norm": 1.501946006392042, + "learning_rate": 9.207938568636739e-06, + "loss": 0.576829731464386, + "step": 3756 + }, + { + "epoch": 1.0985524199444363, + "grad_norm": 1.582387804664269, + "learning_rate": 9.203119143726445e-06, + "loss": 0.581257164478302, + "step": 3757 + }, + { + "epoch": 1.0988448603597016, + "grad_norm": 1.4501950316688965, + "learning_rate": 9.19829990508221e-06, + "loss": 0.6105127334594727, + "step": 3758 + }, + { + "epoch": 1.0991373007749672, + "grad_norm": 1.7379854400774775, + "learning_rate": 9.193480853830495e-06, + "loss": 0.5311432480812073, + "step": 3759 + }, + { + "epoch": 1.0994297411902325, + "grad_norm": 1.3707297007944412, + "learning_rate": 9.188661991097726e-06, + "loss": 0.44334596395492554, + "step": 3760 + }, + { + "epoch": 1.0997221816054978, + "grad_norm": 1.6175971035022318, + "learning_rate": 9.183843318010285e-06, + "loss": 0.5795773267745972, + "step": 3761 + }, + { + "epoch": 1.1000146220207632, + "grad_norm": 1.4465404341375856, + "learning_rate": 9.179024835694504e-06, + "loss": 0.619825541973114, + "step": 3762 + }, + { + "epoch": 1.1003070624360287, + "grad_norm": 1.754450237430447, + "learning_rate": 9.174206545276678e-06, + "loss": 0.633934497833252, + "step": 3763 + }, + { + "epoch": 1.100599502851294, + "grad_norm": 1.34560762533496, + "learning_rate": 9.169388447883053e-06, + "loss": 0.48922473192214966, + "step": 3764 + }, + { + "epoch": 1.1008919432665594, + "grad_norm": 1.7340747304342141, + "learning_rate": 9.164570544639825e-06, + "loss": 0.6125025153160095, + "step": 3765 + }, + { + "epoch": 1.101184383681825, + "grad_norm": 1.4327034643571392, + "learning_rate": 9.159752836673154e-06, + "loss": 0.5428078174591064, + "step": 3766 + }, + { + "epoch": 1.1014768240970902, + "grad_norm": 1.4335551572069505, + "learning_rate": 9.154935325109148e-06, + "loss": 0.5848157405853271, + "step": 3767 + }, + { + "epoch": 1.1017692645123556, + "grad_norm": 1.5053579548838565, + "learning_rate": 9.150118011073872e-06, + "loss": 0.5150102376937866, + "step": 3768 + }, + { + "epoch": 1.102061704927621, + "grad_norm": 1.429407171536289, + "learning_rate": 9.145300895693344e-06, + "loss": 0.6106699705123901, + "step": 3769 + }, + { + "epoch": 1.1023541453428864, + "grad_norm": 1.4079938603953852, + "learning_rate": 9.140483980093534e-06, + "loss": 0.5819482803344727, + "step": 3770 + }, + { + "epoch": 1.1026465857581518, + "grad_norm": 1.7060315490040079, + "learning_rate": 9.135667265400369e-06, + "loss": 0.6499812602996826, + "step": 3771 + }, + { + "epoch": 1.102939026173417, + "grad_norm": 1.520551323323022, + "learning_rate": 9.130850752739724e-06, + "loss": 0.5375189781188965, + "step": 3772 + }, + { + "epoch": 1.1032314665886827, + "grad_norm": 1.5200340564855783, + "learning_rate": 9.12603444323743e-06, + "loss": 0.5582318902015686, + "step": 3773 + }, + { + "epoch": 1.103523907003948, + "grad_norm": 1.6010357553720616, + "learning_rate": 9.121218338019273e-06, + "loss": 0.5549799203872681, + "step": 3774 + }, + { + "epoch": 1.1038163474192133, + "grad_norm": 1.672600820514396, + "learning_rate": 9.116402438210988e-06, + "loss": 0.4942197799682617, + "step": 3775 + }, + { + "epoch": 1.1041087878344786, + "grad_norm": 1.294858704528479, + "learning_rate": 9.11158674493826e-06, + "loss": 0.5039837956428528, + "step": 3776 + }, + { + "epoch": 1.1044012282497442, + "grad_norm": 1.4904483423531274, + "learning_rate": 9.106771259326726e-06, + "loss": 0.49781280755996704, + "step": 3777 + }, + { + "epoch": 1.1046936686650095, + "grad_norm": 1.5058975394537781, + "learning_rate": 9.101955982501981e-06, + "loss": 0.41755813360214233, + "step": 3778 + }, + { + "epoch": 1.1049861090802748, + "grad_norm": 1.3247888444316807, + "learning_rate": 9.097140915589564e-06, + "loss": 0.5605067014694214, + "step": 3779 + }, + { + "epoch": 1.1052785494955404, + "grad_norm": 1.6960736504408462, + "learning_rate": 9.092326059714971e-06, + "loss": 0.6291122436523438, + "step": 3780 + }, + { + "epoch": 1.1055709899108057, + "grad_norm": 1.5309788529424204, + "learning_rate": 9.087511416003636e-06, + "loss": 0.5164260864257812, + "step": 3781 + }, + { + "epoch": 1.105863430326071, + "grad_norm": 1.481065256446166, + "learning_rate": 9.082696985580964e-06, + "loss": 0.5002986192703247, + "step": 3782 + }, + { + "epoch": 1.1061558707413364, + "grad_norm": 1.8553995759252653, + "learning_rate": 9.077882769572295e-06, + "loss": 0.5149055123329163, + "step": 3783 + }, + { + "epoch": 1.106448311156602, + "grad_norm": 1.4637547819206846, + "learning_rate": 9.073068769102925e-06, + "loss": 0.5375808477401733, + "step": 3784 + }, + { + "epoch": 1.1067407515718672, + "grad_norm": 1.4438276838658128, + "learning_rate": 9.06825498529809e-06, + "loss": 0.5574408173561096, + "step": 3785 + }, + { + "epoch": 1.1070331919871326, + "grad_norm": 1.8566533611842586, + "learning_rate": 9.063441419282989e-06, + "loss": 0.7410034537315369, + "step": 3786 + }, + { + "epoch": 1.107325632402398, + "grad_norm": 1.4780218137550694, + "learning_rate": 9.058628072182759e-06, + "loss": 0.4890757203102112, + "step": 3787 + }, + { + "epoch": 1.1076180728176634, + "grad_norm": 1.449027088222319, + "learning_rate": 9.053814945122496e-06, + "loss": 0.5012304782867432, + "step": 3788 + }, + { + "epoch": 1.1079105132329288, + "grad_norm": 1.6277147220392454, + "learning_rate": 9.049002039227239e-06, + "loss": 0.5235648155212402, + "step": 3789 + }, + { + "epoch": 1.108202953648194, + "grad_norm": 1.5099212526378973, + "learning_rate": 9.044189355621969e-06, + "loss": 0.44732457399368286, + "step": 3790 + }, + { + "epoch": 1.1084953940634596, + "grad_norm": 1.6131396298332503, + "learning_rate": 9.039376895431627e-06, + "loss": 0.5771712064743042, + "step": 3791 + }, + { + "epoch": 1.108787834478725, + "grad_norm": 2.537465666899194, + "learning_rate": 9.034564659781096e-06, + "loss": 0.5361784100532532, + "step": 3792 + }, + { + "epoch": 1.1090802748939903, + "grad_norm": 1.3520934517992165, + "learning_rate": 9.029752649795203e-06, + "loss": 0.5305893421173096, + "step": 3793 + }, + { + "epoch": 1.1093727153092559, + "grad_norm": 1.3303918593615456, + "learning_rate": 9.02494086659873e-06, + "loss": 0.5094715356826782, + "step": 3794 + }, + { + "epoch": 1.1096651557245212, + "grad_norm": 1.4162243148383913, + "learning_rate": 9.020129311316405e-06, + "loss": 0.5406676530838013, + "step": 3795 + }, + { + "epoch": 1.1099575961397865, + "grad_norm": 1.5431545303983976, + "learning_rate": 9.015317985072893e-06, + "loss": 0.5170687437057495, + "step": 3796 + }, + { + "epoch": 1.1102500365550518, + "grad_norm": 1.454438976249235, + "learning_rate": 9.010506888992814e-06, + "loss": 0.4632429778575897, + "step": 3797 + }, + { + "epoch": 1.1105424769703174, + "grad_norm": 1.8257270837662332, + "learning_rate": 9.005696024200734e-06, + "loss": 0.5614180564880371, + "step": 3798 + }, + { + "epoch": 1.1108349173855827, + "grad_norm": 1.5187438448472135, + "learning_rate": 9.000885391821164e-06, + "loss": 0.5660920143127441, + "step": 3799 + }, + { + "epoch": 1.111127357800848, + "grad_norm": 1.616333702810617, + "learning_rate": 8.996074992978558e-06, + "loss": 0.6346436142921448, + "step": 3800 + }, + { + "epoch": 1.1114197982161134, + "grad_norm": 1.2613316779938173, + "learning_rate": 8.991264828797319e-06, + "loss": 0.4295850396156311, + "step": 3801 + }, + { + "epoch": 1.111712238631379, + "grad_norm": 1.4545086499056976, + "learning_rate": 8.986454900401791e-06, + "loss": 0.4797070622444153, + "step": 3802 + }, + { + "epoch": 1.1120046790466442, + "grad_norm": 1.3353593055033692, + "learning_rate": 8.98164520891627e-06, + "loss": 0.4912114143371582, + "step": 3803 + }, + { + "epoch": 1.1122971194619096, + "grad_norm": 1.6135433736276805, + "learning_rate": 8.976835755464988e-06, + "loss": 0.4156647026538849, + "step": 3804 + }, + { + "epoch": 1.1125895598771751, + "grad_norm": 1.6120031027815822, + "learning_rate": 8.97202654117213e-06, + "loss": 0.4527992010116577, + "step": 3805 + }, + { + "epoch": 1.1128820002924404, + "grad_norm": 1.6881758541294942, + "learning_rate": 8.967217567161817e-06, + "loss": 0.5969425439834595, + "step": 3806 + }, + { + "epoch": 1.1131744407077058, + "grad_norm": 1.5313114259080804, + "learning_rate": 8.962408834558116e-06, + "loss": 0.5867633819580078, + "step": 3807 + }, + { + "epoch": 1.113466881122971, + "grad_norm": 1.4924056676350326, + "learning_rate": 8.957600344485042e-06, + "loss": 0.549109697341919, + "step": 3808 + }, + { + "epoch": 1.1137593215382366, + "grad_norm": 1.6602567019426782, + "learning_rate": 8.952792098066549e-06, + "loss": 0.6336593627929688, + "step": 3809 + }, + { + "epoch": 1.114051761953502, + "grad_norm": 1.4867429859275132, + "learning_rate": 8.947984096426537e-06, + "loss": 0.5403220653533936, + "step": 3810 + }, + { + "epoch": 1.1143442023687673, + "grad_norm": 1.3422567204959701, + "learning_rate": 8.943176340688846e-06, + "loss": 0.37941914796829224, + "step": 3811 + }, + { + "epoch": 1.1146366427840328, + "grad_norm": 1.7322077540170269, + "learning_rate": 8.938368831977262e-06, + "loss": 0.5509335994720459, + "step": 3812 + }, + { + "epoch": 1.1149290831992982, + "grad_norm": 1.7077554301344111, + "learning_rate": 8.933561571415506e-06, + "loss": 0.5798860788345337, + "step": 3813 + }, + { + "epoch": 1.1152215236145635, + "grad_norm": 1.8693354922278385, + "learning_rate": 8.92875456012725e-06, + "loss": 0.5549412965774536, + "step": 3814 + }, + { + "epoch": 1.1155139640298288, + "grad_norm": 1.5992402094758784, + "learning_rate": 8.9239477992361e-06, + "loss": 0.4707058072090149, + "step": 3815 + }, + { + "epoch": 1.1158064044450944, + "grad_norm": 1.5838333385974708, + "learning_rate": 8.919141289865611e-06, + "loss": 0.4717002511024475, + "step": 3816 + }, + { + "epoch": 1.1160988448603597, + "grad_norm": 1.288572308356885, + "learning_rate": 8.914335033139274e-06, + "loss": 0.48403650522232056, + "step": 3817 + }, + { + "epoch": 1.116391285275625, + "grad_norm": 1.6715157915340426, + "learning_rate": 8.909529030180522e-06, + "loss": 0.48592090606689453, + "step": 3818 + }, + { + "epoch": 1.1166837256908906, + "grad_norm": 1.566157541574177, + "learning_rate": 8.904723282112728e-06, + "loss": 0.5052220225334167, + "step": 3819 + }, + { + "epoch": 1.116976166106156, + "grad_norm": 1.6151321192825796, + "learning_rate": 8.899917790059208e-06, + "loss": 0.7858535051345825, + "step": 3820 + }, + { + "epoch": 1.1172686065214212, + "grad_norm": 1.8369545909174703, + "learning_rate": 8.895112555143217e-06, + "loss": 0.6768159866333008, + "step": 3821 + }, + { + "epoch": 1.1175610469366866, + "grad_norm": 1.8079763728482598, + "learning_rate": 8.890307578487947e-06, + "loss": 0.5661243200302124, + "step": 3822 + }, + { + "epoch": 1.1178534873519521, + "grad_norm": 1.7067515294047517, + "learning_rate": 8.885502861216535e-06, + "loss": 0.5129438638687134, + "step": 3823 + }, + { + "epoch": 1.1181459277672174, + "grad_norm": 1.5735393429941704, + "learning_rate": 8.880698404452051e-06, + "loss": 0.4813467264175415, + "step": 3824 + }, + { + "epoch": 1.1184383681824828, + "grad_norm": 1.5840908667031388, + "learning_rate": 8.87589420931751e-06, + "loss": 0.5165577530860901, + "step": 3825 + }, + { + "epoch": 1.118730808597748, + "grad_norm": 1.4425390765128903, + "learning_rate": 8.871090276935863e-06, + "loss": 0.47335073351860046, + "step": 3826 + }, + { + "epoch": 1.1190232490130136, + "grad_norm": 1.6934955516318184, + "learning_rate": 8.86628660843e-06, + "loss": 0.4902348518371582, + "step": 3827 + }, + { + "epoch": 1.119315689428279, + "grad_norm": 1.7245920316429901, + "learning_rate": 8.861483204922752e-06, + "loss": 0.5933388471603394, + "step": 3828 + }, + { + "epoch": 1.1196081298435443, + "grad_norm": 1.5505961542425288, + "learning_rate": 8.85668006753688e-06, + "loss": 0.4898201823234558, + "step": 3829 + }, + { + "epoch": 1.1199005702588098, + "grad_norm": 1.3287782031202422, + "learning_rate": 8.851877197395088e-06, + "loss": 0.4745003879070282, + "step": 3830 + }, + { + "epoch": 1.1201930106740752, + "grad_norm": 1.5081067046883336, + "learning_rate": 8.847074595620024e-06, + "loss": 0.5246972441673279, + "step": 3831 + }, + { + "epoch": 1.1204854510893405, + "grad_norm": 1.498399687409688, + "learning_rate": 8.842272263334263e-06, + "loss": 0.5196787714958191, + "step": 3832 + }, + { + "epoch": 1.120777891504606, + "grad_norm": 2.0301798075149446, + "learning_rate": 8.83747020166032e-06, + "loss": 0.6721034049987793, + "step": 3833 + }, + { + "epoch": 1.1210703319198714, + "grad_norm": 1.4930580521199184, + "learning_rate": 8.832668411720652e-06, + "loss": 0.5654234886169434, + "step": 3834 + }, + { + "epoch": 1.1213627723351367, + "grad_norm": 1.91001506609742, + "learning_rate": 8.827866894637642e-06, + "loss": 0.7520767450332642, + "step": 3835 + }, + { + "epoch": 1.121655212750402, + "grad_norm": 1.3753523987373926, + "learning_rate": 8.82306565153362e-06, + "loss": 0.43645960092544556, + "step": 3836 + }, + { + "epoch": 1.1219476531656676, + "grad_norm": 1.688542605024225, + "learning_rate": 8.818264683530845e-06, + "loss": 0.5802274942398071, + "step": 3837 + }, + { + "epoch": 1.122240093580933, + "grad_norm": 1.5706370149670577, + "learning_rate": 8.813463991751516e-06, + "loss": 0.5593410134315491, + "step": 3838 + }, + { + "epoch": 1.1225325339961982, + "grad_norm": 1.56949134961986, + "learning_rate": 8.808663577317765e-06, + "loss": 0.6126681566238403, + "step": 3839 + }, + { + "epoch": 1.1228249744114636, + "grad_norm": 1.6396156905409707, + "learning_rate": 8.80386344135166e-06, + "loss": 0.6245180368423462, + "step": 3840 + }, + { + "epoch": 1.123117414826729, + "grad_norm": 1.3541654958690765, + "learning_rate": 8.799063584975201e-06, + "loss": 0.6611473560333252, + "step": 3841 + }, + { + "epoch": 1.1234098552419944, + "grad_norm": 1.2310988629927149, + "learning_rate": 8.79426400931033e-06, + "loss": 0.40020978450775146, + "step": 3842 + }, + { + "epoch": 1.1237022956572598, + "grad_norm": 1.3644507366239775, + "learning_rate": 8.789464715478913e-06, + "loss": 0.4965318441390991, + "step": 3843 + }, + { + "epoch": 1.1239947360725253, + "grad_norm": 1.4326851806590044, + "learning_rate": 8.784665704602758e-06, + "loss": 0.4838374853134155, + "step": 3844 + }, + { + "epoch": 1.1242871764877906, + "grad_norm": 1.389039662475551, + "learning_rate": 8.77986697780361e-06, + "loss": 0.5756508708000183, + "step": 3845 + }, + { + "epoch": 1.124579616903056, + "grad_norm": 1.484286888056792, + "learning_rate": 8.775068536203132e-06, + "loss": 0.5341511964797974, + "step": 3846 + }, + { + "epoch": 1.1248720573183213, + "grad_norm": 1.5007549282773276, + "learning_rate": 8.77027038092294e-06, + "loss": 0.6239134073257446, + "step": 3847 + }, + { + "epoch": 1.1251644977335868, + "grad_norm": 1.818555508500906, + "learning_rate": 8.765472513084566e-06, + "loss": 0.5642406940460205, + "step": 3848 + }, + { + "epoch": 1.1254569381488522, + "grad_norm": 1.5841401225303304, + "learning_rate": 8.760674933809488e-06, + "loss": 0.5242771506309509, + "step": 3849 + }, + { + "epoch": 1.1257493785641175, + "grad_norm": 1.5608207104848433, + "learning_rate": 8.755877644219108e-06, + "loss": 0.5205737352371216, + "step": 3850 + }, + { + "epoch": 1.126041818979383, + "grad_norm": 1.6760248633979633, + "learning_rate": 8.751080645434768e-06, + "loss": 0.5005168318748474, + "step": 3851 + }, + { + "epoch": 1.1263342593946484, + "grad_norm": 1.539730717074913, + "learning_rate": 8.74628393857773e-06, + "loss": 0.44978275895118713, + "step": 3852 + }, + { + "epoch": 1.1266266998099137, + "grad_norm": 1.3558571119447433, + "learning_rate": 8.741487524769198e-06, + "loss": 0.43631571531295776, + "step": 3853 + }, + { + "epoch": 1.126919140225179, + "grad_norm": 1.220093214706796, + "learning_rate": 8.736691405130306e-06, + "loss": 0.4196016788482666, + "step": 3854 + }, + { + "epoch": 1.1272115806404446, + "grad_norm": 1.6296314839875645, + "learning_rate": 8.731895580782118e-06, + "loss": 0.6389856338500977, + "step": 3855 + }, + { + "epoch": 1.12750402105571, + "grad_norm": 1.726359030533187, + "learning_rate": 8.72710005284563e-06, + "loss": 0.5465584993362427, + "step": 3856 + }, + { + "epoch": 1.1277964614709752, + "grad_norm": 1.469192647678069, + "learning_rate": 8.722304822441757e-06, + "loss": 0.5513765811920166, + "step": 3857 + }, + { + "epoch": 1.1280889018862408, + "grad_norm": 1.5516390698184288, + "learning_rate": 8.717509890691369e-06, + "loss": 0.6984349489212036, + "step": 3858 + }, + { + "epoch": 1.128381342301506, + "grad_norm": 1.6096511723205336, + "learning_rate": 8.712715258715248e-06, + "loss": 0.5311027765274048, + "step": 3859 + }, + { + "epoch": 1.1286737827167714, + "grad_norm": 1.5113126886002746, + "learning_rate": 8.707920927634105e-06, + "loss": 0.4598672091960907, + "step": 3860 + }, + { + "epoch": 1.1289662231320368, + "grad_norm": 1.8202302284240548, + "learning_rate": 8.703126898568591e-06, + "loss": 0.6177612543106079, + "step": 3861 + }, + { + "epoch": 1.1292586635473023, + "grad_norm": 1.8043964275332298, + "learning_rate": 8.69833317263928e-06, + "loss": 0.6442389488220215, + "step": 3862 + }, + { + "epoch": 1.1295511039625676, + "grad_norm": 1.4793698971631246, + "learning_rate": 8.693539750966672e-06, + "loss": 0.5925737023353577, + "step": 3863 + }, + { + "epoch": 1.129843544377833, + "grad_norm": 1.3730688779887357, + "learning_rate": 8.688746634671207e-06, + "loss": 0.46009114384651184, + "step": 3864 + }, + { + "epoch": 1.1301359847930983, + "grad_norm": 1.6065358861472605, + "learning_rate": 8.683953824873246e-06, + "loss": 0.5438460111618042, + "step": 3865 + }, + { + "epoch": 1.1304284252083638, + "grad_norm": 1.6436751318662282, + "learning_rate": 8.679161322693073e-06, + "loss": 0.5355101823806763, + "step": 3866 + }, + { + "epoch": 1.1307208656236292, + "grad_norm": 1.5636124606467166, + "learning_rate": 8.67436912925091e-06, + "loss": 0.4494459629058838, + "step": 3867 + }, + { + "epoch": 1.1310133060388945, + "grad_norm": 1.5118698872161136, + "learning_rate": 8.669577245666905e-06, + "loss": 0.5828550457954407, + "step": 3868 + }, + { + "epoch": 1.13130574645416, + "grad_norm": 1.43455699505813, + "learning_rate": 8.664785673061127e-06, + "loss": 0.4956590235233307, + "step": 3869 + }, + { + "epoch": 1.1315981868694254, + "grad_norm": 1.5146504272638424, + "learning_rate": 8.659994412553582e-06, + "loss": 0.5447779893875122, + "step": 3870 + }, + { + "epoch": 1.1318906272846907, + "grad_norm": 1.6512585184867246, + "learning_rate": 8.655203465264196e-06, + "loss": 0.6275361776351929, + "step": 3871 + }, + { + "epoch": 1.1321830676999562, + "grad_norm": 1.564521343459816, + "learning_rate": 8.650412832312823e-06, + "loss": 0.47899991273880005, + "step": 3872 + }, + { + "epoch": 1.1324755081152216, + "grad_norm": 1.1875547206815094, + "learning_rate": 8.645622514819243e-06, + "loss": 0.3356127142906189, + "step": 3873 + }, + { + "epoch": 1.132767948530487, + "grad_norm": 1.4442401622701144, + "learning_rate": 8.640832513903168e-06, + "loss": 0.48855727910995483, + "step": 3874 + }, + { + "epoch": 1.1330603889457522, + "grad_norm": 1.4528018972795056, + "learning_rate": 8.636042830684227e-06, + "loss": 0.46642380952835083, + "step": 3875 + }, + { + "epoch": 1.1333528293610178, + "grad_norm": 1.8421536572224761, + "learning_rate": 8.631253466281984e-06, + "loss": 0.6179598569869995, + "step": 3876 + }, + { + "epoch": 1.133645269776283, + "grad_norm": 1.6762180368596016, + "learning_rate": 8.626464421815919e-06, + "loss": 0.6361704468727112, + "step": 3877 + }, + { + "epoch": 1.1339377101915484, + "grad_norm": 1.574443230288469, + "learning_rate": 8.621675698405446e-06, + "loss": 0.6243701577186584, + "step": 3878 + }, + { + "epoch": 1.1342301506068138, + "grad_norm": 1.6113304231540622, + "learning_rate": 8.616887297169895e-06, + "loss": 0.5402215123176575, + "step": 3879 + }, + { + "epoch": 1.1345225910220793, + "grad_norm": 1.6390191276422172, + "learning_rate": 8.61209921922853e-06, + "loss": 0.6050009727478027, + "step": 3880 + }, + { + "epoch": 1.1348150314373446, + "grad_norm": 1.6106875040973343, + "learning_rate": 8.607311465700534e-06, + "loss": 0.5705801248550415, + "step": 3881 + }, + { + "epoch": 1.13510747185261, + "grad_norm": 1.5380461037587805, + "learning_rate": 8.602524037705018e-06, + "loss": 0.5467248558998108, + "step": 3882 + }, + { + "epoch": 1.1353999122678755, + "grad_norm": 1.7121108266736746, + "learning_rate": 8.597736936361007e-06, + "loss": 0.5903012752532959, + "step": 3883 + }, + { + "epoch": 1.1356923526831408, + "grad_norm": 1.6218348221942134, + "learning_rate": 8.592950162787463e-06, + "loss": 0.6034090518951416, + "step": 3884 + }, + { + "epoch": 1.1359847930984062, + "grad_norm": 1.3056254339924755, + "learning_rate": 8.588163718103264e-06, + "loss": 0.4282987117767334, + "step": 3885 + }, + { + "epoch": 1.1362772335136717, + "grad_norm": 1.5127630417626896, + "learning_rate": 8.583377603427212e-06, + "loss": 0.47374194860458374, + "step": 3886 + }, + { + "epoch": 1.136569673928937, + "grad_norm": 1.5841934947134406, + "learning_rate": 8.578591819878033e-06, + "loss": 0.43954724073410034, + "step": 3887 + }, + { + "epoch": 1.1368621143442024, + "grad_norm": 1.4278799477191386, + "learning_rate": 8.573806368574372e-06, + "loss": 0.4731065034866333, + "step": 3888 + }, + { + "epoch": 1.1371545547594677, + "grad_norm": 1.4184800646863156, + "learning_rate": 8.5690212506348e-06, + "loss": 0.5241256356239319, + "step": 3889 + }, + { + "epoch": 1.1374469951747332, + "grad_norm": 1.631021419370316, + "learning_rate": 8.56423646717781e-06, + "loss": 0.5823307037353516, + "step": 3890 + }, + { + "epoch": 1.1377394355899986, + "grad_norm": 1.654201038343883, + "learning_rate": 8.55945201932182e-06, + "loss": 0.5360631346702576, + "step": 3891 + }, + { + "epoch": 1.138031876005264, + "grad_norm": 1.5773624073994579, + "learning_rate": 8.554667908185158e-06, + "loss": 0.5227797627449036, + "step": 3892 + }, + { + "epoch": 1.1383243164205292, + "grad_norm": 1.7414634806893152, + "learning_rate": 8.549884134886089e-06, + "loss": 0.6232806444168091, + "step": 3893 + }, + { + "epoch": 1.1386167568357948, + "grad_norm": 1.567438316916472, + "learning_rate": 8.545100700542782e-06, + "loss": 0.6697877049446106, + "step": 3894 + }, + { + "epoch": 1.13890919725106, + "grad_norm": 1.5115348655280192, + "learning_rate": 8.540317606273343e-06, + "loss": 0.6348206400871277, + "step": 3895 + }, + { + "epoch": 1.1392016376663254, + "grad_norm": 1.5453537409734852, + "learning_rate": 8.535534853195786e-06, + "loss": 0.5578476190567017, + "step": 3896 + }, + { + "epoch": 1.139494078081591, + "grad_norm": 1.6674507953444782, + "learning_rate": 8.530752442428055e-06, + "loss": 0.6439946889877319, + "step": 3897 + }, + { + "epoch": 1.1397865184968563, + "grad_norm": 1.5233786551580588, + "learning_rate": 8.525970375088006e-06, + "loss": 0.5292261242866516, + "step": 3898 + }, + { + "epoch": 1.1400789589121216, + "grad_norm": 1.6870433422022266, + "learning_rate": 8.521188652293421e-06, + "loss": 0.5836480855941772, + "step": 3899 + }, + { + "epoch": 1.140371399327387, + "grad_norm": 1.731988866581243, + "learning_rate": 8.516407275161998e-06, + "loss": 0.5166354775428772, + "step": 3900 + }, + { + "epoch": 1.1406638397426525, + "grad_norm": 1.61853635946673, + "learning_rate": 8.511626244811352e-06, + "loss": 0.5236127972602844, + "step": 3901 + }, + { + "epoch": 1.1409562801579178, + "grad_norm": 1.3903461786321225, + "learning_rate": 8.506845562359022e-06, + "loss": 0.4900703430175781, + "step": 3902 + }, + { + "epoch": 1.1412487205731832, + "grad_norm": 1.484704749479714, + "learning_rate": 8.502065228922464e-06, + "loss": 0.5200212001800537, + "step": 3903 + }, + { + "epoch": 1.1415411609884485, + "grad_norm": 1.588712114908106, + "learning_rate": 8.497285245619053e-06, + "loss": 0.5553300976753235, + "step": 3904 + }, + { + "epoch": 1.141833601403714, + "grad_norm": 1.5947362241383982, + "learning_rate": 8.492505613566075e-06, + "loss": 0.5650131702423096, + "step": 3905 + }, + { + "epoch": 1.1421260418189794, + "grad_norm": 1.7370414648582224, + "learning_rate": 8.487726333880746e-06, + "loss": 0.4732077121734619, + "step": 3906 + }, + { + "epoch": 1.1424184822342447, + "grad_norm": 1.4560698890341355, + "learning_rate": 8.482947407680193e-06, + "loss": 0.46741920709609985, + "step": 3907 + }, + { + "epoch": 1.1427109226495102, + "grad_norm": 1.717154367813477, + "learning_rate": 8.478168836081457e-06, + "loss": 0.606191873550415, + "step": 3908 + }, + { + "epoch": 1.1430033630647756, + "grad_norm": 1.4018605845855592, + "learning_rate": 8.473390620201505e-06, + "loss": 0.4373897314071655, + "step": 3909 + }, + { + "epoch": 1.143295803480041, + "grad_norm": 1.4705540951964773, + "learning_rate": 8.468612761157215e-06, + "loss": 0.5460623502731323, + "step": 3910 + }, + { + "epoch": 1.1435882438953064, + "grad_norm": 1.311062743935516, + "learning_rate": 8.463835260065379e-06, + "loss": 0.4939531087875366, + "step": 3911 + }, + { + "epoch": 1.1438806843105718, + "grad_norm": 1.4297382144675803, + "learning_rate": 8.459058118042708e-06, + "loss": 0.544964611530304, + "step": 3912 + }, + { + "epoch": 1.144173124725837, + "grad_norm": 1.600083847682917, + "learning_rate": 8.454281336205836e-06, + "loss": 0.6118921041488647, + "step": 3913 + }, + { + "epoch": 1.1444655651411024, + "grad_norm": 1.4018893231050742, + "learning_rate": 8.449504915671304e-06, + "loss": 0.561060905456543, + "step": 3914 + }, + { + "epoch": 1.144758005556368, + "grad_norm": 1.3967184209578762, + "learning_rate": 8.444728857555572e-06, + "loss": 0.430827796459198, + "step": 3915 + }, + { + "epoch": 1.1450504459716333, + "grad_norm": 1.7776445971156332, + "learning_rate": 8.439953162975011e-06, + "loss": 0.5482884645462036, + "step": 3916 + }, + { + "epoch": 1.1453428863868986, + "grad_norm": 1.8487697311002218, + "learning_rate": 8.435177833045911e-06, + "loss": 0.6614879965782166, + "step": 3917 + }, + { + "epoch": 1.145635326802164, + "grad_norm": 1.686073678884194, + "learning_rate": 8.430402868884482e-06, + "loss": 0.6290509104728699, + "step": 3918 + }, + { + "epoch": 1.1459277672174295, + "grad_norm": 1.479686951025449, + "learning_rate": 8.425628271606836e-06, + "loss": 0.404970645904541, + "step": 3919 + }, + { + "epoch": 1.1462202076326948, + "grad_norm": 1.3978968237521616, + "learning_rate": 8.420854042329011e-06, + "loss": 0.4902762174606323, + "step": 3920 + }, + { + "epoch": 1.1465126480479602, + "grad_norm": 1.6869046118960203, + "learning_rate": 8.416080182166955e-06, + "loss": 0.5757346153259277, + "step": 3921 + }, + { + "epoch": 1.1468050884632257, + "grad_norm": 1.5541954452670608, + "learning_rate": 8.41130669223652e-06, + "loss": 0.5453485250473022, + "step": 3922 + }, + { + "epoch": 1.147097528878491, + "grad_norm": 1.7189844130617113, + "learning_rate": 8.40653357365349e-06, + "loss": 0.5660290122032166, + "step": 3923 + }, + { + "epoch": 1.1473899692937564, + "grad_norm": 1.576466831282747, + "learning_rate": 8.40176082753355e-06, + "loss": 0.46013498306274414, + "step": 3924 + }, + { + "epoch": 1.147682409709022, + "grad_norm": 1.4364824711460213, + "learning_rate": 8.396988454992296e-06, + "loss": 0.5183000564575195, + "step": 3925 + }, + { + "epoch": 1.1479748501242872, + "grad_norm": 1.7258149850246205, + "learning_rate": 8.392216457145246e-06, + "loss": 0.5407284498214722, + "step": 3926 + }, + { + "epoch": 1.1482672905395526, + "grad_norm": 1.735168999167248, + "learning_rate": 8.387444835107824e-06, + "loss": 0.5960655808448792, + "step": 3927 + }, + { + "epoch": 1.148559730954818, + "grad_norm": 1.2891916158500891, + "learning_rate": 8.382673589995365e-06, + "loss": 0.4363316297531128, + "step": 3928 + }, + { + "epoch": 1.1488521713700834, + "grad_norm": 1.4413045514377891, + "learning_rate": 8.377902722923122e-06, + "loss": 0.5143908262252808, + "step": 3929 + }, + { + "epoch": 1.1491446117853488, + "grad_norm": 1.696244956095385, + "learning_rate": 8.373132235006254e-06, + "loss": 0.6016460657119751, + "step": 3930 + }, + { + "epoch": 1.149437052200614, + "grad_norm": 1.4319561508465357, + "learning_rate": 8.368362127359835e-06, + "loss": 0.5120511651039124, + "step": 3931 + }, + { + "epoch": 1.1497294926158794, + "grad_norm": 1.5254857110351325, + "learning_rate": 8.363592401098853e-06, + "loss": 0.49658435583114624, + "step": 3932 + }, + { + "epoch": 1.150021933031145, + "grad_norm": 1.7705521617533395, + "learning_rate": 8.358823057338188e-06, + "loss": 0.584032416343689, + "step": 3933 + }, + { + "epoch": 1.1503143734464103, + "grad_norm": 1.5012587623360505, + "learning_rate": 8.35405409719266e-06, + "loss": 0.4673706293106079, + "step": 3934 + }, + { + "epoch": 1.1506068138616756, + "grad_norm": 1.618555555366979, + "learning_rate": 8.349285521776982e-06, + "loss": 0.633565366268158, + "step": 3935 + }, + { + "epoch": 1.1508992542769412, + "grad_norm": 1.6576478038135816, + "learning_rate": 8.344517332205774e-06, + "loss": 0.6029015779495239, + "step": 3936 + }, + { + "epoch": 1.1511916946922065, + "grad_norm": 1.519081286345544, + "learning_rate": 8.339749529593574e-06, + "loss": 0.45594489574432373, + "step": 3937 + }, + { + "epoch": 1.1514841351074718, + "grad_norm": 1.5262842564669963, + "learning_rate": 8.334982115054828e-06, + "loss": 0.4413541257381439, + "step": 3938 + }, + { + "epoch": 1.1517765755227372, + "grad_norm": 1.6373893488771099, + "learning_rate": 8.330215089703887e-06, + "loss": 0.5674389004707336, + "step": 3939 + }, + { + "epoch": 1.1520690159380027, + "grad_norm": 1.386401958621656, + "learning_rate": 8.325448454655019e-06, + "loss": 0.43449294567108154, + "step": 3940 + }, + { + "epoch": 1.152361456353268, + "grad_norm": 1.6283403091444353, + "learning_rate": 8.320682211022393e-06, + "loss": 0.5190714597702026, + "step": 3941 + }, + { + "epoch": 1.1526538967685334, + "grad_norm": 1.5774508757028434, + "learning_rate": 8.31591635992009e-06, + "loss": 0.56162428855896, + "step": 3942 + }, + { + "epoch": 1.1529463371837987, + "grad_norm": 1.4891934876919055, + "learning_rate": 8.311150902462096e-06, + "loss": 0.5588958263397217, + "step": 3943 + }, + { + "epoch": 1.1532387775990642, + "grad_norm": 1.5108312938903155, + "learning_rate": 8.306385839762312e-06, + "loss": 0.5438264608383179, + "step": 3944 + }, + { + "epoch": 1.1535312180143296, + "grad_norm": 1.575513080138648, + "learning_rate": 8.30162117293454e-06, + "loss": 0.5860258340835571, + "step": 3945 + }, + { + "epoch": 1.153823658429595, + "grad_norm": 1.552005958726473, + "learning_rate": 8.296856903092494e-06, + "loss": 0.4742947220802307, + "step": 3946 + }, + { + "epoch": 1.1541160988448604, + "grad_norm": 1.44195573685015, + "learning_rate": 8.292093031349791e-06, + "loss": 0.47963109612464905, + "step": 3947 + }, + { + "epoch": 1.1544085392601258, + "grad_norm": 1.5340226225614597, + "learning_rate": 8.287329558819957e-06, + "loss": 0.5404704213142395, + "step": 3948 + }, + { + "epoch": 1.154700979675391, + "grad_norm": 1.8054477659796657, + "learning_rate": 8.282566486616425e-06, + "loss": 0.6559766530990601, + "step": 3949 + }, + { + "epoch": 1.1549934200906566, + "grad_norm": 1.507763379787764, + "learning_rate": 8.277803815852535e-06, + "loss": 0.4462929368019104, + "step": 3950 + }, + { + "epoch": 1.155285860505922, + "grad_norm": 1.6398920335039024, + "learning_rate": 8.273041547641531e-06, + "loss": 0.5672504901885986, + "step": 3951 + }, + { + "epoch": 1.1555783009211873, + "grad_norm": 1.5384582587859306, + "learning_rate": 8.268279683096567e-06, + "loss": 0.4040188193321228, + "step": 3952 + }, + { + "epoch": 1.1558707413364526, + "grad_norm": 1.4954603260099153, + "learning_rate": 8.263518223330698e-06, + "loss": 0.4639814794063568, + "step": 3953 + }, + { + "epoch": 1.1561631817517182, + "grad_norm": 1.3560290444841174, + "learning_rate": 8.258757169456885e-06, + "loss": 0.384866327047348, + "step": 3954 + }, + { + "epoch": 1.1564556221669835, + "grad_norm": 1.5360587849114566, + "learning_rate": 8.253996522587997e-06, + "loss": 0.452106773853302, + "step": 3955 + }, + { + "epoch": 1.1567480625822488, + "grad_norm": 1.5044138285106523, + "learning_rate": 8.249236283836806e-06, + "loss": 0.487504780292511, + "step": 3956 + }, + { + "epoch": 1.1570405029975142, + "grad_norm": 1.6199121483000312, + "learning_rate": 8.244476454315989e-06, + "loss": 0.6225916147232056, + "step": 3957 + }, + { + "epoch": 1.1573329434127797, + "grad_norm": 1.7421167385988239, + "learning_rate": 8.239717035138128e-06, + "loss": 0.5254271030426025, + "step": 3958 + }, + { + "epoch": 1.157625383828045, + "grad_norm": 1.6240162719096014, + "learning_rate": 8.234958027415707e-06, + "loss": 0.5759135484695435, + "step": 3959 + }, + { + "epoch": 1.1579178242433104, + "grad_norm": 1.6959935899735565, + "learning_rate": 8.230199432261115e-06, + "loss": 0.5720966458320618, + "step": 3960 + }, + { + "epoch": 1.158210264658576, + "grad_norm": 1.5797174163929866, + "learning_rate": 8.225441250786643e-06, + "loss": 0.4807323217391968, + "step": 3961 + }, + { + "epoch": 1.1585027050738412, + "grad_norm": 1.6197693861653146, + "learning_rate": 8.22068348410449e-06, + "loss": 0.5049746036529541, + "step": 3962 + }, + { + "epoch": 1.1587951454891066, + "grad_norm": 1.673364031578337, + "learning_rate": 8.215926133326758e-06, + "loss": 0.5321973562240601, + "step": 3963 + }, + { + "epoch": 1.159087585904372, + "grad_norm": 1.3992709586079797, + "learning_rate": 8.211169199565444e-06, + "loss": 0.5176634788513184, + "step": 3964 + }, + { + "epoch": 1.1593800263196374, + "grad_norm": 1.5661593234971032, + "learning_rate": 8.20641268393245e-06, + "loss": 0.5345112681388855, + "step": 3965 + }, + { + "epoch": 1.1596724667349028, + "grad_norm": 1.8309312482061675, + "learning_rate": 8.201656587539589e-06, + "loss": 0.47578325867652893, + "step": 3966 + }, + { + "epoch": 1.159964907150168, + "grad_norm": 1.5996140092470157, + "learning_rate": 8.196900911498563e-06, + "loss": 0.5018264651298523, + "step": 3967 + }, + { + "epoch": 1.1602573475654336, + "grad_norm": 1.530612277867195, + "learning_rate": 8.192145656920989e-06, + "loss": 0.4643394351005554, + "step": 3968 + }, + { + "epoch": 1.160549787980699, + "grad_norm": 1.6066179328722245, + "learning_rate": 8.187390824918375e-06, + "loss": 0.5391045808792114, + "step": 3969 + }, + { + "epoch": 1.1608422283959643, + "grad_norm": 1.4691594768883462, + "learning_rate": 8.182636416602136e-06, + "loss": 0.5168124437332153, + "step": 3970 + }, + { + "epoch": 1.1611346688112296, + "grad_norm": 1.4702658109064293, + "learning_rate": 8.177882433083583e-06, + "loss": 0.5821055173873901, + "step": 3971 + }, + { + "epoch": 1.1614271092264952, + "grad_norm": 1.597748811964364, + "learning_rate": 8.173128875473933e-06, + "loss": 0.6031824946403503, + "step": 3972 + }, + { + "epoch": 1.1617195496417605, + "grad_norm": 1.598311083454874, + "learning_rate": 8.1683757448843e-06, + "loss": 0.5085259675979614, + "step": 3973 + }, + { + "epoch": 1.1620119900570258, + "grad_norm": 1.6218562380492636, + "learning_rate": 8.163623042425702e-06, + "loss": 0.5654903650283813, + "step": 3974 + }, + { + "epoch": 1.1623044304722914, + "grad_norm": 1.6279393236171642, + "learning_rate": 8.158870769209051e-06, + "loss": 0.3920902609825134, + "step": 3975 + }, + { + "epoch": 1.1625968708875567, + "grad_norm": 1.6100798425685794, + "learning_rate": 8.154118926345165e-06, + "loss": 0.5334979891777039, + "step": 3976 + }, + { + "epoch": 1.162889311302822, + "grad_norm": 1.7332980039574648, + "learning_rate": 8.149367514944754e-06, + "loss": 0.6212184429168701, + "step": 3977 + }, + { + "epoch": 1.1631817517180874, + "grad_norm": 1.847204612085083, + "learning_rate": 8.144616536118437e-06, + "loss": 0.71863853931427, + "step": 3978 + }, + { + "epoch": 1.163474192133353, + "grad_norm": 1.7297963031597574, + "learning_rate": 8.139865990976722e-06, + "loss": 0.5263794660568237, + "step": 3979 + }, + { + "epoch": 1.1637666325486182, + "grad_norm": 1.5706968019905152, + "learning_rate": 8.135115880630025e-06, + "loss": 0.5035576224327087, + "step": 3980 + }, + { + "epoch": 1.1640590729638836, + "grad_norm": 1.4183002447341373, + "learning_rate": 8.130366206188651e-06, + "loss": 0.5695084929466248, + "step": 3981 + }, + { + "epoch": 1.1643515133791489, + "grad_norm": 1.51980370598088, + "learning_rate": 8.125616968762806e-06, + "loss": 0.5826396942138672, + "step": 3982 + }, + { + "epoch": 1.1646439537944144, + "grad_norm": 1.5991682342910063, + "learning_rate": 8.1208681694626e-06, + "loss": 0.5132841467857361, + "step": 3983 + }, + { + "epoch": 1.1649363942096798, + "grad_norm": 1.7073185800473716, + "learning_rate": 8.116119809398034e-06, + "loss": 0.6572669744491577, + "step": 3984 + }, + { + "epoch": 1.165228834624945, + "grad_norm": 1.8729301131644296, + "learning_rate": 8.111371889679007e-06, + "loss": 0.5365801453590393, + "step": 3985 + }, + { + "epoch": 1.1655212750402106, + "grad_norm": 1.4561472169130645, + "learning_rate": 8.10662441141532e-06, + "loss": 0.44511687755584717, + "step": 3986 + }, + { + "epoch": 1.165813715455476, + "grad_norm": 1.596383666869324, + "learning_rate": 8.101877375716666e-06, + "loss": 0.47212404012680054, + "step": 3987 + }, + { + "epoch": 1.1661061558707413, + "grad_norm": 1.5859450593798408, + "learning_rate": 8.097130783692631e-06, + "loss": 0.5942205786705017, + "step": 3988 + }, + { + "epoch": 1.1663985962860068, + "grad_norm": 1.6678058947227146, + "learning_rate": 8.092384636452708e-06, + "loss": 0.49162304401397705, + "step": 3989 + }, + { + "epoch": 1.1666910367012722, + "grad_norm": 2.635849062548634, + "learning_rate": 8.087638935106277e-06, + "loss": 0.6544803380966187, + "step": 3990 + }, + { + "epoch": 1.1669834771165375, + "grad_norm": 1.677008396527972, + "learning_rate": 8.082893680762619e-06, + "loss": 0.5572186708450317, + "step": 3991 + }, + { + "epoch": 1.1672759175318028, + "grad_norm": 1.6443546400872178, + "learning_rate": 8.078148874530906e-06, + "loss": 0.5836775898933411, + "step": 3992 + }, + { + "epoch": 1.1675683579470684, + "grad_norm": 1.3079140035223278, + "learning_rate": 8.073404517520208e-06, + "loss": 0.5507068634033203, + "step": 3993 + }, + { + "epoch": 1.1678607983623337, + "grad_norm": 1.9861505555993526, + "learning_rate": 8.068660610839489e-06, + "loss": 0.5312684178352356, + "step": 3994 + }, + { + "epoch": 1.168153238777599, + "grad_norm": 1.6552821635427635, + "learning_rate": 8.06391715559761e-06, + "loss": 0.5555688142776489, + "step": 3995 + }, + { + "epoch": 1.1684456791928644, + "grad_norm": 1.6871575092969338, + "learning_rate": 8.059174152903324e-06, + "loss": 0.5724596977233887, + "step": 3996 + }, + { + "epoch": 1.16873811960813, + "grad_norm": 1.6937240718062052, + "learning_rate": 8.054431603865282e-06, + "loss": 0.6212218999862671, + "step": 3997 + }, + { + "epoch": 1.1690305600233952, + "grad_norm": 1.4230492945656301, + "learning_rate": 8.049689509592023e-06, + "loss": 0.5061509609222412, + "step": 3998 + }, + { + "epoch": 1.1693230004386606, + "grad_norm": 1.695677070671476, + "learning_rate": 8.044947871191982e-06, + "loss": 0.6143001914024353, + "step": 3999 + }, + { + "epoch": 1.169615440853926, + "grad_norm": 1.32203821023621, + "learning_rate": 8.040206689773487e-06, + "loss": 0.5079911351203918, + "step": 4000 + }, + { + "epoch": 1.1699078812691914, + "grad_norm": 1.6638666755727167, + "learning_rate": 8.035465966444764e-06, + "loss": 0.6104908585548401, + "step": 4001 + }, + { + "epoch": 1.1702003216844568, + "grad_norm": 1.680147004679776, + "learning_rate": 8.03072570231393e-06, + "loss": 0.5953013896942139, + "step": 4002 + }, + { + "epoch": 1.1704927620997223, + "grad_norm": 1.3075175590117196, + "learning_rate": 8.025985898488986e-06, + "loss": 0.4541323781013489, + "step": 4003 + }, + { + "epoch": 1.1707852025149876, + "grad_norm": 1.5142685350846732, + "learning_rate": 8.021246556077838e-06, + "loss": 0.5708850026130676, + "step": 4004 + }, + { + "epoch": 1.171077642930253, + "grad_norm": 1.6574340180310174, + "learning_rate": 8.016507676188275e-06, + "loss": 0.5430601835250854, + "step": 4005 + }, + { + "epoch": 1.1713700833455183, + "grad_norm": 1.504278683910439, + "learning_rate": 8.011769259927981e-06, + "loss": 0.5621174573898315, + "step": 4006 + }, + { + "epoch": 1.1716625237607838, + "grad_norm": 1.5473800123062453, + "learning_rate": 8.007031308404536e-06, + "loss": 0.48092782497406006, + "step": 4007 + }, + { + "epoch": 1.1719549641760492, + "grad_norm": 1.9401766125340165, + "learning_rate": 8.002293822725404e-06, + "loss": 0.5770663022994995, + "step": 4008 + }, + { + "epoch": 1.1722474045913145, + "grad_norm": 1.7123399188942874, + "learning_rate": 7.997556803997945e-06, + "loss": 0.5692728757858276, + "step": 4009 + }, + { + "epoch": 1.1725398450065798, + "grad_norm": 1.6496884851556144, + "learning_rate": 7.99282025332941e-06, + "loss": 0.6256895065307617, + "step": 4010 + }, + { + "epoch": 1.1728322854218454, + "grad_norm": 1.4529405935008253, + "learning_rate": 7.988084171826937e-06, + "loss": 0.4272884130477905, + "step": 4011 + }, + { + "epoch": 1.1731247258371107, + "grad_norm": 1.548325850009333, + "learning_rate": 7.983348560597557e-06, + "loss": 0.5113184452056885, + "step": 4012 + }, + { + "epoch": 1.173417166252376, + "grad_norm": 1.4489276426544837, + "learning_rate": 7.978613420748186e-06, + "loss": 0.45635539293289185, + "step": 4013 + }, + { + "epoch": 1.1737096066676416, + "grad_norm": 1.6347983233600756, + "learning_rate": 7.973878753385638e-06, + "loss": 0.5539636611938477, + "step": 4014 + }, + { + "epoch": 1.174002047082907, + "grad_norm": 1.4140693021111321, + "learning_rate": 7.969144559616615e-06, + "loss": 0.6083431243896484, + "step": 4015 + }, + { + "epoch": 1.1742944874981722, + "grad_norm": 1.372102806580561, + "learning_rate": 7.9644108405477e-06, + "loss": 0.5268326997756958, + "step": 4016 + }, + { + "epoch": 1.1745869279134376, + "grad_norm": 1.5989300144328094, + "learning_rate": 7.95967759728538e-06, + "loss": 0.5680301189422607, + "step": 4017 + }, + { + "epoch": 1.174879368328703, + "grad_norm": 1.62105042514946, + "learning_rate": 7.954944830936012e-06, + "loss": 0.5457121133804321, + "step": 4018 + }, + { + "epoch": 1.1751718087439684, + "grad_norm": 1.59579517284719, + "learning_rate": 7.950212542605857e-06, + "loss": 0.5358338356018066, + "step": 4019 + }, + { + "epoch": 1.1754642491592338, + "grad_norm": 1.5630110417390142, + "learning_rate": 7.945480733401056e-06, + "loss": 0.6094579696655273, + "step": 4020 + }, + { + "epoch": 1.175756689574499, + "grad_norm": 1.6732097827507912, + "learning_rate": 7.940749404427642e-06, + "loss": 0.5108463764190674, + "step": 4021 + }, + { + "epoch": 1.1760491299897646, + "grad_norm": 1.6495017651653137, + "learning_rate": 7.936018556791537e-06, + "loss": 0.4946494698524475, + "step": 4022 + }, + { + "epoch": 1.17634157040503, + "grad_norm": 1.7757854212337651, + "learning_rate": 7.931288191598543e-06, + "loss": 0.5056017637252808, + "step": 4023 + }, + { + "epoch": 1.1766340108202953, + "grad_norm": 1.5276820294687934, + "learning_rate": 7.926558309954354e-06, + "loss": 0.5242294073104858, + "step": 4024 + }, + { + "epoch": 1.1769264512355608, + "grad_norm": 1.682705103807737, + "learning_rate": 7.921828912964556e-06, + "loss": 0.5667276382446289, + "step": 4025 + }, + { + "epoch": 1.1772188916508262, + "grad_norm": 1.6370912674167624, + "learning_rate": 7.917100001734614e-06, + "loss": 0.5282422304153442, + "step": 4026 + }, + { + "epoch": 1.1775113320660915, + "grad_norm": 1.6026370834828365, + "learning_rate": 7.912371577369881e-06, + "loss": 0.4887520670890808, + "step": 4027 + }, + { + "epoch": 1.177803772481357, + "grad_norm": 1.6287890532743194, + "learning_rate": 7.907643640975603e-06, + "loss": 0.5082155466079712, + "step": 4028 + }, + { + "epoch": 1.1780962128966224, + "grad_norm": 1.649507152949628, + "learning_rate": 7.902916193656898e-06, + "loss": 0.5432984828948975, + "step": 4029 + }, + { + "epoch": 1.1783886533118877, + "grad_norm": 1.7249606112651144, + "learning_rate": 7.898189236518783e-06, + "loss": 0.4313681721687317, + "step": 4030 + }, + { + "epoch": 1.178681093727153, + "grad_norm": 1.494399406404666, + "learning_rate": 7.893462770666155e-06, + "loss": 0.6051831245422363, + "step": 4031 + }, + { + "epoch": 1.1789735341424186, + "grad_norm": 1.5057449817059945, + "learning_rate": 7.888736797203796e-06, + "loss": 0.45805442333221436, + "step": 4032 + }, + { + "epoch": 1.179265974557684, + "grad_norm": 1.7917233044229635, + "learning_rate": 7.884011317236376e-06, + "loss": 0.4998340606689453, + "step": 4033 + }, + { + "epoch": 1.1795584149729492, + "grad_norm": 1.651259706746187, + "learning_rate": 7.879286331868443e-06, + "loss": 0.5298212170600891, + "step": 4034 + }, + { + "epoch": 1.1798508553882145, + "grad_norm": 1.7028183419777814, + "learning_rate": 7.874561842204437e-06, + "loss": 0.5104682445526123, + "step": 4035 + }, + { + "epoch": 1.18014329580348, + "grad_norm": 1.6393724776910414, + "learning_rate": 7.869837849348676e-06, + "loss": 0.5793051719665527, + "step": 4036 + }, + { + "epoch": 1.1804357362187454, + "grad_norm": 1.4839435154715734, + "learning_rate": 7.865114354405367e-06, + "loss": 0.42913323640823364, + "step": 4037 + }, + { + "epoch": 1.1807281766340108, + "grad_norm": 2.104724599006863, + "learning_rate": 7.860391358478596e-06, + "loss": 0.5183675289154053, + "step": 4038 + }, + { + "epoch": 1.1810206170492763, + "grad_norm": 1.5685744104736703, + "learning_rate": 7.855668862672339e-06, + "loss": 0.444034218788147, + "step": 4039 + }, + { + "epoch": 1.1813130574645416, + "grad_norm": 1.486556561749613, + "learning_rate": 7.850946868090446e-06, + "loss": 0.4357207417488098, + "step": 4040 + }, + { + "epoch": 1.181605497879807, + "grad_norm": 1.6923285770365775, + "learning_rate": 7.846225375836657e-06, + "loss": 0.4517707824707031, + "step": 4041 + }, + { + "epoch": 1.1818979382950725, + "grad_norm": 1.5865011864132745, + "learning_rate": 7.841504387014589e-06, + "loss": 0.4437381625175476, + "step": 4042 + }, + { + "epoch": 1.1821903787103378, + "grad_norm": 1.4744521314451464, + "learning_rate": 7.836783902727746e-06, + "loss": 0.5364828109741211, + "step": 4043 + }, + { + "epoch": 1.1824828191256032, + "grad_norm": 1.650227369991675, + "learning_rate": 7.832063924079516e-06, + "loss": 0.4814251661300659, + "step": 4044 + }, + { + "epoch": 1.1827752595408685, + "grad_norm": 1.9016693432010778, + "learning_rate": 7.827344452173163e-06, + "loss": 0.5376232862472534, + "step": 4045 + }, + { + "epoch": 1.183067699956134, + "grad_norm": 1.7007887018924743, + "learning_rate": 7.822625488111833e-06, + "loss": 0.6005147695541382, + "step": 4046 + }, + { + "epoch": 1.1833601403713994, + "grad_norm": 1.5696670197669271, + "learning_rate": 7.817907032998556e-06, + "loss": 0.5276827216148376, + "step": 4047 + }, + { + "epoch": 1.1836525807866647, + "grad_norm": 2.1978111734105994, + "learning_rate": 7.813189087936243e-06, + "loss": 0.6425626277923584, + "step": 4048 + }, + { + "epoch": 1.18394502120193, + "grad_norm": 1.272646490936496, + "learning_rate": 7.808471654027685e-06, + "loss": 0.44388407468795776, + "step": 4049 + }, + { + "epoch": 1.1842374616171956, + "grad_norm": 1.743245771156321, + "learning_rate": 7.803754732375554e-06, + "loss": 0.5044336318969727, + "step": 4050 + }, + { + "epoch": 1.184529902032461, + "grad_norm": 1.9415496480441554, + "learning_rate": 7.7990383240824e-06, + "loss": 0.6964906454086304, + "step": 4051 + }, + { + "epoch": 1.1848223424477262, + "grad_norm": 1.9029191440552455, + "learning_rate": 7.794322430250654e-06, + "loss": 0.6093637943267822, + "step": 4052 + }, + { + "epoch": 1.1851147828629918, + "grad_norm": 1.8079016024144563, + "learning_rate": 7.78960705198263e-06, + "loss": 0.5264803171157837, + "step": 4053 + }, + { + "epoch": 1.185407223278257, + "grad_norm": 1.444425047773482, + "learning_rate": 7.78489219038052e-06, + "loss": 0.5336456298828125, + "step": 4054 + }, + { + "epoch": 1.1856996636935224, + "grad_norm": 1.7563642817078289, + "learning_rate": 7.78017784654639e-06, + "loss": 0.5266311168670654, + "step": 4055 + }, + { + "epoch": 1.1859921041087877, + "grad_norm": 1.6538609406479838, + "learning_rate": 7.775464021582195e-06, + "loss": 0.6281685829162598, + "step": 4056 + }, + { + "epoch": 1.1862845445240533, + "grad_norm": 1.6081255371588656, + "learning_rate": 7.770750716589758e-06, + "loss": 0.560591995716095, + "step": 4057 + }, + { + "epoch": 1.1865769849393186, + "grad_norm": 1.7912692279763305, + "learning_rate": 7.766037932670786e-06, + "loss": 0.5751859545707703, + "step": 4058 + }, + { + "epoch": 1.186869425354584, + "grad_norm": 1.629657999448518, + "learning_rate": 7.761325670926864e-06, + "loss": 0.5404624938964844, + "step": 4059 + }, + { + "epoch": 1.1871618657698493, + "grad_norm": 1.6997280704374504, + "learning_rate": 7.756613932459456e-06, + "loss": 0.4714626669883728, + "step": 4060 + }, + { + "epoch": 1.1874543061851148, + "grad_norm": 1.4471766418666208, + "learning_rate": 7.751902718369903e-06, + "loss": 0.5449519157409668, + "step": 4061 + }, + { + "epoch": 1.1877467466003802, + "grad_norm": 1.6279611933236646, + "learning_rate": 7.747192029759419e-06, + "loss": 0.6518754959106445, + "step": 4062 + }, + { + "epoch": 1.1880391870156455, + "grad_norm": 1.4655931664348079, + "learning_rate": 7.7424818677291e-06, + "loss": 0.47224369645118713, + "step": 4063 + }, + { + "epoch": 1.188331627430911, + "grad_norm": 1.6924798895194766, + "learning_rate": 7.737772233379919e-06, + "loss": 0.5482417345046997, + "step": 4064 + }, + { + "epoch": 1.1886240678461764, + "grad_norm": 1.3910277085667344, + "learning_rate": 7.733063127812724e-06, + "loss": 0.5401996374130249, + "step": 4065 + }, + { + "epoch": 1.1889165082614417, + "grad_norm": 2.4517381628425547, + "learning_rate": 7.72835455212824e-06, + "loss": 0.4678424596786499, + "step": 4066 + }, + { + "epoch": 1.1892089486767072, + "grad_norm": 1.6156459518768798, + "learning_rate": 7.72364650742707e-06, + "loss": 0.5191294550895691, + "step": 4067 + }, + { + "epoch": 1.1895013890919726, + "grad_norm": 1.4433917939096517, + "learning_rate": 7.718938994809685e-06, + "loss": 0.44018834829330444, + "step": 4068 + }, + { + "epoch": 1.1897938295072379, + "grad_norm": 1.3955169745603861, + "learning_rate": 7.714232015376442e-06, + "loss": 0.47852614521980286, + "step": 4069 + }, + { + "epoch": 1.1900862699225032, + "grad_norm": 1.523334975304476, + "learning_rate": 7.709525570227567e-06, + "loss": 0.5748994946479797, + "step": 4070 + }, + { + "epoch": 1.1903787103377688, + "grad_norm": 1.7714529908638612, + "learning_rate": 7.704819660463164e-06, + "loss": 0.5015645027160645, + "step": 4071 + }, + { + "epoch": 1.190671150753034, + "grad_norm": 1.8100962592275294, + "learning_rate": 7.70011428718321e-06, + "loss": 0.6200511455535889, + "step": 4072 + }, + { + "epoch": 1.1909635911682994, + "grad_norm": 1.531990990921369, + "learning_rate": 7.69540945148756e-06, + "loss": 0.6311289668083191, + "step": 4073 + }, + { + "epoch": 1.1912560315835647, + "grad_norm": 1.5403717728586237, + "learning_rate": 7.690705154475937e-06, + "loss": 0.5707247257232666, + "step": 4074 + }, + { + "epoch": 1.1915484719988303, + "grad_norm": 2.0693191702072107, + "learning_rate": 7.686001397247944e-06, + "loss": 0.5616360902786255, + "step": 4075 + }, + { + "epoch": 1.1918409124140956, + "grad_norm": 1.7144278887449431, + "learning_rate": 7.681298180903054e-06, + "loss": 0.5955555438995361, + "step": 4076 + }, + { + "epoch": 1.192133352829361, + "grad_norm": 1.6252826516162207, + "learning_rate": 7.676595506540615e-06, + "loss": 0.5057257413864136, + "step": 4077 + }, + { + "epoch": 1.1924257932446265, + "grad_norm": 1.8445544744897249, + "learning_rate": 7.671893375259854e-06, + "loss": 0.5795278549194336, + "step": 4078 + }, + { + "epoch": 1.1927182336598918, + "grad_norm": 1.5541021220011975, + "learning_rate": 7.66719178815986e-06, + "loss": 0.5213087797164917, + "step": 4079 + }, + { + "epoch": 1.1930106740751572, + "grad_norm": 1.5183242886274189, + "learning_rate": 7.662490746339601e-06, + "loss": 0.5333693027496338, + "step": 4080 + }, + { + "epoch": 1.1933031144904227, + "grad_norm": 1.5253876680230323, + "learning_rate": 7.657790250897916e-06, + "loss": 0.4705297648906708, + "step": 4081 + }, + { + "epoch": 1.193595554905688, + "grad_norm": 1.5875026444946445, + "learning_rate": 7.65309030293352e-06, + "loss": 0.5376054644584656, + "step": 4082 + }, + { + "epoch": 1.1938879953209534, + "grad_norm": 1.4103452849520708, + "learning_rate": 7.648390903544997e-06, + "loss": 0.47457355260849, + "step": 4083 + }, + { + "epoch": 1.1941804357362187, + "grad_norm": 1.5068528532277095, + "learning_rate": 7.6436920538308e-06, + "loss": 0.48752763867378235, + "step": 4084 + }, + { + "epoch": 1.1944728761514842, + "grad_norm": 1.5226531730849548, + "learning_rate": 7.63899375488926e-06, + "loss": 0.48227858543395996, + "step": 4085 + }, + { + "epoch": 1.1947653165667496, + "grad_norm": 1.4101996785965327, + "learning_rate": 7.634296007818576e-06, + "loss": 0.4294116497039795, + "step": 4086 + }, + { + "epoch": 1.1950577569820149, + "grad_norm": 1.24669252589954, + "learning_rate": 7.629598813716817e-06, + "loss": 0.5562552809715271, + "step": 4087 + }, + { + "epoch": 1.1953501973972802, + "grad_norm": 1.6628602240304204, + "learning_rate": 7.624902173681923e-06, + "loss": 0.6466431617736816, + "step": 4088 + }, + { + "epoch": 1.1956426378125458, + "grad_norm": 1.7596122427030323, + "learning_rate": 7.620206088811704e-06, + "loss": 0.7183903455734253, + "step": 4089 + }, + { + "epoch": 1.195935078227811, + "grad_norm": 1.666055880141139, + "learning_rate": 7.615510560203841e-06, + "loss": 0.5667496919631958, + "step": 4090 + }, + { + "epoch": 1.1962275186430764, + "grad_norm": 1.5232682591562918, + "learning_rate": 7.610815588955888e-06, + "loss": 0.5603050589561462, + "step": 4091 + }, + { + "epoch": 1.196519959058342, + "grad_norm": 1.6556784363331365, + "learning_rate": 7.606121176165267e-06, + "loss": 0.5305474996566772, + "step": 4092 + }, + { + "epoch": 1.1968123994736073, + "grad_norm": 2.0140258709167163, + "learning_rate": 7.6014273229292625e-06, + "loss": 0.7321374416351318, + "step": 4093 + }, + { + "epoch": 1.1971048398888726, + "grad_norm": 1.5891169030075603, + "learning_rate": 7.5967340303450385e-06, + "loss": 0.44885972142219543, + "step": 4094 + }, + { + "epoch": 1.197397280304138, + "grad_norm": 1.7955283190373275, + "learning_rate": 7.592041299509624e-06, + "loss": 0.593859076499939, + "step": 4095 + }, + { + "epoch": 1.1976897207194035, + "grad_norm": 1.4553428657338656, + "learning_rate": 7.587349131519913e-06, + "loss": 0.6701182723045349, + "step": 4096 + }, + { + "epoch": 1.1979821611346688, + "grad_norm": 1.9268795339399152, + "learning_rate": 7.582657527472674e-06, + "loss": 0.6456711292266846, + "step": 4097 + }, + { + "epoch": 1.1982746015499341, + "grad_norm": 1.6602700214896833, + "learning_rate": 7.577966488464543e-06, + "loss": 0.5933864116668701, + "step": 4098 + }, + { + "epoch": 1.1985670419651995, + "grad_norm": 1.398305705152583, + "learning_rate": 7.5732760155920175e-06, + "loss": 0.4609876275062561, + "step": 4099 + }, + { + "epoch": 1.198859482380465, + "grad_norm": 1.4827488477589208, + "learning_rate": 7.568586109951468e-06, + "loss": 0.540961503982544, + "step": 4100 + }, + { + "epoch": 1.1991519227957304, + "grad_norm": 1.629963355664156, + "learning_rate": 7.563896772639132e-06, + "loss": 0.5522942543029785, + "step": 4101 + }, + { + "epoch": 1.1994443632109957, + "grad_norm": 1.739784480619601, + "learning_rate": 7.559208004751114e-06, + "loss": 0.483737587928772, + "step": 4102 + }, + { + "epoch": 1.1997368036262612, + "grad_norm": 1.2743684109876499, + "learning_rate": 7.554519807383384e-06, + "loss": 0.3760339915752411, + "step": 4103 + }, + { + "epoch": 1.2000292440415266, + "grad_norm": 1.494486709964621, + "learning_rate": 7.549832181631782e-06, + "loss": 0.5034801959991455, + "step": 4104 + }, + { + "epoch": 1.2003216844567919, + "grad_norm": 1.6403057961263519, + "learning_rate": 7.545145128592009e-06, + "loss": 0.5605261325836182, + "step": 4105 + }, + { + "epoch": 1.2006141248720574, + "grad_norm": 1.4179033673825343, + "learning_rate": 7.540458649359637e-06, + "loss": 0.4724245071411133, + "step": 4106 + }, + { + "epoch": 1.2009065652873228, + "grad_norm": 1.609040907971216, + "learning_rate": 7.535772745030101e-06, + "loss": 0.564873218536377, + "step": 4107 + }, + { + "epoch": 1.201199005702588, + "grad_norm": 1.9431151220409157, + "learning_rate": 7.531087416698702e-06, + "loss": 0.699596643447876, + "step": 4108 + }, + { + "epoch": 1.2014914461178534, + "grad_norm": 1.5180492689699372, + "learning_rate": 7.526402665460612e-06, + "loss": 0.47448351979255676, + "step": 4109 + }, + { + "epoch": 1.201783886533119, + "grad_norm": 1.4606225624905942, + "learning_rate": 7.521718492410855e-06, + "loss": 0.4681323766708374, + "step": 4110 + }, + { + "epoch": 1.2020763269483843, + "grad_norm": 1.8550718864551587, + "learning_rate": 7.517034898644333e-06, + "loss": 0.6361842155456543, + "step": 4111 + }, + { + "epoch": 1.2023687673636496, + "grad_norm": 1.5211596606564617, + "learning_rate": 7.5123518852558075e-06, + "loss": 0.4732646942138672, + "step": 4112 + }, + { + "epoch": 1.202661207778915, + "grad_norm": 1.6512929892036816, + "learning_rate": 7.507669453339903e-06, + "loss": 0.57124263048172, + "step": 4113 + }, + { + "epoch": 1.2029536481941805, + "grad_norm": 1.758611342292707, + "learning_rate": 7.502987603991111e-06, + "loss": 0.5228173732757568, + "step": 4114 + }, + { + "epoch": 1.2032460886094458, + "grad_norm": 1.7352024129193708, + "learning_rate": 7.4983063383037864e-06, + "loss": 0.5501765012741089, + "step": 4115 + }, + { + "epoch": 1.2035385290247111, + "grad_norm": 1.6782467710972089, + "learning_rate": 7.493625657372141e-06, + "loss": 0.5062840580940247, + "step": 4116 + }, + { + "epoch": 1.2038309694399767, + "grad_norm": 1.6960273401585455, + "learning_rate": 7.4889455622902616e-06, + "loss": 0.7060763835906982, + "step": 4117 + }, + { + "epoch": 1.204123409855242, + "grad_norm": 1.478061987478783, + "learning_rate": 7.484266054152088e-06, + "loss": 0.42127668857574463, + "step": 4118 + }, + { + "epoch": 1.2044158502705073, + "grad_norm": 1.3574946815299211, + "learning_rate": 7.479587134051429e-06, + "loss": 0.490860253572464, + "step": 4119 + }, + { + "epoch": 1.204708290685773, + "grad_norm": 1.443033575116078, + "learning_rate": 7.474908803081955e-06, + "loss": 0.45786625146865845, + "step": 4120 + }, + { + "epoch": 1.2050007311010382, + "grad_norm": 1.810733388901398, + "learning_rate": 7.470231062337192e-06, + "loss": 0.5267277359962463, + "step": 4121 + }, + { + "epoch": 1.2052931715163036, + "grad_norm": 1.670838162040588, + "learning_rate": 7.465553912910539e-06, + "loss": 0.47834646701812744, + "step": 4122 + }, + { + "epoch": 1.2055856119315689, + "grad_norm": 1.4366745635956868, + "learning_rate": 7.460877355895249e-06, + "loss": 0.5348576903343201, + "step": 4123 + }, + { + "epoch": 1.2058780523468344, + "grad_norm": 1.7186674622129299, + "learning_rate": 7.456201392384437e-06, + "loss": 0.47992441058158875, + "step": 4124 + }, + { + "epoch": 1.2061704927620998, + "grad_norm": 1.6108537844876905, + "learning_rate": 7.451526023471085e-06, + "loss": 0.5693913698196411, + "step": 4125 + }, + { + "epoch": 1.206462933177365, + "grad_norm": 1.8995573488864546, + "learning_rate": 7.4468512502480305e-06, + "loss": 0.5165153741836548, + "step": 4126 + }, + { + "epoch": 1.2067553735926304, + "grad_norm": 1.4833110616884417, + "learning_rate": 7.442177073807973e-06, + "loss": 0.522534966468811, + "step": 4127 + }, + { + "epoch": 1.207047814007896, + "grad_norm": 2.0276890753098464, + "learning_rate": 7.43750349524347e-06, + "loss": 0.6298432946205139, + "step": 4128 + }, + { + "epoch": 1.2073402544231613, + "grad_norm": 1.5840348747117112, + "learning_rate": 7.432830515646947e-06, + "loss": 0.5077394247055054, + "step": 4129 + }, + { + "epoch": 1.2076326948384266, + "grad_norm": 2.0187303897146682, + "learning_rate": 7.428158136110681e-06, + "loss": 0.6492841839790344, + "step": 4130 + }, + { + "epoch": 1.2079251352536922, + "grad_norm": 1.8651226738731277, + "learning_rate": 7.423486357726813e-06, + "loss": 0.5204535126686096, + "step": 4131 + }, + { + "epoch": 1.2082175756689575, + "grad_norm": 1.7208004693147547, + "learning_rate": 7.418815181587347e-06, + "loss": 0.56598961353302, + "step": 4132 + }, + { + "epoch": 1.2085100160842228, + "grad_norm": 1.7632065676998485, + "learning_rate": 7.4141446087841364e-06, + "loss": 0.486950159072876, + "step": 4133 + }, + { + "epoch": 1.2088024564994881, + "grad_norm": 1.8229002651567825, + "learning_rate": 7.4094746404089e-06, + "loss": 0.7218466997146606, + "step": 4134 + }, + { + "epoch": 1.2090948969147537, + "grad_norm": 1.715700034058204, + "learning_rate": 7.404805277553218e-06, + "loss": 0.6486172676086426, + "step": 4135 + }, + { + "epoch": 1.209387337330019, + "grad_norm": 1.8385918373460561, + "learning_rate": 7.400136521308521e-06, + "loss": 0.6160574555397034, + "step": 4136 + }, + { + "epoch": 1.2096797777452843, + "grad_norm": 1.6766631954981184, + "learning_rate": 7.395468372766107e-06, + "loss": 0.6184699535369873, + "step": 4137 + }, + { + "epoch": 1.2099722181605497, + "grad_norm": 1.6881704887676476, + "learning_rate": 7.390800833017124e-06, + "loss": 0.5795263051986694, + "step": 4138 + }, + { + "epoch": 1.2102646585758152, + "grad_norm": 1.6841718896097397, + "learning_rate": 7.386133903152581e-06, + "loss": 0.5409367084503174, + "step": 4139 + }, + { + "epoch": 1.2105570989910805, + "grad_norm": 1.678948206873695, + "learning_rate": 7.3814675842633465e-06, + "loss": 0.47924935817718506, + "step": 4140 + }, + { + "epoch": 1.2108495394063459, + "grad_norm": 1.6532202064740131, + "learning_rate": 7.376801877440143e-06, + "loss": 0.5737412571907043, + "step": 4141 + }, + { + "epoch": 1.2111419798216114, + "grad_norm": 1.5307761286613382, + "learning_rate": 7.372136783773551e-06, + "loss": 0.538013219833374, + "step": 4142 + }, + { + "epoch": 1.2114344202368768, + "grad_norm": 1.4940902719253717, + "learning_rate": 7.367472304354011e-06, + "loss": 0.4523904323577881, + "step": 4143 + }, + { + "epoch": 1.211726860652142, + "grad_norm": 1.4793905716399964, + "learning_rate": 7.362808440271811e-06, + "loss": 0.5057293176651001, + "step": 4144 + }, + { + "epoch": 1.2120193010674076, + "grad_norm": 1.4373562566302274, + "learning_rate": 7.358145192617103e-06, + "loss": 0.4653171896934509, + "step": 4145 + }, + { + "epoch": 1.212311741482673, + "grad_norm": 1.6048946971271119, + "learning_rate": 7.353482562479896e-06, + "loss": 0.607070803642273, + "step": 4146 + }, + { + "epoch": 1.2126041818979383, + "grad_norm": 1.51939699208445, + "learning_rate": 7.348820550950047e-06, + "loss": 0.4721861481666565, + "step": 4147 + }, + { + "epoch": 1.2128966223132036, + "grad_norm": 1.8802239228266517, + "learning_rate": 7.3441591591172765e-06, + "loss": 0.6656746864318848, + "step": 4148 + }, + { + "epoch": 1.2131890627284692, + "grad_norm": 1.504596663567376, + "learning_rate": 7.339498388071154e-06, + "loss": 0.5231848359107971, + "step": 4149 + }, + { + "epoch": 1.2134815031437345, + "grad_norm": 1.4250712810936565, + "learning_rate": 7.334838238901106e-06, + "loss": 0.42241257429122925, + "step": 4150 + }, + { + "epoch": 1.2137739435589998, + "grad_norm": 1.4721862632309721, + "learning_rate": 7.3301787126964165e-06, + "loss": 0.427111953496933, + "step": 4151 + }, + { + "epoch": 1.2140663839742651, + "grad_norm": 2.038283523639075, + "learning_rate": 7.325519810546219e-06, + "loss": 0.6208339929580688, + "step": 4152 + }, + { + "epoch": 1.2143588243895307, + "grad_norm": 1.3970516014119925, + "learning_rate": 7.320861533539505e-06, + "loss": 0.5031273365020752, + "step": 4153 + }, + { + "epoch": 1.214651264804796, + "grad_norm": 1.5153681425347725, + "learning_rate": 7.3162038827651205e-06, + "loss": 0.5617444515228271, + "step": 4154 + }, + { + "epoch": 1.2149437052200613, + "grad_norm": 1.4855483785732004, + "learning_rate": 7.311546859311758e-06, + "loss": 0.4616255462169647, + "step": 4155 + }, + { + "epoch": 1.215236145635327, + "grad_norm": 1.5704453976932513, + "learning_rate": 7.306890464267972e-06, + "loss": 0.5799977779388428, + "step": 4156 + }, + { + "epoch": 1.2155285860505922, + "grad_norm": 1.521477491941422, + "learning_rate": 7.302234698722165e-06, + "loss": 0.5669786930084229, + "step": 4157 + }, + { + "epoch": 1.2158210264658575, + "grad_norm": 1.5325381791627977, + "learning_rate": 7.297579563762595e-06, + "loss": 0.5622642040252686, + "step": 4158 + }, + { + "epoch": 1.216113466881123, + "grad_norm": 1.8789411887268221, + "learning_rate": 7.292925060477367e-06, + "loss": 0.6896791458129883, + "step": 4159 + }, + { + "epoch": 1.2164059072963884, + "grad_norm": 1.5263918361022677, + "learning_rate": 7.288271189954451e-06, + "loss": 0.6704437136650085, + "step": 4160 + }, + { + "epoch": 1.2166983477116537, + "grad_norm": 1.6192057061391554, + "learning_rate": 7.2836179532816565e-06, + "loss": 0.6340646743774414, + "step": 4161 + }, + { + "epoch": 1.216990788126919, + "grad_norm": 1.4283430296516553, + "learning_rate": 7.278965351546648e-06, + "loss": 0.528992772102356, + "step": 4162 + }, + { + "epoch": 1.2172832285421846, + "grad_norm": 1.4842100691170903, + "learning_rate": 7.274313385836949e-06, + "loss": 0.45160621404647827, + "step": 4163 + }, + { + "epoch": 1.21757566895745, + "grad_norm": 1.3859373993268853, + "learning_rate": 7.269662057239919e-06, + "loss": 0.5398670434951782, + "step": 4164 + }, + { + "epoch": 1.2178681093727153, + "grad_norm": 1.7598892874276293, + "learning_rate": 7.265011366842785e-06, + "loss": 0.5174476504325867, + "step": 4165 + }, + { + "epoch": 1.2181605497879806, + "grad_norm": 1.663231631427072, + "learning_rate": 7.260361315732613e-06, + "loss": 0.4830206632614136, + "step": 4166 + }, + { + "epoch": 1.2184529902032462, + "grad_norm": 1.4149457900973579, + "learning_rate": 7.2557119049963266e-06, + "loss": 0.42422181367874146, + "step": 4167 + }, + { + "epoch": 1.2187454306185115, + "grad_norm": 1.363467777836694, + "learning_rate": 7.251063135720699e-06, + "loss": 0.43544018268585205, + "step": 4168 + }, + { + "epoch": 1.2190378710337768, + "grad_norm": 1.4776092804767433, + "learning_rate": 7.2464150089923465e-06, + "loss": 0.5352005362510681, + "step": 4169 + }, + { + "epoch": 1.2193303114490424, + "grad_norm": 1.5459436268475357, + "learning_rate": 7.241767525897746e-06, + "loss": 0.4718678891658783, + "step": 4170 + }, + { + "epoch": 1.2196227518643077, + "grad_norm": 1.4994134423194976, + "learning_rate": 7.237120687523214e-06, + "loss": 0.618084192276001, + "step": 4171 + }, + { + "epoch": 1.219915192279573, + "grad_norm": 1.8137589794234399, + "learning_rate": 7.232474494954924e-06, + "loss": 0.625995397567749, + "step": 4172 + }, + { + "epoch": 1.2202076326948383, + "grad_norm": 1.4989590312422592, + "learning_rate": 7.227828949278894e-06, + "loss": 0.5382465124130249, + "step": 4173 + }, + { + "epoch": 1.220500073110104, + "grad_norm": 1.702878462884744, + "learning_rate": 7.223184051580992e-06, + "loss": 0.5299465656280518, + "step": 4174 + }, + { + "epoch": 1.2207925135253692, + "grad_norm": 1.7776293184889576, + "learning_rate": 7.218539802946934e-06, + "loss": 0.5899940729141235, + "step": 4175 + }, + { + "epoch": 1.2210849539406345, + "grad_norm": 1.9763552708522982, + "learning_rate": 7.213896204462286e-06, + "loss": 0.6126594543457031, + "step": 4176 + }, + { + "epoch": 1.2213773943558999, + "grad_norm": 1.6580044033592523, + "learning_rate": 7.20925325721246e-06, + "loss": 0.5576338768005371, + "step": 4177 + }, + { + "epoch": 1.2216698347711654, + "grad_norm": 1.5044012673537284, + "learning_rate": 7.204610962282717e-06, + "loss": 0.540515661239624, + "step": 4178 + }, + { + "epoch": 1.2219622751864307, + "grad_norm": 1.5281012838641301, + "learning_rate": 7.1999693207581675e-06, + "loss": 0.5306440591812134, + "step": 4179 + }, + { + "epoch": 1.222254715601696, + "grad_norm": 2.02113466617051, + "learning_rate": 7.195328333723763e-06, + "loss": 0.6274853944778442, + "step": 4180 + }, + { + "epoch": 1.2225471560169616, + "grad_norm": 1.6954554706562375, + "learning_rate": 7.190688002264308e-06, + "loss": 0.5626333951950073, + "step": 4181 + }, + { + "epoch": 1.222839596432227, + "grad_norm": 1.6364457786315536, + "learning_rate": 7.18604832746445e-06, + "loss": 0.5938719511032104, + "step": 4182 + }, + { + "epoch": 1.2231320368474923, + "grad_norm": 1.4010331016668016, + "learning_rate": 7.181409310408688e-06, + "loss": 0.4599727988243103, + "step": 4183 + }, + { + "epoch": 1.2234244772627578, + "grad_norm": 1.516823379099723, + "learning_rate": 7.176770952181363e-06, + "loss": 0.5912302732467651, + "step": 4184 + }, + { + "epoch": 1.2237169176780232, + "grad_norm": 1.24563200951521, + "learning_rate": 7.172133253866662e-06, + "loss": 0.534631073474884, + "step": 4185 + }, + { + "epoch": 1.2240093580932885, + "grad_norm": 1.3825393422514298, + "learning_rate": 7.167496216548618e-06, + "loss": 0.5084418058395386, + "step": 4186 + }, + { + "epoch": 1.2243017985085538, + "grad_norm": 1.6343841724383257, + "learning_rate": 7.162859841311112e-06, + "loss": 0.6906956434249878, + "step": 4187 + }, + { + "epoch": 1.2245942389238194, + "grad_norm": 1.6583835426138527, + "learning_rate": 7.158224129237867e-06, + "loss": 0.5578658580780029, + "step": 4188 + }, + { + "epoch": 1.2248866793390847, + "grad_norm": 1.4116232043960963, + "learning_rate": 7.153589081412455e-06, + "loss": 0.4438907206058502, + "step": 4189 + }, + { + "epoch": 1.22517911975435, + "grad_norm": 1.9189119615156511, + "learning_rate": 7.148954698918289e-06, + "loss": 0.6366580724716187, + "step": 4190 + }, + { + "epoch": 1.2254715601696153, + "grad_norm": 1.674796821883658, + "learning_rate": 7.144320982838628e-06, + "loss": 0.5532524585723877, + "step": 4191 + }, + { + "epoch": 1.2257640005848809, + "grad_norm": 1.3678471530217577, + "learning_rate": 7.139687934256574e-06, + "loss": 0.4847594201564789, + "step": 4192 + }, + { + "epoch": 1.2260564410001462, + "grad_norm": 1.651582950772816, + "learning_rate": 7.135055554255073e-06, + "loss": 0.6273454427719116, + "step": 4193 + }, + { + "epoch": 1.2263488814154115, + "grad_norm": 1.826963047999446, + "learning_rate": 7.130423843916917e-06, + "loss": 0.6320512294769287, + "step": 4194 + }, + { + "epoch": 1.226641321830677, + "grad_norm": 1.9938752870068028, + "learning_rate": 7.125792804324741e-06, + "loss": 0.5499723553657532, + "step": 4195 + }, + { + "epoch": 1.2269337622459424, + "grad_norm": 1.531512294163018, + "learning_rate": 7.121162436561023e-06, + "loss": 0.5855484008789062, + "step": 4196 + }, + { + "epoch": 1.2272262026612077, + "grad_norm": 1.5092619418718032, + "learning_rate": 7.11653274170808e-06, + "loss": 0.5998305678367615, + "step": 4197 + }, + { + "epoch": 1.2275186430764733, + "grad_norm": 1.6613439290789596, + "learning_rate": 7.111903720848077e-06, + "loss": 0.6963703632354736, + "step": 4198 + }, + { + "epoch": 1.2278110834917386, + "grad_norm": 1.7273945695579416, + "learning_rate": 7.10727537506302e-06, + "loss": 0.5664974451065063, + "step": 4199 + }, + { + "epoch": 1.228103523907004, + "grad_norm": 1.8611907189119672, + "learning_rate": 7.102647705434755e-06, + "loss": 0.6502630710601807, + "step": 4200 + }, + { + "epoch": 1.2283959643222693, + "grad_norm": 1.5674599206950446, + "learning_rate": 7.098020713044973e-06, + "loss": 0.5727233290672302, + "step": 4201 + }, + { + "epoch": 1.2286884047375348, + "grad_norm": 1.4105219463780128, + "learning_rate": 7.093394398975206e-06, + "loss": 0.47885602712631226, + "step": 4202 + }, + { + "epoch": 1.2289808451528001, + "grad_norm": 1.62325320016664, + "learning_rate": 7.088768764306826e-06, + "loss": 0.46089547872543335, + "step": 4203 + }, + { + "epoch": 1.2292732855680655, + "grad_norm": 1.3853973501267451, + "learning_rate": 7.084143810121044e-06, + "loss": 0.48920977115631104, + "step": 4204 + }, + { + "epoch": 1.2295657259833308, + "grad_norm": 1.4371671531095065, + "learning_rate": 7.07951953749892e-06, + "loss": 0.5320104956626892, + "step": 4205 + }, + { + "epoch": 1.2298581663985964, + "grad_norm": 1.693565977205871, + "learning_rate": 7.074895947521347e-06, + "loss": 0.6403206586837769, + "step": 4206 + }, + { + "epoch": 1.2301506068138617, + "grad_norm": 1.3774390509755927, + "learning_rate": 7.070273041269062e-06, + "loss": 0.5522217750549316, + "step": 4207 + }, + { + "epoch": 1.230443047229127, + "grad_norm": 1.644407790392686, + "learning_rate": 7.0656508198226405e-06, + "loss": 0.5235073566436768, + "step": 4208 + }, + { + "epoch": 1.2307354876443926, + "grad_norm": 1.9076552987416457, + "learning_rate": 7.061029284262497e-06, + "loss": 0.5972521305084229, + "step": 4209 + }, + { + "epoch": 1.2310279280596579, + "grad_norm": 1.7443828706372393, + "learning_rate": 7.0564084356688885e-06, + "loss": 0.5989280343055725, + "step": 4210 + }, + { + "epoch": 1.2313203684749232, + "grad_norm": 1.4723000244161777, + "learning_rate": 7.051788275121913e-06, + "loss": 0.5714213848114014, + "step": 4211 + }, + { + "epoch": 1.2316128088901885, + "grad_norm": 1.715005842824084, + "learning_rate": 7.047168803701502e-06, + "loss": 0.5588504076004028, + "step": 4212 + }, + { + "epoch": 1.231905249305454, + "grad_norm": 1.3648320017744335, + "learning_rate": 7.042550022487431e-06, + "loss": 0.47527533769607544, + "step": 4213 + }, + { + "epoch": 1.2321976897207194, + "grad_norm": 1.4838404108317171, + "learning_rate": 7.03793193255931e-06, + "loss": 0.5281137228012085, + "step": 4214 + }, + { + "epoch": 1.2324901301359847, + "grad_norm": 1.7839268972332825, + "learning_rate": 7.033314534996589e-06, + "loss": 0.5509631037712097, + "step": 4215 + }, + { + "epoch": 1.23278257055125, + "grad_norm": 1.5991883103171023, + "learning_rate": 7.028697830878557e-06, + "loss": 0.5291438698768616, + "step": 4216 + }, + { + "epoch": 1.2330750109665156, + "grad_norm": 1.9253124571991533, + "learning_rate": 7.024081821284343e-06, + "loss": 0.5931780934333801, + "step": 4217 + }, + { + "epoch": 1.233367451381781, + "grad_norm": 1.5978832259158926, + "learning_rate": 7.019466507292908e-06, + "loss": 0.4883537292480469, + "step": 4218 + }, + { + "epoch": 1.2336598917970463, + "grad_norm": 1.5441369085427046, + "learning_rate": 7.014851889983058e-06, + "loss": 0.45155030488967896, + "step": 4219 + }, + { + "epoch": 1.2339523322123118, + "grad_norm": 1.7603110515675113, + "learning_rate": 7.010237970433426e-06, + "loss": 0.6107507944107056, + "step": 4220 + }, + { + "epoch": 1.2342447726275771, + "grad_norm": 1.4005214588133317, + "learning_rate": 7.0056247497224905e-06, + "loss": 0.41764840483665466, + "step": 4221 + }, + { + "epoch": 1.2345372130428425, + "grad_norm": 1.4727432689856292, + "learning_rate": 7.0010122289285635e-06, + "loss": 0.6786199808120728, + "step": 4222 + }, + { + "epoch": 1.234829653458108, + "grad_norm": 1.6328773458986388, + "learning_rate": 6.996400409129793e-06, + "loss": 0.5378292798995972, + "step": 4223 + }, + { + "epoch": 1.2351220938733734, + "grad_norm": 1.5470680329093456, + "learning_rate": 6.9917892914041685e-06, + "loss": 0.47646570205688477, + "step": 4224 + }, + { + "epoch": 1.2354145342886387, + "grad_norm": 1.6302332764801317, + "learning_rate": 6.987178876829503e-06, + "loss": 0.554225504398346, + "step": 4225 + }, + { + "epoch": 1.235706974703904, + "grad_norm": 1.8888599643549215, + "learning_rate": 6.982569166483459e-06, + "loss": 0.42614030838012695, + "step": 4226 + }, + { + "epoch": 1.2359994151191696, + "grad_norm": 1.523118498051214, + "learning_rate": 6.977960161443524e-06, + "loss": 0.5043676495552063, + "step": 4227 + }, + { + "epoch": 1.2362918555344349, + "grad_norm": 1.454372819437309, + "learning_rate": 6.973351862787029e-06, + "loss": 0.4905642569065094, + "step": 4228 + }, + { + "epoch": 1.2365842959497002, + "grad_norm": 1.6152329822736995, + "learning_rate": 6.9687442715911325e-06, + "loss": 0.5860332250595093, + "step": 4229 + }, + { + "epoch": 1.2368767363649655, + "grad_norm": 1.3841079659340747, + "learning_rate": 6.9641373889328345e-06, + "loss": 0.4900137782096863, + "step": 4230 + }, + { + "epoch": 1.237169176780231, + "grad_norm": 1.7249957815195471, + "learning_rate": 6.959531215888961e-06, + "loss": 0.5736855268478394, + "step": 4231 + }, + { + "epoch": 1.2374616171954964, + "grad_norm": 1.6635333389812996, + "learning_rate": 6.95492575353618e-06, + "loss": 0.6390400528907776, + "step": 4232 + }, + { + "epoch": 1.2377540576107617, + "grad_norm": 1.6623693676348965, + "learning_rate": 6.95032100295099e-06, + "loss": 0.6553822159767151, + "step": 4233 + }, + { + "epoch": 1.2380464980260273, + "grad_norm": 1.871056647578711, + "learning_rate": 6.945716965209723e-06, + "loss": 0.6685863733291626, + "step": 4234 + }, + { + "epoch": 1.2383389384412926, + "grad_norm": 1.7090289188063175, + "learning_rate": 6.941113641388542e-06, + "loss": 0.5172277688980103, + "step": 4235 + }, + { + "epoch": 1.238631378856558, + "grad_norm": 1.9648968097135298, + "learning_rate": 6.936511032563451e-06, + "loss": 0.6578007936477661, + "step": 4236 + }, + { + "epoch": 1.2389238192718235, + "grad_norm": 1.5304274814539944, + "learning_rate": 6.931909139810283e-06, + "loss": 0.5679500699043274, + "step": 4237 + }, + { + "epoch": 1.2392162596870888, + "grad_norm": 1.6592749019605815, + "learning_rate": 6.927307964204695e-06, + "loss": 0.49142318964004517, + "step": 4238 + }, + { + "epoch": 1.2395087001023541, + "grad_norm": 1.497996058585022, + "learning_rate": 6.9227075068221926e-06, + "loss": 0.5339487195014954, + "step": 4239 + }, + { + "epoch": 1.2398011405176195, + "grad_norm": 1.9993237065248757, + "learning_rate": 6.918107768738097e-06, + "loss": 0.5845860242843628, + "step": 4240 + }, + { + "epoch": 1.240093580932885, + "grad_norm": 2.5543699126297823, + "learning_rate": 6.9135087510275735e-06, + "loss": 0.6767281889915466, + "step": 4241 + }, + { + "epoch": 1.2403860213481503, + "grad_norm": 1.850547226886836, + "learning_rate": 6.908910454765612e-06, + "loss": 0.6119472980499268, + "step": 4242 + }, + { + "epoch": 1.2406784617634157, + "grad_norm": 1.6013723709723773, + "learning_rate": 6.904312881027038e-06, + "loss": 0.6375409364700317, + "step": 4243 + }, + { + "epoch": 1.240970902178681, + "grad_norm": 1.9482571730059268, + "learning_rate": 6.899716030886508e-06, + "loss": 0.7059881687164307, + "step": 4244 + }, + { + "epoch": 1.2412633425939466, + "grad_norm": 1.9206862231453385, + "learning_rate": 6.895119905418504e-06, + "loss": 0.6463328003883362, + "step": 4245 + }, + { + "epoch": 1.2415557830092119, + "grad_norm": 1.5219372029025222, + "learning_rate": 6.890524505697345e-06, + "loss": 0.5374869108200073, + "step": 4246 + }, + { + "epoch": 1.2418482234244772, + "grad_norm": 1.625313205404651, + "learning_rate": 6.885929832797176e-06, + "loss": 0.5219276547431946, + "step": 4247 + }, + { + "epoch": 1.2421406638397428, + "grad_norm": 1.4315105659194174, + "learning_rate": 6.881335887791973e-06, + "loss": 0.4815624952316284, + "step": 4248 + }, + { + "epoch": 1.242433104255008, + "grad_norm": 1.318059168550072, + "learning_rate": 6.8767426717555475e-06, + "loss": 0.5111992955207825, + "step": 4249 + }, + { + "epoch": 1.2427255446702734, + "grad_norm": 1.6870166439076426, + "learning_rate": 6.872150185761533e-06, + "loss": 0.5331606268882751, + "step": 4250 + }, + { + "epoch": 1.2430179850855387, + "grad_norm": 1.5572023614320247, + "learning_rate": 6.867558430883393e-06, + "loss": 0.5375202894210815, + "step": 4251 + }, + { + "epoch": 1.2433104255008043, + "grad_norm": 1.495445158871636, + "learning_rate": 6.862967408194425e-06, + "loss": 0.5667152404785156, + "step": 4252 + }, + { + "epoch": 1.2436028659160696, + "grad_norm": 2.036302557289267, + "learning_rate": 6.858377118767752e-06, + "loss": 0.5679255723953247, + "step": 4253 + }, + { + "epoch": 1.243895306331335, + "grad_norm": 1.7798647531094058, + "learning_rate": 6.853787563676324e-06, + "loss": 0.6097947359085083, + "step": 4254 + }, + { + "epoch": 1.2441877467466003, + "grad_norm": 1.458407608257313, + "learning_rate": 6.849198743992927e-06, + "loss": 0.41869044303894043, + "step": 4255 + }, + { + "epoch": 1.2444801871618658, + "grad_norm": 1.595586166137391, + "learning_rate": 6.8446106607901655e-06, + "loss": 0.6414821147918701, + "step": 4256 + }, + { + "epoch": 1.2447726275771311, + "grad_norm": 1.9180058965370612, + "learning_rate": 6.840023315140476e-06, + "loss": 0.5985021591186523, + "step": 4257 + }, + { + "epoch": 1.2450650679923965, + "grad_norm": 1.429348085027092, + "learning_rate": 6.8354367081161235e-06, + "loss": 0.4718092381954193, + "step": 4258 + }, + { + "epoch": 1.245357508407662, + "grad_norm": 1.374927912317877, + "learning_rate": 6.8308508407892e-06, + "loss": 0.46431800723075867, + "step": 4259 + }, + { + "epoch": 1.2456499488229273, + "grad_norm": 1.4906925043469428, + "learning_rate": 6.826265714231624e-06, + "loss": 0.5499997735023499, + "step": 4260 + }, + { + "epoch": 1.2459423892381927, + "grad_norm": 1.605653884930273, + "learning_rate": 6.8216813295151415e-06, + "loss": 0.6078206300735474, + "step": 4261 + }, + { + "epoch": 1.2462348296534582, + "grad_norm": 1.6116067904051048, + "learning_rate": 6.817097687711322e-06, + "loss": 0.5706520080566406, + "step": 4262 + }, + { + "epoch": 1.2465272700687235, + "grad_norm": 1.4579793726336556, + "learning_rate": 6.812514789891566e-06, + "loss": 0.5210137367248535, + "step": 4263 + }, + { + "epoch": 1.2468197104839889, + "grad_norm": 1.5969341972097826, + "learning_rate": 6.807932637127097e-06, + "loss": 0.42632028460502625, + "step": 4264 + }, + { + "epoch": 1.2471121508992542, + "grad_norm": 1.3281470644259092, + "learning_rate": 6.803351230488967e-06, + "loss": 0.49990004301071167, + "step": 4265 + }, + { + "epoch": 1.2474045913145198, + "grad_norm": 1.6439327542913937, + "learning_rate": 6.798770571048052e-06, + "loss": 0.557829737663269, + "step": 4266 + }, + { + "epoch": 1.247697031729785, + "grad_norm": 1.6838717466364301, + "learning_rate": 6.794190659875052e-06, + "loss": 0.4784187078475952, + "step": 4267 + }, + { + "epoch": 1.2479894721450504, + "grad_norm": 1.6243877795123443, + "learning_rate": 6.789611498040492e-06, + "loss": 0.4795057773590088, + "step": 4268 + }, + { + "epoch": 1.2482819125603157, + "grad_norm": 1.4149752899303223, + "learning_rate": 6.785033086614725e-06, + "loss": 0.415715754032135, + "step": 4269 + }, + { + "epoch": 1.2485743529755813, + "grad_norm": 1.4478921102692126, + "learning_rate": 6.7804554266679266e-06, + "loss": 0.49056607484817505, + "step": 4270 + }, + { + "epoch": 1.2488667933908466, + "grad_norm": 1.8227279880342706, + "learning_rate": 6.775878519270098e-06, + "loss": 0.5268200039863586, + "step": 4271 + }, + { + "epoch": 1.249159233806112, + "grad_norm": 1.5664194732567784, + "learning_rate": 6.771302365491064e-06, + "loss": 0.6250356435775757, + "step": 4272 + }, + { + "epoch": 1.2494516742213775, + "grad_norm": 1.5152208337758115, + "learning_rate": 6.76672696640047e-06, + "loss": 0.5403029918670654, + "step": 4273 + }, + { + "epoch": 1.2497441146366428, + "grad_norm": 1.6699524807174595, + "learning_rate": 6.762152323067787e-06, + "loss": 0.47006577253341675, + "step": 4274 + }, + { + "epoch": 1.2500365550519081, + "grad_norm": 1.7406248179582138, + "learning_rate": 6.7575784365623134e-06, + "loss": 0.5088232755661011, + "step": 4275 + }, + { + "epoch": 1.2503289954671737, + "grad_norm": 1.7598214720338152, + "learning_rate": 6.7530053079531664e-06, + "loss": 0.5438642501831055, + "step": 4276 + }, + { + "epoch": 1.250621435882439, + "grad_norm": 1.4316922317447767, + "learning_rate": 6.748432938309286e-06, + "loss": 0.45436567068099976, + "step": 4277 + }, + { + "epoch": 1.2509138762977043, + "grad_norm": 1.5793052704561465, + "learning_rate": 6.743861328699438e-06, + "loss": 0.5298944115638733, + "step": 4278 + }, + { + "epoch": 1.2512063167129697, + "grad_norm": 1.3504092629468785, + "learning_rate": 6.7392904801922055e-06, + "loss": 0.49393707513809204, + "step": 4279 + }, + { + "epoch": 1.251498757128235, + "grad_norm": 1.4852717426676887, + "learning_rate": 6.734720393855998e-06, + "loss": 0.5540947318077087, + "step": 4280 + }, + { + "epoch": 1.2517911975435005, + "grad_norm": 1.4330918355062934, + "learning_rate": 6.730151070759043e-06, + "loss": 0.47406166791915894, + "step": 4281 + }, + { + "epoch": 1.2520836379587659, + "grad_norm": 1.5653956712736337, + "learning_rate": 6.725582511969397e-06, + "loss": 0.46885907649993896, + "step": 4282 + }, + { + "epoch": 1.2523760783740312, + "grad_norm": 1.7710771095422673, + "learning_rate": 6.721014718554931e-06, + "loss": 0.537517786026001, + "step": 4283 + }, + { + "epoch": 1.2526685187892967, + "grad_norm": 1.5323701554592244, + "learning_rate": 6.716447691583336e-06, + "loss": 0.514340341091156, + "step": 4284 + }, + { + "epoch": 1.252960959204562, + "grad_norm": 1.6716715067641383, + "learning_rate": 6.711881432122129e-06, + "loss": 0.5696117281913757, + "step": 4285 + }, + { + "epoch": 1.2532533996198274, + "grad_norm": 1.632492076185155, + "learning_rate": 6.707315941238645e-06, + "loss": 0.5620799660682678, + "step": 4286 + }, + { + "epoch": 1.253545840035093, + "grad_norm": 1.7721487037647632, + "learning_rate": 6.702751220000039e-06, + "loss": 0.4832923412322998, + "step": 4287 + }, + { + "epoch": 1.2538382804503583, + "grad_norm": 1.7195688873272827, + "learning_rate": 6.698187269473289e-06, + "loss": 0.6608176231384277, + "step": 4288 + }, + { + "epoch": 1.2541307208656236, + "grad_norm": 1.67536250359078, + "learning_rate": 6.69362409072519e-06, + "loss": 0.6002779006958008, + "step": 4289 + }, + { + "epoch": 1.2544231612808892, + "grad_norm": 1.5859756058231869, + "learning_rate": 6.689061684822357e-06, + "loss": 0.49898988008499146, + "step": 4290 + }, + { + "epoch": 1.2547156016961545, + "grad_norm": 1.908707186131175, + "learning_rate": 6.684500052831222e-06, + "loss": 0.5887055397033691, + "step": 4291 + }, + { + "epoch": 1.2550080421114198, + "grad_norm": 1.7680049519728702, + "learning_rate": 6.679939195818043e-06, + "loss": 0.6494714617729187, + "step": 4292 + }, + { + "epoch": 1.2553004825266851, + "grad_norm": 2.320887096811341, + "learning_rate": 6.67537911484889e-06, + "loss": 0.5708397626876831, + "step": 4293 + }, + { + "epoch": 1.2555929229419505, + "grad_norm": 1.4472817266256797, + "learning_rate": 6.670819810989656e-06, + "loss": 0.40412014722824097, + "step": 4294 + }, + { + "epoch": 1.255885363357216, + "grad_norm": 1.675200347061479, + "learning_rate": 6.666261285306048e-06, + "loss": 0.5141078233718872, + "step": 4295 + }, + { + "epoch": 1.2561778037724813, + "grad_norm": 1.8039877813287382, + "learning_rate": 6.661703538863595e-06, + "loss": 0.6463406085968018, + "step": 4296 + }, + { + "epoch": 1.2564702441877467, + "grad_norm": 1.5123528456732447, + "learning_rate": 6.657146572727643e-06, + "loss": 0.5809177160263062, + "step": 4297 + }, + { + "epoch": 1.2567626846030122, + "grad_norm": 1.6628802038143384, + "learning_rate": 6.652590387963354e-06, + "loss": 0.5124412775039673, + "step": 4298 + }, + { + "epoch": 1.2570551250182775, + "grad_norm": 1.8011842610745197, + "learning_rate": 6.64803498563571e-06, + "loss": 0.5399736762046814, + "step": 4299 + }, + { + "epoch": 1.2573475654335429, + "grad_norm": 1.4403786785249715, + "learning_rate": 6.6434803668095095e-06, + "loss": 0.548133373260498, + "step": 4300 + }, + { + "epoch": 1.2576400058488084, + "grad_norm": 1.7736401224051406, + "learning_rate": 6.638926532549364e-06, + "loss": 0.45056310296058655, + "step": 4301 + }, + { + "epoch": 1.2579324462640737, + "grad_norm": 2.434184879977136, + "learning_rate": 6.634373483919705e-06, + "loss": 0.5191814303398132, + "step": 4302 + }, + { + "epoch": 1.258224886679339, + "grad_norm": 1.4188278481806091, + "learning_rate": 6.62982122198478e-06, + "loss": 0.41939109563827515, + "step": 4303 + }, + { + "epoch": 1.2585173270946044, + "grad_norm": 1.6631261031278954, + "learning_rate": 6.625269747808655e-06, + "loss": 0.6535190939903259, + "step": 4304 + }, + { + "epoch": 1.2588097675098697, + "grad_norm": 1.7210614964326925, + "learning_rate": 6.620719062455207e-06, + "loss": 0.6282539367675781, + "step": 4305 + }, + { + "epoch": 1.2591022079251353, + "grad_norm": 1.5686327106153548, + "learning_rate": 6.616169166988133e-06, + "loss": 0.5378686189651489, + "step": 4306 + }, + { + "epoch": 1.2593946483404006, + "grad_norm": 1.80292094791683, + "learning_rate": 6.611620062470942e-06, + "loss": 0.5278643369674683, + "step": 4307 + }, + { + "epoch": 1.259687088755666, + "grad_norm": 1.5211478183195457, + "learning_rate": 6.607071749966958e-06, + "loss": 0.5578285455703735, + "step": 4308 + }, + { + "epoch": 1.2599795291709315, + "grad_norm": 1.7646090466366875, + "learning_rate": 6.602524230539324e-06, + "loss": 0.6452580094337463, + "step": 4309 + }, + { + "epoch": 1.2602719695861968, + "grad_norm": 1.7812547970338353, + "learning_rate": 6.597977505250992e-06, + "loss": 0.6133028268814087, + "step": 4310 + }, + { + "epoch": 1.2605644100014621, + "grad_norm": 1.552230597230507, + "learning_rate": 6.5934315751647345e-06, + "loss": 0.4930221140384674, + "step": 4311 + }, + { + "epoch": 1.2608568504167277, + "grad_norm": 2.197359143106273, + "learning_rate": 6.588886441343136e-06, + "loss": 0.48653531074523926, + "step": 4312 + }, + { + "epoch": 1.261149290831993, + "grad_norm": 1.851387133095935, + "learning_rate": 6.5843421048485915e-06, + "loss": 0.6594399213790894, + "step": 4313 + }, + { + "epoch": 1.2614417312472583, + "grad_norm": 1.66909694599425, + "learning_rate": 6.579798566743314e-06, + "loss": 0.5164401531219482, + "step": 4314 + }, + { + "epoch": 1.2617341716625239, + "grad_norm": 1.7484363064869977, + "learning_rate": 6.5752558280893245e-06, + "loss": 0.6338971853256226, + "step": 4315 + }, + { + "epoch": 1.2620266120777892, + "grad_norm": 1.7526913055276123, + "learning_rate": 6.570713889948461e-06, + "loss": 0.5301859974861145, + "step": 4316 + }, + { + "epoch": 1.2623190524930545, + "grad_norm": 1.5016995868339762, + "learning_rate": 6.566172753382376e-06, + "loss": 0.4572887420654297, + "step": 4317 + }, + { + "epoch": 1.2626114929083199, + "grad_norm": 1.5874066468532555, + "learning_rate": 6.561632419452532e-06, + "loss": 0.5235984325408936, + "step": 4318 + }, + { + "epoch": 1.2629039333235852, + "grad_norm": 1.5456604836068861, + "learning_rate": 6.557092889220206e-06, + "loss": 0.586036205291748, + "step": 4319 + }, + { + "epoch": 1.2631963737388507, + "grad_norm": 1.6865403223453492, + "learning_rate": 6.5525541637464855e-06, + "loss": 0.4728356599807739, + "step": 4320 + }, + { + "epoch": 1.263488814154116, + "grad_norm": 1.5435862254535146, + "learning_rate": 6.548016244092265e-06, + "loss": 0.4932190179824829, + "step": 4321 + }, + { + "epoch": 1.2637812545693814, + "grad_norm": 1.6817765339416926, + "learning_rate": 6.543479131318259e-06, + "loss": 0.525676429271698, + "step": 4322 + }, + { + "epoch": 1.264073694984647, + "grad_norm": 1.4602981048339732, + "learning_rate": 6.538942826484991e-06, + "loss": 0.5462610721588135, + "step": 4323 + }, + { + "epoch": 1.2643661353999123, + "grad_norm": 1.6170865165049584, + "learning_rate": 6.534407330652792e-06, + "loss": 0.5391229391098022, + "step": 4324 + }, + { + "epoch": 1.2646585758151776, + "grad_norm": 1.7047610503615187, + "learning_rate": 6.529872644881811e-06, + "loss": 0.5361309051513672, + "step": 4325 + }, + { + "epoch": 1.2649510162304431, + "grad_norm": 1.7296167923882715, + "learning_rate": 6.525338770232001e-06, + "loss": 0.5692390203475952, + "step": 4326 + }, + { + "epoch": 1.2652434566457085, + "grad_norm": 1.7314833561159049, + "learning_rate": 6.520805707763125e-06, + "loss": 0.5337555408477783, + "step": 4327 + }, + { + "epoch": 1.2655358970609738, + "grad_norm": 1.5538338127930955, + "learning_rate": 6.5162734585347605e-06, + "loss": 0.604168176651001, + "step": 4328 + }, + { + "epoch": 1.2658283374762394, + "grad_norm": 1.621069176676038, + "learning_rate": 6.5117420236062955e-06, + "loss": 0.5404821038246155, + "step": 4329 + }, + { + "epoch": 1.2661207778915047, + "grad_norm": 1.8779165644410452, + "learning_rate": 6.507211404036922e-06, + "loss": 0.6097038388252258, + "step": 4330 + }, + { + "epoch": 1.26641321830677, + "grad_norm": 1.41106750899854, + "learning_rate": 6.50268160088565e-06, + "loss": 0.44309180974960327, + "step": 4331 + }, + { + "epoch": 1.2667056587220353, + "grad_norm": 1.633689199912191, + "learning_rate": 6.498152615211286e-06, + "loss": 0.5703015923500061, + "step": 4332 + }, + { + "epoch": 1.2669980991373007, + "grad_norm": 1.9239494523704173, + "learning_rate": 6.4936244480724575e-06, + "loss": 0.5745347738265991, + "step": 4333 + }, + { + "epoch": 1.2672905395525662, + "grad_norm": 1.7558467932702122, + "learning_rate": 6.489097100527595e-06, + "loss": 0.6611922979354858, + "step": 4334 + }, + { + "epoch": 1.2675829799678315, + "grad_norm": 1.373367301388142, + "learning_rate": 6.484570573634939e-06, + "loss": 0.4560534358024597, + "step": 4335 + }, + { + "epoch": 1.2678754203830969, + "grad_norm": 1.3735982195225196, + "learning_rate": 6.480044868452535e-06, + "loss": 0.3765673041343689, + "step": 4336 + }, + { + "epoch": 1.2681678607983624, + "grad_norm": 1.631255659187599, + "learning_rate": 6.475519986038246e-06, + "loss": 0.6471004486083984, + "step": 4337 + }, + { + "epoch": 1.2684603012136277, + "grad_norm": 1.6199016829966775, + "learning_rate": 6.4709959274497284e-06, + "loss": 0.5639084577560425, + "step": 4338 + }, + { + "epoch": 1.268752741628893, + "grad_norm": 1.6880087227037737, + "learning_rate": 6.4664726937444545e-06, + "loss": 0.6367507576942444, + "step": 4339 + }, + { + "epoch": 1.2690451820441586, + "grad_norm": 2.0302420653268958, + "learning_rate": 6.4619502859797055e-06, + "loss": 0.6803586483001709, + "step": 4340 + }, + { + "epoch": 1.269337622459424, + "grad_norm": 1.7398101139995543, + "learning_rate": 6.457428705212565e-06, + "loss": 0.49068397283554077, + "step": 4341 + }, + { + "epoch": 1.2696300628746893, + "grad_norm": 1.8759736386903334, + "learning_rate": 6.4529079524999296e-06, + "loss": 0.616880476474762, + "step": 4342 + }, + { + "epoch": 1.2699225032899546, + "grad_norm": 1.3483643409763457, + "learning_rate": 6.448388028898489e-06, + "loss": 0.45614945888519287, + "step": 4343 + }, + { + "epoch": 1.27021494370522, + "grad_norm": 1.4554785032074153, + "learning_rate": 6.443868935464754e-06, + "loss": 0.49267178773880005, + "step": 4344 + }, + { + "epoch": 1.2705073841204855, + "grad_norm": 1.6269409722468795, + "learning_rate": 6.439350673255033e-06, + "loss": 0.5169225335121155, + "step": 4345 + }, + { + "epoch": 1.2707998245357508, + "grad_norm": 1.4955295461512919, + "learning_rate": 6.434833243325442e-06, + "loss": 0.4999169111251831, + "step": 4346 + }, + { + "epoch": 1.2710922649510161, + "grad_norm": 1.6243334237328435, + "learning_rate": 6.430316646731906e-06, + "loss": 0.6282567977905273, + "step": 4347 + }, + { + "epoch": 1.2713847053662817, + "grad_norm": 1.6085299245102849, + "learning_rate": 6.425800884530151e-06, + "loss": 0.5007494688034058, + "step": 4348 + }, + { + "epoch": 1.271677145781547, + "grad_norm": 1.656568917278449, + "learning_rate": 6.421285957775705e-06, + "loss": 0.5178118944168091, + "step": 4349 + }, + { + "epoch": 1.2719695861968123, + "grad_norm": 1.560370266514351, + "learning_rate": 6.4167718675239075e-06, + "loss": 0.5473636388778687, + "step": 4350 + }, + { + "epoch": 1.2722620266120779, + "grad_norm": 1.6953423126666767, + "learning_rate": 6.4122586148299004e-06, + "loss": 0.5863620042800903, + "step": 4351 + }, + { + "epoch": 1.2725544670273432, + "grad_norm": 1.8607908969719156, + "learning_rate": 6.407746200748628e-06, + "loss": 0.5301654934883118, + "step": 4352 + }, + { + "epoch": 1.2728469074426085, + "grad_norm": 1.6932378497792755, + "learning_rate": 6.403234626334842e-06, + "loss": 0.5856075286865234, + "step": 4353 + }, + { + "epoch": 1.273139347857874, + "grad_norm": 1.678003179838639, + "learning_rate": 6.39872389264309e-06, + "loss": 0.49686455726623535, + "step": 4354 + }, + { + "epoch": 1.2734317882731394, + "grad_norm": 1.4854139308295418, + "learning_rate": 6.394214000727734e-06, + "loss": 0.5032684803009033, + "step": 4355 + }, + { + "epoch": 1.2737242286884047, + "grad_norm": 1.8801294667488437, + "learning_rate": 6.389704951642931e-06, + "loss": 0.6855330467224121, + "step": 4356 + }, + { + "epoch": 1.27401666910367, + "grad_norm": 1.479367610859775, + "learning_rate": 6.385196746442644e-06, + "loss": 0.5333864688873291, + "step": 4357 + }, + { + "epoch": 1.2743091095189354, + "grad_norm": 1.5944305875728124, + "learning_rate": 6.380689386180641e-06, + "loss": 0.5597629547119141, + "step": 4358 + }, + { + "epoch": 1.274601549934201, + "grad_norm": 1.467403558865203, + "learning_rate": 6.376182871910488e-06, + "loss": 0.4576488137245178, + "step": 4359 + }, + { + "epoch": 1.2748939903494663, + "grad_norm": 1.7247772731373485, + "learning_rate": 6.371677204685555e-06, + "loss": 0.45165061950683594, + "step": 4360 + }, + { + "epoch": 1.2751864307647316, + "grad_norm": 1.5415632861050979, + "learning_rate": 6.367172385559014e-06, + "loss": 0.5451514720916748, + "step": 4361 + }, + { + "epoch": 1.2754788711799971, + "grad_norm": 1.874618224476165, + "learning_rate": 6.362668415583841e-06, + "loss": 0.6141163110733032, + "step": 4362 + }, + { + "epoch": 1.2757713115952625, + "grad_norm": 1.6869879622469415, + "learning_rate": 6.358165295812809e-06, + "loss": 0.5156669020652771, + "step": 4363 + }, + { + "epoch": 1.2760637520105278, + "grad_norm": 1.8328178355603366, + "learning_rate": 6.3536630272984974e-06, + "loss": 0.41485118865966797, + "step": 4364 + }, + { + "epoch": 1.2763561924257933, + "grad_norm": 1.546563271256682, + "learning_rate": 6.3491616110932845e-06, + "loss": 0.386514276266098, + "step": 4365 + }, + { + "epoch": 1.2766486328410587, + "grad_norm": 1.472426766767245, + "learning_rate": 6.344661048249345e-06, + "loss": 0.5620483160018921, + "step": 4366 + }, + { + "epoch": 1.276941073256324, + "grad_norm": 1.6328857080628636, + "learning_rate": 6.340161339818662e-06, + "loss": 0.4910007119178772, + "step": 4367 + }, + { + "epoch": 1.2772335136715895, + "grad_norm": 1.3312787841228058, + "learning_rate": 6.335662486853014e-06, + "loss": 0.4628123939037323, + "step": 4368 + }, + { + "epoch": 1.2775259540868549, + "grad_norm": 1.7576669653081538, + "learning_rate": 6.331164490403978e-06, + "loss": 0.5129125118255615, + "step": 4369 + }, + { + "epoch": 1.2778183945021202, + "grad_norm": 1.3282548492081792, + "learning_rate": 6.326667351522939e-06, + "loss": 0.45091521739959717, + "step": 4370 + }, + { + "epoch": 1.2781108349173855, + "grad_norm": 1.4312089210542207, + "learning_rate": 6.322171071261071e-06, + "loss": 0.4914324879646301, + "step": 4371 + }, + { + "epoch": 1.2784032753326509, + "grad_norm": 1.7409991660962885, + "learning_rate": 6.317675650669353e-06, + "loss": 0.6361461877822876, + "step": 4372 + }, + { + "epoch": 1.2786957157479164, + "grad_norm": 1.6196651007639755, + "learning_rate": 6.313181090798561e-06, + "loss": 0.4251636564731598, + "step": 4373 + }, + { + "epoch": 1.2789881561631817, + "grad_norm": 1.7204832108380748, + "learning_rate": 6.308687392699275e-06, + "loss": 0.5605714321136475, + "step": 4374 + }, + { + "epoch": 1.279280596578447, + "grad_norm": 1.5898129202606366, + "learning_rate": 6.304194557421867e-06, + "loss": 0.5366392731666565, + "step": 4375 + }, + { + "epoch": 1.2795730369937126, + "grad_norm": 1.9084263306328586, + "learning_rate": 6.299702586016512e-06, + "loss": 0.5501587986946106, + "step": 4376 + }, + { + "epoch": 1.279865477408978, + "grad_norm": 1.856477952130892, + "learning_rate": 6.295211479533177e-06, + "loss": 0.6145694851875305, + "step": 4377 + }, + { + "epoch": 1.2801579178242433, + "grad_norm": 1.9271512769721166, + "learning_rate": 6.2907212390216335e-06, + "loss": 0.5921984910964966, + "step": 4378 + }, + { + "epoch": 1.2804503582395088, + "grad_norm": 1.5061577707687395, + "learning_rate": 6.286231865531447e-06, + "loss": 0.4376833140850067, + "step": 4379 + }, + { + "epoch": 1.2807427986547741, + "grad_norm": 1.5348932565255202, + "learning_rate": 6.281743360111983e-06, + "loss": 0.5141662955284119, + "step": 4380 + }, + { + "epoch": 1.2810352390700395, + "grad_norm": 1.700541758244486, + "learning_rate": 6.2772557238124025e-06, + "loss": 0.7065848112106323, + "step": 4381 + }, + { + "epoch": 1.2813276794853048, + "grad_norm": 1.500203661604044, + "learning_rate": 6.272768957681659e-06, + "loss": 0.5662813186645508, + "step": 4382 + }, + { + "epoch": 1.2816201199005701, + "grad_norm": 1.5006210101215816, + "learning_rate": 6.268283062768512e-06, + "loss": 0.46340662240982056, + "step": 4383 + }, + { + "epoch": 1.2819125603158357, + "grad_norm": 1.5406586553103667, + "learning_rate": 6.263798040121508e-06, + "loss": 0.5258422493934631, + "step": 4384 + }, + { + "epoch": 1.282205000731101, + "grad_norm": 1.8313859097442655, + "learning_rate": 6.2593138907889965e-06, + "loss": 0.5586943030357361, + "step": 4385 + }, + { + "epoch": 1.2824974411463663, + "grad_norm": 1.707661958872181, + "learning_rate": 6.254830615819116e-06, + "loss": 0.5224723815917969, + "step": 4386 + }, + { + "epoch": 1.2827898815616319, + "grad_norm": 1.8755820352841006, + "learning_rate": 6.250348216259812e-06, + "loss": 0.6092125177383423, + "step": 4387 + }, + { + "epoch": 1.2830823219768972, + "grad_norm": 1.6601692047393128, + "learning_rate": 6.245866693158813e-06, + "loss": 0.5582839250564575, + "step": 4388 + }, + { + "epoch": 1.2833747623921625, + "grad_norm": 1.529218817283274, + "learning_rate": 6.241386047563649e-06, + "loss": 0.6074620485305786, + "step": 4389 + }, + { + "epoch": 1.283667202807428, + "grad_norm": 1.3747332990929297, + "learning_rate": 6.236906280521646e-06, + "loss": 0.6247550845146179, + "step": 4390 + }, + { + "epoch": 1.2839596432226934, + "grad_norm": 1.6645308511195784, + "learning_rate": 6.232427393079919e-06, + "loss": 0.5325940847396851, + "step": 4391 + }, + { + "epoch": 1.2842520836379587, + "grad_norm": 1.5279900789464966, + "learning_rate": 6.227949386285379e-06, + "loss": 0.5082288980484009, + "step": 4392 + }, + { + "epoch": 1.2845445240532243, + "grad_norm": 1.587332587045442, + "learning_rate": 6.223472261184738e-06, + "loss": 0.5704036355018616, + "step": 4393 + }, + { + "epoch": 1.2848369644684896, + "grad_norm": 1.7646477307813349, + "learning_rate": 6.218996018824492e-06, + "loss": 0.5301543474197388, + "step": 4394 + }, + { + "epoch": 1.285129404883755, + "grad_norm": 1.6829663682000435, + "learning_rate": 6.21452066025094e-06, + "loss": 0.48660725355148315, + "step": 4395 + }, + { + "epoch": 1.2854218452990203, + "grad_norm": 1.7324467857194032, + "learning_rate": 6.210046186510168e-06, + "loss": 0.5744560956954956, + "step": 4396 + }, + { + "epoch": 1.2857142857142856, + "grad_norm": 1.6645302463411007, + "learning_rate": 6.205572598648055e-06, + "loss": 0.5714898109436035, + "step": 4397 + }, + { + "epoch": 1.2860067261295511, + "grad_norm": 1.8166911532739076, + "learning_rate": 6.201099897710277e-06, + "loss": 0.6616571545600891, + "step": 4398 + }, + { + "epoch": 1.2862991665448165, + "grad_norm": 1.75450880953695, + "learning_rate": 6.1966280847423e-06, + "loss": 0.5552959442138672, + "step": 4399 + }, + { + "epoch": 1.2865916069600818, + "grad_norm": 1.6738534376194054, + "learning_rate": 6.192157160789382e-06, + "loss": 0.5544919967651367, + "step": 4400 + }, + { + "epoch": 1.2868840473753473, + "grad_norm": 1.6448049553355306, + "learning_rate": 6.18768712689658e-06, + "loss": 0.5914726853370667, + "step": 4401 + }, + { + "epoch": 1.2871764877906127, + "grad_norm": 1.76025336575331, + "learning_rate": 6.183217984108729e-06, + "loss": 0.47191259264945984, + "step": 4402 + }, + { + "epoch": 1.287468928205878, + "grad_norm": 1.690038062727397, + "learning_rate": 6.178749733470468e-06, + "loss": 0.6479181051254272, + "step": 4403 + }, + { + "epoch": 1.2877613686211435, + "grad_norm": 1.5093061541159978, + "learning_rate": 6.174282376026225e-06, + "loss": 0.42491137981414795, + "step": 4404 + }, + { + "epoch": 1.2880538090364089, + "grad_norm": 1.5952968160469727, + "learning_rate": 6.169815912820214e-06, + "loss": 0.6037728786468506, + "step": 4405 + }, + { + "epoch": 1.2883462494516742, + "grad_norm": 1.6035701682484467, + "learning_rate": 6.165350344896446e-06, + "loss": 0.4979787766933441, + "step": 4406 + }, + { + "epoch": 1.2886386898669397, + "grad_norm": 1.800062229580063, + "learning_rate": 6.160885673298722e-06, + "loss": 0.5863564014434814, + "step": 4407 + }, + { + "epoch": 1.288931130282205, + "grad_norm": 1.735193401842224, + "learning_rate": 6.156421899070628e-06, + "loss": 0.6516878008842468, + "step": 4408 + }, + { + "epoch": 1.2892235706974704, + "grad_norm": 1.3644068122534347, + "learning_rate": 6.151959023255545e-06, + "loss": 0.45655903220176697, + "step": 4409 + }, + { + "epoch": 1.2895160111127357, + "grad_norm": 1.5401566996811273, + "learning_rate": 6.147497046896644e-06, + "loss": 0.4751289486885071, + "step": 4410 + }, + { + "epoch": 1.289808451528001, + "grad_norm": 1.6902527178920421, + "learning_rate": 6.1430359710368845e-06, + "loss": 0.48472684621810913, + "step": 4411 + }, + { + "epoch": 1.2901008919432666, + "grad_norm": 1.5473669029252384, + "learning_rate": 6.138575796719017e-06, + "loss": 0.5014214515686035, + "step": 4412 + }, + { + "epoch": 1.290393332358532, + "grad_norm": 1.7827106404845192, + "learning_rate": 6.134116524985581e-06, + "loss": 0.5979991555213928, + "step": 4413 + }, + { + "epoch": 1.2906857727737973, + "grad_norm": 1.325839826079579, + "learning_rate": 6.129658156878899e-06, + "loss": 0.4651130437850952, + "step": 4414 + }, + { + "epoch": 1.2909782131890628, + "grad_norm": 1.7806648175874917, + "learning_rate": 6.125200693441092e-06, + "loss": 0.5938215255737305, + "step": 4415 + }, + { + "epoch": 1.2912706536043281, + "grad_norm": 1.5490961027602033, + "learning_rate": 6.1207441357140626e-06, + "loss": 0.4893927574157715, + "step": 4416 + }, + { + "epoch": 1.2915630940195935, + "grad_norm": 1.7524993955466766, + "learning_rate": 6.116288484739507e-06, + "loss": 0.5546435713768005, + "step": 4417 + }, + { + "epoch": 1.291855534434859, + "grad_norm": 1.8413981048239587, + "learning_rate": 6.111833741558905e-06, + "loss": 0.545367419719696, + "step": 4418 + }, + { + "epoch": 1.2921479748501243, + "grad_norm": 1.4120684443774227, + "learning_rate": 6.1073799072135245e-06, + "loss": 0.47479283809661865, + "step": 4419 + }, + { + "epoch": 1.2924404152653897, + "grad_norm": 1.6721044710471762, + "learning_rate": 6.102926982744423e-06, + "loss": 0.5109270215034485, + "step": 4420 + }, + { + "epoch": 1.292732855680655, + "grad_norm": 1.774842272860347, + "learning_rate": 6.098474969192445e-06, + "loss": 0.5862404108047485, + "step": 4421 + }, + { + "epoch": 1.2930252960959203, + "grad_norm": 1.5821200459355214, + "learning_rate": 6.09402386759822e-06, + "loss": 0.5031660795211792, + "step": 4422 + }, + { + "epoch": 1.2933177365111859, + "grad_norm": 1.7397846198854208, + "learning_rate": 6.089573679002168e-06, + "loss": 0.47179776430130005, + "step": 4423 + }, + { + "epoch": 1.2936101769264512, + "grad_norm": 1.5340233803824985, + "learning_rate": 6.085124404444495e-06, + "loss": 0.45889902114868164, + "step": 4424 + }, + { + "epoch": 1.2939026173417165, + "grad_norm": 1.5550814946749143, + "learning_rate": 6.080676044965188e-06, + "loss": 0.49759042263031006, + "step": 4425 + }, + { + "epoch": 1.294195057756982, + "grad_norm": 1.9841525065569887, + "learning_rate": 6.076228601604024e-06, + "loss": 0.5980732440948486, + "step": 4426 + }, + { + "epoch": 1.2944874981722474, + "grad_norm": 1.6256180215634828, + "learning_rate": 6.07178207540057e-06, + "loss": 0.6167548894882202, + "step": 4427 + }, + { + "epoch": 1.2947799385875127, + "grad_norm": 1.7343822678821683, + "learning_rate": 6.067336467394169e-06, + "loss": 0.5632568597793579, + "step": 4428 + }, + { + "epoch": 1.2950723790027783, + "grad_norm": 1.713926568632917, + "learning_rate": 6.062891778623961e-06, + "loss": 0.5521456003189087, + "step": 4429 + }, + { + "epoch": 1.2953648194180436, + "grad_norm": 1.4514202434870498, + "learning_rate": 6.058448010128861e-06, + "loss": 0.5916576385498047, + "step": 4430 + }, + { + "epoch": 1.295657259833309, + "grad_norm": 1.4200773171635346, + "learning_rate": 6.054005162947571e-06, + "loss": 0.546825647354126, + "step": 4431 + }, + { + "epoch": 1.2959497002485745, + "grad_norm": 1.903586469303659, + "learning_rate": 6.049563238118584e-06, + "loss": 0.5704302787780762, + "step": 4432 + }, + { + "epoch": 1.2962421406638398, + "grad_norm": 1.6923235048512564, + "learning_rate": 6.0451222366801706e-06, + "loss": 0.5791710615158081, + "step": 4433 + }, + { + "epoch": 1.2965345810791051, + "grad_norm": 1.5242567102891653, + "learning_rate": 6.040682159670389e-06, + "loss": 0.41179752349853516, + "step": 4434 + }, + { + "epoch": 1.2968270214943705, + "grad_norm": 1.7120079687188825, + "learning_rate": 6.03624300812708e-06, + "loss": 0.5213680267333984, + "step": 4435 + }, + { + "epoch": 1.2971194619096358, + "grad_norm": 1.6198208396506975, + "learning_rate": 6.0318047830878675e-06, + "loss": 0.4917318522930145, + "step": 4436 + }, + { + "epoch": 1.2974119023249013, + "grad_norm": 1.9301576881874427, + "learning_rate": 6.027367485590159e-06, + "loss": 0.6347956657409668, + "step": 4437 + }, + { + "epoch": 1.2977043427401667, + "grad_norm": 1.454096730257314, + "learning_rate": 6.022931116671147e-06, + "loss": 0.5263427495956421, + "step": 4438 + }, + { + "epoch": 1.297996783155432, + "grad_norm": 1.3982615348649814, + "learning_rate": 6.018495677367806e-06, + "loss": 0.5686784982681274, + "step": 4439 + }, + { + "epoch": 1.2982892235706975, + "grad_norm": 1.6986790860575087, + "learning_rate": 6.0140611687168934e-06, + "loss": 0.576974630355835, + "step": 4440 + }, + { + "epoch": 1.2985816639859629, + "grad_norm": 1.7183954732732796, + "learning_rate": 6.009627591754946e-06, + "loss": 0.5375877618789673, + "step": 4441 + }, + { + "epoch": 1.2988741044012282, + "grad_norm": 1.7026702794952187, + "learning_rate": 6.005194947518287e-06, + "loss": 0.6106576919555664, + "step": 4442 + }, + { + "epoch": 1.2991665448164937, + "grad_norm": 1.6076086367802058, + "learning_rate": 6.000763237043021e-06, + "loss": 0.475483238697052, + "step": 4443 + }, + { + "epoch": 1.299458985231759, + "grad_norm": 1.7568326021636087, + "learning_rate": 5.9963324613650335e-06, + "loss": 0.5819226503372192, + "step": 4444 + }, + { + "epoch": 1.2997514256470244, + "grad_norm": 1.6384408260054233, + "learning_rate": 5.991902621519988e-06, + "loss": 0.6394410133361816, + "step": 4445 + }, + { + "epoch": 1.30004386606229, + "grad_norm": 1.653615111391099, + "learning_rate": 5.987473718543338e-06, + "loss": 0.48502016067504883, + "step": 4446 + }, + { + "epoch": 1.3003363064775553, + "grad_norm": 1.5217151928427126, + "learning_rate": 5.983045753470308e-06, + "loss": 0.5782333612442017, + "step": 4447 + }, + { + "epoch": 1.3006287468928206, + "grad_norm": 1.8358895387455052, + "learning_rate": 5.97861872733591e-06, + "loss": 0.5498893857002258, + "step": 4448 + }, + { + "epoch": 1.300921187308086, + "grad_norm": 1.5773905938706185, + "learning_rate": 5.974192641174934e-06, + "loss": 0.47757571935653687, + "step": 4449 + }, + { + "epoch": 1.3012136277233513, + "grad_norm": 1.751650457738534, + "learning_rate": 5.96976749602195e-06, + "loss": 0.5401994585990906, + "step": 4450 + }, + { + "epoch": 1.3015060681386168, + "grad_norm": 1.7445816604225337, + "learning_rate": 5.965343292911309e-06, + "loss": 0.5818814635276794, + "step": 4451 + }, + { + "epoch": 1.3017985085538821, + "grad_norm": 1.890298335476633, + "learning_rate": 5.9609200328771465e-06, + "loss": 0.524645984172821, + "step": 4452 + }, + { + "epoch": 1.3020909489691475, + "grad_norm": 1.6124004265504417, + "learning_rate": 5.956497716953365e-06, + "loss": 0.46523183584213257, + "step": 4453 + }, + { + "epoch": 1.302383389384413, + "grad_norm": 1.6328139064911342, + "learning_rate": 5.952076346173657e-06, + "loss": 0.6066159009933472, + "step": 4454 + }, + { + "epoch": 1.3026758297996783, + "grad_norm": 1.5743831575113747, + "learning_rate": 5.947655921571491e-06, + "loss": 0.48635774850845337, + "step": 4455 + }, + { + "epoch": 1.3029682702149437, + "grad_norm": 1.7296441740948125, + "learning_rate": 5.943236444180116e-06, + "loss": 0.5159435868263245, + "step": 4456 + }, + { + "epoch": 1.3032607106302092, + "grad_norm": 1.43545214825073, + "learning_rate": 5.938817915032558e-06, + "loss": 0.5566878914833069, + "step": 4457 + }, + { + "epoch": 1.3035531510454745, + "grad_norm": 1.5736652583628634, + "learning_rate": 5.934400335161618e-06, + "loss": 0.46998029947280884, + "step": 4458 + }, + { + "epoch": 1.3038455914607399, + "grad_norm": 1.7808256717613173, + "learning_rate": 5.92998370559988e-06, + "loss": 0.5554553270339966, + "step": 4459 + }, + { + "epoch": 1.3041380318760052, + "grad_norm": 1.7335497855414168, + "learning_rate": 5.925568027379704e-06, + "loss": 0.5659651756286621, + "step": 4460 + }, + { + "epoch": 1.3044304722912705, + "grad_norm": 1.4784849199972236, + "learning_rate": 5.921153301533229e-06, + "loss": 0.5105445981025696, + "step": 4461 + }, + { + "epoch": 1.304722912706536, + "grad_norm": 1.6833489269681376, + "learning_rate": 5.91673952909237e-06, + "loss": 0.5255740284919739, + "step": 4462 + }, + { + "epoch": 1.3050153531218014, + "grad_norm": 1.6388447853221406, + "learning_rate": 5.912326711088821e-06, + "loss": 0.5691270232200623, + "step": 4463 + }, + { + "epoch": 1.3053077935370667, + "grad_norm": 1.64945916767282, + "learning_rate": 5.907914848554048e-06, + "loss": 0.5783474445343018, + "step": 4464 + }, + { + "epoch": 1.3056002339523323, + "grad_norm": 1.631334603802349, + "learning_rate": 5.903503942519299e-06, + "loss": 0.6305002570152283, + "step": 4465 + }, + { + "epoch": 1.3058926743675976, + "grad_norm": 1.9357776829199835, + "learning_rate": 5.8990939940156e-06, + "loss": 0.6465631723403931, + "step": 4466 + }, + { + "epoch": 1.306185114782863, + "grad_norm": 1.8264406193491898, + "learning_rate": 5.8946850040737434e-06, + "loss": 0.4883456230163574, + "step": 4467 + }, + { + "epoch": 1.3064775551981285, + "grad_norm": 1.3902013367704193, + "learning_rate": 5.890276973724305e-06, + "loss": 0.4896056056022644, + "step": 4468 + }, + { + "epoch": 1.3067699956133938, + "grad_norm": 1.6292986861573446, + "learning_rate": 5.885869903997638e-06, + "loss": 0.603757917881012, + "step": 4469 + }, + { + "epoch": 1.3070624360286591, + "grad_norm": 1.6368879465310389, + "learning_rate": 5.881463795923866e-06, + "loss": 0.5412129163742065, + "step": 4470 + }, + { + "epoch": 1.3073548764439247, + "grad_norm": 1.576979548849775, + "learning_rate": 5.877058650532891e-06, + "loss": 0.5255335569381714, + "step": 4471 + }, + { + "epoch": 1.30764731685919, + "grad_norm": 1.861250264495057, + "learning_rate": 5.87265446885439e-06, + "loss": 0.5855039358139038, + "step": 4472 + }, + { + "epoch": 1.3079397572744553, + "grad_norm": 1.7387082626664492, + "learning_rate": 5.868251251917811e-06, + "loss": 0.5763603448867798, + "step": 4473 + }, + { + "epoch": 1.3082321976897207, + "grad_norm": 1.7494976398773932, + "learning_rate": 5.86384900075238e-06, + "loss": 0.5148910880088806, + "step": 4474 + }, + { + "epoch": 1.308524638104986, + "grad_norm": 1.726220320494232, + "learning_rate": 5.859447716387097e-06, + "loss": 0.6387143135070801, + "step": 4475 + }, + { + "epoch": 1.3088170785202515, + "grad_norm": 1.6421362434800872, + "learning_rate": 5.855047399850735e-06, + "loss": 0.5492211580276489, + "step": 4476 + }, + { + "epoch": 1.3091095189355169, + "grad_norm": 1.748321310864673, + "learning_rate": 5.850648052171843e-06, + "loss": 0.5715115070343018, + "step": 4477 + }, + { + "epoch": 1.3094019593507822, + "grad_norm": 1.8948603499593957, + "learning_rate": 5.8462496743787385e-06, + "loss": 0.6295989155769348, + "step": 4478 + }, + { + "epoch": 1.3096943997660477, + "grad_norm": 1.6169983680834699, + "learning_rate": 5.841852267499518e-06, + "loss": 0.5843105316162109, + "step": 4479 + }, + { + "epoch": 1.309986840181313, + "grad_norm": 1.443044009123256, + "learning_rate": 5.837455832562049e-06, + "loss": 0.43283605575561523, + "step": 4480 + }, + { + "epoch": 1.3102792805965784, + "grad_norm": 1.6217104179487012, + "learning_rate": 5.8330603705939684e-06, + "loss": 0.6115404367446899, + "step": 4481 + }, + { + "epoch": 1.310571721011844, + "grad_norm": 1.2325386929467517, + "learning_rate": 5.828665882622692e-06, + "loss": 0.4274179935455322, + "step": 4482 + }, + { + "epoch": 1.3108641614271093, + "grad_norm": 1.3722363792161896, + "learning_rate": 5.824272369675403e-06, + "loss": 0.4385778307914734, + "step": 4483 + }, + { + "epoch": 1.3111566018423746, + "grad_norm": 1.939305382555819, + "learning_rate": 5.819879832779058e-06, + "loss": 0.6310205459594727, + "step": 4484 + }, + { + "epoch": 1.3114490422576401, + "grad_norm": 1.5511013635003787, + "learning_rate": 5.815488272960388e-06, + "loss": 0.6309192180633545, + "step": 4485 + }, + { + "epoch": 1.3117414826729055, + "grad_norm": 1.8051032087296774, + "learning_rate": 5.811097691245895e-06, + "loss": 0.4751497507095337, + "step": 4486 + }, + { + "epoch": 1.3120339230881708, + "grad_norm": 1.5897893613027336, + "learning_rate": 5.806708088661846e-06, + "loss": 0.5540175437927246, + "step": 4487 + }, + { + "epoch": 1.3123263635034361, + "grad_norm": 1.924801228279098, + "learning_rate": 5.802319466234283e-06, + "loss": 0.5533273816108704, + "step": 4488 + }, + { + "epoch": 1.3126188039187014, + "grad_norm": 1.5486991099512135, + "learning_rate": 5.797931824989023e-06, + "loss": 0.463643878698349, + "step": 4489 + }, + { + "epoch": 1.312911244333967, + "grad_norm": 1.9073169839874196, + "learning_rate": 5.79354516595165e-06, + "loss": 0.5990232229232788, + "step": 4490 + }, + { + "epoch": 1.3132036847492323, + "grad_norm": 1.7681103257151853, + "learning_rate": 5.789159490147518e-06, + "loss": 0.5569760799407959, + "step": 4491 + }, + { + "epoch": 1.3134961251644977, + "grad_norm": 1.598897244778613, + "learning_rate": 5.784774798601755e-06, + "loss": 0.5016749501228333, + "step": 4492 + }, + { + "epoch": 1.3137885655797632, + "grad_norm": 1.8830720070455038, + "learning_rate": 5.780391092339253e-06, + "loss": 0.5624934434890747, + "step": 4493 + }, + { + "epoch": 1.3140810059950285, + "grad_norm": 2.146444811832683, + "learning_rate": 5.776008372384676e-06, + "loss": 0.7445797920227051, + "step": 4494 + }, + { + "epoch": 1.3143734464102939, + "grad_norm": 1.9276650555591395, + "learning_rate": 5.771626639762461e-06, + "loss": 0.5849495530128479, + "step": 4495 + }, + { + "epoch": 1.3146658868255594, + "grad_norm": 1.6679644602081254, + "learning_rate": 5.767245895496809e-06, + "loss": 0.5672163367271423, + "step": 4496 + }, + { + "epoch": 1.3149583272408247, + "grad_norm": 1.4482015307125622, + "learning_rate": 5.762866140611698e-06, + "loss": 0.5278276801109314, + "step": 4497 + }, + { + "epoch": 1.31525076765609, + "grad_norm": 1.8273800354421317, + "learning_rate": 5.7584873761308615e-06, + "loss": 0.54908686876297, + "step": 4498 + }, + { + "epoch": 1.3155432080713554, + "grad_norm": 1.7592605115208164, + "learning_rate": 5.754109603077811e-06, + "loss": 0.5257589817047119, + "step": 4499 + }, + { + "epoch": 1.3158356484866207, + "grad_norm": 1.4910358958486878, + "learning_rate": 5.749732822475825e-06, + "loss": 0.5744988918304443, + "step": 4500 + }, + { + "epoch": 1.3161280889018863, + "grad_norm": 1.4827754689170145, + "learning_rate": 5.74535703534795e-06, + "loss": 0.5186365246772766, + "step": 4501 + }, + { + "epoch": 1.3164205293171516, + "grad_norm": 1.6539527720112557, + "learning_rate": 5.740982242716999e-06, + "loss": 0.53574538230896, + "step": 4502 + }, + { + "epoch": 1.316712969732417, + "grad_norm": 1.5347054109635063, + "learning_rate": 5.736608445605555e-06, + "loss": 0.6087717413902283, + "step": 4503 + }, + { + "epoch": 1.3170054101476825, + "grad_norm": 1.5413257189374059, + "learning_rate": 5.732235645035964e-06, + "loss": 0.5132769346237183, + "step": 4504 + }, + { + "epoch": 1.3172978505629478, + "grad_norm": 1.6361856291197476, + "learning_rate": 5.727863842030342e-06, + "loss": 0.588458776473999, + "step": 4505 + }, + { + "epoch": 1.3175902909782131, + "grad_norm": 1.6129388653597692, + "learning_rate": 5.723493037610572e-06, + "loss": 0.5154894590377808, + "step": 4506 + }, + { + "epoch": 1.3178827313934787, + "grad_norm": 1.5507002889867831, + "learning_rate": 5.719123232798304e-06, + "loss": 0.586688220500946, + "step": 4507 + }, + { + "epoch": 1.318175171808744, + "grad_norm": 1.8125403251714918, + "learning_rate": 5.714754428614956e-06, + "loss": 0.4948856830596924, + "step": 4508 + }, + { + "epoch": 1.3184676122240093, + "grad_norm": 1.5128350944665496, + "learning_rate": 5.7103866260817005e-06, + "loss": 0.6179821491241455, + "step": 4509 + }, + { + "epoch": 1.3187600526392749, + "grad_norm": 1.876290206668384, + "learning_rate": 5.7060198262194914e-06, + "loss": 0.5865011811256409, + "step": 4510 + }, + { + "epoch": 1.3190524930545402, + "grad_norm": 1.660419141577327, + "learning_rate": 5.701654030049038e-06, + "loss": 0.519783079624176, + "step": 4511 + }, + { + "epoch": 1.3193449334698055, + "grad_norm": 1.5035780556155738, + "learning_rate": 5.697289238590822e-06, + "loss": 0.4238147437572479, + "step": 4512 + }, + { + "epoch": 1.3196373738850709, + "grad_norm": 1.6350345014151721, + "learning_rate": 5.6929254528650855e-06, + "loss": 0.5931107997894287, + "step": 4513 + }, + { + "epoch": 1.3199298143003362, + "grad_norm": 1.7485415603348589, + "learning_rate": 5.688562673891837e-06, + "loss": 0.7454524040222168, + "step": 4514 + }, + { + "epoch": 1.3202222547156017, + "grad_norm": 1.6756127294636487, + "learning_rate": 5.684200902690848e-06, + "loss": 0.5909554362297058, + "step": 4515 + }, + { + "epoch": 1.320514695130867, + "grad_norm": 1.449068353866628, + "learning_rate": 5.67984014028166e-06, + "loss": 0.5059943199157715, + "step": 4516 + }, + { + "epoch": 1.3208071355461324, + "grad_norm": 1.3855018310443914, + "learning_rate": 5.675480387683572e-06, + "loss": 0.4387373924255371, + "step": 4517 + }, + { + "epoch": 1.321099575961398, + "grad_norm": 1.6368288915875209, + "learning_rate": 5.671121645915648e-06, + "loss": 0.6452310681343079, + "step": 4518 + }, + { + "epoch": 1.3213920163766633, + "grad_norm": 1.4569471180570228, + "learning_rate": 5.666763915996725e-06, + "loss": 0.5629088282585144, + "step": 4519 + }, + { + "epoch": 1.3216844567919286, + "grad_norm": 1.6108062624448902, + "learning_rate": 5.662407198945386e-06, + "loss": 0.6442849636077881, + "step": 4520 + }, + { + "epoch": 1.3219768972071941, + "grad_norm": 1.4707356833436183, + "learning_rate": 5.6580514957799894e-06, + "loss": 0.5330031514167786, + "step": 4521 + }, + { + "epoch": 1.3222693376224595, + "grad_norm": 1.4396348923376052, + "learning_rate": 5.6536968075186575e-06, + "loss": 0.471035361289978, + "step": 4522 + }, + { + "epoch": 1.3225617780377248, + "grad_norm": 1.5589169874424196, + "learning_rate": 5.649343135179271e-06, + "loss": 0.5675650835037231, + "step": 4523 + }, + { + "epoch": 1.3228542184529903, + "grad_norm": 1.6961906881686575, + "learning_rate": 5.644990479779473e-06, + "loss": 0.5458093881607056, + "step": 4524 + }, + { + "epoch": 1.3231466588682557, + "grad_norm": 1.5690712646364733, + "learning_rate": 5.640638842336672e-06, + "loss": 0.5625189542770386, + "step": 4525 + }, + { + "epoch": 1.323439099283521, + "grad_norm": 1.778677748743509, + "learning_rate": 5.636288223868038e-06, + "loss": 0.5868214964866638, + "step": 4526 + }, + { + "epoch": 1.3237315396987863, + "grad_norm": 1.6502123203157841, + "learning_rate": 5.631938625390498e-06, + "loss": 0.5340765714645386, + "step": 4527 + }, + { + "epoch": 1.3240239801140516, + "grad_norm": 1.4463169385647288, + "learning_rate": 5.627590047920747e-06, + "loss": 0.4487069845199585, + "step": 4528 + }, + { + "epoch": 1.3243164205293172, + "grad_norm": 1.5750183859940412, + "learning_rate": 5.623242492475237e-06, + "loss": 0.4246913194656372, + "step": 4529 + }, + { + "epoch": 1.3246088609445825, + "grad_norm": 1.6537085849345186, + "learning_rate": 5.618895960070188e-06, + "loss": 0.49904564023017883, + "step": 4530 + }, + { + "epoch": 1.3249013013598478, + "grad_norm": 1.6201874773916152, + "learning_rate": 5.614550451721566e-06, + "loss": 0.5506085157394409, + "step": 4531 + }, + { + "epoch": 1.3251937417751134, + "grad_norm": 1.6929750939693964, + "learning_rate": 5.610205968445111e-06, + "loss": 0.4861884117126465, + "step": 4532 + }, + { + "epoch": 1.3254861821903787, + "grad_norm": 1.5616728357477914, + "learning_rate": 5.605862511256322e-06, + "loss": 0.5639146566390991, + "step": 4533 + }, + { + "epoch": 1.325778622605644, + "grad_norm": 1.3747626231277423, + "learning_rate": 5.601520081170455e-06, + "loss": 0.43305879831314087, + "step": 4534 + }, + { + "epoch": 1.3260710630209096, + "grad_norm": 1.4728588464752952, + "learning_rate": 5.597178679202524e-06, + "loss": 0.4820408821105957, + "step": 4535 + }, + { + "epoch": 1.326363503436175, + "grad_norm": 2.092875019342334, + "learning_rate": 5.592838306367307e-06, + "loss": 0.5601707100868225, + "step": 4536 + }, + { + "epoch": 1.3266559438514403, + "grad_norm": 1.6269012393440097, + "learning_rate": 5.588498963679339e-06, + "loss": 0.5655055046081543, + "step": 4537 + }, + { + "epoch": 1.3269483842667056, + "grad_norm": 1.871556737283143, + "learning_rate": 5.584160652152917e-06, + "loss": 0.5425975322723389, + "step": 4538 + }, + { + "epoch": 1.327240824681971, + "grad_norm": 1.5388263554547548, + "learning_rate": 5.579823372802098e-06, + "loss": 0.607103168964386, + "step": 4539 + }, + { + "epoch": 1.3275332650972365, + "grad_norm": 1.6396827179367406, + "learning_rate": 5.575487126640686e-06, + "loss": 0.6011538505554199, + "step": 4540 + }, + { + "epoch": 1.3278257055125018, + "grad_norm": 1.6364470669862505, + "learning_rate": 5.571151914682258e-06, + "loss": 0.5333601236343384, + "step": 4541 + }, + { + "epoch": 1.3281181459277671, + "grad_norm": 1.7756177203838306, + "learning_rate": 5.566817737940142e-06, + "loss": 0.576410174369812, + "step": 4542 + }, + { + "epoch": 1.3284105863430327, + "grad_norm": 1.8060302167235907, + "learning_rate": 5.562484597427425e-06, + "loss": 0.506458044052124, + "step": 4543 + }, + { + "epoch": 1.328703026758298, + "grad_norm": 2.0174061298696975, + "learning_rate": 5.558152494156955e-06, + "loss": 0.5893718004226685, + "step": 4544 + }, + { + "epoch": 1.3289954671735633, + "grad_norm": 1.6979483029237916, + "learning_rate": 5.55382142914133e-06, + "loss": 0.508120059967041, + "step": 4545 + }, + { + "epoch": 1.3292879075888289, + "grad_norm": 1.5737735987577735, + "learning_rate": 5.5494914033929126e-06, + "loss": 0.6103616952896118, + "step": 4546 + }, + { + "epoch": 1.3295803480040942, + "grad_norm": 1.7304904972315491, + "learning_rate": 5.545162417923822e-06, + "loss": 0.5290235280990601, + "step": 4547 + }, + { + "epoch": 1.3298727884193595, + "grad_norm": 1.5350904839753017, + "learning_rate": 5.540834473745929e-06, + "loss": 0.5729631185531616, + "step": 4548 + }, + { + "epoch": 1.330165228834625, + "grad_norm": 1.5574358916011883, + "learning_rate": 5.536507571870866e-06, + "loss": 0.48720547556877136, + "step": 4549 + }, + { + "epoch": 1.3304576692498904, + "grad_norm": 1.5393587740053045, + "learning_rate": 5.532181713310023e-06, + "loss": 0.4987955689430237, + "step": 4550 + }, + { + "epoch": 1.3307501096651557, + "grad_norm": 1.3126988702980638, + "learning_rate": 5.527856899074536e-06, + "loss": 0.4002467393875122, + "step": 4551 + }, + { + "epoch": 1.331042550080421, + "grad_norm": 2.0947575410388866, + "learning_rate": 5.523533130175308e-06, + "loss": 0.7435724139213562, + "step": 4552 + }, + { + "epoch": 1.3313349904956864, + "grad_norm": 1.541726198150986, + "learning_rate": 5.519210407622993e-06, + "loss": 0.34711340069770813, + "step": 4553 + }, + { + "epoch": 1.331627430910952, + "grad_norm": 1.6396721749099359, + "learning_rate": 5.514888732428003e-06, + "loss": 0.4749720096588135, + "step": 4554 + }, + { + "epoch": 1.3319198713262173, + "grad_norm": 1.7586628740577253, + "learning_rate": 5.5105681056005e-06, + "loss": 0.5818741321563721, + "step": 4555 + }, + { + "epoch": 1.3322123117414826, + "grad_norm": 1.7095504305078453, + "learning_rate": 5.506248528150407e-06, + "loss": 0.5715004801750183, + "step": 4556 + }, + { + "epoch": 1.3325047521567481, + "grad_norm": 1.7722621684818736, + "learning_rate": 5.501930001087399e-06, + "loss": 0.5465661287307739, + "step": 4557 + }, + { + "epoch": 1.3327971925720135, + "grad_norm": 1.7620411170921917, + "learning_rate": 5.4976125254209035e-06, + "loss": 0.6324847936630249, + "step": 4558 + }, + { + "epoch": 1.3330896329872788, + "grad_norm": 1.4165701736936904, + "learning_rate": 5.493296102160105e-06, + "loss": 0.4616294503211975, + "step": 4559 + }, + { + "epoch": 1.3333820734025443, + "grad_norm": 1.6922106714814378, + "learning_rate": 5.488980732313942e-06, + "loss": 0.5187079310417175, + "step": 4560 + }, + { + "epoch": 1.3336745138178097, + "grad_norm": 1.8396067182286635, + "learning_rate": 5.484666416891109e-06, + "loss": 0.6120654344558716, + "step": 4561 + }, + { + "epoch": 1.333966954233075, + "grad_norm": 1.6878860661661148, + "learning_rate": 5.480353156900044e-06, + "loss": 0.6171379685401917, + "step": 4562 + }, + { + "epoch": 1.3342593946483405, + "grad_norm": 1.510636167770684, + "learning_rate": 5.4760409533489475e-06, + "loss": 0.4690072536468506, + "step": 4563 + }, + { + "epoch": 1.3345518350636059, + "grad_norm": 1.5961764389633983, + "learning_rate": 5.471729807245773e-06, + "loss": 0.511309802532196, + "step": 4564 + }, + { + "epoch": 1.3348442754788712, + "grad_norm": 1.6355911684199975, + "learning_rate": 5.467419719598223e-06, + "loss": 0.5657862424850464, + "step": 4565 + }, + { + "epoch": 1.3351367158941365, + "grad_norm": 1.7641189489668823, + "learning_rate": 5.4631106914137555e-06, + "loss": 0.4263400733470917, + "step": 4566 + }, + { + "epoch": 1.3354291563094018, + "grad_norm": 1.8179548841156754, + "learning_rate": 5.458802723699579e-06, + "loss": 0.6275177001953125, + "step": 4567 + }, + { + "epoch": 1.3357215967246674, + "grad_norm": 1.6668120373290058, + "learning_rate": 5.454495817462655e-06, + "loss": 0.3857421278953552, + "step": 4568 + }, + { + "epoch": 1.3360140371399327, + "grad_norm": 1.7165178528012586, + "learning_rate": 5.450189973709697e-06, + "loss": 0.5834560394287109, + "step": 4569 + }, + { + "epoch": 1.336306477555198, + "grad_norm": 1.6632572235317495, + "learning_rate": 5.445885193447169e-06, + "loss": 0.6165010929107666, + "step": 4570 + }, + { + "epoch": 1.3365989179704636, + "grad_norm": 1.7470412065212853, + "learning_rate": 5.441581477681288e-06, + "loss": 0.6034595966339111, + "step": 4571 + }, + { + "epoch": 1.336891358385729, + "grad_norm": 1.740024112758077, + "learning_rate": 5.43727882741802e-06, + "loss": 0.570164144039154, + "step": 4572 + }, + { + "epoch": 1.3371837988009942, + "grad_norm": 1.4917354928366209, + "learning_rate": 5.432977243663089e-06, + "loss": 0.5369169116020203, + "step": 4573 + }, + { + "epoch": 1.3374762392162598, + "grad_norm": 1.7875464183853407, + "learning_rate": 5.428676727421954e-06, + "loss": 0.5624364614486694, + "step": 4574 + }, + { + "epoch": 1.3377686796315251, + "grad_norm": 1.517348885410251, + "learning_rate": 5.424377279699842e-06, + "loss": 0.5002127885818481, + "step": 4575 + }, + { + "epoch": 1.3380611200467905, + "grad_norm": 1.7071888960959534, + "learning_rate": 5.42007890150172e-06, + "loss": 0.5998499393463135, + "step": 4576 + }, + { + "epoch": 1.3383535604620558, + "grad_norm": 1.7074905497433162, + "learning_rate": 5.415781593832307e-06, + "loss": 0.5988572835922241, + "step": 4577 + }, + { + "epoch": 1.338646000877321, + "grad_norm": 1.6551550553396004, + "learning_rate": 5.411485357696075e-06, + "loss": 0.5202064514160156, + "step": 4578 + }, + { + "epoch": 1.3389384412925867, + "grad_norm": 2.519364812628366, + "learning_rate": 5.407190194097241e-06, + "loss": 0.5246714949607849, + "step": 4579 + }, + { + "epoch": 1.339230881707852, + "grad_norm": 1.5907571805696734, + "learning_rate": 5.4028961040397765e-06, + "loss": 0.5998588800430298, + "step": 4580 + }, + { + "epoch": 1.3395233221231173, + "grad_norm": 1.7851321190756844, + "learning_rate": 5.3986030885273945e-06, + "loss": 0.5971418023109436, + "step": 4581 + }, + { + "epoch": 1.3398157625383829, + "grad_norm": 1.5857061971181772, + "learning_rate": 5.3943111485635644e-06, + "loss": 0.4638952910900116, + "step": 4582 + }, + { + "epoch": 1.3401082029536482, + "grad_norm": 1.5981773831835344, + "learning_rate": 5.390020285151502e-06, + "loss": 0.5007182955741882, + "step": 4583 + }, + { + "epoch": 1.3404006433689135, + "grad_norm": 1.610643010141743, + "learning_rate": 5.385730499294171e-06, + "loss": 0.5013964772224426, + "step": 4584 + }, + { + "epoch": 1.340693083784179, + "grad_norm": 1.6360724667305655, + "learning_rate": 5.381441791994276e-06, + "loss": 0.5699980854988098, + "step": 4585 + }, + { + "epoch": 1.3409855241994444, + "grad_norm": 1.6423818252193456, + "learning_rate": 5.377154164254283e-06, + "loss": 0.5326210260391235, + "step": 4586 + }, + { + "epoch": 1.3412779646147097, + "grad_norm": 1.5111806674915849, + "learning_rate": 5.372867617076395e-06, + "loss": 0.6065158843994141, + "step": 4587 + }, + { + "epoch": 1.3415704050299753, + "grad_norm": 1.356022290658006, + "learning_rate": 5.368582151462569e-06, + "loss": 0.48427143692970276, + "step": 4588 + }, + { + "epoch": 1.3418628454452406, + "grad_norm": 1.4868111001385538, + "learning_rate": 5.364297768414505e-06, + "loss": 0.5755994915962219, + "step": 4589 + }, + { + "epoch": 1.342155285860506, + "grad_norm": 1.4690268021295017, + "learning_rate": 5.360014468933652e-06, + "loss": 0.4959644377231598, + "step": 4590 + }, + { + "epoch": 1.3424477262757712, + "grad_norm": 1.5383458553689457, + "learning_rate": 5.355732254021205e-06, + "loss": 0.5374274253845215, + "step": 4591 + }, + { + "epoch": 1.3427401666910366, + "grad_norm": 1.6286753609495908, + "learning_rate": 5.351451124678106e-06, + "loss": 0.5875111818313599, + "step": 4592 + }, + { + "epoch": 1.3430326071063021, + "grad_norm": 1.7964496178319949, + "learning_rate": 5.347171081905045e-06, + "loss": 0.5230692028999329, + "step": 4593 + }, + { + "epoch": 1.3433250475215675, + "grad_norm": 1.424672908012482, + "learning_rate": 5.342892126702453e-06, + "loss": 0.4624518156051636, + "step": 4594 + }, + { + "epoch": 1.3436174879368328, + "grad_norm": 1.9140370650793175, + "learning_rate": 5.3386142600705134e-06, + "loss": 0.5141074061393738, + "step": 4595 + }, + { + "epoch": 1.3439099283520983, + "grad_norm": 1.6249918744835086, + "learning_rate": 5.334337483009147e-06, + "loss": 0.4655565023422241, + "step": 4596 + }, + { + "epoch": 1.3442023687673637, + "grad_norm": 1.6516547156710706, + "learning_rate": 5.330061796518025e-06, + "loss": 0.6135094165802002, + "step": 4597 + }, + { + "epoch": 1.344494809182629, + "grad_norm": 1.595543646054287, + "learning_rate": 5.325787201596563e-06, + "loss": 0.5865254402160645, + "step": 4598 + }, + { + "epoch": 1.3447872495978945, + "grad_norm": 1.8032344885262006, + "learning_rate": 5.321513699243924e-06, + "loss": 0.5290840268135071, + "step": 4599 + }, + { + "epoch": 1.3450796900131599, + "grad_norm": 1.5294052976370318, + "learning_rate": 5.317241290459012e-06, + "loss": 0.554675817489624, + "step": 4600 + }, + { + "epoch": 1.3453721304284252, + "grad_norm": 1.499219614332531, + "learning_rate": 5.312969976240479e-06, + "loss": 0.5033853650093079, + "step": 4601 + }, + { + "epoch": 1.3456645708436907, + "grad_norm": 1.8108264508032192, + "learning_rate": 5.308699757586713e-06, + "loss": 0.44666093587875366, + "step": 4602 + }, + { + "epoch": 1.345957011258956, + "grad_norm": 1.5332559280539126, + "learning_rate": 5.304430635495856e-06, + "loss": 0.5447900891304016, + "step": 4603 + }, + { + "epoch": 1.3462494516742214, + "grad_norm": 1.507503116151542, + "learning_rate": 5.30016261096579e-06, + "loss": 0.4425917863845825, + "step": 4604 + }, + { + "epoch": 1.3465418920894867, + "grad_norm": 1.508411296889156, + "learning_rate": 5.295895684994137e-06, + "loss": 0.4411497712135315, + "step": 4605 + }, + { + "epoch": 1.346834332504752, + "grad_norm": 1.537668383754579, + "learning_rate": 5.291629858578271e-06, + "loss": 0.5577414631843567, + "step": 4606 + }, + { + "epoch": 1.3471267729200176, + "grad_norm": 1.7128549715372505, + "learning_rate": 5.287365132715293e-06, + "loss": 0.4754186272621155, + "step": 4607 + }, + { + "epoch": 1.347419213335283, + "grad_norm": 1.6521724702121328, + "learning_rate": 5.283101508402063e-06, + "loss": 0.5582431554794312, + "step": 4608 + }, + { + "epoch": 1.3477116537505482, + "grad_norm": 1.7476811492664892, + "learning_rate": 5.2788389866351755e-06, + "loss": 0.5552654266357422, + "step": 4609 + }, + { + "epoch": 1.3480040941658138, + "grad_norm": 1.8662632335270106, + "learning_rate": 5.2745775684109705e-06, + "loss": 0.5776556730270386, + "step": 4610 + }, + { + "epoch": 1.3482965345810791, + "grad_norm": 1.7735552141557176, + "learning_rate": 5.270317254725528e-06, + "loss": 0.5859286785125732, + "step": 4611 + }, + { + "epoch": 1.3485889749963444, + "grad_norm": 1.5182169678473143, + "learning_rate": 5.2660580465746694e-06, + "loss": 0.5914887189865112, + "step": 4612 + }, + { + "epoch": 1.34888141541161, + "grad_norm": 1.6371325039607922, + "learning_rate": 5.261799944953956e-06, + "loss": 0.43669426441192627, + "step": 4613 + }, + { + "epoch": 1.3491738558268753, + "grad_norm": 1.718792113074269, + "learning_rate": 5.2575429508587e-06, + "loss": 0.473773717880249, + "step": 4614 + }, + { + "epoch": 1.3494662962421407, + "grad_norm": 1.7451807781202082, + "learning_rate": 5.253287065283949e-06, + "loss": 0.5011228919029236, + "step": 4615 + }, + { + "epoch": 1.349758736657406, + "grad_norm": 1.6598931266775088, + "learning_rate": 5.249032289224483e-06, + "loss": 0.5839254856109619, + "step": 4616 + }, + { + "epoch": 1.3500511770726713, + "grad_norm": 1.7262514320572941, + "learning_rate": 5.244778623674831e-06, + "loss": 0.5375077128410339, + "step": 4617 + }, + { + "epoch": 1.3503436174879369, + "grad_norm": 1.4572654878782452, + "learning_rate": 5.240526069629265e-06, + "loss": 0.49445679783821106, + "step": 4618 + }, + { + "epoch": 1.3506360579032022, + "grad_norm": 1.5263979209526246, + "learning_rate": 5.236274628081792e-06, + "loss": 0.5369694828987122, + "step": 4619 + }, + { + "epoch": 1.3509284983184675, + "grad_norm": 1.8018674546255473, + "learning_rate": 5.23202430002616e-06, + "loss": 0.6017554402351379, + "step": 4620 + }, + { + "epoch": 1.351220938733733, + "grad_norm": 1.9428924144840352, + "learning_rate": 5.227775086455859e-06, + "loss": 0.5380403995513916, + "step": 4621 + }, + { + "epoch": 1.3515133791489984, + "grad_norm": 1.6665289001084298, + "learning_rate": 5.223526988364116e-06, + "loss": 0.5650593042373657, + "step": 4622 + }, + { + "epoch": 1.3518058195642637, + "grad_norm": 1.5672489406384107, + "learning_rate": 5.219280006743897e-06, + "loss": 0.5572884678840637, + "step": 4623 + }, + { + "epoch": 1.3520982599795293, + "grad_norm": 1.839257774768153, + "learning_rate": 5.21503414258791e-06, + "loss": 0.5304458141326904, + "step": 4624 + }, + { + "epoch": 1.3523907003947946, + "grad_norm": 1.8264084905380675, + "learning_rate": 5.2107893968886005e-06, + "loss": 0.6702588796615601, + "step": 4625 + }, + { + "epoch": 1.35268314081006, + "grad_norm": 1.5301776431109881, + "learning_rate": 5.206545770638152e-06, + "loss": 0.4607279300689697, + "step": 4626 + }, + { + "epoch": 1.3529755812253255, + "grad_norm": 1.4702386368708713, + "learning_rate": 5.202303264828482e-06, + "loss": 0.5759040713310242, + "step": 4627 + }, + { + "epoch": 1.3532680216405908, + "grad_norm": 1.6340224609334149, + "learning_rate": 5.198061880451253e-06, + "loss": 0.446469783782959, + "step": 4628 + }, + { + "epoch": 1.3535604620558561, + "grad_norm": 1.6416831158378962, + "learning_rate": 5.193821618497864e-06, + "loss": 0.4869040846824646, + "step": 4629 + }, + { + "epoch": 1.3538529024711214, + "grad_norm": 1.59588454548975, + "learning_rate": 5.189582479959449e-06, + "loss": 0.5153477191925049, + "step": 4630 + }, + { + "epoch": 1.3541453428863868, + "grad_norm": 1.6964185114911852, + "learning_rate": 5.185344465826883e-06, + "loss": 0.4958652853965759, + "step": 4631 + }, + { + "epoch": 1.3544377833016523, + "grad_norm": 1.544404184800908, + "learning_rate": 5.1811075770907715e-06, + "loss": 0.5314347743988037, + "step": 4632 + }, + { + "epoch": 1.3547302237169176, + "grad_norm": 1.6488125019330604, + "learning_rate": 5.176871814741466e-06, + "loss": 0.5366088151931763, + "step": 4633 + }, + { + "epoch": 1.355022664132183, + "grad_norm": 1.7011582339400138, + "learning_rate": 5.172637179769049e-06, + "loss": 0.6239185929298401, + "step": 4634 + }, + { + "epoch": 1.3553151045474485, + "grad_norm": 1.8789833552926098, + "learning_rate": 5.168403673163341e-06, + "loss": 0.5516507625579834, + "step": 4635 + }, + { + "epoch": 1.3556075449627139, + "grad_norm": 1.6420696506744512, + "learning_rate": 5.164171295913898e-06, + "loss": 0.5859683156013489, + "step": 4636 + }, + { + "epoch": 1.3558999853779792, + "grad_norm": 1.6138084463921514, + "learning_rate": 5.159940049010015e-06, + "loss": 0.5913225412368774, + "step": 4637 + }, + { + "epoch": 1.3561924257932447, + "grad_norm": 1.690951404825549, + "learning_rate": 5.155709933440714e-06, + "loss": 0.650983989238739, + "step": 4638 + }, + { + "epoch": 1.35648486620851, + "grad_norm": 1.7360324268029201, + "learning_rate": 5.151480950194762e-06, + "loss": 0.5631625652313232, + "step": 4639 + }, + { + "epoch": 1.3567773066237754, + "grad_norm": 1.9305214623229574, + "learning_rate": 5.147253100260659e-06, + "loss": 0.48153650760650635, + "step": 4640 + }, + { + "epoch": 1.357069747039041, + "grad_norm": 1.382159174171422, + "learning_rate": 5.143026384626637e-06, + "loss": 0.43598422408103943, + "step": 4641 + }, + { + "epoch": 1.3573621874543063, + "grad_norm": 1.5586949144187017, + "learning_rate": 5.138800804280668e-06, + "loss": 0.5323987007141113, + "step": 4642 + }, + { + "epoch": 1.3576546278695716, + "grad_norm": 1.739858834969472, + "learning_rate": 5.134576360210454e-06, + "loss": 0.5386587977409363, + "step": 4643 + }, + { + "epoch": 1.357947068284837, + "grad_norm": 1.7229356194902612, + "learning_rate": 5.130353053403434e-06, + "loss": 0.4913867115974426, + "step": 4644 + }, + { + "epoch": 1.3582395087001022, + "grad_norm": 2.681042611993396, + "learning_rate": 5.12613088484678e-06, + "loss": 0.6516048908233643, + "step": 4645 + }, + { + "epoch": 1.3585319491153678, + "grad_norm": 1.7863407962771196, + "learning_rate": 5.121909855527398e-06, + "loss": 0.5290599465370178, + "step": 4646 + }, + { + "epoch": 1.3588243895306331, + "grad_norm": 1.992281323100596, + "learning_rate": 5.117689966431927e-06, + "loss": 0.7909928560256958, + "step": 4647 + }, + { + "epoch": 1.3591168299458984, + "grad_norm": 1.7798386890797042, + "learning_rate": 5.113471218546746e-06, + "loss": 0.4751276969909668, + "step": 4648 + }, + { + "epoch": 1.359409270361164, + "grad_norm": 1.3934486662021524, + "learning_rate": 5.109253612857954e-06, + "loss": 0.4542301893234253, + "step": 4649 + }, + { + "epoch": 1.3597017107764293, + "grad_norm": 1.6724566490890436, + "learning_rate": 5.105037150351393e-06, + "loss": 0.5355349779129028, + "step": 4650 + }, + { + "epoch": 1.3599941511916946, + "grad_norm": 1.7131391763754547, + "learning_rate": 5.100821832012637e-06, + "loss": 0.4994719326496124, + "step": 4651 + }, + { + "epoch": 1.3602865916069602, + "grad_norm": 1.7061763475820229, + "learning_rate": 5.096607658826989e-06, + "loss": 0.6171674728393555, + "step": 4652 + }, + { + "epoch": 1.3605790320222255, + "grad_norm": 1.6851325839422124, + "learning_rate": 5.092394631779487e-06, + "loss": 0.5386878252029419, + "step": 4653 + }, + { + "epoch": 1.3608714724374908, + "grad_norm": 1.4863597978488459, + "learning_rate": 5.088182751854903e-06, + "loss": 0.4495810270309448, + "step": 4654 + }, + { + "epoch": 1.3611639128527562, + "grad_norm": 1.560829764762291, + "learning_rate": 5.083972020037735e-06, + "loss": 0.5540642142295837, + "step": 4655 + }, + { + "epoch": 1.3614563532680215, + "grad_norm": 1.7743988570673719, + "learning_rate": 5.079762437312219e-06, + "loss": 0.6020554900169373, + "step": 4656 + }, + { + "epoch": 1.361748793683287, + "grad_norm": 1.5410143370370128, + "learning_rate": 5.075554004662316e-06, + "loss": 0.47981250286102295, + "step": 4657 + }, + { + "epoch": 1.3620412340985524, + "grad_norm": 1.6809006565320033, + "learning_rate": 5.071346723071724e-06, + "loss": 0.6206443905830383, + "step": 4658 + }, + { + "epoch": 1.3623336745138177, + "grad_norm": 1.2946163710464256, + "learning_rate": 5.067140593523869e-06, + "loss": 0.46899446845054626, + "step": 4659 + }, + { + "epoch": 1.3626261149290833, + "grad_norm": 1.3692435027739418, + "learning_rate": 5.062935617001912e-06, + "loss": 0.5695985555648804, + "step": 4660 + }, + { + "epoch": 1.3629185553443486, + "grad_norm": 1.5567765237338644, + "learning_rate": 5.058731794488732e-06, + "loss": 0.5524671077728271, + "step": 4661 + }, + { + "epoch": 1.363210995759614, + "grad_norm": 1.5953543121744755, + "learning_rate": 5.054529126966953e-06, + "loss": 0.4655245244503021, + "step": 4662 + }, + { + "epoch": 1.3635034361748795, + "grad_norm": 1.6197588686677031, + "learning_rate": 5.050327615418921e-06, + "loss": 0.5617693662643433, + "step": 4663 + }, + { + "epoch": 1.3637958765901448, + "grad_norm": 1.515126796303483, + "learning_rate": 5.046127260826714e-06, + "loss": 0.52044677734375, + "step": 4664 + }, + { + "epoch": 1.3640883170054101, + "grad_norm": 1.6797173356320934, + "learning_rate": 5.041928064172139e-06, + "loss": 0.4567520022392273, + "step": 4665 + }, + { + "epoch": 1.3643807574206757, + "grad_norm": 1.5794296901996336, + "learning_rate": 5.037730026436736e-06, + "loss": 0.5942729711532593, + "step": 4666 + }, + { + "epoch": 1.364673197835941, + "grad_norm": 1.6501244665537385, + "learning_rate": 5.033533148601766e-06, + "loss": 0.3824811279773712, + "step": 4667 + }, + { + "epoch": 1.3649656382512063, + "grad_norm": 1.4770402468740385, + "learning_rate": 5.029337431648227e-06, + "loss": 0.4710771441459656, + "step": 4668 + }, + { + "epoch": 1.3652580786664716, + "grad_norm": 1.5059979846835174, + "learning_rate": 5.02514287655684e-06, + "loss": 0.6617978811264038, + "step": 4669 + }, + { + "epoch": 1.365550519081737, + "grad_norm": 1.5829629132621983, + "learning_rate": 5.020949484308058e-06, + "loss": 0.5237355828285217, + "step": 4670 + }, + { + "epoch": 1.3658429594970025, + "grad_norm": 1.4158253094169178, + "learning_rate": 5.016757255882065e-06, + "loss": 0.4544803500175476, + "step": 4671 + }, + { + "epoch": 1.3661353999122678, + "grad_norm": 1.8761810485620272, + "learning_rate": 5.012566192258763e-06, + "loss": 0.5854490399360657, + "step": 4672 + }, + { + "epoch": 1.3664278403275332, + "grad_norm": 1.902502544434852, + "learning_rate": 5.008376294417787e-06, + "loss": 0.6275635361671448, + "step": 4673 + }, + { + "epoch": 1.3667202807427987, + "grad_norm": 1.6133596882151136, + "learning_rate": 5.004187563338504e-06, + "loss": 0.5160082578659058, + "step": 4674 + }, + { + "epoch": 1.367012721158064, + "grad_norm": 1.439845673979846, + "learning_rate": 5.000000000000003e-06, + "loss": 0.5203640460968018, + "step": 4675 + }, + { + "epoch": 1.3673051615733294, + "grad_norm": 2.025079516078861, + "learning_rate": 4.9958136053811e-06, + "loss": 0.6836066246032715, + "step": 4676 + }, + { + "epoch": 1.367597601988595, + "grad_norm": 1.5727820508513324, + "learning_rate": 4.991628380460343e-06, + "loss": 0.5566641092300415, + "step": 4677 + }, + { + "epoch": 1.3678900424038603, + "grad_norm": 1.643119627925769, + "learning_rate": 4.9874443262159984e-06, + "loss": 0.5618000030517578, + "step": 4678 + }, + { + "epoch": 1.3681824828191256, + "grad_norm": 1.4054605482949574, + "learning_rate": 4.983261443626068e-06, + "loss": 0.4605063796043396, + "step": 4679 + }, + { + "epoch": 1.3684749232343911, + "grad_norm": 1.7557732951775291, + "learning_rate": 4.97907973366827e-06, + "loss": 0.48282021284103394, + "step": 4680 + }, + { + "epoch": 1.3687673636496565, + "grad_norm": 1.467194830130128, + "learning_rate": 4.974899197320059e-06, + "loss": 0.42356133460998535, + "step": 4681 + }, + { + "epoch": 1.3690598040649218, + "grad_norm": 1.3266470239270218, + "learning_rate": 4.97071983555861e-06, + "loss": 0.459377646446228, + "step": 4682 + }, + { + "epoch": 1.369352244480187, + "grad_norm": 1.9278413810039654, + "learning_rate": 4.966541649360819e-06, + "loss": 0.5539775490760803, + "step": 4683 + }, + { + "epoch": 1.3696446848954524, + "grad_norm": 1.7014699336581571, + "learning_rate": 4.962364639703311e-06, + "loss": 0.5593239068984985, + "step": 4684 + }, + { + "epoch": 1.369937125310718, + "grad_norm": 1.8333805174527635, + "learning_rate": 4.958188807562441e-06, + "loss": 0.5425251722335815, + "step": 4685 + }, + { + "epoch": 1.3702295657259833, + "grad_norm": 1.564182289934299, + "learning_rate": 4.954014153914282e-06, + "loss": 0.5183289051055908, + "step": 4686 + }, + { + "epoch": 1.3705220061412486, + "grad_norm": 1.6834251116472225, + "learning_rate": 4.9498406797346345e-06, + "loss": 0.5278980731964111, + "step": 4687 + }, + { + "epoch": 1.3708144465565142, + "grad_norm": 1.6861784833580373, + "learning_rate": 4.9456683859990185e-06, + "loss": 0.4857858419418335, + "step": 4688 + }, + { + "epoch": 1.3711068869717795, + "grad_norm": 1.4955733852507764, + "learning_rate": 4.94149727368269e-06, + "loss": 0.4889591336250305, + "step": 4689 + }, + { + "epoch": 1.3713993273870448, + "grad_norm": 2.1119376280699105, + "learning_rate": 4.937327343760617e-06, + "loss": 0.5475220680236816, + "step": 4690 + }, + { + "epoch": 1.3716917678023104, + "grad_norm": 1.8065068083746048, + "learning_rate": 4.933158597207501e-06, + "loss": 0.5794380903244019, + "step": 4691 + }, + { + "epoch": 1.3719842082175757, + "grad_norm": 1.5916906211687458, + "learning_rate": 4.928991034997752e-06, + "loss": 0.42212024331092834, + "step": 4692 + }, + { + "epoch": 1.372276648632841, + "grad_norm": 1.8447627986814241, + "learning_rate": 4.924824658105516e-06, + "loss": 0.6091631054878235, + "step": 4693 + }, + { + "epoch": 1.3725690890481064, + "grad_norm": 1.8839419484958528, + "learning_rate": 4.9206594675046595e-06, + "loss": 0.544279158115387, + "step": 4694 + }, + { + "epoch": 1.3728615294633717, + "grad_norm": 1.4361678658463186, + "learning_rate": 4.916495464168768e-06, + "loss": 0.46237099170684814, + "step": 4695 + }, + { + "epoch": 1.3731539698786372, + "grad_norm": 1.5990237040506552, + "learning_rate": 4.912332649071154e-06, + "loss": 0.5615352392196655, + "step": 4696 + }, + { + "epoch": 1.3734464102939026, + "grad_norm": 1.7554295249178744, + "learning_rate": 4.90817102318485e-06, + "loss": 0.5552200078964233, + "step": 4697 + }, + { + "epoch": 1.373738850709168, + "grad_norm": 1.798510214490848, + "learning_rate": 4.904010587482612e-06, + "loss": 0.5466557741165161, + "step": 4698 + }, + { + "epoch": 1.3740312911244335, + "grad_norm": 1.8536275815794498, + "learning_rate": 4.8998513429369135e-06, + "loss": 0.6131544709205627, + "step": 4699 + }, + { + "epoch": 1.3743237315396988, + "grad_norm": 1.7671899353023186, + "learning_rate": 4.895693290519954e-06, + "loss": 0.5264796018600464, + "step": 4700 + }, + { + "epoch": 1.374616171954964, + "grad_norm": 1.6582809024037055, + "learning_rate": 4.891536431203653e-06, + "loss": 0.5179097652435303, + "step": 4701 + }, + { + "epoch": 1.3749086123702297, + "grad_norm": 1.7203915102871608, + "learning_rate": 4.887380765959655e-06, + "loss": 0.46007782220840454, + "step": 4702 + }, + { + "epoch": 1.375201052785495, + "grad_norm": 1.3949646851760964, + "learning_rate": 4.8832262957593145e-06, + "loss": 0.48182815313339233, + "step": 4703 + }, + { + "epoch": 1.3754934932007603, + "grad_norm": 1.6488295590740498, + "learning_rate": 4.879073021573717e-06, + "loss": 0.5334529280662537, + "step": 4704 + }, + { + "epoch": 1.3757859336160259, + "grad_norm": 1.824410831192183, + "learning_rate": 4.874920944373665e-06, + "loss": 0.5984899997711182, + "step": 4705 + }, + { + "epoch": 1.3760783740312912, + "grad_norm": 1.633539262172952, + "learning_rate": 4.870770065129681e-06, + "loss": 0.46676474809646606, + "step": 4706 + }, + { + "epoch": 1.3763708144465565, + "grad_norm": 1.6766360321424407, + "learning_rate": 4.866620384812008e-06, + "loss": 0.4608241617679596, + "step": 4707 + }, + { + "epoch": 1.3766632548618218, + "grad_norm": 1.6783484732888503, + "learning_rate": 4.862471904390609e-06, + "loss": 0.5877207517623901, + "step": 4708 + }, + { + "epoch": 1.3769556952770872, + "grad_norm": 1.9194747868225221, + "learning_rate": 4.858324624835164e-06, + "loss": 0.5243252515792847, + "step": 4709 + }, + { + "epoch": 1.3772481356923527, + "grad_norm": 1.7326979192308607, + "learning_rate": 4.854178547115078e-06, + "loss": 0.528606653213501, + "step": 4710 + }, + { + "epoch": 1.377540576107618, + "grad_norm": 1.761919042167513, + "learning_rate": 4.850033672199469e-06, + "loss": 0.46468549966812134, + "step": 4711 + }, + { + "epoch": 1.3778330165228834, + "grad_norm": 1.5919653348557072, + "learning_rate": 4.8458900010571765e-06, + "loss": 0.5368300676345825, + "step": 4712 + }, + { + "epoch": 1.378125456938149, + "grad_norm": 1.6462148743894651, + "learning_rate": 4.8417475346567635e-06, + "loss": 0.5156906843185425, + "step": 4713 + }, + { + "epoch": 1.3784178973534142, + "grad_norm": 1.718628393460986, + "learning_rate": 4.837606273966496e-06, + "loss": 0.5899196863174438, + "step": 4714 + }, + { + "epoch": 1.3787103377686796, + "grad_norm": 1.6725614455419595, + "learning_rate": 4.833466219954376e-06, + "loss": 0.5820844769477844, + "step": 4715 + }, + { + "epoch": 1.3790027781839451, + "grad_norm": 1.5883271974734077, + "learning_rate": 4.829327373588113e-06, + "loss": 0.4926246404647827, + "step": 4716 + }, + { + "epoch": 1.3792952185992104, + "grad_norm": 1.5404696535835014, + "learning_rate": 4.825189735835138e-06, + "loss": 0.5417006611824036, + "step": 4717 + }, + { + "epoch": 1.3795876590144758, + "grad_norm": 1.5296186550545692, + "learning_rate": 4.821053307662599e-06, + "loss": 0.4130229949951172, + "step": 4718 + }, + { + "epoch": 1.3798800994297413, + "grad_norm": 1.279729123751172, + "learning_rate": 4.8169180900373615e-06, + "loss": 0.4553627371788025, + "step": 4719 + }, + { + "epoch": 1.3801725398450067, + "grad_norm": 1.3535233614920503, + "learning_rate": 4.812784083926005e-06, + "loss": 0.523567259311676, + "step": 4720 + }, + { + "epoch": 1.380464980260272, + "grad_norm": 1.585136917164004, + "learning_rate": 4.808651290294832e-06, + "loss": 0.4643239378929138, + "step": 4721 + }, + { + "epoch": 1.3807574206755373, + "grad_norm": 1.4443352165881056, + "learning_rate": 4.804519710109856e-06, + "loss": 0.4631537199020386, + "step": 4722 + }, + { + "epoch": 1.3810498610908026, + "grad_norm": 1.9168786498716517, + "learning_rate": 4.8003893443368075e-06, + "loss": 0.5304736495018005, + "step": 4723 + }, + { + "epoch": 1.3813423015060682, + "grad_norm": 1.7679231174871453, + "learning_rate": 4.79626019394114e-06, + "loss": 0.4357796907424927, + "step": 4724 + }, + { + "epoch": 1.3816347419213335, + "grad_norm": 1.9313439900637919, + "learning_rate": 4.7921322598880095e-06, + "loss": 0.6693407297134399, + "step": 4725 + }, + { + "epoch": 1.3819271823365988, + "grad_norm": 1.614277655310262, + "learning_rate": 4.788005543142299e-06, + "loss": 0.5333320498466492, + "step": 4726 + }, + { + "epoch": 1.3822196227518644, + "grad_norm": 1.900002017358812, + "learning_rate": 4.783880044668603e-06, + "loss": 0.5782167911529541, + "step": 4727 + }, + { + "epoch": 1.3825120631671297, + "grad_norm": 1.8216810622231216, + "learning_rate": 4.779755765431231e-06, + "loss": 0.581318199634552, + "step": 4728 + }, + { + "epoch": 1.382804503582395, + "grad_norm": 1.6899321824779212, + "learning_rate": 4.775632706394211e-06, + "loss": 0.5812945365905762, + "step": 4729 + }, + { + "epoch": 1.3830969439976606, + "grad_norm": 1.7981132988330288, + "learning_rate": 4.771510868521279e-06, + "loss": 0.460615873336792, + "step": 4730 + }, + { + "epoch": 1.383389384412926, + "grad_norm": 1.8316112888726737, + "learning_rate": 4.767390252775894e-06, + "loss": 0.5934186577796936, + "step": 4731 + }, + { + "epoch": 1.3836818248281912, + "grad_norm": 1.6355522234245776, + "learning_rate": 4.763270860121222e-06, + "loss": 0.4928584098815918, + "step": 4732 + }, + { + "epoch": 1.3839742652434566, + "grad_norm": 1.6231538800234695, + "learning_rate": 4.759152691520146e-06, + "loss": 0.505489706993103, + "step": 4733 + }, + { + "epoch": 1.3842667056587221, + "grad_norm": 1.5771553081820557, + "learning_rate": 4.755035747935264e-06, + "loss": 0.5679354667663574, + "step": 4734 + }, + { + "epoch": 1.3845591460739874, + "grad_norm": 1.7096467723863036, + "learning_rate": 4.750920030328889e-06, + "loss": 0.5744746923446655, + "step": 4735 + }, + { + "epoch": 1.3848515864892528, + "grad_norm": 1.6483531613381477, + "learning_rate": 4.7468055396630395e-06, + "loss": 0.4953685402870178, + "step": 4736 + }, + { + "epoch": 1.385144026904518, + "grad_norm": 1.8803927120396235, + "learning_rate": 4.742692276899454e-06, + "loss": 0.6083461046218872, + "step": 4737 + }, + { + "epoch": 1.3854364673197836, + "grad_norm": 1.5633925902592396, + "learning_rate": 4.738580242999584e-06, + "loss": 0.4980735778808594, + "step": 4738 + }, + { + "epoch": 1.385728907735049, + "grad_norm": 1.4499409145464446, + "learning_rate": 4.734469438924594e-06, + "loss": 0.46363019943237305, + "step": 4739 + }, + { + "epoch": 1.3860213481503143, + "grad_norm": 1.818813219831182, + "learning_rate": 4.730359865635355e-06, + "loss": 0.5946298837661743, + "step": 4740 + }, + { + "epoch": 1.3863137885655799, + "grad_norm": 1.6327330611392554, + "learning_rate": 4.726251524092459e-06, + "loss": 0.5630123615264893, + "step": 4741 + }, + { + "epoch": 1.3866062289808452, + "grad_norm": 1.5382056004014089, + "learning_rate": 4.7221444152562045e-06, + "loss": 0.5353481769561768, + "step": 4742 + }, + { + "epoch": 1.3868986693961105, + "grad_norm": 1.7585652476725264, + "learning_rate": 4.718038540086602e-06, + "loss": 0.5170711874961853, + "step": 4743 + }, + { + "epoch": 1.387191109811376, + "grad_norm": 1.8043747351160766, + "learning_rate": 4.713933899543377e-06, + "loss": 0.600492000579834, + "step": 4744 + }, + { + "epoch": 1.3874835502266414, + "grad_norm": 1.5446435468278237, + "learning_rate": 4.709830494585962e-06, + "loss": 0.5291938781738281, + "step": 4745 + }, + { + "epoch": 1.3877759906419067, + "grad_norm": 1.658022225410227, + "learning_rate": 4.7057283261735055e-06, + "loss": 0.5664317011833191, + "step": 4746 + }, + { + "epoch": 1.388068431057172, + "grad_norm": 1.8477945736694077, + "learning_rate": 4.701627395264866e-06, + "loss": 0.606655478477478, + "step": 4747 + }, + { + "epoch": 1.3883608714724374, + "grad_norm": 1.5930247770190467, + "learning_rate": 4.697527702818604e-06, + "loss": 0.6160893440246582, + "step": 4748 + }, + { + "epoch": 1.388653311887703, + "grad_norm": 1.510283707012234, + "learning_rate": 4.693429249793002e-06, + "loss": 0.45944249629974365, + "step": 4749 + }, + { + "epoch": 1.3889457523029682, + "grad_norm": 1.7369442621234958, + "learning_rate": 4.689332037146049e-06, + "loss": 0.5737302303314209, + "step": 4750 + }, + { + "epoch": 1.3892381927182336, + "grad_norm": 1.7885159565933124, + "learning_rate": 4.685236065835443e-06, + "loss": 0.4075150787830353, + "step": 4751 + }, + { + "epoch": 1.3895306331334991, + "grad_norm": 1.7699683741602097, + "learning_rate": 4.681141336818592e-06, + "loss": 0.5832744836807251, + "step": 4752 + }, + { + "epoch": 1.3898230735487644, + "grad_norm": 1.6617741591328279, + "learning_rate": 4.6770478510526155e-06, + "loss": 0.5444560647010803, + "step": 4753 + }, + { + "epoch": 1.3901155139640298, + "grad_norm": 1.5343212819990357, + "learning_rate": 4.672955609494339e-06, + "loss": 0.6087433695793152, + "step": 4754 + }, + { + "epoch": 1.3904079543792953, + "grad_norm": 1.3783003966189016, + "learning_rate": 4.6688646131002995e-06, + "loss": 0.3781468868255615, + "step": 4755 + }, + { + "epoch": 1.3907003947945606, + "grad_norm": 2.0008130334792953, + "learning_rate": 4.664774862826742e-06, + "loss": 0.43719804286956787, + "step": 4756 + }, + { + "epoch": 1.390992835209826, + "grad_norm": 1.7926138812382992, + "learning_rate": 4.660686359629623e-06, + "loss": 0.550011932849884, + "step": 4757 + }, + { + "epoch": 1.3912852756250915, + "grad_norm": 1.670816081047031, + "learning_rate": 4.656599104464607e-06, + "loss": 0.6060909032821655, + "step": 4758 + }, + { + "epoch": 1.3915777160403568, + "grad_norm": 1.727898538684726, + "learning_rate": 4.652513098287058e-06, + "loss": 0.5169791579246521, + "step": 4759 + }, + { + "epoch": 1.3918701564556222, + "grad_norm": 1.667801698839589, + "learning_rate": 4.6484283420520594e-06, + "loss": 0.43063026666641235, + "step": 4760 + }, + { + "epoch": 1.3921625968708875, + "grad_norm": 1.6770983664766483, + "learning_rate": 4.644344836714397e-06, + "loss": 0.5426993370056152, + "step": 4761 + }, + { + "epoch": 1.3924550372861528, + "grad_norm": 1.7220159777866155, + "learning_rate": 4.6402625832285665e-06, + "loss": 0.5260995030403137, + "step": 4762 + }, + { + "epoch": 1.3927474777014184, + "grad_norm": 1.791130103339175, + "learning_rate": 4.63618158254877e-06, + "loss": 0.5206680297851562, + "step": 4763 + }, + { + "epoch": 1.3930399181166837, + "grad_norm": 1.8800757395074672, + "learning_rate": 4.632101835628912e-06, + "loss": 0.5250430703163147, + "step": 4764 + }, + { + "epoch": 1.393332358531949, + "grad_norm": 1.5663601185417966, + "learning_rate": 4.628023343422616e-06, + "loss": 0.5409445762634277, + "step": 4765 + }, + { + "epoch": 1.3936247989472146, + "grad_norm": 1.6199099812994435, + "learning_rate": 4.6239461068832056e-06, + "loss": 0.4676284193992615, + "step": 4766 + }, + { + "epoch": 1.39391723936248, + "grad_norm": 1.6644750420264167, + "learning_rate": 4.6198701269637014e-06, + "loss": 0.6019079089164734, + "step": 4767 + }, + { + "epoch": 1.3942096797777452, + "grad_norm": 1.6721679687151758, + "learning_rate": 4.615795404616844e-06, + "loss": 0.5434615612030029, + "step": 4768 + }, + { + "epoch": 1.3945021201930108, + "grad_norm": 1.8615818009836036, + "learning_rate": 4.611721940795074e-06, + "loss": 0.5817157030105591, + "step": 4769 + }, + { + "epoch": 1.3947945606082761, + "grad_norm": 1.7318982025014367, + "learning_rate": 4.607649736450539e-06, + "loss": 0.5601100921630859, + "step": 4770 + }, + { + "epoch": 1.3950870010235414, + "grad_norm": 1.8105361405271991, + "learning_rate": 4.6035787925350915e-06, + "loss": 0.5955039262771606, + "step": 4771 + }, + { + "epoch": 1.3953794414388068, + "grad_norm": 1.735716832820506, + "learning_rate": 4.5995091100002905e-06, + "loss": 0.47491732239723206, + "step": 4772 + }, + { + "epoch": 1.3956718818540723, + "grad_norm": 1.7916635810918338, + "learning_rate": 4.595440689797402e-06, + "loss": 0.5451281070709229, + "step": 4773 + }, + { + "epoch": 1.3959643222693376, + "grad_norm": 1.5652511418689858, + "learning_rate": 4.591373532877389e-06, + "loss": 0.3973035514354706, + "step": 4774 + }, + { + "epoch": 1.396256762684603, + "grad_norm": 1.6712606601404056, + "learning_rate": 4.587307640190929e-06, + "loss": 0.604694128036499, + "step": 4775 + }, + { + "epoch": 1.3965492030998683, + "grad_norm": 1.3684363761943823, + "learning_rate": 4.583243012688397e-06, + "loss": 0.4120032489299774, + "step": 4776 + }, + { + "epoch": 1.3968416435151338, + "grad_norm": 1.5200379644064634, + "learning_rate": 4.579179651319878e-06, + "loss": 0.4864089787006378, + "step": 4777 + }, + { + "epoch": 1.3971340839303992, + "grad_norm": 1.7660999886821023, + "learning_rate": 4.57511755703516e-06, + "loss": 0.5774982571601868, + "step": 4778 + }, + { + "epoch": 1.3974265243456645, + "grad_norm": 1.7243096372475708, + "learning_rate": 4.571056730783725e-06, + "loss": 0.48220688104629517, + "step": 4779 + }, + { + "epoch": 1.39771896476093, + "grad_norm": 1.4235878512993427, + "learning_rate": 4.566997173514771e-06, + "loss": 0.4636304974555969, + "step": 4780 + }, + { + "epoch": 1.3980114051761954, + "grad_norm": 1.3469561341500977, + "learning_rate": 4.562938886177194e-06, + "loss": 0.500522792339325, + "step": 4781 + }, + { + "epoch": 1.3983038455914607, + "grad_norm": 1.8391525606302594, + "learning_rate": 4.558881869719595e-06, + "loss": 0.5322657823562622, + "step": 4782 + }, + { + "epoch": 1.3985962860067263, + "grad_norm": 1.8673725266705359, + "learning_rate": 4.554826125090276e-06, + "loss": 0.5013759136199951, + "step": 4783 + }, + { + "epoch": 1.3988887264219916, + "grad_norm": 1.5888002392216285, + "learning_rate": 4.550771653237242e-06, + "loss": 0.4261836111545563, + "step": 4784 + }, + { + "epoch": 1.399181166837257, + "grad_norm": 1.6811392186782483, + "learning_rate": 4.546718455108205e-06, + "loss": 0.6181522607803345, + "step": 4785 + }, + { + "epoch": 1.3994736072525222, + "grad_norm": 1.7420663714537028, + "learning_rate": 4.54266653165057e-06, + "loss": 0.6267478466033936, + "step": 4786 + }, + { + "epoch": 1.3997660476677876, + "grad_norm": 1.841391700351839, + "learning_rate": 4.5386158838114535e-06, + "loss": 0.5382452607154846, + "step": 4787 + }, + { + "epoch": 1.400058488083053, + "grad_norm": 1.5361116059310378, + "learning_rate": 4.534566512537668e-06, + "loss": 0.5973625183105469, + "step": 4788 + }, + { + "epoch": 1.4003509284983184, + "grad_norm": 1.7115299901221885, + "learning_rate": 4.530518418775734e-06, + "loss": 0.57401442527771, + "step": 4789 + }, + { + "epoch": 1.4006433689135838, + "grad_norm": 1.7539136213830773, + "learning_rate": 4.52647160347186e-06, + "loss": 0.5712965726852417, + "step": 4790 + }, + { + "epoch": 1.4009358093288493, + "grad_norm": 1.7324506482257287, + "learning_rate": 4.52242606757197e-06, + "loss": 0.5678268671035767, + "step": 4791 + }, + { + "epoch": 1.4012282497441146, + "grad_norm": 1.8696367540913243, + "learning_rate": 4.518381812021682e-06, + "loss": 0.4798399806022644, + "step": 4792 + }, + { + "epoch": 1.40152069015938, + "grad_norm": 1.570253187142898, + "learning_rate": 4.514338837766317e-06, + "loss": 0.48918360471725464, + "step": 4793 + }, + { + "epoch": 1.4018131305746455, + "grad_norm": 1.4711408699123494, + "learning_rate": 4.510297145750894e-06, + "loss": 0.47836846113204956, + "step": 4794 + }, + { + "epoch": 1.4021055709899108, + "grad_norm": 1.6409652265079098, + "learning_rate": 4.506256736920136e-06, + "loss": 0.4956067204475403, + "step": 4795 + }, + { + "epoch": 1.4023980114051762, + "grad_norm": 1.6571409914414528, + "learning_rate": 4.502217612218463e-06, + "loss": 0.39146924018859863, + "step": 4796 + }, + { + "epoch": 1.4026904518204417, + "grad_norm": 1.6190957574837974, + "learning_rate": 4.498179772589998e-06, + "loss": 0.46657800674438477, + "step": 4797 + }, + { + "epoch": 1.402982892235707, + "grad_norm": 1.5760103505209448, + "learning_rate": 4.4941432189785574e-06, + "loss": 0.4949738383293152, + "step": 4798 + }, + { + "epoch": 1.4032753326509724, + "grad_norm": 1.882895838026707, + "learning_rate": 4.490107952327663e-06, + "loss": 0.5256912708282471, + "step": 4799 + }, + { + "epoch": 1.4035677730662377, + "grad_norm": 1.7128737744359326, + "learning_rate": 4.486073973580539e-06, + "loss": 0.38139551877975464, + "step": 4800 + }, + { + "epoch": 1.403860213481503, + "grad_norm": 1.8140605273544137, + "learning_rate": 4.482041283680095e-06, + "loss": 0.5014597177505493, + "step": 4801 + }, + { + "epoch": 1.4041526538967686, + "grad_norm": 1.8595922924331247, + "learning_rate": 4.478009883568951e-06, + "loss": 0.5497276186943054, + "step": 4802 + }, + { + "epoch": 1.404445094312034, + "grad_norm": 2.0532585085438524, + "learning_rate": 4.473979774189422e-06, + "loss": 0.6098340749740601, + "step": 4803 + }, + { + "epoch": 1.4047375347272992, + "grad_norm": 1.7416135071315817, + "learning_rate": 4.469950956483522e-06, + "loss": 0.40206801891326904, + "step": 4804 + }, + { + "epoch": 1.4050299751425648, + "grad_norm": 1.5567497019384768, + "learning_rate": 4.465923431392962e-06, + "loss": 0.5362050533294678, + "step": 4805 + }, + { + "epoch": 1.40532241555783, + "grad_norm": 1.6896555289921489, + "learning_rate": 4.461897199859153e-06, + "loss": 0.5688962936401367, + "step": 4806 + }, + { + "epoch": 1.4056148559730954, + "grad_norm": 2.0519988466480723, + "learning_rate": 4.457872262823202e-06, + "loss": 0.5270779132843018, + "step": 4807 + }, + { + "epoch": 1.405907296388361, + "grad_norm": 1.9613398978608871, + "learning_rate": 4.453848621225913e-06, + "loss": 0.5656974911689758, + "step": 4808 + }, + { + "epoch": 1.4061997368036263, + "grad_norm": 1.517853308784437, + "learning_rate": 4.449826276007786e-06, + "loss": 0.44072896242141724, + "step": 4809 + }, + { + "epoch": 1.4064921772188916, + "grad_norm": 1.642033723460973, + "learning_rate": 4.445805228109022e-06, + "loss": 0.5851765871047974, + "step": 4810 + }, + { + "epoch": 1.406784617634157, + "grad_norm": 1.71031586004946, + "learning_rate": 4.441785478469519e-06, + "loss": 0.6174030303955078, + "step": 4811 + }, + { + "epoch": 1.4070770580494225, + "grad_norm": 1.5609662983326855, + "learning_rate": 4.437767028028863e-06, + "loss": 0.542346715927124, + "step": 4812 + }, + { + "epoch": 1.4073694984646878, + "grad_norm": 1.855237193625426, + "learning_rate": 4.433749877726345e-06, + "loss": 0.4964073598384857, + "step": 4813 + }, + { + "epoch": 1.4076619388799532, + "grad_norm": 1.798693836443108, + "learning_rate": 4.429734028500951e-06, + "loss": 0.5309566259384155, + "step": 4814 + }, + { + "epoch": 1.4079543792952185, + "grad_norm": 1.7569401782763947, + "learning_rate": 4.425719481291359e-06, + "loss": 0.5799233913421631, + "step": 4815 + }, + { + "epoch": 1.408246819710484, + "grad_norm": 1.6640340310451727, + "learning_rate": 4.4217062370359456e-06, + "loss": 0.37344229221343994, + "step": 4816 + }, + { + "epoch": 1.4085392601257494, + "grad_norm": 1.9633336456325348, + "learning_rate": 4.417694296672783e-06, + "loss": 0.5752555727958679, + "step": 4817 + }, + { + "epoch": 1.4088317005410147, + "grad_norm": 1.8625982582112681, + "learning_rate": 4.413683661139638e-06, + "loss": 0.61701500415802, + "step": 4818 + }, + { + "epoch": 1.4091241409562802, + "grad_norm": 1.6641617857653193, + "learning_rate": 4.409674331373972e-06, + "loss": 0.4163259267807007, + "step": 4819 + }, + { + "epoch": 1.4094165813715456, + "grad_norm": 1.4025408210631873, + "learning_rate": 4.40566630831294e-06, + "loss": 0.46583253145217896, + "step": 4820 + }, + { + "epoch": 1.409709021786811, + "grad_norm": 1.739036857290848, + "learning_rate": 4.401659592893396e-06, + "loss": 0.5230617523193359, + "step": 4821 + }, + { + "epoch": 1.4100014622020764, + "grad_norm": 1.7435910389535008, + "learning_rate": 4.397654186051887e-06, + "loss": 0.6351375579833984, + "step": 4822 + }, + { + "epoch": 1.4102939026173418, + "grad_norm": 1.6526547277716674, + "learning_rate": 4.3936500887246445e-06, + "loss": 0.5895766615867615, + "step": 4823 + }, + { + "epoch": 1.410586343032607, + "grad_norm": 1.7357556256264726, + "learning_rate": 4.389647301847607e-06, + "loss": 0.49772539734840393, + "step": 4824 + }, + { + "epoch": 1.4108787834478724, + "grad_norm": 1.6867136550948763, + "learning_rate": 4.385645826356402e-06, + "loss": 0.593197226524353, + "step": 4825 + }, + { + "epoch": 1.4111712238631378, + "grad_norm": 1.497358571958903, + "learning_rate": 4.381645663186348e-06, + "loss": 0.4971385598182678, + "step": 4826 + }, + { + "epoch": 1.4114636642784033, + "grad_norm": 1.772016135609381, + "learning_rate": 4.3776468132724605e-06, + "loss": 0.5452263951301575, + "step": 4827 + }, + { + "epoch": 1.4117561046936686, + "grad_norm": 1.9896815505139207, + "learning_rate": 4.373649277549446e-06, + "loss": 0.6085976362228394, + "step": 4828 + }, + { + "epoch": 1.412048545108934, + "grad_norm": 1.4346670326917912, + "learning_rate": 4.369653056951705e-06, + "loss": 0.5594700574874878, + "step": 4829 + }, + { + "epoch": 1.4123409855241995, + "grad_norm": 1.6570477364640872, + "learning_rate": 4.365658152413328e-06, + "loss": 0.5099719166755676, + "step": 4830 + }, + { + "epoch": 1.4126334259394648, + "grad_norm": 1.557110878077197, + "learning_rate": 4.3616645648681e-06, + "loss": 0.5683532953262329, + "step": 4831 + }, + { + "epoch": 1.4129258663547302, + "grad_norm": 1.9307182018155977, + "learning_rate": 4.3576722952495e-06, + "loss": 0.5311406850814819, + "step": 4832 + }, + { + "epoch": 1.4132183067699957, + "grad_norm": 1.6214149336480879, + "learning_rate": 4.353681344490693e-06, + "loss": 0.5299100875854492, + "step": 4833 + }, + { + "epoch": 1.413510747185261, + "grad_norm": 1.6883675181677418, + "learning_rate": 4.349691713524546e-06, + "loss": 0.5531362891197205, + "step": 4834 + }, + { + "epoch": 1.4138031876005264, + "grad_norm": 1.7469666557337236, + "learning_rate": 4.345703403283603e-06, + "loss": 0.5315259099006653, + "step": 4835 + }, + { + "epoch": 1.414095628015792, + "grad_norm": 2.0019997249517645, + "learning_rate": 4.341716414700112e-06, + "loss": 0.583083987236023, + "step": 4836 + }, + { + "epoch": 1.4143880684310572, + "grad_norm": 1.680867008867613, + "learning_rate": 4.337730748706005e-06, + "loss": 0.5273857116699219, + "step": 4837 + }, + { + "epoch": 1.4146805088463226, + "grad_norm": 1.6688598484210682, + "learning_rate": 4.333746406232908e-06, + "loss": 0.4903373718261719, + "step": 4838 + }, + { + "epoch": 1.414972949261588, + "grad_norm": 1.4926269811940354, + "learning_rate": 4.329763388212134e-06, + "loss": 0.5807479619979858, + "step": 4839 + }, + { + "epoch": 1.4152653896768532, + "grad_norm": 1.6552276273685866, + "learning_rate": 4.325781695574695e-06, + "loss": 0.5613743662834167, + "step": 4840 + }, + { + "epoch": 1.4155578300921188, + "grad_norm": 1.6028157865716284, + "learning_rate": 4.321801329251286e-06, + "loss": 0.5801016092300415, + "step": 4841 + }, + { + "epoch": 1.415850270507384, + "grad_norm": 1.6267997915866552, + "learning_rate": 4.3178222901722956e-06, + "loss": 0.6412584781646729, + "step": 4842 + }, + { + "epoch": 1.4161427109226494, + "grad_norm": 1.7251596479619187, + "learning_rate": 4.313844579267793e-06, + "loss": 0.5687737464904785, + "step": 4843 + }, + { + "epoch": 1.416435151337915, + "grad_norm": 1.6343964176323358, + "learning_rate": 4.309868197467548e-06, + "loss": 0.5668497085571289, + "step": 4844 + }, + { + "epoch": 1.4167275917531803, + "grad_norm": 1.811368112437045, + "learning_rate": 4.305893145701015e-06, + "loss": 0.5814717411994934, + "step": 4845 + }, + { + "epoch": 1.4170200321684456, + "grad_norm": 1.9246707148702022, + "learning_rate": 4.301919424897339e-06, + "loss": 0.5974467992782593, + "step": 4846 + }, + { + "epoch": 1.4173124725837112, + "grad_norm": 1.5643373795961777, + "learning_rate": 4.297947035985351e-06, + "loss": 0.48333030939102173, + "step": 4847 + }, + { + "epoch": 1.4176049129989765, + "grad_norm": 1.7102352976297683, + "learning_rate": 4.293975979893576e-06, + "loss": 0.5851039886474609, + "step": 4848 + }, + { + "epoch": 1.4178973534142418, + "grad_norm": 1.4778659468844006, + "learning_rate": 4.290006257550221e-06, + "loss": 0.5510480403900146, + "step": 4849 + }, + { + "epoch": 1.4181897938295072, + "grad_norm": 1.6670833236483533, + "learning_rate": 4.286037869883187e-06, + "loss": 0.6053529977798462, + "step": 4850 + }, + { + "epoch": 1.4184822342447727, + "grad_norm": 1.5745047113214952, + "learning_rate": 4.282070817820059e-06, + "loss": 0.471671462059021, + "step": 4851 + }, + { + "epoch": 1.418774674660038, + "grad_norm": 1.6834167266574704, + "learning_rate": 4.278105102288113e-06, + "loss": 0.4864043593406677, + "step": 4852 + }, + { + "epoch": 1.4190671150753034, + "grad_norm": 1.7275065448049989, + "learning_rate": 4.274140724214311e-06, + "loss": 0.6283255815505981, + "step": 4853 + }, + { + "epoch": 1.4193595554905687, + "grad_norm": 1.7634272907173199, + "learning_rate": 4.270177684525299e-06, + "loss": 0.4990651607513428, + "step": 4854 + }, + { + "epoch": 1.4196519959058342, + "grad_norm": 1.6718595783894241, + "learning_rate": 4.2662159841474145e-06, + "loss": 0.6053239703178406, + "step": 4855 + }, + { + "epoch": 1.4199444363210996, + "grad_norm": 1.541217587678611, + "learning_rate": 4.262255624006683e-06, + "loss": 0.45790988206863403, + "step": 4856 + }, + { + "epoch": 1.420236876736365, + "grad_norm": 1.5408074963828202, + "learning_rate": 4.2582966050288125e-06, + "loss": 0.49944519996643066, + "step": 4857 + }, + { + "epoch": 1.4205293171516304, + "grad_norm": 1.7145691587216874, + "learning_rate": 4.2543389281392e-06, + "loss": 0.5365482568740845, + "step": 4858 + }, + { + "epoch": 1.4208217575668958, + "grad_norm": 1.709871732141181, + "learning_rate": 4.2503825942629285e-06, + "loss": 0.7763599157333374, + "step": 4859 + }, + { + "epoch": 1.421114197982161, + "grad_norm": 1.6376653647841246, + "learning_rate": 4.246427604324768e-06, + "loss": 0.6125203371047974, + "step": 4860 + }, + { + "epoch": 1.4214066383974266, + "grad_norm": 1.8190946758346407, + "learning_rate": 4.242473959249172e-06, + "loss": 0.6634939312934875, + "step": 4861 + }, + { + "epoch": 1.421699078812692, + "grad_norm": 1.607723662080485, + "learning_rate": 4.238521659960283e-06, + "loss": 0.5117735862731934, + "step": 4862 + }, + { + "epoch": 1.4219915192279573, + "grad_norm": 1.6860730867984624, + "learning_rate": 4.234570707381925e-06, + "loss": 0.5700962543487549, + "step": 4863 + }, + { + "epoch": 1.4222839596432226, + "grad_norm": 1.5634193566609638, + "learning_rate": 4.23062110243761e-06, + "loss": 0.5443791151046753, + "step": 4864 + }, + { + "epoch": 1.422576400058488, + "grad_norm": 1.4504951290152908, + "learning_rate": 4.226672846050538e-06, + "loss": 0.5474614500999451, + "step": 4865 + }, + { + "epoch": 1.4228688404737535, + "grad_norm": 1.9578528314343135, + "learning_rate": 4.222725939143582e-06, + "loss": 0.5938940048217773, + "step": 4866 + }, + { + "epoch": 1.4231612808890188, + "grad_norm": 1.720980371359197, + "learning_rate": 4.21878038263931e-06, + "loss": 0.5010229349136353, + "step": 4867 + }, + { + "epoch": 1.4234537213042842, + "grad_norm": 1.8142108741121714, + "learning_rate": 4.214836177459975e-06, + "loss": 0.5186876058578491, + "step": 4868 + }, + { + "epoch": 1.4237461617195497, + "grad_norm": 1.6608706852165134, + "learning_rate": 4.210893324527507e-06, + "loss": 0.5998060703277588, + "step": 4869 + }, + { + "epoch": 1.424038602134815, + "grad_norm": 1.9807145100005583, + "learning_rate": 4.206951824763528e-06, + "loss": 0.5127147436141968, + "step": 4870 + }, + { + "epoch": 1.4243310425500804, + "grad_norm": 1.4194980170815183, + "learning_rate": 4.203011679089336e-06, + "loss": 0.5134439468383789, + "step": 4871 + }, + { + "epoch": 1.424623482965346, + "grad_norm": 1.728900083762804, + "learning_rate": 4.199072888425919e-06, + "loss": 0.6244111657142639, + "step": 4872 + }, + { + "epoch": 1.4249159233806112, + "grad_norm": 1.6442803911967188, + "learning_rate": 4.195135453693944e-06, + "loss": 0.4431127905845642, + "step": 4873 + }, + { + "epoch": 1.4252083637958766, + "grad_norm": 1.7030697753848931, + "learning_rate": 4.191199375813761e-06, + "loss": 0.6479794979095459, + "step": 4874 + }, + { + "epoch": 1.4255008042111421, + "grad_norm": 2.04011086867295, + "learning_rate": 4.187264655705407e-06, + "loss": 0.6386070847511292, + "step": 4875 + }, + { + "epoch": 1.4257932446264074, + "grad_norm": 1.6039579455905961, + "learning_rate": 4.183331294288603e-06, + "loss": 0.5201597213745117, + "step": 4876 + }, + { + "epoch": 1.4260856850416728, + "grad_norm": 1.7232164566002766, + "learning_rate": 4.179399292482737e-06, + "loss": 0.46355581283569336, + "step": 4877 + }, + { + "epoch": 1.426378125456938, + "grad_norm": 2.2615584884797975, + "learning_rate": 4.175468651206898e-06, + "loss": 0.5360985398292542, + "step": 4878 + }, + { + "epoch": 1.4266705658722034, + "grad_norm": 1.552480099700309, + "learning_rate": 4.171539371379847e-06, + "loss": 0.5545670390129089, + "step": 4879 + }, + { + "epoch": 1.426963006287469, + "grad_norm": 1.4276797255790008, + "learning_rate": 4.167611453920031e-06, + "loss": 0.445978581905365, + "step": 4880 + }, + { + "epoch": 1.4272554467027343, + "grad_norm": 1.7199888948749738, + "learning_rate": 4.163684899745576e-06, + "loss": 0.5242947340011597, + "step": 4881 + }, + { + "epoch": 1.4275478871179996, + "grad_norm": 1.7383193525416518, + "learning_rate": 4.15975970977429e-06, + "loss": 0.5544728636741638, + "step": 4882 + }, + { + "epoch": 1.4278403275332652, + "grad_norm": 2.073499174067984, + "learning_rate": 4.1558358849236626e-06, + "loss": 0.5400837063789368, + "step": 4883 + }, + { + "epoch": 1.4281327679485305, + "grad_norm": 1.6385411261569034, + "learning_rate": 4.151913426110864e-06, + "loss": 0.5201395153999329, + "step": 4884 + }, + { + "epoch": 1.4284252083637958, + "grad_norm": 1.7888379069815619, + "learning_rate": 4.147992334252745e-06, + "loss": 0.4414210319519043, + "step": 4885 + }, + { + "epoch": 1.4287176487790614, + "grad_norm": 1.7818076981346203, + "learning_rate": 4.144072610265838e-06, + "loss": 0.6590272188186646, + "step": 4886 + }, + { + "epoch": 1.4290100891943267, + "grad_norm": 1.4800084296243576, + "learning_rate": 4.140154255066356e-06, + "loss": 0.4734429717063904, + "step": 4887 + }, + { + "epoch": 1.429302529609592, + "grad_norm": 1.5398179955798732, + "learning_rate": 4.136237269570186e-06, + "loss": 0.45204073190689087, + "step": 4888 + }, + { + "epoch": 1.4295949700248574, + "grad_norm": 1.6199970278575915, + "learning_rate": 4.132321654692901e-06, + "loss": 0.6570174694061279, + "step": 4889 + }, + { + "epoch": 1.429887410440123, + "grad_norm": 1.7926483421459931, + "learning_rate": 4.128407411349754e-06, + "loss": 0.5159077644348145, + "step": 4890 + }, + { + "epoch": 1.4301798508553882, + "grad_norm": 1.603963849008659, + "learning_rate": 4.124494540455674e-06, + "loss": 0.5778994560241699, + "step": 4891 + }, + { + "epoch": 1.4304722912706536, + "grad_norm": 1.4954754441376699, + "learning_rate": 4.120583042925273e-06, + "loss": 0.4740722179412842, + "step": 4892 + }, + { + "epoch": 1.430764731685919, + "grad_norm": 1.4416066465695618, + "learning_rate": 4.116672919672837e-06, + "loss": 0.5561014413833618, + "step": 4893 + }, + { + "epoch": 1.4310571721011844, + "grad_norm": 1.5040800316270475, + "learning_rate": 4.112764171612335e-06, + "loss": 0.4834856688976288, + "step": 4894 + }, + { + "epoch": 1.4313496125164498, + "grad_norm": 1.691313354112802, + "learning_rate": 4.108856799657412e-06, + "loss": 0.5565547943115234, + "step": 4895 + }, + { + "epoch": 1.431642052931715, + "grad_norm": 1.8883359305911547, + "learning_rate": 4.104950804721395e-06, + "loss": 0.5401065349578857, + "step": 4896 + }, + { + "epoch": 1.4319344933469806, + "grad_norm": 1.3793655379788223, + "learning_rate": 4.101046187717284e-06, + "loss": 0.4792686700820923, + "step": 4897 + }, + { + "epoch": 1.432226933762246, + "grad_norm": 1.5922549032476903, + "learning_rate": 4.097142949557764e-06, + "loss": 0.5255981683731079, + "step": 4898 + }, + { + "epoch": 1.4325193741775113, + "grad_norm": 1.614736024187036, + "learning_rate": 4.093241091155187e-06, + "loss": 0.5535293817520142, + "step": 4899 + }, + { + "epoch": 1.4328118145927768, + "grad_norm": 1.8976199736566215, + "learning_rate": 4.089340613421589e-06, + "loss": 0.5235373973846436, + "step": 4900 + }, + { + "epoch": 1.4331042550080422, + "grad_norm": 1.8120415147677507, + "learning_rate": 4.085441517268687e-06, + "loss": 0.5538134574890137, + "step": 4901 + }, + { + "epoch": 1.4333966954233075, + "grad_norm": 1.5442149105119904, + "learning_rate": 4.081543803607869e-06, + "loss": 0.5394395589828491, + "step": 4902 + }, + { + "epoch": 1.4336891358385728, + "grad_norm": 1.6068663887611208, + "learning_rate": 4.077647473350201e-06, + "loss": 0.522742509841919, + "step": 4903 + }, + { + "epoch": 1.4339815762538382, + "grad_norm": 1.6377229499845016, + "learning_rate": 4.073752527406429e-06, + "loss": 0.559830367565155, + "step": 4904 + }, + { + "epoch": 1.4342740166691037, + "grad_norm": 1.7578675965544384, + "learning_rate": 4.069858966686971e-06, + "loss": 0.42535799741744995, + "step": 4905 + }, + { + "epoch": 1.434566457084369, + "grad_norm": 1.7745987719575682, + "learning_rate": 4.065966792101924e-06, + "loss": 0.6075177192687988, + "step": 4906 + }, + { + "epoch": 1.4348588974996344, + "grad_norm": 1.7444570198074862, + "learning_rate": 4.06207600456106e-06, + "loss": 0.5010570883750916, + "step": 4907 + }, + { + "epoch": 1.4351513379149, + "grad_norm": 1.621587467371749, + "learning_rate": 4.058186604973826e-06, + "loss": 0.571307897567749, + "step": 4908 + }, + { + "epoch": 1.4354437783301652, + "grad_norm": 1.643170818508206, + "learning_rate": 4.0542985942493505e-06, + "loss": 0.4918866455554962, + "step": 4909 + }, + { + "epoch": 1.4357362187454306, + "grad_norm": 1.8933520643034856, + "learning_rate": 4.050411973296425e-06, + "loss": 0.6588176488876343, + "step": 4910 + }, + { + "epoch": 1.436028659160696, + "grad_norm": 1.9180926902562168, + "learning_rate": 4.046526743023526e-06, + "loss": 0.7341527938842773, + "step": 4911 + }, + { + "epoch": 1.4363210995759614, + "grad_norm": 1.7782521784505012, + "learning_rate": 4.042642904338801e-06, + "loss": 0.5233849287033081, + "step": 4912 + }, + { + "epoch": 1.4366135399912268, + "grad_norm": 1.6182742405882007, + "learning_rate": 4.038760458150079e-06, + "loss": 0.5144373178482056, + "step": 4913 + }, + { + "epoch": 1.4369059804064923, + "grad_norm": 1.55901993468911, + "learning_rate": 4.034879405364853e-06, + "loss": 0.4520954489707947, + "step": 4914 + }, + { + "epoch": 1.4371984208217576, + "grad_norm": 1.6208081934978835, + "learning_rate": 4.030999746890295e-06, + "loss": 0.5632743835449219, + "step": 4915 + }, + { + "epoch": 1.437490861237023, + "grad_norm": 1.5950473237167822, + "learning_rate": 4.027121483633257e-06, + "loss": 0.49681180715560913, + "step": 4916 + }, + { + "epoch": 1.4377833016522883, + "grad_norm": 1.684721295445507, + "learning_rate": 4.023244616500257e-06, + "loss": 0.5182398557662964, + "step": 4917 + }, + { + "epoch": 1.4380757420675536, + "grad_norm": 1.6044294787301046, + "learning_rate": 4.019369146397493e-06, + "loss": 0.5686701536178589, + "step": 4918 + }, + { + "epoch": 1.4383681824828192, + "grad_norm": 1.682926006912085, + "learning_rate": 4.015495074230823e-06, + "loss": 0.5668520927429199, + "step": 4919 + }, + { + "epoch": 1.4386606228980845, + "grad_norm": 1.556828511748538, + "learning_rate": 4.011622400905794e-06, + "loss": 0.4511116147041321, + "step": 4920 + }, + { + "epoch": 1.4389530633133498, + "grad_norm": 1.677757503686359, + "learning_rate": 4.007751127327618e-06, + "loss": 0.4736326336860657, + "step": 4921 + }, + { + "epoch": 1.4392455037286154, + "grad_norm": 1.68287466179835, + "learning_rate": 4.003881254401183e-06, + "loss": 0.5705248117446899, + "step": 4922 + }, + { + "epoch": 1.4395379441438807, + "grad_norm": 1.4732853876066263, + "learning_rate": 4.000012783031047e-06, + "loss": 0.45527490973472595, + "step": 4923 + }, + { + "epoch": 1.439830384559146, + "grad_norm": 1.5504418192282816, + "learning_rate": 3.996145714121444e-06, + "loss": 0.4926735758781433, + "step": 4924 + }, + { + "epoch": 1.4401228249744116, + "grad_norm": 1.523617382800049, + "learning_rate": 3.992280048576276e-06, + "loss": 0.42700374126434326, + "step": 4925 + }, + { + "epoch": 1.440415265389677, + "grad_norm": 1.6783270187790582, + "learning_rate": 3.988415787299118e-06, + "loss": 0.5833145976066589, + "step": 4926 + }, + { + "epoch": 1.4407077058049422, + "grad_norm": 1.70461399954195, + "learning_rate": 3.98455293119322e-06, + "loss": 0.5290282964706421, + "step": 4927 + }, + { + "epoch": 1.4410001462202076, + "grad_norm": 1.9146871710495363, + "learning_rate": 3.9806914811614984e-06, + "loss": 0.4489266872406006, + "step": 4928 + }, + { + "epoch": 1.441292586635473, + "grad_norm": 1.9109717939773812, + "learning_rate": 3.97683143810655e-06, + "loss": 0.5630865097045898, + "step": 4929 + }, + { + "epoch": 1.4415850270507384, + "grad_norm": 1.6030492821452516, + "learning_rate": 3.972972802930627e-06, + "loss": 0.5962105989456177, + "step": 4930 + }, + { + "epoch": 1.4418774674660038, + "grad_norm": 1.789368844700869, + "learning_rate": 3.9691155765356674e-06, + "loss": 0.6059410572052002, + "step": 4931 + }, + { + "epoch": 1.442169907881269, + "grad_norm": 1.6894490985884645, + "learning_rate": 3.965259759823272e-06, + "loss": 0.5476605296134949, + "step": 4932 + }, + { + "epoch": 1.4424623482965346, + "grad_norm": 1.7561171676767597, + "learning_rate": 3.961405353694716e-06, + "loss": 0.70278000831604, + "step": 4933 + }, + { + "epoch": 1.4427547887118, + "grad_norm": 1.6884311650773163, + "learning_rate": 3.9575523590509445e-06, + "loss": 0.5838963389396667, + "step": 4934 + }, + { + "epoch": 1.4430472291270653, + "grad_norm": 1.536536052995308, + "learning_rate": 3.95370077679257e-06, + "loss": 0.508273720741272, + "step": 4935 + }, + { + "epoch": 1.4433396695423308, + "grad_norm": 1.4692622152510404, + "learning_rate": 3.949850607819876e-06, + "loss": 0.5053583383560181, + "step": 4936 + }, + { + "epoch": 1.4436321099575962, + "grad_norm": 1.5754477318406401, + "learning_rate": 3.946001853032818e-06, + "loss": 0.5729954242706299, + "step": 4937 + }, + { + "epoch": 1.4439245503728615, + "grad_norm": 1.833619886253515, + "learning_rate": 3.942154513331018e-06, + "loss": 0.5261870622634888, + "step": 4938 + }, + { + "epoch": 1.444216990788127, + "grad_norm": 1.3956467871190747, + "learning_rate": 3.9383085896137675e-06, + "loss": 0.34802311658859253, + "step": 4939 + }, + { + "epoch": 1.4445094312033924, + "grad_norm": 1.8896307306874633, + "learning_rate": 3.934464082780032e-06, + "loss": 0.48302024602890015, + "step": 4940 + }, + { + "epoch": 1.4448018716186577, + "grad_norm": 1.8507631130251807, + "learning_rate": 3.930620993728434e-06, + "loss": 0.6649061441421509, + "step": 4941 + }, + { + "epoch": 1.445094312033923, + "grad_norm": 1.705526500334542, + "learning_rate": 3.926779323357278e-06, + "loss": 0.5945848822593689, + "step": 4942 + }, + { + "epoch": 1.4453867524491884, + "grad_norm": 1.5476382055190478, + "learning_rate": 3.922939072564528e-06, + "loss": 0.4783032536506653, + "step": 4943 + }, + { + "epoch": 1.445679192864454, + "grad_norm": 1.6453487782833462, + "learning_rate": 3.919100242247821e-06, + "loss": 0.4619516134262085, + "step": 4944 + }, + { + "epoch": 1.4459716332797192, + "grad_norm": 1.5327149597771257, + "learning_rate": 3.915262833304461e-06, + "loss": 0.5652358531951904, + "step": 4945 + }, + { + "epoch": 1.4462640736949846, + "grad_norm": 1.4734419470243802, + "learning_rate": 3.911426846631416e-06, + "loss": 0.4523610472679138, + "step": 4946 + }, + { + "epoch": 1.44655651411025, + "grad_norm": 1.5670101583017915, + "learning_rate": 3.9075922831253276e-06, + "loss": 0.4914482831954956, + "step": 4947 + }, + { + "epoch": 1.4468489545255154, + "grad_norm": 1.7113071980283088, + "learning_rate": 3.9037591436825005e-06, + "loss": 0.4060005247592926, + "step": 4948 + }, + { + "epoch": 1.4471413949407808, + "grad_norm": 1.9320743237560347, + "learning_rate": 3.899927429198908e-06, + "loss": 0.49987125396728516, + "step": 4949 + }, + { + "epoch": 1.4474338353560463, + "grad_norm": 2.0596677045202036, + "learning_rate": 3.896097140570189e-06, + "loss": 0.6205358505249023, + "step": 4950 + }, + { + "epoch": 1.4477262757713116, + "grad_norm": 1.7670476784744638, + "learning_rate": 3.892268278691651e-06, + "loss": 0.5302955508232117, + "step": 4951 + }, + { + "epoch": 1.448018716186577, + "grad_norm": 1.7962585212488547, + "learning_rate": 3.888440844458272e-06, + "loss": 0.5225962400436401, + "step": 4952 + }, + { + "epoch": 1.4483111566018425, + "grad_norm": 1.8247561425410785, + "learning_rate": 3.884614838764682e-06, + "loss": 0.5030089616775513, + "step": 4953 + }, + { + "epoch": 1.4486035970171078, + "grad_norm": 1.8999355010605985, + "learning_rate": 3.880790262505192e-06, + "loss": 0.6060030460357666, + "step": 4954 + }, + { + "epoch": 1.4488960374323732, + "grad_norm": 1.8229751812699673, + "learning_rate": 3.8769671165737725e-06, + "loss": 0.5244846343994141, + "step": 4955 + }, + { + "epoch": 1.4491884778476385, + "grad_norm": 1.4616444667042836, + "learning_rate": 3.873145401864061e-06, + "loss": 0.46979671716690063, + "step": 4956 + }, + { + "epoch": 1.4494809182629038, + "grad_norm": 1.8452052569073554, + "learning_rate": 3.8693251192693596e-06, + "loss": 0.5201131105422974, + "step": 4957 + }, + { + "epoch": 1.4497733586781694, + "grad_norm": 1.679443447217904, + "learning_rate": 3.865506269682638e-06, + "loss": 0.5124838352203369, + "step": 4958 + }, + { + "epoch": 1.4500657990934347, + "grad_norm": 1.830132365627518, + "learning_rate": 3.861688853996525e-06, + "loss": 0.5613473653793335, + "step": 4959 + }, + { + "epoch": 1.4503582395087, + "grad_norm": 1.5976816836472583, + "learning_rate": 3.857872873103322e-06, + "loss": 0.46196621656417847, + "step": 4960 + }, + { + "epoch": 1.4506506799239656, + "grad_norm": 1.9393165963504067, + "learning_rate": 3.8540583278949905e-06, + "loss": 0.6427509784698486, + "step": 4961 + }, + { + "epoch": 1.450943120339231, + "grad_norm": 1.7485862700938968, + "learning_rate": 3.850245219263157e-06, + "loss": 0.6306381821632385, + "step": 4962 + }, + { + "epoch": 1.4512355607544962, + "grad_norm": 1.5645194602237047, + "learning_rate": 3.846433548099114e-06, + "loss": 0.46638673543930054, + "step": 4963 + }, + { + "epoch": 1.4515280011697618, + "grad_norm": 1.5360842567610604, + "learning_rate": 3.842623315293814e-06, + "loss": 0.4950143098831177, + "step": 4964 + }, + { + "epoch": 1.451820441585027, + "grad_norm": 1.5810107141405056, + "learning_rate": 3.838814521737875e-06, + "loss": 0.45698249340057373, + "step": 4965 + }, + { + "epoch": 1.4521128820002924, + "grad_norm": 1.6457012436395508, + "learning_rate": 3.8350071683215814e-06, + "loss": 0.6068260669708252, + "step": 4966 + }, + { + "epoch": 1.4524053224155578, + "grad_norm": 1.8188775401166803, + "learning_rate": 3.831201255934879e-06, + "loss": 0.5264104008674622, + "step": 4967 + }, + { + "epoch": 1.4526977628308233, + "grad_norm": 1.6372667669239498, + "learning_rate": 3.827396785467375e-06, + "loss": 0.5198315978050232, + "step": 4968 + }, + { + "epoch": 1.4529902032460886, + "grad_norm": 1.6294906688066837, + "learning_rate": 3.823593757808342e-06, + "loss": 0.504194438457489, + "step": 4969 + }, + { + "epoch": 1.453282643661354, + "grad_norm": 1.6016674444230832, + "learning_rate": 3.819792173846717e-06, + "loss": 0.5018986463546753, + "step": 4970 + }, + { + "epoch": 1.4535750840766193, + "grad_norm": 1.6893120935929504, + "learning_rate": 3.8159920344710936e-06, + "loss": 0.4847358465194702, + "step": 4971 + }, + { + "epoch": 1.4538675244918848, + "grad_norm": 1.6703483014148515, + "learning_rate": 3.812193340569733e-06, + "loss": 0.547623872756958, + "step": 4972 + }, + { + "epoch": 1.4541599649071502, + "grad_norm": 2.1389235560975615, + "learning_rate": 3.8083960930305562e-06, + "loss": 0.534354031085968, + "step": 4973 + }, + { + "epoch": 1.4544524053224155, + "grad_norm": 1.788418032061747, + "learning_rate": 3.8046002927411506e-06, + "loss": 0.6123033165931702, + "step": 4974 + }, + { + "epoch": 1.454744845737681, + "grad_norm": 1.6087574153138633, + "learning_rate": 3.8008059405887553e-06, + "loss": 0.5222622752189636, + "step": 4975 + }, + { + "epoch": 1.4550372861529464, + "grad_norm": 1.684901707974216, + "learning_rate": 3.7970130374602785e-06, + "loss": 0.5568759441375732, + "step": 4976 + }, + { + "epoch": 1.4553297265682117, + "grad_norm": 1.7459991230210548, + "learning_rate": 3.7932215842422903e-06, + "loss": 0.5458661317825317, + "step": 4977 + }, + { + "epoch": 1.4556221669834772, + "grad_norm": 1.6216302867008319, + "learning_rate": 3.789431581821019e-06, + "loss": 0.48293566703796387, + "step": 4978 + }, + { + "epoch": 1.4559146073987426, + "grad_norm": 1.893470262052562, + "learning_rate": 3.7856430310823546e-06, + "loss": 0.647431492805481, + "step": 4979 + }, + { + "epoch": 1.456207047814008, + "grad_norm": 1.6735249045743477, + "learning_rate": 3.7818559329118475e-06, + "loss": 0.48039543628692627, + "step": 4980 + }, + { + "epoch": 1.4564994882292732, + "grad_norm": 1.6704036620696165, + "learning_rate": 3.7780702881947084e-06, + "loss": 0.6705803871154785, + "step": 4981 + }, + { + "epoch": 1.4567919286445385, + "grad_norm": 1.7404901320645014, + "learning_rate": 3.7742860978158103e-06, + "loss": 0.564405083656311, + "step": 4982 + }, + { + "epoch": 1.457084369059804, + "grad_norm": 1.7081222209997355, + "learning_rate": 3.7705033626596844e-06, + "loss": 0.5208612084388733, + "step": 4983 + }, + { + "epoch": 1.4573768094750694, + "grad_norm": 1.909829427679328, + "learning_rate": 3.766722083610521e-06, + "loss": 0.6230732202529907, + "step": 4984 + }, + { + "epoch": 1.4576692498903348, + "grad_norm": 1.6601663066885601, + "learning_rate": 3.7629422615521747e-06, + "loss": 0.5741504430770874, + "step": 4985 + }, + { + "epoch": 1.4579616903056003, + "grad_norm": 1.584208244849031, + "learning_rate": 3.75916389736815e-06, + "loss": 0.5321571826934814, + "step": 4986 + }, + { + "epoch": 1.4582541307208656, + "grad_norm": 1.95685306597155, + "learning_rate": 3.7553869919416186e-06, + "loss": 0.6367009878158569, + "step": 4987 + }, + { + "epoch": 1.458546571136131, + "grad_norm": 1.5904913997392975, + "learning_rate": 3.75161154615541e-06, + "loss": 0.5736235976219177, + "step": 4988 + }, + { + "epoch": 1.4588390115513965, + "grad_norm": 2.0157501917439866, + "learning_rate": 3.7478375608920127e-06, + "loss": 0.5799358487129211, + "step": 4989 + }, + { + "epoch": 1.4591314519666618, + "grad_norm": 1.7515991790236536, + "learning_rate": 3.7440650370335675e-06, + "loss": 0.6065561771392822, + "step": 4990 + }, + { + "epoch": 1.4594238923819272, + "grad_norm": 1.4583944256149548, + "learning_rate": 3.740293975461886e-06, + "loss": 0.5182442665100098, + "step": 4991 + }, + { + "epoch": 1.4597163327971927, + "grad_norm": 1.6877116508095484, + "learning_rate": 3.736524377058429e-06, + "loss": 0.5065605640411377, + "step": 4992 + }, + { + "epoch": 1.460008773212458, + "grad_norm": 1.5024812411134352, + "learning_rate": 3.7327562427043163e-06, + "loss": 0.44326460361480713, + "step": 4993 + }, + { + "epoch": 1.4603012136277234, + "grad_norm": 1.9166701258714811, + "learning_rate": 3.7289895732803306e-06, + "loss": 0.6192547082901001, + "step": 4994 + }, + { + "epoch": 1.4605936540429887, + "grad_norm": 1.794387571688338, + "learning_rate": 3.725224369666899e-06, + "loss": 0.5487738847732544, + "step": 4995 + }, + { + "epoch": 1.460886094458254, + "grad_norm": 1.922772286834415, + "learning_rate": 3.7214606327441203e-06, + "loss": 0.558982253074646, + "step": 4996 + }, + { + "epoch": 1.4611785348735196, + "grad_norm": 1.770836311904495, + "learning_rate": 3.717698363391744e-06, + "loss": 0.5277853012084961, + "step": 4997 + }, + { + "epoch": 1.461470975288785, + "grad_norm": 1.7748123557502546, + "learning_rate": 3.7139375624891795e-06, + "loss": 0.6561184525489807, + "step": 4998 + }, + { + "epoch": 1.4617634157040502, + "grad_norm": 1.5647900159041126, + "learning_rate": 3.710178230915489e-06, + "loss": 0.46555888652801514, + "step": 4999 + }, + { + "epoch": 1.4620558561193158, + "grad_norm": 1.7414970962586886, + "learning_rate": 3.706420369549394e-06, + "loss": 0.5808060765266418, + "step": 5000 + }, + { + "epoch": 1.462348296534581, + "grad_norm": 1.442227314234909, + "learning_rate": 3.7026639792692722e-06, + "loss": 0.5407893061637878, + "step": 5001 + }, + { + "epoch": 1.4626407369498464, + "grad_norm": 2.580423891920115, + "learning_rate": 3.6989090609531574e-06, + "loss": 0.538393497467041, + "step": 5002 + }, + { + "epoch": 1.462933177365112, + "grad_norm": 1.8751864874321293, + "learning_rate": 3.6951556154787373e-06, + "loss": 0.530704140663147, + "step": 5003 + }, + { + "epoch": 1.4632256177803773, + "grad_norm": 1.4470439364888814, + "learning_rate": 3.691403643723359e-06, + "loss": 0.43352627754211426, + "step": 5004 + }, + { + "epoch": 1.4635180581956426, + "grad_norm": 1.6573279039642985, + "learning_rate": 3.687653146564025e-06, + "loss": 0.6047205924987793, + "step": 5005 + }, + { + "epoch": 1.463810498610908, + "grad_norm": 1.6556697002732312, + "learning_rate": 3.6839041248773857e-06, + "loss": 0.44708865880966187, + "step": 5006 + }, + { + "epoch": 1.4641029390261735, + "grad_norm": 1.6445747944839355, + "learning_rate": 3.680156579539753e-06, + "loss": 0.5653451681137085, + "step": 5007 + }, + { + "epoch": 1.4643953794414388, + "grad_norm": 1.750839565103172, + "learning_rate": 3.6764105114270966e-06, + "loss": 0.49293750524520874, + "step": 5008 + }, + { + "epoch": 1.4646878198567042, + "grad_norm": 1.7691390827672615, + "learning_rate": 3.672665921415034e-06, + "loss": 0.5761851072311401, + "step": 5009 + }, + { + "epoch": 1.4649802602719695, + "grad_norm": 1.7025752756263197, + "learning_rate": 3.668922810378841e-06, + "loss": 0.5188437700271606, + "step": 5010 + }, + { + "epoch": 1.465272700687235, + "grad_norm": 1.7765263620108804, + "learning_rate": 3.6651811791934476e-06, + "loss": 0.5159400701522827, + "step": 5011 + }, + { + "epoch": 1.4655651411025004, + "grad_norm": 1.4463295265937102, + "learning_rate": 3.6614410287334377e-06, + "loss": 0.478866845369339, + "step": 5012 + }, + { + "epoch": 1.4658575815177657, + "grad_norm": 1.6006806590634375, + "learning_rate": 3.6577023598730486e-06, + "loss": 0.5509926080703735, + "step": 5013 + }, + { + "epoch": 1.4661500219330312, + "grad_norm": 1.5613591503777215, + "learning_rate": 3.6539651734861705e-06, + "loss": 0.4872981309890747, + "step": 5014 + }, + { + "epoch": 1.4664424623482966, + "grad_norm": 1.4569843282992687, + "learning_rate": 3.6502294704463493e-06, + "loss": 0.47478264570236206, + "step": 5015 + }, + { + "epoch": 1.4667349027635619, + "grad_norm": 1.765955621655722, + "learning_rate": 3.646495251626785e-06, + "loss": 0.5140335559844971, + "step": 5016 + }, + { + "epoch": 1.4670273431788274, + "grad_norm": 1.5785594027919339, + "learning_rate": 3.6427625179003223e-06, + "loss": 0.41033172607421875, + "step": 5017 + }, + { + "epoch": 1.4673197835940928, + "grad_norm": 1.7731644033346952, + "learning_rate": 3.639031270139468e-06, + "loss": 0.4290558099746704, + "step": 5018 + }, + { + "epoch": 1.467612224009358, + "grad_norm": 1.8964888989060893, + "learning_rate": 3.635301509216379e-06, + "loss": 0.5903435349464417, + "step": 5019 + }, + { + "epoch": 1.4679046644246234, + "grad_norm": 1.7302589846174075, + "learning_rate": 3.6315732360028655e-06, + "loss": 0.6410748958587646, + "step": 5020 + }, + { + "epoch": 1.4681971048398887, + "grad_norm": 1.584781169707585, + "learning_rate": 3.6278464513703858e-06, + "loss": 0.5499910712242126, + "step": 5021 + }, + { + "epoch": 1.4684895452551543, + "grad_norm": 1.4876234400926511, + "learning_rate": 3.624121156190056e-06, + "loss": 0.4980154037475586, + "step": 5022 + }, + { + "epoch": 1.4687819856704196, + "grad_norm": 1.7622618315552074, + "learning_rate": 3.6203973513326395e-06, + "loss": 0.5910995006561279, + "step": 5023 + }, + { + "epoch": 1.469074426085685, + "grad_norm": 1.837302229581672, + "learning_rate": 3.6166750376685534e-06, + "loss": 0.6003058552742004, + "step": 5024 + }, + { + "epoch": 1.4693668665009505, + "grad_norm": 2.0086634437416215, + "learning_rate": 3.6129542160678655e-06, + "loss": 0.5655561685562134, + "step": 5025 + }, + { + "epoch": 1.4696593069162158, + "grad_norm": 1.6720399704395428, + "learning_rate": 3.609234887400297e-06, + "loss": 0.713152289390564, + "step": 5026 + }, + { + "epoch": 1.4699517473314812, + "grad_norm": 1.3619130802184511, + "learning_rate": 3.605517052535219e-06, + "loss": 0.41018784046173096, + "step": 5027 + }, + { + "epoch": 1.4702441877467467, + "grad_norm": 1.7429761856148576, + "learning_rate": 3.6018007123416486e-06, + "loss": 0.5852759480476379, + "step": 5028 + }, + { + "epoch": 1.470536628162012, + "grad_norm": 1.6763203292398523, + "learning_rate": 3.598085867688259e-06, + "loss": 0.5942279696464539, + "step": 5029 + }, + { + "epoch": 1.4708290685772774, + "grad_norm": 1.5957062749275768, + "learning_rate": 3.594372519443374e-06, + "loss": 0.6265639662742615, + "step": 5030 + }, + { + "epoch": 1.471121508992543, + "grad_norm": 1.6944518172910965, + "learning_rate": 3.5906606684749668e-06, + "loss": 0.4539163112640381, + "step": 5031 + }, + { + "epoch": 1.4714139494078082, + "grad_norm": 1.8810670575321342, + "learning_rate": 3.586950315650658e-06, + "loss": 0.5682815909385681, + "step": 5032 + }, + { + "epoch": 1.4717063898230736, + "grad_norm": 1.5382985580447415, + "learning_rate": 3.583241461837721e-06, + "loss": 0.5188582539558411, + "step": 5033 + }, + { + "epoch": 1.4719988302383389, + "grad_norm": 1.923705094705072, + "learning_rate": 3.5795341079030777e-06, + "loss": 0.501958966255188, + "step": 5034 + }, + { + "epoch": 1.4722912706536042, + "grad_norm": 1.769758245215022, + "learning_rate": 3.5758282547132995e-06, + "loss": 0.5748735666275024, + "step": 5035 + }, + { + "epoch": 1.4725837110688698, + "grad_norm": 1.720811530645175, + "learning_rate": 3.5721239031346067e-06, + "loss": 0.5796875357627869, + "step": 5036 + }, + { + "epoch": 1.472876151484135, + "grad_norm": 1.7760443740240528, + "learning_rate": 3.56842105403287e-06, + "loss": 0.457103431224823, + "step": 5037 + }, + { + "epoch": 1.4731685918994004, + "grad_norm": 1.607843165834991, + "learning_rate": 3.564719708273607e-06, + "loss": 0.5300487875938416, + "step": 5038 + }, + { + "epoch": 1.473461032314666, + "grad_norm": 1.7877129065541937, + "learning_rate": 3.5610198667219886e-06, + "loss": 0.48143619298934937, + "step": 5039 + }, + { + "epoch": 1.4737534727299313, + "grad_norm": 1.9171325817627416, + "learning_rate": 3.557321530242824e-06, + "loss": 0.5523685216903687, + "step": 5040 + }, + { + "epoch": 1.4740459131451966, + "grad_norm": 1.7367077785146405, + "learning_rate": 3.5536246997005785e-06, + "loss": 0.5820931196212769, + "step": 5041 + }, + { + "epoch": 1.4743383535604622, + "grad_norm": 1.6717570524697325, + "learning_rate": 3.5499293759593656e-06, + "loss": 0.6287394762039185, + "step": 5042 + }, + { + "epoch": 1.4746307939757275, + "grad_norm": 1.737914835396703, + "learning_rate": 3.5462355598829433e-06, + "loss": 0.4621254801750183, + "step": 5043 + }, + { + "epoch": 1.4749232343909928, + "grad_norm": 1.687652415457897, + "learning_rate": 3.5425432523347205e-06, + "loss": 0.5571160316467285, + "step": 5044 + }, + { + "epoch": 1.4752156748062581, + "grad_norm": 1.716802557057107, + "learning_rate": 3.5388524541777492e-06, + "loss": 0.4135715365409851, + "step": 5045 + }, + { + "epoch": 1.4755081152215237, + "grad_norm": 1.868527213017395, + "learning_rate": 3.535163166274733e-06, + "loss": 0.524153470993042, + "step": 5046 + }, + { + "epoch": 1.475800555636789, + "grad_norm": 1.9441558365554423, + "learning_rate": 3.5314753894880205e-06, + "loss": 0.6330267786979675, + "step": 5047 + }, + { + "epoch": 1.4760929960520544, + "grad_norm": 1.7270524835767156, + "learning_rate": 3.527789124679605e-06, + "loss": 0.46210330724716187, + "step": 5048 + }, + { + "epoch": 1.4763854364673197, + "grad_norm": 1.8799684878196978, + "learning_rate": 3.524104372711131e-06, + "loss": 0.49293309450149536, + "step": 5049 + }, + { + "epoch": 1.4766778768825852, + "grad_norm": 1.7601042593478657, + "learning_rate": 3.520421134443889e-06, + "loss": 0.6196815967559814, + "step": 5050 + }, + { + "epoch": 1.4769703172978506, + "grad_norm": 1.568738566408146, + "learning_rate": 3.5167394107388064e-06, + "loss": 0.42622530460357666, + "step": 5051 + }, + { + "epoch": 1.4772627577131159, + "grad_norm": 1.6087834768838942, + "learning_rate": 3.513059202456468e-06, + "loss": 0.4475107491016388, + "step": 5052 + }, + { + "epoch": 1.4775551981283814, + "grad_norm": 1.549049360877832, + "learning_rate": 3.5093805104571e-06, + "loss": 0.4295683205127716, + "step": 5053 + }, + { + "epoch": 1.4778476385436468, + "grad_norm": 1.512499491264911, + "learning_rate": 3.505703335600573e-06, + "loss": 0.5331642627716064, + "step": 5054 + }, + { + "epoch": 1.478140078958912, + "grad_norm": 1.7125050045051866, + "learning_rate": 3.5020276787464058e-06, + "loss": 0.5615599155426025, + "step": 5055 + }, + { + "epoch": 1.4784325193741776, + "grad_norm": 1.470462641632426, + "learning_rate": 3.4983535407537618e-06, + "loss": 0.5611366033554077, + "step": 5056 + }, + { + "epoch": 1.478724959789443, + "grad_norm": 2.0861134690908325, + "learning_rate": 3.494680922481445e-06, + "loss": 0.5891577005386353, + "step": 5057 + }, + { + "epoch": 1.4790174002047083, + "grad_norm": 1.981139638659905, + "learning_rate": 3.491009824787911e-06, + "loss": 0.5583761930465698, + "step": 5058 + }, + { + "epoch": 1.4793098406199736, + "grad_norm": 1.5020288470897978, + "learning_rate": 3.4873402485312548e-06, + "loss": 0.5001339912414551, + "step": 5059 + }, + { + "epoch": 1.479602281035239, + "grad_norm": 1.445341864944132, + "learning_rate": 3.4836721945692175e-06, + "loss": 0.5050641894340515, + "step": 5060 + }, + { + "epoch": 1.4798947214505045, + "grad_norm": 1.5825314066620513, + "learning_rate": 3.4800056637591885e-06, + "loss": 0.5377815365791321, + "step": 5061 + }, + { + "epoch": 1.4801871618657698, + "grad_norm": 1.6490614330323619, + "learning_rate": 3.4763406569581892e-06, + "loss": 0.5517662763595581, + "step": 5062 + }, + { + "epoch": 1.4804796022810351, + "grad_norm": 1.7535356829599726, + "learning_rate": 3.4726771750228984e-06, + "loss": 0.5908320546150208, + "step": 5063 + }, + { + "epoch": 1.4807720426963007, + "grad_norm": 1.640782634903257, + "learning_rate": 3.4690152188096293e-06, + "loss": 0.5169299840927124, + "step": 5064 + }, + { + "epoch": 1.481064483111566, + "grad_norm": 1.5566091974805318, + "learning_rate": 3.4653547891743457e-06, + "loss": 0.6198064088821411, + "step": 5065 + }, + { + "epoch": 1.4813569235268313, + "grad_norm": 1.7822104060368598, + "learning_rate": 3.4616958869726436e-06, + "loss": 0.4971558153629303, + "step": 5066 + }, + { + "epoch": 1.481649363942097, + "grad_norm": 1.8117473020924466, + "learning_rate": 3.4580385130597794e-06, + "loss": 0.556640088558197, + "step": 5067 + }, + { + "epoch": 1.4819418043573622, + "grad_norm": 1.7297037385384992, + "learning_rate": 3.4543826682906358e-06, + "loss": 0.5336956977844238, + "step": 5068 + }, + { + "epoch": 1.4822342447726276, + "grad_norm": 1.8723627634024749, + "learning_rate": 3.4507283535197454e-06, + "loss": 0.5185145735740662, + "step": 5069 + }, + { + "epoch": 1.482526685187893, + "grad_norm": 1.5962927751585108, + "learning_rate": 3.447075569601287e-06, + "loss": 0.5460748672485352, + "step": 5070 + }, + { + "epoch": 1.4828191256031584, + "grad_norm": 1.7486536420516579, + "learning_rate": 3.4434243173890667e-06, + "loss": 0.5860699415206909, + "step": 5071 + }, + { + "epoch": 1.4831115660184238, + "grad_norm": 1.5377337582646984, + "learning_rate": 3.4397745977365482e-06, + "loss": 0.5818450450897217, + "step": 5072 + }, + { + "epoch": 1.483404006433689, + "grad_norm": 1.6591511763241749, + "learning_rate": 3.4361264114968316e-06, + "loss": 0.4205876588821411, + "step": 5073 + }, + { + "epoch": 1.4836964468489544, + "grad_norm": 1.6097740909701606, + "learning_rate": 3.4324797595226567e-06, + "loss": 0.5503501892089844, + "step": 5074 + }, + { + "epoch": 1.48398888726422, + "grad_norm": 1.7613851561474803, + "learning_rate": 3.4288346426664063e-06, + "loss": 0.5388503074645996, + "step": 5075 + }, + { + "epoch": 1.4842813276794853, + "grad_norm": 1.5726280695427581, + "learning_rate": 3.4251910617801054e-06, + "loss": 0.5866841673851013, + "step": 5076 + }, + { + "epoch": 1.4845737680947506, + "grad_norm": 1.7063663913828162, + "learning_rate": 3.4215490177154176e-06, + "loss": 0.5377970337867737, + "step": 5077 + }, + { + "epoch": 1.4848662085100162, + "grad_norm": 2.013961516297246, + "learning_rate": 3.41790851132365e-06, + "loss": 0.6311028003692627, + "step": 5078 + }, + { + "epoch": 1.4851586489252815, + "grad_norm": 1.7100175604987324, + "learning_rate": 3.414269543455747e-06, + "loss": 0.5226441621780396, + "step": 5079 + }, + { + "epoch": 1.4854510893405468, + "grad_norm": 1.73285658375087, + "learning_rate": 3.410632114962298e-06, + "loss": 0.6306775212287903, + "step": 5080 + }, + { + "epoch": 1.4857435297558124, + "grad_norm": 1.8061194998201888, + "learning_rate": 3.406996226693531e-06, + "loss": 0.5432136058807373, + "step": 5081 + }, + { + "epoch": 1.4860359701710777, + "grad_norm": 1.564250952291821, + "learning_rate": 3.403361879499305e-06, + "loss": 0.4218754470348358, + "step": 5082 + }, + { + "epoch": 1.486328410586343, + "grad_norm": 1.7436245532279955, + "learning_rate": 3.3997290742291335e-06, + "loss": 0.5121650099754333, + "step": 5083 + }, + { + "epoch": 1.4866208510016083, + "grad_norm": 1.713174617853516, + "learning_rate": 3.39609781173216e-06, + "loss": 0.5489382743835449, + "step": 5084 + }, + { + "epoch": 1.486913291416874, + "grad_norm": 1.7492646537049668, + "learning_rate": 3.3924680928571694e-06, + "loss": 0.4190993309020996, + "step": 5085 + }, + { + "epoch": 1.4872057318321392, + "grad_norm": 2.012504952292692, + "learning_rate": 3.388839918452589e-06, + "loss": 0.5927796363830566, + "step": 5086 + }, + { + "epoch": 1.4874981722474045, + "grad_norm": 1.5385674447124333, + "learning_rate": 3.3852132893664803e-06, + "loss": 0.43746429681777954, + "step": 5087 + }, + { + "epoch": 1.4877906126626699, + "grad_norm": 1.592965785800762, + "learning_rate": 3.381588206446548e-06, + "loss": 0.41599413752555847, + "step": 5088 + }, + { + "epoch": 1.4880830530779354, + "grad_norm": 1.640030018717508, + "learning_rate": 3.3779646705401305e-06, + "loss": 0.5803484320640564, + "step": 5089 + }, + { + "epoch": 1.4883754934932008, + "grad_norm": 1.6162932555816476, + "learning_rate": 3.3743426824942082e-06, + "loss": 0.5277384519577026, + "step": 5090 + }, + { + "epoch": 1.488667933908466, + "grad_norm": 1.5149011711130314, + "learning_rate": 3.370722243155401e-06, + "loss": 0.5842317342758179, + "step": 5091 + }, + { + "epoch": 1.4889603743237316, + "grad_norm": 1.8602157485440332, + "learning_rate": 3.367103353369965e-06, + "loss": 0.5394416451454163, + "step": 5092 + }, + { + "epoch": 1.489252814738997, + "grad_norm": 1.6652727466684587, + "learning_rate": 3.3634860139837877e-06, + "loss": 0.5457144975662231, + "step": 5093 + }, + { + "epoch": 1.4895452551542623, + "grad_norm": 1.6270719194791377, + "learning_rate": 3.3598702258424044e-06, + "loss": 0.49552473425865173, + "step": 5094 + }, + { + "epoch": 1.4898376955695278, + "grad_norm": 1.8756044563450258, + "learning_rate": 3.3562559897909842e-06, + "loss": 0.5922214984893799, + "step": 5095 + }, + { + "epoch": 1.4901301359847932, + "grad_norm": 1.6902952443841357, + "learning_rate": 3.35264330667433e-06, + "loss": 0.5844507217407227, + "step": 5096 + }, + { + "epoch": 1.4904225764000585, + "grad_norm": 1.6441848915551236, + "learning_rate": 3.3490321773368872e-06, + "loss": 0.5096029043197632, + "step": 5097 + }, + { + "epoch": 1.4907150168153238, + "grad_norm": 1.8296617417124132, + "learning_rate": 3.345422602622734e-06, + "loss": 0.6343984603881836, + "step": 5098 + }, + { + "epoch": 1.4910074572305891, + "grad_norm": 1.7032992920741425, + "learning_rate": 3.3418145833755875e-06, + "loss": 0.5319832563400269, + "step": 5099 + }, + { + "epoch": 1.4912998976458547, + "grad_norm": 1.8127365107062148, + "learning_rate": 3.3382081204388006e-06, + "loss": 0.6453676819801331, + "step": 5100 + }, + { + "epoch": 1.49159233806112, + "grad_norm": 1.7068058578414038, + "learning_rate": 3.33460321465536e-06, + "loss": 0.5129305720329285, + "step": 5101 + }, + { + "epoch": 1.4918847784763853, + "grad_norm": 1.7103748262888143, + "learning_rate": 3.3309998668678912e-06, + "loss": 0.5680958032608032, + "step": 5102 + }, + { + "epoch": 1.492177218891651, + "grad_norm": 1.654140366409291, + "learning_rate": 3.32739807791866e-06, + "loss": 0.5959445834159851, + "step": 5103 + }, + { + "epoch": 1.4924696593069162, + "grad_norm": 1.5546485584978795, + "learning_rate": 3.3237978486495536e-06, + "loss": 0.5549102425575256, + "step": 5104 + }, + { + "epoch": 1.4927620997221815, + "grad_norm": 1.5522771682213525, + "learning_rate": 3.3201991799021084e-06, + "loss": 0.4219816327095032, + "step": 5105 + }, + { + "epoch": 1.493054540137447, + "grad_norm": 1.8150814493123832, + "learning_rate": 3.3166020725174906e-06, + "loss": 0.46013522148132324, + "step": 5106 + }, + { + "epoch": 1.4933469805527124, + "grad_norm": 2.2057259724068885, + "learning_rate": 3.3130065273365033e-06, + "loss": 0.6013174057006836, + "step": 5107 + }, + { + "epoch": 1.4936394209679777, + "grad_norm": 1.9081850485789635, + "learning_rate": 3.3094125451995827e-06, + "loss": 0.7097996473312378, + "step": 5108 + }, + { + "epoch": 1.4939318613832433, + "grad_norm": 1.6725604100107134, + "learning_rate": 3.305820126946799e-06, + "loss": 0.6704884767532349, + "step": 5109 + }, + { + "epoch": 1.4942243017985086, + "grad_norm": 1.735486744932862, + "learning_rate": 3.3022292734178605e-06, + "loss": 0.5211119651794434, + "step": 5110 + }, + { + "epoch": 1.494516742213774, + "grad_norm": 1.7718418689676594, + "learning_rate": 3.2986399854521065e-06, + "loss": 0.5830427408218384, + "step": 5111 + }, + { + "epoch": 1.4948091826290393, + "grad_norm": 1.574048881929475, + "learning_rate": 3.2950522638885106e-06, + "loss": 0.5647883415222168, + "step": 5112 + }, + { + "epoch": 1.4951016230443046, + "grad_norm": 1.3783682279274316, + "learning_rate": 3.2914661095656807e-06, + "loss": 0.46678125858306885, + "step": 5113 + }, + { + "epoch": 1.4953940634595702, + "grad_norm": 1.768460226758459, + "learning_rate": 3.287881523321863e-06, + "loss": 0.5391934514045715, + "step": 5114 + }, + { + "epoch": 1.4956865038748355, + "grad_norm": 1.532723290545503, + "learning_rate": 3.284298505994926e-06, + "loss": 0.4039243459701538, + "step": 5115 + }, + { + "epoch": 1.4959789442901008, + "grad_norm": 1.8718379114919181, + "learning_rate": 3.2807170584223802e-06, + "loss": 0.6187412738800049, + "step": 5116 + }, + { + "epoch": 1.4962713847053664, + "grad_norm": 1.730072311160077, + "learning_rate": 3.277137181441369e-06, + "loss": 0.5165137648582458, + "step": 5117 + }, + { + "epoch": 1.4965638251206317, + "grad_norm": 1.7402216150888872, + "learning_rate": 3.273558875888665e-06, + "loss": 0.6315420866012573, + "step": 5118 + }, + { + "epoch": 1.496856265535897, + "grad_norm": 1.6811341442796868, + "learning_rate": 3.269982142600677e-06, + "loss": 0.5522993206977844, + "step": 5119 + }, + { + "epoch": 1.4971487059511626, + "grad_norm": 1.8103742244487522, + "learning_rate": 3.266406982413444e-06, + "loss": 0.5751636028289795, + "step": 5120 + }, + { + "epoch": 1.4974411463664279, + "grad_norm": 1.8346826868047423, + "learning_rate": 3.262833396162637e-06, + "loss": 0.5552358031272888, + "step": 5121 + }, + { + "epoch": 1.4977335867816932, + "grad_norm": 1.4553347230926987, + "learning_rate": 3.259261384683562e-06, + "loss": 0.4971257150173187, + "step": 5122 + }, + { + "epoch": 1.4980260271969585, + "grad_norm": 1.7328825599332134, + "learning_rate": 3.2556909488111533e-06, + "loss": 0.3803454637527466, + "step": 5123 + }, + { + "epoch": 1.498318467612224, + "grad_norm": 1.7448185442015292, + "learning_rate": 3.25212208937998e-06, + "loss": 0.45348531007766724, + "step": 5124 + }, + { + "epoch": 1.4986109080274894, + "grad_norm": 1.6593501166731528, + "learning_rate": 3.2485548072242403e-06, + "loss": 0.4839708209037781, + "step": 5125 + }, + { + "epoch": 1.4989033484427547, + "grad_norm": 1.7004886969570365, + "learning_rate": 3.244989103177768e-06, + "loss": 0.4743500351905823, + "step": 5126 + }, + { + "epoch": 1.49919578885802, + "grad_norm": 1.7042585723205583, + "learning_rate": 3.241424978074018e-06, + "loss": 0.558182954788208, + "step": 5127 + }, + { + "epoch": 1.4994882292732856, + "grad_norm": 1.5886443982701122, + "learning_rate": 3.2378624327460874e-06, + "loss": 0.41309911012649536, + "step": 5128 + }, + { + "epoch": 1.499780669688551, + "grad_norm": 1.7452725700601364, + "learning_rate": 3.2343014680266984e-06, + "loss": 0.5627751350402832, + "step": 5129 + }, + { + "epoch": 1.5000731101038163, + "grad_norm": 1.8911076385977756, + "learning_rate": 3.230742084748204e-06, + "loss": 0.5374714732170105, + "step": 5130 + }, + { + "epoch": 1.5003655505190818, + "grad_norm": 1.7659792305895352, + "learning_rate": 3.2271842837425917e-06, + "loss": 0.4264039993286133, + "step": 5131 + }, + { + "epoch": 1.5006579909343472, + "grad_norm": 1.8312136055327797, + "learning_rate": 3.223628065841472e-06, + "loss": 0.44204217195510864, + "step": 5132 + }, + { + "epoch": 1.5009504313496125, + "grad_norm": 1.6892686547824762, + "learning_rate": 3.220073431876092e-06, + "loss": 0.5322041511535645, + "step": 5133 + }, + { + "epoch": 1.501242871764878, + "grad_norm": 1.6801975106342348, + "learning_rate": 3.216520382677324e-06, + "loss": 0.4741417169570923, + "step": 5134 + }, + { + "epoch": 1.5015353121801431, + "grad_norm": 1.9712166683153383, + "learning_rate": 3.212968919075672e-06, + "loss": 0.7069851756095886, + "step": 5135 + }, + { + "epoch": 1.5018277525954087, + "grad_norm": 1.6644566597906936, + "learning_rate": 3.2094190419012694e-06, + "loss": 0.6049044132232666, + "step": 5136 + }, + { + "epoch": 1.5021201930106742, + "grad_norm": 1.6420500389509403, + "learning_rate": 3.2058707519838817e-06, + "loss": 0.556586503982544, + "step": 5137 + }, + { + "epoch": 1.5024126334259393, + "grad_norm": 1.4612168804015682, + "learning_rate": 3.202324050152894e-06, + "loss": 0.46489936113357544, + "step": 5138 + }, + { + "epoch": 1.5027050738412049, + "grad_norm": 1.6808104719845611, + "learning_rate": 3.1987789372373292e-06, + "loss": 0.5332333445549011, + "step": 5139 + }, + { + "epoch": 1.5029975142564702, + "grad_norm": 1.5897163584111842, + "learning_rate": 3.1952354140658346e-06, + "loss": 0.5547586679458618, + "step": 5140 + }, + { + "epoch": 1.5032899546717355, + "grad_norm": 1.7343008366786887, + "learning_rate": 3.1916934814666858e-06, + "loss": 0.5500372648239136, + "step": 5141 + }, + { + "epoch": 1.503582395087001, + "grad_norm": 1.6657659858957796, + "learning_rate": 3.1881531402677934e-06, + "loss": 0.5065571069717407, + "step": 5142 + }, + { + "epoch": 1.5038748355022664, + "grad_norm": 2.106659003681642, + "learning_rate": 3.1846143912966887e-06, + "loss": 0.5942833423614502, + "step": 5143 + }, + { + "epoch": 1.5041672759175317, + "grad_norm": 1.5318136638727409, + "learning_rate": 3.181077235380531e-06, + "loss": 0.4089720547199249, + "step": 5144 + }, + { + "epoch": 1.5044597163327973, + "grad_norm": 1.959628279475518, + "learning_rate": 3.1775416733461107e-06, + "loss": 0.5360317230224609, + "step": 5145 + }, + { + "epoch": 1.5047521567480626, + "grad_norm": 1.8497642502339247, + "learning_rate": 3.174007706019845e-06, + "loss": 0.5403856635093689, + "step": 5146 + }, + { + "epoch": 1.505044597163328, + "grad_norm": 1.583723666722825, + "learning_rate": 3.1704753342277727e-06, + "loss": 0.5377147197723389, + "step": 5147 + }, + { + "epoch": 1.5053370375785935, + "grad_norm": 1.908833197627838, + "learning_rate": 3.166944558795567e-06, + "loss": 0.49888312816619873, + "step": 5148 + }, + { + "epoch": 1.5056294779938586, + "grad_norm": 1.604723023798687, + "learning_rate": 3.1634153805485245e-06, + "loss": 0.5105957984924316, + "step": 5149 + }, + { + "epoch": 1.5059219184091241, + "grad_norm": 1.530550544138999, + "learning_rate": 3.1598878003115694e-06, + "loss": 0.5653882026672363, + "step": 5150 + }, + { + "epoch": 1.5062143588243895, + "grad_norm": 1.7528922447010231, + "learning_rate": 3.1563618189092536e-06, + "loss": 0.5293145179748535, + "step": 5151 + }, + { + "epoch": 1.5065067992396548, + "grad_norm": 1.7049104339852403, + "learning_rate": 3.1528374371657524e-06, + "loss": 0.5852463841438293, + "step": 5152 + }, + { + "epoch": 1.5067992396549204, + "grad_norm": 1.7074372465536334, + "learning_rate": 3.1493146559048683e-06, + "loss": 0.5986759662628174, + "step": 5153 + }, + { + "epoch": 1.5070916800701857, + "grad_norm": 1.8410699226798701, + "learning_rate": 3.1457934759500298e-06, + "loss": 0.6363133788108826, + "step": 5154 + }, + { + "epoch": 1.507384120485451, + "grad_norm": 1.7703119171725752, + "learning_rate": 3.1422738981242927e-06, + "loss": 0.4757901430130005, + "step": 5155 + }, + { + "epoch": 1.5076765609007166, + "grad_norm": 1.8042941675603332, + "learning_rate": 3.1387559232503374e-06, + "loss": 0.7614980936050415, + "step": 5156 + }, + { + "epoch": 1.5079690013159819, + "grad_norm": 1.8353916940267578, + "learning_rate": 3.13523955215047e-06, + "loss": 0.5739883184432983, + "step": 5157 + }, + { + "epoch": 1.5082614417312472, + "grad_norm": 1.6405466984899346, + "learning_rate": 3.131724785646616e-06, + "loss": 0.5893388390541077, + "step": 5158 + }, + { + "epoch": 1.5085538821465128, + "grad_norm": 1.4613031069188664, + "learning_rate": 3.1282116245603333e-06, + "loss": 0.5809957981109619, + "step": 5159 + }, + { + "epoch": 1.508846322561778, + "grad_norm": 1.558509757762028, + "learning_rate": 3.124700069712803e-06, + "loss": 0.5651090741157532, + "step": 5160 + }, + { + "epoch": 1.5091387629770434, + "grad_norm": 1.5870160926102073, + "learning_rate": 3.1211901219248273e-06, + "loss": 0.3736303448677063, + "step": 5161 + }, + { + "epoch": 1.509431203392309, + "grad_norm": 1.744264206007829, + "learning_rate": 3.117681782016838e-06, + "loss": 0.5501068234443665, + "step": 5162 + }, + { + "epoch": 1.509723643807574, + "grad_norm": 1.7377852819958348, + "learning_rate": 3.1141750508088865e-06, + "loss": 0.6210630536079407, + "step": 5163 + }, + { + "epoch": 1.5100160842228396, + "grad_norm": 1.5741938339988393, + "learning_rate": 3.110669929120651e-06, + "loss": 0.5722042322158813, + "step": 5164 + }, + { + "epoch": 1.510308524638105, + "grad_norm": 1.617906406413033, + "learning_rate": 3.107166417771431e-06, + "loss": 0.5813776254653931, + "step": 5165 + }, + { + "epoch": 1.5106009650533703, + "grad_norm": 1.5816945478856634, + "learning_rate": 3.1036645175801515e-06, + "loss": 0.4911368787288666, + "step": 5166 + }, + { + "epoch": 1.5108934054686358, + "grad_norm": 1.5812988749732655, + "learning_rate": 3.100164229365361e-06, + "loss": 0.5136172771453857, + "step": 5167 + }, + { + "epoch": 1.5111858458839011, + "grad_norm": 1.7202185949801794, + "learning_rate": 3.096665553945234e-06, + "loss": 0.5746543407440186, + "step": 5168 + }, + { + "epoch": 1.5114782862991665, + "grad_norm": 1.8577610332100818, + "learning_rate": 3.0931684921375572e-06, + "loss": 0.4949193000793457, + "step": 5169 + }, + { + "epoch": 1.511770726714432, + "grad_norm": 1.6744220879324234, + "learning_rate": 3.089673044759751e-06, + "loss": 0.5732932090759277, + "step": 5170 + }, + { + "epoch": 1.5120631671296973, + "grad_norm": 1.5865659073822531, + "learning_rate": 3.086179212628855e-06, + "loss": 0.5329696536064148, + "step": 5171 + }, + { + "epoch": 1.5123556075449627, + "grad_norm": 1.7970382860153173, + "learning_rate": 3.082686996561531e-06, + "loss": 0.631770670413971, + "step": 5172 + }, + { + "epoch": 1.5126480479602282, + "grad_norm": 1.5998021767601671, + "learning_rate": 3.0791963973740646e-06, + "loss": 0.5183405876159668, + "step": 5173 + }, + { + "epoch": 1.5129404883754933, + "grad_norm": 1.7133603210505308, + "learning_rate": 3.075707415882361e-06, + "loss": 0.5616034269332886, + "step": 5174 + }, + { + "epoch": 1.5132329287907589, + "grad_norm": 1.5912245556380846, + "learning_rate": 3.0722200529019477e-06, + "loss": 0.48513877391815186, + "step": 5175 + }, + { + "epoch": 1.5135253692060244, + "grad_norm": 1.574805808870548, + "learning_rate": 3.068734309247976e-06, + "loss": 0.5226399898529053, + "step": 5176 + }, + { + "epoch": 1.5138178096212895, + "grad_norm": 1.592402045128277, + "learning_rate": 3.0652501857352167e-06, + "loss": 0.48817533254623413, + "step": 5177 + }, + { + "epoch": 1.514110250036555, + "grad_norm": 1.5523305292465257, + "learning_rate": 3.061767683178063e-06, + "loss": 0.4163327217102051, + "step": 5178 + }, + { + "epoch": 1.5144026904518204, + "grad_norm": 1.6254224030737643, + "learning_rate": 3.058286802390531e-06, + "loss": 0.5984256267547607, + "step": 5179 + }, + { + "epoch": 1.5146951308670857, + "grad_norm": 1.8006518354372911, + "learning_rate": 3.054807544186249e-06, + "loss": 0.47233515977859497, + "step": 5180 + }, + { + "epoch": 1.5149875712823513, + "grad_norm": 1.6896342506826862, + "learning_rate": 3.0513299093784766e-06, + "loss": 0.5545482635498047, + "step": 5181 + }, + { + "epoch": 1.5152800116976166, + "grad_norm": 1.5925171354605219, + "learning_rate": 3.047853898780089e-06, + "loss": 0.46200019121170044, + "step": 5182 + }, + { + "epoch": 1.515572452112882, + "grad_norm": 1.7986358499610187, + "learning_rate": 3.0443795132035824e-06, + "loss": 0.6146235466003418, + "step": 5183 + }, + { + "epoch": 1.5158648925281475, + "grad_norm": 1.6180210942837954, + "learning_rate": 3.040906753461075e-06, + "loss": 0.5653461217880249, + "step": 5184 + }, + { + "epoch": 1.5161573329434128, + "grad_norm": 1.7782122645526974, + "learning_rate": 3.0374356203643008e-06, + "loss": 0.6514929533004761, + "step": 5185 + }, + { + "epoch": 1.5164497733586781, + "grad_norm": 1.6488410817366923, + "learning_rate": 3.033966114724618e-06, + "loss": 0.48213401436805725, + "step": 5186 + }, + { + "epoch": 1.5167422137739437, + "grad_norm": 1.8810893536328739, + "learning_rate": 3.0304982373530013e-06, + "loss": 0.4935530424118042, + "step": 5187 + }, + { + "epoch": 1.5170346541892088, + "grad_norm": 1.9406636249591702, + "learning_rate": 3.0270319890600465e-06, + "loss": 0.6435343027114868, + "step": 5188 + }, + { + "epoch": 1.5173270946044743, + "grad_norm": 1.4722259236044228, + "learning_rate": 3.0235673706559675e-06, + "loss": 0.49350717663764954, + "step": 5189 + }, + { + "epoch": 1.5176195350197397, + "grad_norm": 1.636152242750681, + "learning_rate": 3.0201043829506015e-06, + "loss": 0.4745938777923584, + "step": 5190 + }, + { + "epoch": 1.517911975435005, + "grad_norm": 1.747247707841839, + "learning_rate": 3.0166430267533944e-06, + "loss": 0.5867031812667847, + "step": 5191 + }, + { + "epoch": 1.5182044158502705, + "grad_norm": 2.0836038611604275, + "learning_rate": 3.01318330287342e-06, + "loss": 0.5477231740951538, + "step": 5192 + }, + { + "epoch": 1.5184968562655359, + "grad_norm": 1.5825293698408722, + "learning_rate": 3.0097252121193687e-06, + "loss": 0.5788818597793579, + "step": 5193 + }, + { + "epoch": 1.5187892966808012, + "grad_norm": 1.5819522244244852, + "learning_rate": 3.0062687552995475e-06, + "loss": 0.4967714548110962, + "step": 5194 + }, + { + "epoch": 1.5190817370960668, + "grad_norm": 1.810354148695448, + "learning_rate": 3.002813933221882e-06, + "loss": 0.6427319645881653, + "step": 5195 + }, + { + "epoch": 1.519374177511332, + "grad_norm": 1.7324283900525337, + "learning_rate": 2.999360746693916e-06, + "loss": 0.5615307688713074, + "step": 5196 + }, + { + "epoch": 1.5196666179265974, + "grad_norm": 1.8017068269121923, + "learning_rate": 2.9959091965228102e-06, + "loss": 0.6646313667297363, + "step": 5197 + }, + { + "epoch": 1.519959058341863, + "grad_norm": 1.4648905848591907, + "learning_rate": 2.9924592835153454e-06, + "loss": 0.47536247968673706, + "step": 5198 + }, + { + "epoch": 1.5202514987571283, + "grad_norm": 1.701001149097395, + "learning_rate": 2.9890110084779157e-06, + "loss": 0.5850256681442261, + "step": 5199 + }, + { + "epoch": 1.5205439391723936, + "grad_norm": 1.6650942638342863, + "learning_rate": 2.985564372216536e-06, + "loss": 0.5724887251853943, + "step": 5200 + }, + { + "epoch": 1.5208363795876592, + "grad_norm": 1.6379341688791944, + "learning_rate": 2.9821193755368383e-06, + "loss": 0.5052510499954224, + "step": 5201 + }, + { + "epoch": 1.5211288200029243, + "grad_norm": 1.5270508750040293, + "learning_rate": 2.9786760192440644e-06, + "loss": 0.439144492149353, + "step": 5202 + }, + { + "epoch": 1.5214212604181898, + "grad_norm": 1.624134940512823, + "learning_rate": 2.97523430414308e-06, + "loss": 0.4560511112213135, + "step": 5203 + }, + { + "epoch": 1.5217137008334551, + "grad_norm": 1.9447169329839864, + "learning_rate": 2.9717942310383664e-06, + "loss": 0.6848068237304688, + "step": 5204 + }, + { + "epoch": 1.5220061412487205, + "grad_norm": 1.5338251170475576, + "learning_rate": 2.9683558007340184e-06, + "loss": 0.5541313886642456, + "step": 5205 + }, + { + "epoch": 1.522298581663986, + "grad_norm": 1.4921475223936211, + "learning_rate": 2.964919014033749e-06, + "loss": 0.5117338299751282, + "step": 5206 + }, + { + "epoch": 1.5225910220792513, + "grad_norm": 1.8454970950489444, + "learning_rate": 2.9614838717408866e-06, + "loss": 0.5164151191711426, + "step": 5207 + }, + { + "epoch": 1.5228834624945167, + "grad_norm": 1.6612213438595136, + "learning_rate": 2.9580503746583744e-06, + "loss": 0.5461020469665527, + "step": 5208 + }, + { + "epoch": 1.5231759029097822, + "grad_norm": 1.580589085309813, + "learning_rate": 2.9546185235887705e-06, + "loss": 0.4265401065349579, + "step": 5209 + }, + { + "epoch": 1.5234683433250475, + "grad_norm": 1.822483254200033, + "learning_rate": 2.9511883193342505e-06, + "loss": 0.47372496128082275, + "step": 5210 + }, + { + "epoch": 1.5237607837403129, + "grad_norm": 1.5409548150660597, + "learning_rate": 2.9477597626966036e-06, + "loss": 0.43951019644737244, + "step": 5211 + }, + { + "epoch": 1.5240532241555784, + "grad_norm": 2.1038432849237862, + "learning_rate": 2.9443328544772343e-06, + "loss": 0.6514073610305786, + "step": 5212 + }, + { + "epoch": 1.5243456645708435, + "grad_norm": 1.6794879789857167, + "learning_rate": 2.940907595477164e-06, + "loss": 0.523013710975647, + "step": 5213 + }, + { + "epoch": 1.524638104986109, + "grad_norm": 1.6399154124434079, + "learning_rate": 2.9374839864970194e-06, + "loss": 0.4945281744003296, + "step": 5214 + }, + { + "epoch": 1.5249305454013746, + "grad_norm": 1.83414324289986, + "learning_rate": 2.9340620283370525e-06, + "loss": 0.5768609046936035, + "step": 5215 + }, + { + "epoch": 1.5252229858166397, + "grad_norm": 1.7611799606025424, + "learning_rate": 2.930641721797125e-06, + "loss": 0.45644205808639526, + "step": 5216 + }, + { + "epoch": 1.5255154262319053, + "grad_norm": 1.5932175762441756, + "learning_rate": 2.92722306767671e-06, + "loss": 0.590227484703064, + "step": 5217 + }, + { + "epoch": 1.5258078666471706, + "grad_norm": 1.8078838529845034, + "learning_rate": 2.9238060667749014e-06, + "loss": 0.5618122816085815, + "step": 5218 + }, + { + "epoch": 1.526100307062436, + "grad_norm": 1.9135498575527394, + "learning_rate": 2.9203907198904027e-06, + "loss": 0.6431877613067627, + "step": 5219 + }, + { + "epoch": 1.5263927474777015, + "grad_norm": 1.5548470750003383, + "learning_rate": 2.916977027821527e-06, + "loss": 0.5019941329956055, + "step": 5220 + }, + { + "epoch": 1.5266851878929668, + "grad_norm": 1.9013308084843434, + "learning_rate": 2.913564991366209e-06, + "loss": 0.5413016080856323, + "step": 5221 + }, + { + "epoch": 1.5269776283082321, + "grad_norm": 1.6880920277336984, + "learning_rate": 2.9101546113219846e-06, + "loss": 0.6546905636787415, + "step": 5222 + }, + { + "epoch": 1.5272700687234977, + "grad_norm": 1.7013707157233615, + "learning_rate": 2.906745888486013e-06, + "loss": 0.5689815878868103, + "step": 5223 + }, + { + "epoch": 1.527562509138763, + "grad_norm": 1.8369848799419313, + "learning_rate": 2.9033388236550632e-06, + "loss": 0.5134810209274292, + "step": 5224 + }, + { + "epoch": 1.5278549495540283, + "grad_norm": 1.4280052174004847, + "learning_rate": 2.8999334176255143e-06, + "loss": 0.4880787134170532, + "step": 5225 + }, + { + "epoch": 1.528147389969294, + "grad_norm": 1.8292283637694566, + "learning_rate": 2.89652967119336e-06, + "loss": 0.4345950782299042, + "step": 5226 + }, + { + "epoch": 1.528439830384559, + "grad_norm": 1.724451812949585, + "learning_rate": 2.893127585154205e-06, + "loss": 0.43327242136001587, + "step": 5227 + }, + { + "epoch": 1.5287322707998245, + "grad_norm": 1.780345207484487, + "learning_rate": 2.889727160303266e-06, + "loss": 0.6423674821853638, + "step": 5228 + }, + { + "epoch": 1.5290247112150899, + "grad_norm": 1.5540524492201802, + "learning_rate": 2.886328397435374e-06, + "loss": 0.5263554453849792, + "step": 5229 + }, + { + "epoch": 1.5293171516303552, + "grad_norm": 1.6433428703006638, + "learning_rate": 2.882931297344965e-06, + "loss": 0.4111948013305664, + "step": 5230 + }, + { + "epoch": 1.5296095920456207, + "grad_norm": 1.804627326985323, + "learning_rate": 2.8795358608260936e-06, + "loss": 0.43803131580352783, + "step": 5231 + }, + { + "epoch": 1.529902032460886, + "grad_norm": 1.5504311785369362, + "learning_rate": 2.8761420886724223e-06, + "loss": 0.4708956778049469, + "step": 5232 + }, + { + "epoch": 1.5301944728761514, + "grad_norm": 1.7185936460565197, + "learning_rate": 2.8727499816772265e-06, + "loss": 0.5268635749816895, + "step": 5233 + }, + { + "epoch": 1.530486913291417, + "grad_norm": 1.6977720322438927, + "learning_rate": 2.869359540633385e-06, + "loss": 0.5092788934707642, + "step": 5234 + }, + { + "epoch": 1.5307793537066823, + "grad_norm": 1.630735809850627, + "learning_rate": 2.8659707663333958e-06, + "loss": 0.4603293836116791, + "step": 5235 + }, + { + "epoch": 1.5310717941219476, + "grad_norm": 1.7857705195277582, + "learning_rate": 2.8625836595693646e-06, + "loss": 0.545462965965271, + "step": 5236 + }, + { + "epoch": 1.5313642345372132, + "grad_norm": 1.6146415057105645, + "learning_rate": 2.8591982211330073e-06, + "loss": 0.511603832244873, + "step": 5237 + }, + { + "epoch": 1.5316566749524785, + "grad_norm": 1.7935851159627383, + "learning_rate": 2.8558144518156485e-06, + "loss": 0.5076707601547241, + "step": 5238 + }, + { + "epoch": 1.5319491153677438, + "grad_norm": 1.7012818042378361, + "learning_rate": 2.852432352408224e-06, + "loss": 0.5923745632171631, + "step": 5239 + }, + { + "epoch": 1.5322415557830094, + "grad_norm": 1.8251553548092714, + "learning_rate": 2.849051923701279e-06, + "loss": 0.5588465332984924, + "step": 5240 + }, + { + "epoch": 1.5325339961982745, + "grad_norm": 1.6493521356208132, + "learning_rate": 2.845673166484969e-06, + "loss": 0.6681923270225525, + "step": 5241 + }, + { + "epoch": 1.53282643661354, + "grad_norm": 1.8683876960783266, + "learning_rate": 2.8422960815490564e-06, + "loss": 0.5702543258666992, + "step": 5242 + }, + { + "epoch": 1.5331188770288053, + "grad_norm": 1.8090012581479555, + "learning_rate": 2.8389206696829165e-06, + "loss": 0.5401744842529297, + "step": 5243 + }, + { + "epoch": 1.5334113174440707, + "grad_norm": 1.6641276436242072, + "learning_rate": 2.8355469316755324e-06, + "loss": 0.43371906876564026, + "step": 5244 + }, + { + "epoch": 1.5337037578593362, + "grad_norm": 1.6323739542625777, + "learning_rate": 2.8321748683154893e-06, + "loss": 0.5598163604736328, + "step": 5245 + }, + { + "epoch": 1.5339961982746015, + "grad_norm": 1.8330291281030966, + "learning_rate": 2.8288044803909896e-06, + "loss": 0.5836831331253052, + "step": 5246 + }, + { + "epoch": 1.5342886386898669, + "grad_norm": 1.6637462764959579, + "learning_rate": 2.8254357686898404e-06, + "loss": 0.5308898687362671, + "step": 5247 + }, + { + "epoch": 1.5345810791051324, + "grad_norm": 1.7589253104867197, + "learning_rate": 2.822068733999459e-06, + "loss": 0.6104828119277954, + "step": 5248 + }, + { + "epoch": 1.5348735195203977, + "grad_norm": 1.9266285032289332, + "learning_rate": 2.8187033771068685e-06, + "loss": 0.48373985290527344, + "step": 5249 + }, + { + "epoch": 1.535165959935663, + "grad_norm": 1.745809860715047, + "learning_rate": 2.8153396987987e-06, + "loss": 0.5213532447814941, + "step": 5250 + }, + { + "epoch": 1.5354584003509286, + "grad_norm": 1.7052291407432676, + "learning_rate": 2.811977699861195e-06, + "loss": 0.5241051912307739, + "step": 5251 + }, + { + "epoch": 1.5357508407661937, + "grad_norm": 1.54399807563896, + "learning_rate": 2.8086173810801974e-06, + "loss": 0.48321712017059326, + "step": 5252 + }, + { + "epoch": 1.5360432811814593, + "grad_norm": 1.831716416150244, + "learning_rate": 2.8052587432411626e-06, + "loss": 0.5352765917778015, + "step": 5253 + }, + { + "epoch": 1.5363357215967248, + "grad_norm": 1.7051244593885417, + "learning_rate": 2.8019017871291522e-06, + "loss": 0.5402188301086426, + "step": 5254 + }, + { + "epoch": 1.53662816201199, + "grad_norm": 1.5780940900489064, + "learning_rate": 2.798546513528837e-06, + "loss": 0.4398813545703888, + "step": 5255 + }, + { + "epoch": 1.5369206024272555, + "grad_norm": 1.6682503262337565, + "learning_rate": 2.7951929232244855e-06, + "loss": 0.5661803483963013, + "step": 5256 + }, + { + "epoch": 1.5372130428425208, + "grad_norm": 1.9389870116334766, + "learning_rate": 2.791841016999982e-06, + "loss": 0.5051732063293457, + "step": 5257 + }, + { + "epoch": 1.5375054832577861, + "grad_norm": 1.7323475801875265, + "learning_rate": 2.788490795638815e-06, + "loss": 0.5712389945983887, + "step": 5258 + }, + { + "epoch": 1.5377979236730517, + "grad_norm": 1.7189716580722423, + "learning_rate": 2.7851422599240773e-06, + "loss": 0.6257319450378418, + "step": 5259 + }, + { + "epoch": 1.538090364088317, + "grad_norm": 1.7862483931054027, + "learning_rate": 2.7817954106384704e-06, + "loss": 0.5788396596908569, + "step": 5260 + }, + { + "epoch": 1.5383828045035823, + "grad_norm": 1.508089974245087, + "learning_rate": 2.7784502485642985e-06, + "loss": 0.37253260612487793, + "step": 5261 + }, + { + "epoch": 1.5386752449188479, + "grad_norm": 2.206166372523085, + "learning_rate": 2.7751067744834726e-06, + "loss": 0.6547001004219055, + "step": 5262 + }, + { + "epoch": 1.5389676853341132, + "grad_norm": 1.551783656656575, + "learning_rate": 2.77176498917751e-06, + "loss": 0.510914146900177, + "step": 5263 + }, + { + "epoch": 1.5392601257493785, + "grad_norm": 1.731638922465708, + "learning_rate": 2.7684248934275327e-06, + "loss": 0.4387754201889038, + "step": 5264 + }, + { + "epoch": 1.539552566164644, + "grad_norm": 1.573259655998941, + "learning_rate": 2.765086488014268e-06, + "loss": 0.5640195608139038, + "step": 5265 + }, + { + "epoch": 1.5398450065799092, + "grad_norm": 2.3327619392306684, + "learning_rate": 2.7617497737180508e-06, + "loss": 0.5780993103981018, + "step": 5266 + }, + { + "epoch": 1.5401374469951747, + "grad_norm": 1.7296077762304434, + "learning_rate": 2.758414751318813e-06, + "loss": 0.5190057158470154, + "step": 5267 + }, + { + "epoch": 1.54042988741044, + "grad_norm": 1.6180118608432006, + "learning_rate": 2.7550814215960964e-06, + "loss": 0.4204869270324707, + "step": 5268 + }, + { + "epoch": 1.5407223278257054, + "grad_norm": 1.5345717637092124, + "learning_rate": 2.7517497853290477e-06, + "loss": 0.5649294853210449, + "step": 5269 + }, + { + "epoch": 1.541014768240971, + "grad_norm": 1.8541084629609554, + "learning_rate": 2.748419843296416e-06, + "loss": 0.49142545461654663, + "step": 5270 + }, + { + "epoch": 1.5413072086562363, + "grad_norm": 2.006144774477858, + "learning_rate": 2.745091596276557e-06, + "loss": 0.483539879322052, + "step": 5271 + }, + { + "epoch": 1.5415996490715016, + "grad_norm": 1.8772157933692841, + "learning_rate": 2.7417650450474253e-06, + "loss": 0.5400283336639404, + "step": 5272 + }, + { + "epoch": 1.5418920894867671, + "grad_norm": 1.6915167892784866, + "learning_rate": 2.7384401903865844e-06, + "loss": 0.5490765571594238, + "step": 5273 + }, + { + "epoch": 1.5421845299020325, + "grad_norm": 2.267512124400057, + "learning_rate": 2.7351170330711975e-06, + "loss": 0.5434873700141907, + "step": 5274 + }, + { + "epoch": 1.5424769703172978, + "grad_norm": 1.8064402200670897, + "learning_rate": 2.7317955738780333e-06, + "loss": 0.6195025444030762, + "step": 5275 + }, + { + "epoch": 1.5427694107325634, + "grad_norm": 1.6751288499310806, + "learning_rate": 2.728475813583462e-06, + "loss": 0.5552260875701904, + "step": 5276 + }, + { + "epoch": 1.5430618511478287, + "grad_norm": 1.8146552227089312, + "learning_rate": 2.725157752963461e-06, + "loss": 0.5430501699447632, + "step": 5277 + }, + { + "epoch": 1.543354291563094, + "grad_norm": 2.1339271947469047, + "learning_rate": 2.7218413927936006e-06, + "loss": 0.633337676525116, + "step": 5278 + }, + { + "epoch": 1.5436467319783596, + "grad_norm": 1.6483089945499043, + "learning_rate": 2.718526733849062e-06, + "loss": 0.4974183738231659, + "step": 5279 + }, + { + "epoch": 1.5439391723936247, + "grad_norm": 2.06701718299293, + "learning_rate": 2.715213776904628e-06, + "loss": 0.5840449929237366, + "step": 5280 + }, + { + "epoch": 1.5442316128088902, + "grad_norm": 1.480832016038464, + "learning_rate": 2.7119025227346807e-06, + "loss": 0.4684101343154907, + "step": 5281 + }, + { + "epoch": 1.5445240532241555, + "grad_norm": 1.5849030043466241, + "learning_rate": 2.7085929721132078e-06, + "loss": 0.48402637243270874, + "step": 5282 + }, + { + "epoch": 1.5448164936394209, + "grad_norm": 1.6449199299919448, + "learning_rate": 2.7052851258137936e-06, + "loss": 0.6122831106185913, + "step": 5283 + }, + { + "epoch": 1.5451089340546864, + "grad_norm": 1.6951661547391625, + "learning_rate": 2.701978984609629e-06, + "loss": 0.5731217861175537, + "step": 5284 + }, + { + "epoch": 1.5454013744699517, + "grad_norm": 1.869052563685483, + "learning_rate": 2.6986745492735044e-06, + "loss": 0.5610803961753845, + "step": 5285 + }, + { + "epoch": 1.545693814885217, + "grad_norm": 1.4190791359210344, + "learning_rate": 2.695371820577811e-06, + "loss": 0.46112626791000366, + "step": 5286 + }, + { + "epoch": 1.5459862553004826, + "grad_norm": 2.1150576387004247, + "learning_rate": 2.692070799294542e-06, + "loss": 0.5368741154670715, + "step": 5287 + }, + { + "epoch": 1.546278695715748, + "grad_norm": 1.905327182706658, + "learning_rate": 2.688771486195293e-06, + "loss": 0.5991438627243042, + "step": 5288 + }, + { + "epoch": 1.5465711361310133, + "grad_norm": 1.9084615434749013, + "learning_rate": 2.685473882051254e-06, + "loss": 0.5751149654388428, + "step": 5289 + }, + { + "epoch": 1.5468635765462788, + "grad_norm": 2.0751264575493247, + "learning_rate": 2.682177987633221e-06, + "loss": 0.6055437326431274, + "step": 5290 + }, + { + "epoch": 1.547156016961544, + "grad_norm": 1.8883429200709412, + "learning_rate": 2.6788838037115916e-06, + "loss": 0.6009221076965332, + "step": 5291 + }, + { + "epoch": 1.5474484573768095, + "grad_norm": 1.8170478309101001, + "learning_rate": 2.6755913310563585e-06, + "loss": 0.6071531772613525, + "step": 5292 + }, + { + "epoch": 1.547740897792075, + "grad_norm": 1.4851824864906211, + "learning_rate": 2.6723005704371164e-06, + "loss": 0.4102080464363098, + "step": 5293 + }, + { + "epoch": 1.5480333382073401, + "grad_norm": 1.861843061560023, + "learning_rate": 2.6690115226230663e-06, + "loss": 0.48021870851516724, + "step": 5294 + }, + { + "epoch": 1.5483257786226057, + "grad_norm": 1.916351154521063, + "learning_rate": 2.665724188382999e-06, + "loss": 0.4893236458301544, + "step": 5295 + }, + { + "epoch": 1.548618219037871, + "grad_norm": 1.611822755629755, + "learning_rate": 2.6624385684853095e-06, + "loss": 0.6365019083023071, + "step": 5296 + }, + { + "epoch": 1.5489106594531363, + "grad_norm": 1.8901541843584413, + "learning_rate": 2.659154663697995e-06, + "loss": 0.46510767936706543, + "step": 5297 + }, + { + "epoch": 1.5492030998684019, + "grad_norm": 1.4887188273793392, + "learning_rate": 2.655872474788641e-06, + "loss": 0.4355175495147705, + "step": 5298 + }, + { + "epoch": 1.5494955402836672, + "grad_norm": 1.3536753107928572, + "learning_rate": 2.6525920025244432e-06, + "loss": 0.5180836915969849, + "step": 5299 + }, + { + "epoch": 1.5497879806989325, + "grad_norm": 1.9072335806805663, + "learning_rate": 2.6493132476721927e-06, + "loss": 0.5597968101501465, + "step": 5300 + }, + { + "epoch": 1.550080421114198, + "grad_norm": 1.7134796878533993, + "learning_rate": 2.646036210998276e-06, + "loss": 0.6581016778945923, + "step": 5301 + }, + { + "epoch": 1.5503728615294634, + "grad_norm": 1.8671635537156963, + "learning_rate": 2.642760893268684e-06, + "loss": 0.4875848889350891, + "step": 5302 + }, + { + "epoch": 1.5506653019447287, + "grad_norm": 1.571897962721608, + "learning_rate": 2.639487295248999e-06, + "loss": 0.4410843253135681, + "step": 5303 + }, + { + "epoch": 1.5509577423599943, + "grad_norm": 1.8113376757557438, + "learning_rate": 2.6362154177044076e-06, + "loss": 0.5829580426216125, + "step": 5304 + }, + { + "epoch": 1.5512501827752594, + "grad_norm": 1.6979805053981243, + "learning_rate": 2.6329452613996886e-06, + "loss": 0.6281459927558899, + "step": 5305 + }, + { + "epoch": 1.551542623190525, + "grad_norm": 1.6778942363253981, + "learning_rate": 2.629676827099222e-06, + "loss": 0.525640606880188, + "step": 5306 + }, + { + "epoch": 1.5518350636057903, + "grad_norm": 1.710219412838542, + "learning_rate": 2.626410115566985e-06, + "loss": 0.5219406485557556, + "step": 5307 + }, + { + "epoch": 1.5521275040210556, + "grad_norm": 1.7812622188686809, + "learning_rate": 2.623145127566555e-06, + "loss": 0.5120927691459656, + "step": 5308 + }, + { + "epoch": 1.5524199444363211, + "grad_norm": 1.856533490372594, + "learning_rate": 2.6198818638610967e-06, + "loss": 0.586410641670227, + "step": 5309 + }, + { + "epoch": 1.5527123848515865, + "grad_norm": 1.726189213717832, + "learning_rate": 2.6166203252133803e-06, + "loss": 0.5014485120773315, + "step": 5310 + }, + { + "epoch": 1.5530048252668518, + "grad_norm": 1.7251785105103856, + "learning_rate": 2.6133605123857707e-06, + "loss": 0.5087070465087891, + "step": 5311 + }, + { + "epoch": 1.5532972656821173, + "grad_norm": 1.9411711444593984, + "learning_rate": 2.610102426140231e-06, + "loss": 0.5829774737358093, + "step": 5312 + }, + { + "epoch": 1.5535897060973827, + "grad_norm": 1.9403338817582965, + "learning_rate": 2.6068460672383166e-06, + "loss": 0.5273870229721069, + "step": 5313 + }, + { + "epoch": 1.553882146512648, + "grad_norm": 1.6781304796241345, + "learning_rate": 2.603591436441183e-06, + "loss": 0.528778076171875, + "step": 5314 + }, + { + "epoch": 1.5541745869279135, + "grad_norm": 1.6477790459502455, + "learning_rate": 2.600338534509581e-06, + "loss": 0.4914259612560272, + "step": 5315 + }, + { + "epoch": 1.5544670273431789, + "grad_norm": 1.5838952242674544, + "learning_rate": 2.597087362203855e-06, + "loss": 0.48063480854034424, + "step": 5316 + }, + { + "epoch": 1.5547594677584442, + "grad_norm": 1.6948007690415343, + "learning_rate": 2.593837920283949e-06, + "loss": 0.4406088888645172, + "step": 5317 + }, + { + "epoch": 1.5550519081737098, + "grad_norm": 1.5839061375343884, + "learning_rate": 2.590590209509398e-06, + "loss": 0.5027159452438354, + "step": 5318 + }, + { + "epoch": 1.5553443485889749, + "grad_norm": 1.447462212774582, + "learning_rate": 2.5873442306393357e-06, + "loss": 0.3894188404083252, + "step": 5319 + }, + { + "epoch": 1.5556367890042404, + "grad_norm": 1.8834380096125083, + "learning_rate": 2.584099984432492e-06, + "loss": 0.5393104553222656, + "step": 5320 + }, + { + "epoch": 1.5559292294195057, + "grad_norm": 1.640256381642302, + "learning_rate": 2.580857471647186e-06, + "loss": 0.5701737999916077, + "step": 5321 + }, + { + "epoch": 1.556221669834771, + "grad_norm": 1.9050066043706444, + "learning_rate": 2.577616693041336e-06, + "loss": 0.6173145174980164, + "step": 5322 + }, + { + "epoch": 1.5565141102500366, + "grad_norm": 1.718666562714064, + "learning_rate": 2.5743776493724548e-06, + "loss": 0.534600555896759, + "step": 5323 + }, + { + "epoch": 1.556806550665302, + "grad_norm": 1.7258193752543447, + "learning_rate": 2.571140341397651e-06, + "loss": 0.5205268859863281, + "step": 5324 + }, + { + "epoch": 1.5570989910805673, + "grad_norm": 1.9160383524514086, + "learning_rate": 2.5679047698736224e-06, + "loss": 0.5631835460662842, + "step": 5325 + }, + { + "epoch": 1.5573914314958328, + "grad_norm": 1.786367865175988, + "learning_rate": 2.564670935556667e-06, + "loss": 0.5855015516281128, + "step": 5326 + }, + { + "epoch": 1.5576838719110981, + "grad_norm": 1.538967985462843, + "learning_rate": 2.5614388392026735e-06, + "loss": 0.5219928026199341, + "step": 5327 + }, + { + "epoch": 1.5579763123263635, + "grad_norm": 1.6118392863192783, + "learning_rate": 2.5582084815671225e-06, + "loss": 0.50178462266922, + "step": 5328 + }, + { + "epoch": 1.558268752741629, + "grad_norm": 1.65351304969076, + "learning_rate": 2.554979863405094e-06, + "loss": 0.643866777420044, + "step": 5329 + }, + { + "epoch": 1.5585611931568941, + "grad_norm": 1.6117676019433484, + "learning_rate": 2.5517529854712543e-06, + "loss": 0.4976714849472046, + "step": 5330 + }, + { + "epoch": 1.5588536335721597, + "grad_norm": 1.6012275122207043, + "learning_rate": 2.5485278485198716e-06, + "loss": 0.47352612018585205, + "step": 5331 + }, + { + "epoch": 1.5591460739874252, + "grad_norm": 1.5967917267320113, + "learning_rate": 2.5453044533047955e-06, + "loss": 0.6319230794906616, + "step": 5332 + }, + { + "epoch": 1.5594385144026903, + "grad_norm": 1.9005541524381997, + "learning_rate": 2.5420828005794786e-06, + "loss": 0.724555253982544, + "step": 5333 + }, + { + "epoch": 1.5597309548179559, + "grad_norm": 1.987695201205215, + "learning_rate": 2.5388628910969625e-06, + "loss": 0.6235928535461426, + "step": 5334 + }, + { + "epoch": 1.5600233952332212, + "grad_norm": 1.9501926966829706, + "learning_rate": 2.5356447256098805e-06, + "loss": 0.47880417108535767, + "step": 5335 + }, + { + "epoch": 1.5603158356484865, + "grad_norm": 1.451114547860928, + "learning_rate": 2.53242830487046e-06, + "loss": 0.3986828029155731, + "step": 5336 + }, + { + "epoch": 1.560608276063752, + "grad_norm": 1.747029246487311, + "learning_rate": 2.529213629630519e-06, + "loss": 0.515389084815979, + "step": 5337 + }, + { + "epoch": 1.5609007164790174, + "grad_norm": 1.4773319281213657, + "learning_rate": 2.52600070064147e-06, + "loss": 0.611845076084137, + "step": 5338 + }, + { + "epoch": 1.5611931568942827, + "grad_norm": 1.4758258492307896, + "learning_rate": 2.522789518654314e-06, + "loss": 0.4417461156845093, + "step": 5339 + }, + { + "epoch": 1.5614855973095483, + "grad_norm": 1.819505142519117, + "learning_rate": 2.519580084419646e-06, + "loss": 0.5082979798316956, + "step": 5340 + }, + { + "epoch": 1.5617780377248136, + "grad_norm": 1.6547823991622836, + "learning_rate": 2.516372398687652e-06, + "loss": 0.4535973072052002, + "step": 5341 + }, + { + "epoch": 1.562070478140079, + "grad_norm": 1.5836674832459754, + "learning_rate": 2.513166462208111e-06, + "loss": 0.5528950095176697, + "step": 5342 + }, + { + "epoch": 1.5623629185553445, + "grad_norm": 1.9642626952112248, + "learning_rate": 2.5099622757303865e-06, + "loss": 0.6272662281990051, + "step": 5343 + }, + { + "epoch": 1.5626553589706096, + "grad_norm": 1.6065246572629583, + "learning_rate": 2.506759840003439e-06, + "loss": 0.602135181427002, + "step": 5344 + }, + { + "epoch": 1.5629477993858751, + "grad_norm": 1.6289588222907745, + "learning_rate": 2.5035591557758197e-06, + "loss": 0.6336733102798462, + "step": 5345 + }, + { + "epoch": 1.5632402398011405, + "grad_norm": 1.6487862192612195, + "learning_rate": 2.500360223795668e-06, + "loss": 0.5819063186645508, + "step": 5346 + }, + { + "epoch": 1.5635326802164058, + "grad_norm": 1.9625665043715836, + "learning_rate": 2.4971630448107166e-06, + "loss": 0.6384624242782593, + "step": 5347 + }, + { + "epoch": 1.5638251206316713, + "grad_norm": 1.7408709214756897, + "learning_rate": 2.493967619568285e-06, + "loss": 0.5495754480361938, + "step": 5348 + }, + { + "epoch": 1.5641175610469367, + "grad_norm": 1.7544921790911043, + "learning_rate": 2.490773948815284e-06, + "loss": 0.5661545395851135, + "step": 5349 + }, + { + "epoch": 1.564410001462202, + "grad_norm": 1.6122536544450556, + "learning_rate": 2.487582033298217e-06, + "loss": 0.47731083631515503, + "step": 5350 + }, + { + "epoch": 1.5647024418774675, + "grad_norm": 1.6660059461046859, + "learning_rate": 2.4843918737631724e-06, + "loss": 0.5081999897956848, + "step": 5351 + }, + { + "epoch": 1.5649948822927329, + "grad_norm": 1.7409567692793637, + "learning_rate": 2.481203470955832e-06, + "loss": 0.4803314208984375, + "step": 5352 + }, + { + "epoch": 1.5652873227079982, + "grad_norm": 1.5751543533365946, + "learning_rate": 2.4780168256214687e-06, + "loss": 0.5049692392349243, + "step": 5353 + }, + { + "epoch": 1.5655797631232637, + "grad_norm": 1.5980094392584046, + "learning_rate": 2.4748319385049346e-06, + "loss": 0.46404945850372314, + "step": 5354 + }, + { + "epoch": 1.565872203538529, + "grad_norm": 1.8809652221147528, + "learning_rate": 2.471648810350681e-06, + "loss": 0.426737904548645, + "step": 5355 + }, + { + "epoch": 1.5661646439537944, + "grad_norm": 1.8658447876398343, + "learning_rate": 2.4684674419027445e-06, + "loss": 0.511459231376648, + "step": 5356 + }, + { + "epoch": 1.56645708436906, + "grad_norm": 1.6030611377734088, + "learning_rate": 2.4652878339047516e-06, + "loss": 0.5199254155158997, + "step": 5357 + }, + { + "epoch": 1.566749524784325, + "grad_norm": 1.8647690278368902, + "learning_rate": 2.4621099870999156e-06, + "loss": 0.6220999360084534, + "step": 5358 + }, + { + "epoch": 1.5670419651995906, + "grad_norm": 1.6243824818203765, + "learning_rate": 2.4589339022310386e-06, + "loss": 0.598499059677124, + "step": 5359 + }, + { + "epoch": 1.567334405614856, + "grad_norm": 1.6070369897776633, + "learning_rate": 2.455759580040512e-06, + "loss": 0.4726351499557495, + "step": 5360 + }, + { + "epoch": 1.5676268460301213, + "grad_norm": 1.5276631939356082, + "learning_rate": 2.452587021270314e-06, + "loss": 0.4492379426956177, + "step": 5361 + }, + { + "epoch": 1.5679192864453868, + "grad_norm": 1.5322598639207448, + "learning_rate": 2.4494162266620105e-06, + "loss": 0.46546655893325806, + "step": 5362 + }, + { + "epoch": 1.5682117268606521, + "grad_norm": 1.5784589531224524, + "learning_rate": 2.446247196956756e-06, + "loss": 0.45048198103904724, + "step": 5363 + }, + { + "epoch": 1.5685041672759175, + "grad_norm": 1.7001549698958467, + "learning_rate": 2.4430799328952935e-06, + "loss": 0.543383002281189, + "step": 5364 + }, + { + "epoch": 1.568796607691183, + "grad_norm": 1.881054972907132, + "learning_rate": 2.4399144352179484e-06, + "loss": 0.560661256313324, + "step": 5365 + }, + { + "epoch": 1.5690890481064483, + "grad_norm": 1.7380225532335671, + "learning_rate": 2.4367507046646367e-06, + "loss": 0.4915887117385864, + "step": 5366 + }, + { + "epoch": 1.5693814885217137, + "grad_norm": 3.6756946542988396, + "learning_rate": 2.433588741974863e-06, + "loss": 0.576668918132782, + "step": 5367 + }, + { + "epoch": 1.5696739289369792, + "grad_norm": 1.9696979271734443, + "learning_rate": 2.4304285478877134e-06, + "loss": 0.615422248840332, + "step": 5368 + }, + { + "epoch": 1.5699663693522443, + "grad_norm": 1.7262412669866045, + "learning_rate": 2.4272701231418706e-06, + "loss": 0.505649209022522, + "step": 5369 + }, + { + "epoch": 1.5702588097675099, + "grad_norm": 1.6721925296757776, + "learning_rate": 2.424113468475593e-06, + "loss": 0.4803265929222107, + "step": 5370 + }, + { + "epoch": 1.5705512501827754, + "grad_norm": 1.5546849518292136, + "learning_rate": 2.4209585846267293e-06, + "loss": 0.43251073360443115, + "step": 5371 + }, + { + "epoch": 1.5708436905980405, + "grad_norm": 1.517432850414526, + "learning_rate": 2.417805472332716e-06, + "loss": 0.6021081209182739, + "step": 5372 + }, + { + "epoch": 1.571136131013306, + "grad_norm": 1.5438721648404399, + "learning_rate": 2.414654132330575e-06, + "loss": 0.5236715078353882, + "step": 5373 + }, + { + "epoch": 1.5714285714285714, + "grad_norm": 1.7272971424194805, + "learning_rate": 2.4115045653569092e-06, + "loss": 0.45632290840148926, + "step": 5374 + }, + { + "epoch": 1.5717210118438367, + "grad_norm": 1.51681371819029, + "learning_rate": 2.408356772147912e-06, + "loss": 0.5745086669921875, + "step": 5375 + }, + { + "epoch": 1.5720134522591023, + "grad_norm": 1.7235832219181546, + "learning_rate": 2.405210753439361e-06, + "loss": 0.6032901406288147, + "step": 5376 + }, + { + "epoch": 1.5723058926743676, + "grad_norm": 1.9887425059975659, + "learning_rate": 2.40206650996662e-06, + "loss": 0.579899787902832, + "step": 5377 + }, + { + "epoch": 1.572598333089633, + "grad_norm": 1.84593228973349, + "learning_rate": 2.3989240424646355e-06, + "loss": 0.5920897722244263, + "step": 5378 + }, + { + "epoch": 1.5728907735048985, + "grad_norm": 1.6814027292095717, + "learning_rate": 2.395783351667941e-06, + "loss": 0.5080469846725464, + "step": 5379 + }, + { + "epoch": 1.5731832139201638, + "grad_norm": 1.6852885660534134, + "learning_rate": 2.392644438310654e-06, + "loss": 0.6438730955123901, + "step": 5380 + }, + { + "epoch": 1.5734756543354291, + "grad_norm": 1.5835392817230642, + "learning_rate": 2.389507303126475e-06, + "loss": 0.6496621370315552, + "step": 5381 + }, + { + "epoch": 1.5737680947506947, + "grad_norm": 2.056471050614057, + "learning_rate": 2.3863719468486925e-06, + "loss": 0.5780459642410278, + "step": 5382 + }, + { + "epoch": 1.5740605351659598, + "grad_norm": 1.6854861118133662, + "learning_rate": 2.3832383702101747e-06, + "loss": 0.47817176580429077, + "step": 5383 + }, + { + "epoch": 1.5743529755812253, + "grad_norm": 1.8294128359408837, + "learning_rate": 2.3801065739433816e-06, + "loss": 0.565629243850708, + "step": 5384 + }, + { + "epoch": 1.5746454159964907, + "grad_norm": 1.6612699899563574, + "learning_rate": 2.376976558780343e-06, + "loss": 0.6291453838348389, + "step": 5385 + }, + { + "epoch": 1.574937856411756, + "grad_norm": 1.538236610732314, + "learning_rate": 2.3738483254526856e-06, + "loss": 0.5309170484542847, + "step": 5386 + }, + { + "epoch": 1.5752302968270215, + "grad_norm": 1.5901478294831086, + "learning_rate": 2.370721874691614e-06, + "loss": 0.36860692501068115, + "step": 5387 + }, + { + "epoch": 1.5755227372422869, + "grad_norm": 1.4970687777761233, + "learning_rate": 2.3675972072279172e-06, + "loss": 0.4871997833251953, + "step": 5388 + }, + { + "epoch": 1.5758151776575522, + "grad_norm": 1.7243858787556505, + "learning_rate": 2.3644743237919674e-06, + "loss": 0.5318939685821533, + "step": 5389 + }, + { + "epoch": 1.5761076180728177, + "grad_norm": 1.6509311118620078, + "learning_rate": 2.3613532251137205e-06, + "loss": 0.5851289629936218, + "step": 5390 + }, + { + "epoch": 1.576400058488083, + "grad_norm": 1.7554122423009038, + "learning_rate": 2.358233911922713e-06, + "loss": 0.5535321235656738, + "step": 5391 + }, + { + "epoch": 1.5766924989033484, + "grad_norm": 1.6614076147074466, + "learning_rate": 2.3551163849480664e-06, + "loss": 0.5443980693817139, + "step": 5392 + }, + { + "epoch": 1.576984939318614, + "grad_norm": 1.7236213464789372, + "learning_rate": 2.352000644918483e-06, + "loss": 0.6381241083145142, + "step": 5393 + }, + { + "epoch": 1.5772773797338793, + "grad_norm": 1.7284545309348427, + "learning_rate": 2.348886692562248e-06, + "loss": 0.5710772275924683, + "step": 5394 + }, + { + "epoch": 1.5775698201491446, + "grad_norm": 1.5430684665624785, + "learning_rate": 2.3457745286072307e-06, + "loss": 0.5507428050041199, + "step": 5395 + }, + { + "epoch": 1.5778622605644101, + "grad_norm": 1.4206197407713899, + "learning_rate": 2.342664153780878e-06, + "loss": 0.4475744366645813, + "step": 5396 + }, + { + "epoch": 1.5781547009796753, + "grad_norm": 1.636583588423456, + "learning_rate": 2.339555568810221e-06, + "loss": 0.5237560868263245, + "step": 5397 + }, + { + "epoch": 1.5784471413949408, + "grad_norm": 1.8224385271688819, + "learning_rate": 2.3364487744218735e-06, + "loss": 0.513353705406189, + "step": 5398 + }, + { + "epoch": 1.5787395818102061, + "grad_norm": 1.7286392562782233, + "learning_rate": 2.3333437713420305e-06, + "loss": 0.5986731052398682, + "step": 5399 + }, + { + "epoch": 1.5790320222254715, + "grad_norm": 1.5907081834202914, + "learning_rate": 2.330240560296466e-06, + "loss": 0.5834506750106812, + "step": 5400 + }, + { + "epoch": 1.579324462640737, + "grad_norm": 1.4316449017872799, + "learning_rate": 2.3271391420105384e-06, + "loss": 0.4756021499633789, + "step": 5401 + }, + { + "epoch": 1.5796169030560023, + "grad_norm": 1.828748410964233, + "learning_rate": 2.3240395172091847e-06, + "loss": 0.5524263978004456, + "step": 5402 + }, + { + "epoch": 1.5799093434712677, + "grad_norm": 1.7797701447484084, + "learning_rate": 2.320941686616922e-06, + "loss": 0.5689926743507385, + "step": 5403 + }, + { + "epoch": 1.5802017838865332, + "grad_norm": 2.079791124123793, + "learning_rate": 2.317845650957852e-06, + "loss": 0.5737600326538086, + "step": 5404 + }, + { + "epoch": 1.5804942243017985, + "grad_norm": 2.1591480990218406, + "learning_rate": 2.314751410955652e-06, + "loss": 0.585626482963562, + "step": 5405 + }, + { + "epoch": 1.5807866647170639, + "grad_norm": 1.3475179143489473, + "learning_rate": 2.3116589673335833e-06, + "loss": 0.4410518407821655, + "step": 5406 + }, + { + "epoch": 1.5810791051323294, + "grad_norm": 1.4002471500541231, + "learning_rate": 2.308568320814487e-06, + "loss": 0.49071764945983887, + "step": 5407 + }, + { + "epoch": 1.5813715455475945, + "grad_norm": 1.7384943405251394, + "learning_rate": 2.3054794721207796e-06, + "loss": 0.5332186818122864, + "step": 5408 + }, + { + "epoch": 1.58166398596286, + "grad_norm": 1.672632129609112, + "learning_rate": 2.3023924219744607e-06, + "loss": 0.4655637741088867, + "step": 5409 + }, + { + "epoch": 1.5819564263781256, + "grad_norm": 1.8700821530052487, + "learning_rate": 2.2993071710971115e-06, + "loss": 0.4226027727127075, + "step": 5410 + }, + { + "epoch": 1.5822488667933907, + "grad_norm": 1.662889108823369, + "learning_rate": 2.2962237202098903e-06, + "loss": 0.5582948923110962, + "step": 5411 + }, + { + "epoch": 1.5825413072086563, + "grad_norm": 1.9177043486104604, + "learning_rate": 2.293142070033535e-06, + "loss": 0.6695314645767212, + "step": 5412 + }, + { + "epoch": 1.5828337476239216, + "grad_norm": 1.3346239854361734, + "learning_rate": 2.2900622212883617e-06, + "loss": 0.39315858483314514, + "step": 5413 + }, + { + "epoch": 1.583126188039187, + "grad_norm": 1.6781692583647863, + "learning_rate": 2.2869841746942666e-06, + "loss": 0.5034759044647217, + "step": 5414 + }, + { + "epoch": 1.5834186284544525, + "grad_norm": 1.9091862181504, + "learning_rate": 2.2839079309707256e-06, + "loss": 0.6739548444747925, + "step": 5415 + }, + { + "epoch": 1.5837110688697178, + "grad_norm": 1.700292089346711, + "learning_rate": 2.2808334908367914e-06, + "loss": 0.4091438949108124, + "step": 5416 + }, + { + "epoch": 1.5840035092849831, + "grad_norm": 1.9132208987373394, + "learning_rate": 2.277760855011094e-06, + "loss": 0.5543409585952759, + "step": 5417 + }, + { + "epoch": 1.5842959497002487, + "grad_norm": 1.5448108643055853, + "learning_rate": 2.2746900242118487e-06, + "loss": 0.44680702686309814, + "step": 5418 + }, + { + "epoch": 1.584588390115514, + "grad_norm": 1.812422444695138, + "learning_rate": 2.271620999156837e-06, + "loss": 0.604156494140625, + "step": 5419 + }, + { + "epoch": 1.5848808305307793, + "grad_norm": 1.7746704953171426, + "learning_rate": 2.268553780563427e-06, + "loss": 0.6055774688720703, + "step": 5420 + }, + { + "epoch": 1.5851732709460449, + "grad_norm": 1.6413153541100303, + "learning_rate": 2.265488369148563e-06, + "loss": 0.5826502442359924, + "step": 5421 + }, + { + "epoch": 1.58546571136131, + "grad_norm": 1.6438604610732335, + "learning_rate": 2.2624247656287658e-06, + "loss": 0.61782306432724, + "step": 5422 + }, + { + "epoch": 1.5857581517765755, + "grad_norm": 1.6412325546038886, + "learning_rate": 2.2593629707201348e-06, + "loss": 0.5561526417732239, + "step": 5423 + }, + { + "epoch": 1.5860505921918409, + "grad_norm": 1.934339107757701, + "learning_rate": 2.2563029851383447e-06, + "loss": 0.6122138500213623, + "step": 5424 + }, + { + "epoch": 1.5863430326071062, + "grad_norm": 1.7721974769204, + "learning_rate": 2.2532448095986504e-06, + "loss": 0.5694067478179932, + "step": 5425 + }, + { + "epoch": 1.5866354730223717, + "grad_norm": 2.0424311158796145, + "learning_rate": 2.2501884448158804e-06, + "loss": 0.5243874788284302, + "step": 5426 + }, + { + "epoch": 1.586927913437637, + "grad_norm": 1.8166715080001115, + "learning_rate": 2.2471338915044414e-06, + "loss": 0.5144485831260681, + "step": 5427 + }, + { + "epoch": 1.5872203538529024, + "grad_norm": 1.853424108367526, + "learning_rate": 2.244081150378318e-06, + "loss": 0.5013881325721741, + "step": 5428 + }, + { + "epoch": 1.587512794268168, + "grad_norm": 1.7554305935150418, + "learning_rate": 2.2410302221510704e-06, + "loss": 0.45199382305145264, + "step": 5429 + }, + { + "epoch": 1.5878052346834333, + "grad_norm": 1.7321007114143003, + "learning_rate": 2.2379811075358315e-06, + "loss": 0.4699060022830963, + "step": 5430 + }, + { + "epoch": 1.5880976750986986, + "grad_norm": 1.6542253790144112, + "learning_rate": 2.234933807245314e-06, + "loss": 0.6530928611755371, + "step": 5431 + }, + { + "epoch": 1.5883901155139641, + "grad_norm": 2.1734435533671337, + "learning_rate": 2.2318883219918075e-06, + "loss": 0.653563380241394, + "step": 5432 + }, + { + "epoch": 1.5886825559292295, + "grad_norm": 1.6977334736027891, + "learning_rate": 2.2288446524871743e-06, + "loss": 0.5283595323562622, + "step": 5433 + }, + { + "epoch": 1.5889749963444948, + "grad_norm": 1.8947978942641126, + "learning_rate": 2.2258027994428543e-06, + "loss": 0.4382442831993103, + "step": 5434 + }, + { + "epoch": 1.5892674367597603, + "grad_norm": 1.5530659953902877, + "learning_rate": 2.2227627635698624e-06, + "loss": 0.427448570728302, + "step": 5435 + }, + { + "epoch": 1.5895598771750254, + "grad_norm": 1.9154573086486193, + "learning_rate": 2.2197245455787875e-06, + "loss": 0.5794345140457153, + "step": 5436 + }, + { + "epoch": 1.589852317590291, + "grad_norm": 1.7112908574959096, + "learning_rate": 2.2166881461797953e-06, + "loss": 0.4996277987957001, + "step": 5437 + }, + { + "epoch": 1.5901447580055563, + "grad_norm": 1.6191576283665394, + "learning_rate": 2.213653566082625e-06, + "loss": 0.580248236656189, + "step": 5438 + }, + { + "epoch": 1.5904371984208217, + "grad_norm": 1.7775881602556973, + "learning_rate": 2.210620805996594e-06, + "loss": 0.5173758864402771, + "step": 5439 + }, + { + "epoch": 1.5907296388360872, + "grad_norm": 1.9899233221127093, + "learning_rate": 2.2075898666305908e-06, + "loss": 0.5336873531341553, + "step": 5440 + }, + { + "epoch": 1.5910220792513525, + "grad_norm": 1.6076505919691177, + "learning_rate": 2.204560748693074e-06, + "loss": 0.44921910762786865, + "step": 5441 + }, + { + "epoch": 1.5913145196666179, + "grad_norm": 1.6059480320951056, + "learning_rate": 2.201533452892086e-06, + "loss": 0.46475526690483093, + "step": 5442 + }, + { + "epoch": 1.5916069600818834, + "grad_norm": 1.9029311381102771, + "learning_rate": 2.1985079799352383e-06, + "loss": 0.6213991045951843, + "step": 5443 + }, + { + "epoch": 1.5918994004971487, + "grad_norm": 1.7215123021526133, + "learning_rate": 2.1954843305297138e-06, + "loss": 0.5271334648132324, + "step": 5444 + }, + { + "epoch": 1.592191840912414, + "grad_norm": 2.0224569757299333, + "learning_rate": 2.192462505382277e-06, + "loss": 0.6957610249519348, + "step": 5445 + }, + { + "epoch": 1.5924842813276796, + "grad_norm": 1.474394106198892, + "learning_rate": 2.1894425051992587e-06, + "loss": 0.4935681223869324, + "step": 5446 + }, + { + "epoch": 1.5927767217429447, + "grad_norm": 1.8779887346615283, + "learning_rate": 2.1864243306865663e-06, + "loss": 0.7389976978302002, + "step": 5447 + }, + { + "epoch": 1.5930691621582103, + "grad_norm": 1.6663547791548505, + "learning_rate": 2.183407982549679e-06, + "loss": 0.4711039662361145, + "step": 5448 + }, + { + "epoch": 1.5933616025734758, + "grad_norm": 1.7966119865723598, + "learning_rate": 2.180393461493654e-06, + "loss": 0.5640024542808533, + "step": 5449 + }, + { + "epoch": 1.593654042988741, + "grad_norm": 1.8836976245237465, + "learning_rate": 2.1773807682231095e-06, + "loss": 0.5471343994140625, + "step": 5450 + }, + { + "epoch": 1.5939464834040065, + "grad_norm": 1.948314000978572, + "learning_rate": 2.1743699034422483e-06, + "loss": 0.4971361756324768, + "step": 5451 + }, + { + "epoch": 1.5942389238192718, + "grad_norm": 1.816359724345797, + "learning_rate": 2.1713608678548414e-06, + "loss": 0.6338681578636169, + "step": 5452 + }, + { + "epoch": 1.5945313642345371, + "grad_norm": 1.5944974938870278, + "learning_rate": 2.168353662164233e-06, + "loss": 0.5218038558959961, + "step": 5453 + }, + { + "epoch": 1.5948238046498027, + "grad_norm": 1.4835669226996004, + "learning_rate": 2.165348287073339e-06, + "loss": 0.44414108991622925, + "step": 5454 + }, + { + "epoch": 1.595116245065068, + "grad_norm": 1.741912524884312, + "learning_rate": 2.162344743284647e-06, + "loss": 0.5994665622711182, + "step": 5455 + }, + { + "epoch": 1.5954086854803333, + "grad_norm": 1.8002543813503216, + "learning_rate": 2.159343031500217e-06, + "loss": 0.6745023727416992, + "step": 5456 + }, + { + "epoch": 1.5957011258955989, + "grad_norm": 1.8583415901166598, + "learning_rate": 2.1563431524216825e-06, + "loss": 0.4678364396095276, + "step": 5457 + }, + { + "epoch": 1.5959935663108642, + "grad_norm": 1.5733048792098263, + "learning_rate": 2.1533451067502464e-06, + "loss": 0.5792031288146973, + "step": 5458 + }, + { + "epoch": 1.5962860067261295, + "grad_norm": 1.575360514250564, + "learning_rate": 2.1503488951866822e-06, + "loss": 0.48152512311935425, + "step": 5459 + }, + { + "epoch": 1.596578447141395, + "grad_norm": 1.6753593421486697, + "learning_rate": 2.147354518431339e-06, + "loss": 0.4407780170440674, + "step": 5460 + }, + { + "epoch": 1.5968708875566602, + "grad_norm": 1.5845476508430212, + "learning_rate": 2.1443619771841308e-06, + "loss": 0.41062241792678833, + "step": 5461 + }, + { + "epoch": 1.5971633279719257, + "grad_norm": 1.6329985009235597, + "learning_rate": 2.1413712721445478e-06, + "loss": 0.4564778208732605, + "step": 5462 + }, + { + "epoch": 1.597455768387191, + "grad_norm": 1.7819738842734478, + "learning_rate": 2.1383824040116474e-06, + "loss": 0.4347888231277466, + "step": 5463 + }, + { + "epoch": 1.5977482088024564, + "grad_norm": 1.7547086253653914, + "learning_rate": 2.1353953734840615e-06, + "loss": 0.574216902256012, + "step": 5464 + }, + { + "epoch": 1.598040649217722, + "grad_norm": 1.5449681232026575, + "learning_rate": 2.1324101812599884e-06, + "loss": 0.46540650725364685, + "step": 5465 + }, + { + "epoch": 1.5983330896329873, + "grad_norm": 1.7330971380509632, + "learning_rate": 2.129426828037201e-06, + "loss": 0.5446870923042297, + "step": 5466 + }, + { + "epoch": 1.5986255300482526, + "grad_norm": 1.5387720739202952, + "learning_rate": 2.126445314513038e-06, + "loss": 0.5442406535148621, + "step": 5467 + }, + { + "epoch": 1.5989179704635181, + "grad_norm": 1.5552673745283687, + "learning_rate": 2.1234656413844114e-06, + "loss": 0.48960334062576294, + "step": 5468 + }, + { + "epoch": 1.5992104108787835, + "grad_norm": 1.6554781479614895, + "learning_rate": 2.1204878093477998e-06, + "loss": 0.5053935647010803, + "step": 5469 + }, + { + "epoch": 1.5995028512940488, + "grad_norm": 1.9853825289751812, + "learning_rate": 2.117511819099256e-06, + "loss": 0.5984711647033691, + "step": 5470 + }, + { + "epoch": 1.5997952917093143, + "grad_norm": 1.7887732493049897, + "learning_rate": 2.1145376713344e-06, + "loss": 0.6060935258865356, + "step": 5471 + }, + { + "epoch": 1.6000877321245797, + "grad_norm": 1.7731884284372257, + "learning_rate": 2.111565366748416e-06, + "loss": 0.5640311241149902, + "step": 5472 + }, + { + "epoch": 1.600380172539845, + "grad_norm": 1.4780823569090165, + "learning_rate": 2.1085949060360654e-06, + "loss": 0.5127131342887878, + "step": 5473 + }, + { + "epoch": 1.6006726129551105, + "grad_norm": 1.7137118890776333, + "learning_rate": 2.1056262898916747e-06, + "loss": 0.5630159378051758, + "step": 5474 + }, + { + "epoch": 1.6009650533703756, + "grad_norm": 1.6419339983794916, + "learning_rate": 2.1026595190091403e-06, + "loss": 0.4511195421218872, + "step": 5475 + }, + { + "epoch": 1.6012574937856412, + "grad_norm": 1.5933389134682139, + "learning_rate": 2.099694594081927e-06, + "loss": 0.47073638439178467, + "step": 5476 + }, + { + "epoch": 1.6015499342009065, + "grad_norm": 1.7678159005173808, + "learning_rate": 2.0967315158030675e-06, + "loss": 0.47757452726364136, + "step": 5477 + }, + { + "epoch": 1.6018423746161718, + "grad_norm": 1.612539233178663, + "learning_rate": 2.093770284865164e-06, + "loss": 0.4703200161457062, + "step": 5478 + }, + { + "epoch": 1.6021348150314374, + "grad_norm": 1.7112390228319339, + "learning_rate": 2.090810901960385e-06, + "loss": 0.47457796335220337, + "step": 5479 + }, + { + "epoch": 1.6024272554467027, + "grad_norm": 1.6069409002673796, + "learning_rate": 2.087853367780469e-06, + "loss": 0.4907105267047882, + "step": 5480 + }, + { + "epoch": 1.602719695861968, + "grad_norm": 1.8859078577608002, + "learning_rate": 2.0848976830167224e-06, + "loss": 0.5329782962799072, + "step": 5481 + }, + { + "epoch": 1.6030121362772336, + "grad_norm": 1.8407304692969428, + "learning_rate": 2.0819438483600197e-06, + "loss": 0.45858579874038696, + "step": 5482 + }, + { + "epoch": 1.603304576692499, + "grad_norm": 1.7103287599993058, + "learning_rate": 2.0789918645007977e-06, + "loss": 0.47545814514160156, + "step": 5483 + }, + { + "epoch": 1.6035970171077643, + "grad_norm": 1.7521375813446352, + "learning_rate": 2.076041732129066e-06, + "loss": 0.5482660531997681, + "step": 5484 + }, + { + "epoch": 1.6038894575230298, + "grad_norm": 1.650951498750666, + "learning_rate": 2.0730934519344025e-06, + "loss": 0.5252633094787598, + "step": 5485 + }, + { + "epoch": 1.604181897938295, + "grad_norm": 2.7727108215969882, + "learning_rate": 2.0701470246059472e-06, + "loss": 0.5400367379188538, + "step": 5486 + }, + { + "epoch": 1.6044743383535605, + "grad_norm": 1.5423948281806983, + "learning_rate": 2.0672024508324107e-06, + "loss": 0.4788953363895416, + "step": 5487 + }, + { + "epoch": 1.604766778768826, + "grad_norm": 1.6092306606930025, + "learning_rate": 2.0642597313020685e-06, + "loss": 0.5430850982666016, + "step": 5488 + }, + { + "epoch": 1.6050592191840911, + "grad_norm": 1.8683302543522238, + "learning_rate": 2.061318866702765e-06, + "loss": 0.5833520293235779, + "step": 5489 + }, + { + "epoch": 1.6053516595993567, + "grad_norm": 1.7369107165445012, + "learning_rate": 2.058379857721908e-06, + "loss": 0.5854958295822144, + "step": 5490 + }, + { + "epoch": 1.605644100014622, + "grad_norm": 1.6603772170749127, + "learning_rate": 2.0554427050464742e-06, + "loss": 0.5577352643013, + "step": 5491 + }, + { + "epoch": 1.6059365404298873, + "grad_norm": 1.6757677840410201, + "learning_rate": 2.052507409363004e-06, + "loss": 0.5328816175460815, + "step": 5492 + }, + { + "epoch": 1.6062289808451529, + "grad_norm": 1.7643397031335737, + "learning_rate": 2.0495739713576046e-06, + "loss": 0.5606744289398193, + "step": 5493 + }, + { + "epoch": 1.6065214212604182, + "grad_norm": 1.7836115172074085, + "learning_rate": 2.0466423917159526e-06, + "loss": 0.541358470916748, + "step": 5494 + }, + { + "epoch": 1.6068138616756835, + "grad_norm": 2.1455011977132714, + "learning_rate": 2.0437126711232826e-06, + "loss": 0.6578946709632874, + "step": 5495 + }, + { + "epoch": 1.607106302090949, + "grad_norm": 1.9512378226148355, + "learning_rate": 2.0407848102644002e-06, + "loss": 0.5967978239059448, + "step": 5496 + }, + { + "epoch": 1.6073987425062144, + "grad_norm": 1.623105883994405, + "learning_rate": 2.037858809823675e-06, + "loss": 0.46947693824768066, + "step": 5497 + }, + { + "epoch": 1.6076911829214797, + "grad_norm": 1.5763151196056784, + "learning_rate": 2.0349346704850436e-06, + "loss": 0.5014760494232178, + "step": 5498 + }, + { + "epoch": 1.6079836233367453, + "grad_norm": 1.5417734514532708, + "learning_rate": 2.0320123929320033e-06, + "loss": 0.4399675726890564, + "step": 5499 + }, + { + "epoch": 1.6082760637520104, + "grad_norm": 1.8719036359624468, + "learning_rate": 2.0290919778476214e-06, + "loss": 0.4729107618331909, + "step": 5500 + }, + { + "epoch": 1.608568504167276, + "grad_norm": 1.5894079730285777, + "learning_rate": 2.0261734259145248e-06, + "loss": 0.5669134259223938, + "step": 5501 + }, + { + "epoch": 1.6088609445825413, + "grad_norm": 1.554035864612711, + "learning_rate": 2.0232567378149082e-06, + "loss": 0.4200817942619324, + "step": 5502 + }, + { + "epoch": 1.6091533849978066, + "grad_norm": 1.8154865090092227, + "learning_rate": 2.0203419142305303e-06, + "loss": 0.6057849526405334, + "step": 5503 + }, + { + "epoch": 1.6094458254130721, + "grad_norm": 1.7156552575659618, + "learning_rate": 2.017428955842713e-06, + "loss": 0.5644170045852661, + "step": 5504 + }, + { + "epoch": 1.6097382658283375, + "grad_norm": 1.9102243104698693, + "learning_rate": 2.014517863332345e-06, + "loss": 0.6368730068206787, + "step": 5505 + }, + { + "epoch": 1.6100307062436028, + "grad_norm": 1.5712918255487374, + "learning_rate": 2.0116086373798704e-06, + "loss": 0.4829355478286743, + "step": 5506 + }, + { + "epoch": 1.6103231466588683, + "grad_norm": 1.642541904242283, + "learning_rate": 2.0087012786653072e-06, + "loss": 0.5604796409606934, + "step": 5507 + }, + { + "epoch": 1.6106155870741337, + "grad_norm": 1.8591393596163848, + "learning_rate": 2.005795787868232e-06, + "loss": 0.5594274997711182, + "step": 5508 + }, + { + "epoch": 1.610908027489399, + "grad_norm": 1.607362999733334, + "learning_rate": 2.0028921656677857e-06, + "loss": 0.5553449988365173, + "step": 5509 + }, + { + "epoch": 1.6112004679046645, + "grad_norm": 1.7968941470299316, + "learning_rate": 1.999990412742673e-06, + "loss": 0.5056631565093994, + "step": 5510 + }, + { + "epoch": 1.6114929083199299, + "grad_norm": 1.5654499452702673, + "learning_rate": 1.9970905297711606e-06, + "loss": 0.432037353515625, + "step": 5511 + }, + { + "epoch": 1.6117853487351952, + "grad_norm": 1.6991047972494284, + "learning_rate": 1.9941925174310773e-06, + "loss": 0.5152974128723145, + "step": 5512 + }, + { + "epoch": 1.6120777891504607, + "grad_norm": 2.1530610582321015, + "learning_rate": 1.9912963763998185e-06, + "loss": 0.59015291929245, + "step": 5513 + }, + { + "epoch": 1.6123702295657258, + "grad_norm": 2.024675130869183, + "learning_rate": 1.9884021073543368e-06, + "loss": 0.564031720161438, + "step": 5514 + }, + { + "epoch": 1.6126626699809914, + "grad_norm": 1.560415326953441, + "learning_rate": 1.985509710971152e-06, + "loss": 0.5930228233337402, + "step": 5515 + }, + { + "epoch": 1.6129551103962567, + "grad_norm": 1.6853261691368011, + "learning_rate": 1.9826191879263446e-06, + "loss": 0.540229082107544, + "step": 5516 + }, + { + "epoch": 1.613247550811522, + "grad_norm": 1.3918983021829734, + "learning_rate": 1.9797305388955547e-06, + "loss": 0.5473166704177856, + "step": 5517 + }, + { + "epoch": 1.6135399912267876, + "grad_norm": 1.888180196247059, + "learning_rate": 1.976843764553986e-06, + "loss": 0.5814535617828369, + "step": 5518 + }, + { + "epoch": 1.613832431642053, + "grad_norm": 1.556089571926902, + "learning_rate": 1.973958865576403e-06, + "loss": 0.4892576038837433, + "step": 5519 + }, + { + "epoch": 1.6141248720573182, + "grad_norm": 2.0461002845877454, + "learning_rate": 1.97107584263714e-06, + "loss": 0.5416869521141052, + "step": 5520 + }, + { + "epoch": 1.6144173124725838, + "grad_norm": 1.9685372161480885, + "learning_rate": 1.9681946964100807e-06, + "loss": 0.5956105589866638, + "step": 5521 + }, + { + "epoch": 1.6147097528878491, + "grad_norm": 1.7885384988170376, + "learning_rate": 1.9653154275686782e-06, + "loss": 0.5722565650939941, + "step": 5522 + }, + { + "epoch": 1.6150021933031145, + "grad_norm": 1.7917880328936266, + "learning_rate": 1.962438036785942e-06, + "loss": 0.3984888195991516, + "step": 5523 + }, + { + "epoch": 1.61529463371838, + "grad_norm": 1.6334267618118792, + "learning_rate": 1.959562524734445e-06, + "loss": 0.601211428642273, + "step": 5524 + }, + { + "epoch": 1.615587074133645, + "grad_norm": 1.8080265301577823, + "learning_rate": 1.9566888920863247e-06, + "loss": 0.4803691506385803, + "step": 5525 + }, + { + "epoch": 1.6158795145489107, + "grad_norm": 1.7017865626810558, + "learning_rate": 1.9538171395132688e-06, + "loss": 0.6914256811141968, + "step": 5526 + }, + { + "epoch": 1.6161719549641762, + "grad_norm": 1.6511977253132817, + "learning_rate": 1.950947267686536e-06, + "loss": 0.49076569080352783, + "step": 5527 + }, + { + "epoch": 1.6164643953794413, + "grad_norm": 1.747888743558531, + "learning_rate": 1.9480792772769384e-06, + "loss": 0.45781368017196655, + "step": 5528 + }, + { + "epoch": 1.6167568357947069, + "grad_norm": 1.6564602036044371, + "learning_rate": 1.9452131689548547e-06, + "loss": 0.5257985591888428, + "step": 5529 + }, + { + "epoch": 1.6170492762099722, + "grad_norm": 1.6440311298220422, + "learning_rate": 1.9423489433902186e-06, + "loss": 0.4170517921447754, + "step": 5530 + }, + { + "epoch": 1.6173417166252375, + "grad_norm": 2.0566981290938386, + "learning_rate": 1.939486601252525e-06, + "loss": 0.5612319707870483, + "step": 5531 + }, + { + "epoch": 1.617634157040503, + "grad_norm": 1.6971941543602107, + "learning_rate": 1.93662614321083e-06, + "loss": 0.4543185234069824, + "step": 5532 + }, + { + "epoch": 1.6179265974557684, + "grad_norm": 1.8366998724664239, + "learning_rate": 1.933767569933749e-06, + "loss": 0.5506256222724915, + "step": 5533 + }, + { + "epoch": 1.6182190378710337, + "grad_norm": 1.9975995427991473, + "learning_rate": 1.930910882089454e-06, + "loss": 0.5411139130592346, + "step": 5534 + }, + { + "epoch": 1.6185114782862993, + "grad_norm": 1.5549343206880035, + "learning_rate": 1.9280560803456794e-06, + "loss": 0.5332196950912476, + "step": 5535 + }, + { + "epoch": 1.6188039187015646, + "grad_norm": 1.6015028384804206, + "learning_rate": 1.92520316536972e-06, + "loss": 0.5159808993339539, + "step": 5536 + }, + { + "epoch": 1.61909635911683, + "grad_norm": 1.6182793083642761, + "learning_rate": 1.9223521378284227e-06, + "loss": 0.5483378767967224, + "step": 5537 + }, + { + "epoch": 1.6193887995320955, + "grad_norm": 1.748287896704832, + "learning_rate": 1.9195029983882008e-06, + "loss": 0.6451961994171143, + "step": 5538 + }, + { + "epoch": 1.6196812399473606, + "grad_norm": 1.799346834276764, + "learning_rate": 1.9166557477150227e-06, + "loss": 0.5904289484024048, + "step": 5539 + }, + { + "epoch": 1.6199736803626261, + "grad_norm": 1.7892510148499787, + "learning_rate": 1.9138103864744164e-06, + "loss": 0.6688845753669739, + "step": 5540 + }, + { + "epoch": 1.6202661207778914, + "grad_norm": 2.050448659373089, + "learning_rate": 1.910966915331467e-06, + "loss": 0.6299211382865906, + "step": 5541 + }, + { + "epoch": 1.6205585611931568, + "grad_norm": 1.7426964698819027, + "learning_rate": 1.908125334950819e-06, + "loss": 0.5502864122390747, + "step": 5542 + }, + { + "epoch": 1.6208510016084223, + "grad_norm": 1.7235241145346871, + "learning_rate": 1.905285645996674e-06, + "loss": 0.5332865118980408, + "step": 5543 + }, + { + "epoch": 1.6211434420236877, + "grad_norm": 1.6383658575716402, + "learning_rate": 1.9024478491327936e-06, + "loss": 0.43371304869651794, + "step": 5544 + }, + { + "epoch": 1.621435882438953, + "grad_norm": 1.6217362880484214, + "learning_rate": 1.8996119450224936e-06, + "loss": 0.6992501616477966, + "step": 5545 + }, + { + "epoch": 1.6217283228542185, + "grad_norm": 1.6128570910751827, + "learning_rate": 1.8967779343286507e-06, + "loss": 0.46558254957199097, + "step": 5546 + }, + { + "epoch": 1.6220207632694839, + "grad_norm": 1.8944422030763228, + "learning_rate": 1.8939458177136994e-06, + "loss": 0.48943620920181274, + "step": 5547 + }, + { + "epoch": 1.6223132036847492, + "grad_norm": 1.7707340805275773, + "learning_rate": 1.8911155958396256e-06, + "loss": 0.6041419506072998, + "step": 5548 + }, + { + "epoch": 1.6226056441000147, + "grad_norm": 1.9134744412177116, + "learning_rate": 1.8882872693679787e-06, + "loss": 0.5695589780807495, + "step": 5549 + }, + { + "epoch": 1.62289808451528, + "grad_norm": 1.5970321508292495, + "learning_rate": 1.8854608389598616e-06, + "loss": 0.45147764682769775, + "step": 5550 + }, + { + "epoch": 1.6231905249305454, + "grad_norm": 1.6145559649024994, + "learning_rate": 1.8826363052759367e-06, + "loss": 0.45560893416404724, + "step": 5551 + }, + { + "epoch": 1.623482965345811, + "grad_norm": 2.0039561405471855, + "learning_rate": 1.8798136689764213e-06, + "loss": 0.5714661478996277, + "step": 5552 + }, + { + "epoch": 1.623775405761076, + "grad_norm": 1.963169578207157, + "learning_rate": 1.8769929307210889e-06, + "loss": 0.6074325442314148, + "step": 5553 + }, + { + "epoch": 1.6240678461763416, + "grad_norm": 1.8912554250379197, + "learning_rate": 1.8741740911692708e-06, + "loss": 0.5406322479248047, + "step": 5554 + }, + { + "epoch": 1.624360286591607, + "grad_norm": 1.6201303288999382, + "learning_rate": 1.8713571509798524e-06, + "loss": 0.6098664999008179, + "step": 5555 + }, + { + "epoch": 1.6246527270068722, + "grad_norm": 1.5939995677707521, + "learning_rate": 1.8685421108112778e-06, + "loss": 0.4424859881401062, + "step": 5556 + }, + { + "epoch": 1.6249451674221378, + "grad_norm": 1.939602482231334, + "learning_rate": 1.8657289713215442e-06, + "loss": 0.5893913507461548, + "step": 5557 + }, + { + "epoch": 1.6252376078374031, + "grad_norm": 1.7158163961672592, + "learning_rate": 1.862917733168208e-06, + "loss": 0.5462610125541687, + "step": 5558 + }, + { + "epoch": 1.6255300482526684, + "grad_norm": 1.6886650692922842, + "learning_rate": 1.8601083970083766e-06, + "loss": 0.6044303178787231, + "step": 5559 + }, + { + "epoch": 1.625822488667934, + "grad_norm": 1.9164005939081288, + "learning_rate": 1.857300963498715e-06, + "loss": 0.4110264778137207, + "step": 5560 + }, + { + "epoch": 1.6261149290831993, + "grad_norm": 1.863152431496286, + "learning_rate": 1.8544954332954445e-06, + "loss": 0.5640783309936523, + "step": 5561 + }, + { + "epoch": 1.6264073694984647, + "grad_norm": 1.7604141548514003, + "learning_rate": 1.851691807054342e-06, + "loss": 0.43247851729393005, + "step": 5562 + }, + { + "epoch": 1.6266998099137302, + "grad_norm": 1.5534888737518595, + "learning_rate": 1.8488900854307367e-06, + "loss": 0.4909735918045044, + "step": 5563 + }, + { + "epoch": 1.6269922503289953, + "grad_norm": 2.0368143734269983, + "learning_rate": 1.8460902690795135e-06, + "loss": 0.5705426335334778, + "step": 5564 + }, + { + "epoch": 1.6272846907442609, + "grad_norm": 1.7974229709801126, + "learning_rate": 1.8432923586551144e-06, + "loss": 0.6455575823783875, + "step": 5565 + }, + { + "epoch": 1.6275771311595264, + "grad_norm": 1.5854644304225498, + "learning_rate": 1.8404963548115318e-06, + "loss": 0.4156351089477539, + "step": 5566 + }, + { + "epoch": 1.6278695715747915, + "grad_norm": 1.5858505218040218, + "learning_rate": 1.8377022582023163e-06, + "loss": 0.5497896075248718, + "step": 5567 + }, + { + "epoch": 1.628162011990057, + "grad_norm": 1.5247094519796704, + "learning_rate": 1.8349100694805711e-06, + "loss": 0.5237758159637451, + "step": 5568 + }, + { + "epoch": 1.6284544524053224, + "grad_norm": 1.8340347816856337, + "learning_rate": 1.832119789298954e-06, + "loss": 0.5140771865844727, + "step": 5569 + }, + { + "epoch": 1.6287468928205877, + "grad_norm": 2.04051717357991, + "learning_rate": 1.8293314183096721e-06, + "loss": 0.5942349433898926, + "step": 5570 + }, + { + "epoch": 1.6290393332358533, + "grad_norm": 1.7868670881272706, + "learning_rate": 1.8265449571644933e-06, + "loss": 0.6316613554954529, + "step": 5571 + }, + { + "epoch": 1.6293317736511186, + "grad_norm": 1.7168155291178147, + "learning_rate": 1.823760406514735e-06, + "loss": 0.4789954423904419, + "step": 5572 + }, + { + "epoch": 1.629624214066384, + "grad_norm": 1.67674259516067, + "learning_rate": 1.8209777670112706e-06, + "loss": 0.596744179725647, + "step": 5573 + }, + { + "epoch": 1.6299166544816495, + "grad_norm": 1.7162317239554103, + "learning_rate": 1.8181970393045223e-06, + "loss": 0.5785890817642212, + "step": 5574 + }, + { + "epoch": 1.6302090948969148, + "grad_norm": 1.72408279785472, + "learning_rate": 1.8154182240444706e-06, + "loss": 0.5399461388587952, + "step": 5575 + }, + { + "epoch": 1.6305015353121801, + "grad_norm": 1.789842505433769, + "learning_rate": 1.812641321880645e-06, + "loss": 0.5251961946487427, + "step": 5576 + }, + { + "epoch": 1.6307939757274457, + "grad_norm": 1.5658430659550284, + "learning_rate": 1.8098663334621314e-06, + "loss": 0.6094855070114136, + "step": 5577 + }, + { + "epoch": 1.6310864161427108, + "grad_norm": 1.7839781318616403, + "learning_rate": 1.8070932594375656e-06, + "loss": 0.5586157441139221, + "step": 5578 + }, + { + "epoch": 1.6313788565579763, + "grad_norm": 1.6074136925381057, + "learning_rate": 1.804322100455136e-06, + "loss": 0.5572035312652588, + "step": 5579 + }, + { + "epoch": 1.6316712969732416, + "grad_norm": 1.8419935059375991, + "learning_rate": 1.801552857162585e-06, + "loss": 0.5567929148674011, + "step": 5580 + }, + { + "epoch": 1.631963737388507, + "grad_norm": 1.6633256712541593, + "learning_rate": 1.79878553020721e-06, + "loss": 0.4823629558086395, + "step": 5581 + }, + { + "epoch": 1.6322561778037725, + "grad_norm": 1.6367496398860508, + "learning_rate": 1.7960201202358495e-06, + "loss": 0.52935791015625, + "step": 5582 + }, + { + "epoch": 1.6325486182190379, + "grad_norm": 1.612437469487566, + "learning_rate": 1.7932566278949049e-06, + "loss": 0.5486055016517639, + "step": 5583 + }, + { + "epoch": 1.6328410586343032, + "grad_norm": 1.638977663987494, + "learning_rate": 1.7904950538303256e-06, + "loss": 0.5606030225753784, + "step": 5584 + }, + { + "epoch": 1.6331334990495687, + "grad_norm": 1.5693957426770746, + "learning_rate": 1.7877353986876134e-06, + "loss": 0.5394873023033142, + "step": 5585 + }, + { + "epoch": 1.633425939464834, + "grad_norm": 1.7113121312436326, + "learning_rate": 1.7849776631118198e-06, + "loss": 0.6015416383743286, + "step": 5586 + }, + { + "epoch": 1.6337183798800994, + "grad_norm": 1.308708455891742, + "learning_rate": 1.7822218477475496e-06, + "loss": 0.3476119041442871, + "step": 5587 + }, + { + "epoch": 1.634010820295365, + "grad_norm": 1.6872606261874499, + "learning_rate": 1.7794679532389569e-06, + "loss": 0.43062901496887207, + "step": 5588 + }, + { + "epoch": 1.6343032607106303, + "grad_norm": 1.5715829289628913, + "learning_rate": 1.7767159802297497e-06, + "loss": 0.5267136096954346, + "step": 5589 + }, + { + "epoch": 1.6345957011258956, + "grad_norm": 1.5843839322860915, + "learning_rate": 1.7739659293631828e-06, + "loss": 0.40477365255355835, + "step": 5590 + }, + { + "epoch": 1.6348881415411611, + "grad_norm": 1.5464703907052304, + "learning_rate": 1.7712178012820657e-06, + "loss": 0.5166594386100769, + "step": 5591 + }, + { + "epoch": 1.6351805819564262, + "grad_norm": 1.8725681057880097, + "learning_rate": 1.768471596628757e-06, + "loss": 0.577332615852356, + "step": 5592 + }, + { + "epoch": 1.6354730223716918, + "grad_norm": 1.9361068668488919, + "learning_rate": 1.7657273160451626e-06, + "loss": 0.6265558004379272, + "step": 5593 + }, + { + "epoch": 1.6357654627869571, + "grad_norm": 1.359788014623014, + "learning_rate": 1.7629849601727422e-06, + "loss": 0.46483689546585083, + "step": 5594 + }, + { + "epoch": 1.6360579032022224, + "grad_norm": 1.763922790196176, + "learning_rate": 1.760244529652504e-06, + "loss": 0.5217114090919495, + "step": 5595 + }, + { + "epoch": 1.636350343617488, + "grad_norm": 1.6050490395737056, + "learning_rate": 1.7575060251250098e-06, + "loss": 0.40754032135009766, + "step": 5596 + }, + { + "epoch": 1.6366427840327533, + "grad_norm": 1.8321306870013994, + "learning_rate": 1.7547694472303677e-06, + "loss": 0.5153856873512268, + "step": 5597 + }, + { + "epoch": 1.6369352244480186, + "grad_norm": 1.7719174136737381, + "learning_rate": 1.7520347966082352e-06, + "loss": 0.47374534606933594, + "step": 5598 + }, + { + "epoch": 1.6372276648632842, + "grad_norm": 1.85303064846871, + "learning_rate": 1.7493020738978205e-06, + "loss": 0.375232070684433, + "step": 5599 + }, + { + "epoch": 1.6375201052785495, + "grad_norm": 1.6844665277996391, + "learning_rate": 1.746571279737884e-06, + "loss": 0.5731218457221985, + "step": 5600 + }, + { + "epoch": 1.6378125456938148, + "grad_norm": 1.7604017420749336, + "learning_rate": 1.7438424147667267e-06, + "loss": 0.4908478260040283, + "step": 5601 + }, + { + "epoch": 1.6381049861090804, + "grad_norm": 1.759771030770569, + "learning_rate": 1.741115479622205e-06, + "loss": 0.6114420890808105, + "step": 5602 + }, + { + "epoch": 1.6383974265243455, + "grad_norm": 1.6860969538693165, + "learning_rate": 1.738390474941727e-06, + "loss": 0.6207842826843262, + "step": 5603 + }, + { + "epoch": 1.638689866939611, + "grad_norm": 1.8627303036453442, + "learning_rate": 1.7356674013622431e-06, + "loss": 0.4745057225227356, + "step": 5604 + }, + { + "epoch": 1.6389823073548766, + "grad_norm": 1.7448279838579288, + "learning_rate": 1.7329462595202573e-06, + "loss": 0.5501791834831238, + "step": 5605 + }, + { + "epoch": 1.6392747477701417, + "grad_norm": 1.5723514930569527, + "learning_rate": 1.7302270500518181e-06, + "loss": 0.5497169494628906, + "step": 5606 + }, + { + "epoch": 1.6395671881854073, + "grad_norm": 1.5582550082102102, + "learning_rate": 1.7275097735925239e-06, + "loss": 0.4439499080181122, + "step": 5607 + }, + { + "epoch": 1.6398596286006726, + "grad_norm": 1.5421642594165323, + "learning_rate": 1.7247944307775245e-06, + "loss": 0.5869239568710327, + "step": 5608 + }, + { + "epoch": 1.640152069015938, + "grad_norm": 1.8811101308859866, + "learning_rate": 1.722081022241512e-06, + "loss": 0.6979252099990845, + "step": 5609 + }, + { + "epoch": 1.6404445094312035, + "grad_norm": 1.6052357503320651, + "learning_rate": 1.719369548618729e-06, + "loss": 0.43291550874710083, + "step": 5610 + }, + { + "epoch": 1.6407369498464688, + "grad_norm": 1.9414841639869573, + "learning_rate": 1.7166600105429676e-06, + "loss": 0.5670255422592163, + "step": 5611 + }, + { + "epoch": 1.6410293902617341, + "grad_norm": 1.8236286685742322, + "learning_rate": 1.7139524086475679e-06, + "loss": 0.5956759452819824, + "step": 5612 + }, + { + "epoch": 1.6413218306769997, + "grad_norm": 1.8379778243010318, + "learning_rate": 1.71124674356541e-06, + "loss": 0.624202311038971, + "step": 5613 + }, + { + "epoch": 1.641614271092265, + "grad_norm": 1.546976601945301, + "learning_rate": 1.7085430159289295e-06, + "loss": 0.5394845604896545, + "step": 5614 + }, + { + "epoch": 1.6419067115075303, + "grad_norm": 1.689395226298913, + "learning_rate": 1.7058412263701063e-06, + "loss": 0.5320364236831665, + "step": 5615 + }, + { + "epoch": 1.6421991519227959, + "grad_norm": 1.719103506089404, + "learning_rate": 1.7031413755204673e-06, + "loss": 0.5889087915420532, + "step": 5616 + }, + { + "epoch": 1.642491592338061, + "grad_norm": 2.1274597082343103, + "learning_rate": 1.7004434640110857e-06, + "loss": 0.63529372215271, + "step": 5617 + }, + { + "epoch": 1.6427840327533265, + "grad_norm": 1.4153766033649497, + "learning_rate": 1.6977474924725823e-06, + "loss": 0.48696887493133545, + "step": 5618 + }, + { + "epoch": 1.6430764731685918, + "grad_norm": 1.592412158520241, + "learning_rate": 1.6950534615351234e-06, + "loss": 0.5998564958572388, + "step": 5619 + }, + { + "epoch": 1.6433689135838572, + "grad_norm": 2.0156836237169142, + "learning_rate": 1.6923613718284237e-06, + "loss": 0.5256673693656921, + "step": 5620 + }, + { + "epoch": 1.6436613539991227, + "grad_norm": 1.6125826678096948, + "learning_rate": 1.6896712239817425e-06, + "loss": 0.4609792232513428, + "step": 5621 + }, + { + "epoch": 1.643953794414388, + "grad_norm": 1.6601641389435113, + "learning_rate": 1.6869830186238846e-06, + "loss": 0.6816249489784241, + "step": 5622 + }, + { + "epoch": 1.6442462348296534, + "grad_norm": 1.7917988570441037, + "learning_rate": 1.6842967563832036e-06, + "loss": 0.4622993767261505, + "step": 5623 + }, + { + "epoch": 1.644538675244919, + "grad_norm": 1.7039192593388794, + "learning_rate": 1.6816124378875942e-06, + "loss": 0.5089092254638672, + "step": 5624 + }, + { + "epoch": 1.6448311156601843, + "grad_norm": 1.804753905417491, + "learning_rate": 1.6789300637645e-06, + "loss": 0.49178463220596313, + "step": 5625 + }, + { + "epoch": 1.6451235560754496, + "grad_norm": 1.5823189990427826, + "learning_rate": 1.676249634640912e-06, + "loss": 0.4943847358226776, + "step": 5626 + }, + { + "epoch": 1.6454159964907151, + "grad_norm": 1.368767643177251, + "learning_rate": 1.6735711511433606e-06, + "loss": 0.38509243726730347, + "step": 5627 + }, + { + "epoch": 1.6457084369059805, + "grad_norm": 1.8368901340386043, + "learning_rate": 1.6708946138979288e-06, + "loss": 0.4765651822090149, + "step": 5628 + }, + { + "epoch": 1.6460008773212458, + "grad_norm": 1.9577136986762462, + "learning_rate": 1.6682200235302383e-06, + "loss": 0.5667406916618347, + "step": 5629 + }, + { + "epoch": 1.6462933177365113, + "grad_norm": 1.7936789387136831, + "learning_rate": 1.66554738066546e-06, + "loss": 0.702905535697937, + "step": 5630 + }, + { + "epoch": 1.6465857581517764, + "grad_norm": 1.8218045279879265, + "learning_rate": 1.6628766859283064e-06, + "loss": 0.5056663155555725, + "step": 5631 + }, + { + "epoch": 1.646878198567042, + "grad_norm": 1.5059989898819282, + "learning_rate": 1.660207939943037e-06, + "loss": 0.3949700593948364, + "step": 5632 + }, + { + "epoch": 1.6471706389823073, + "grad_norm": 1.8365180821647582, + "learning_rate": 1.6575411433334553e-06, + "loss": 0.5562522411346436, + "step": 5633 + }, + { + "epoch": 1.6474630793975726, + "grad_norm": 1.853282636299631, + "learning_rate": 1.6548762967229104e-06, + "loss": 0.5046012997627258, + "step": 5634 + }, + { + "epoch": 1.6477555198128382, + "grad_norm": 1.873405273649113, + "learning_rate": 1.6522134007342894e-06, + "loss": 0.510586678981781, + "step": 5635 + }, + { + "epoch": 1.6480479602281035, + "grad_norm": 1.7244709678320052, + "learning_rate": 1.649552455990031e-06, + "loss": 0.5587502717971802, + "step": 5636 + }, + { + "epoch": 1.6483404006433688, + "grad_norm": 1.4244703013642708, + "learning_rate": 1.6468934631121147e-06, + "loss": 0.4774302840232849, + "step": 5637 + }, + { + "epoch": 1.6486328410586344, + "grad_norm": 1.7840577383362874, + "learning_rate": 1.644236422722063e-06, + "loss": 0.5969966650009155, + "step": 5638 + }, + { + "epoch": 1.6489252814738997, + "grad_norm": 1.9166026144170052, + "learning_rate": 1.6415813354409438e-06, + "loss": 0.6344267129898071, + "step": 5639 + }, + { + "epoch": 1.649217721889165, + "grad_norm": 1.6455866581497667, + "learning_rate": 1.638928201889367e-06, + "loss": 0.5252394676208496, + "step": 5640 + }, + { + "epoch": 1.6495101623044306, + "grad_norm": 1.7211145798255698, + "learning_rate": 1.636277022687488e-06, + "loss": 0.5092496871948242, + "step": 5641 + }, + { + "epoch": 1.6498026027196957, + "grad_norm": 1.797167898340461, + "learning_rate": 1.633627798455002e-06, + "loss": 0.6530938148498535, + "step": 5642 + }, + { + "epoch": 1.6500950431349612, + "grad_norm": 1.9009398203220143, + "learning_rate": 1.6309805298111492e-06, + "loss": 0.5152128338813782, + "step": 5643 + }, + { + "epoch": 1.6503874835502268, + "grad_norm": 1.4233567646508596, + "learning_rate": 1.6283352173747148e-06, + "loss": 0.4670040011405945, + "step": 5644 + }, + { + "epoch": 1.650679923965492, + "grad_norm": 1.718468492793745, + "learning_rate": 1.625691861764024e-06, + "loss": 0.47373896837234497, + "step": 5645 + }, + { + "epoch": 1.6509723643807575, + "grad_norm": 1.7609405530750961, + "learning_rate": 1.6230504635969413e-06, + "loss": 0.44277217984199524, + "step": 5646 + }, + { + "epoch": 1.6512648047960228, + "grad_norm": 1.9781946361433098, + "learning_rate": 1.6204110234908798e-06, + "loss": 0.6000313758850098, + "step": 5647 + }, + { + "epoch": 1.651557245211288, + "grad_norm": 1.7535547863968097, + "learning_rate": 1.6177735420627939e-06, + "loss": 0.5950880646705627, + "step": 5648 + }, + { + "epoch": 1.6518496856265537, + "grad_norm": 1.6566653852646755, + "learning_rate": 1.6151380199291767e-06, + "loss": 0.6308536529541016, + "step": 5649 + }, + { + "epoch": 1.652142126041819, + "grad_norm": 1.6807098070808562, + "learning_rate": 1.6125044577060667e-06, + "loss": 0.619708240032196, + "step": 5650 + }, + { + "epoch": 1.6524345664570843, + "grad_norm": 1.5330415165514617, + "learning_rate": 1.6098728560090438e-06, + "loss": 0.4912105202674866, + "step": 5651 + }, + { + "epoch": 1.6527270068723499, + "grad_norm": 1.6147329529882195, + "learning_rate": 1.607243215453227e-06, + "loss": 0.5078046917915344, + "step": 5652 + }, + { + "epoch": 1.6530194472876152, + "grad_norm": 1.562057881587638, + "learning_rate": 1.60461553665328e-06, + "loss": 0.4845188856124878, + "step": 5653 + }, + { + "epoch": 1.6533118877028805, + "grad_norm": 1.7846716530712952, + "learning_rate": 1.6019898202234075e-06, + "loss": 0.6323055028915405, + "step": 5654 + }, + { + "epoch": 1.653604328118146, + "grad_norm": 1.6998274797888833, + "learning_rate": 1.5993660667773524e-06, + "loss": 0.5700039863586426, + "step": 5655 + }, + { + "epoch": 1.6538967685334112, + "grad_norm": 1.567032216624363, + "learning_rate": 1.596744276928406e-06, + "loss": 0.5415322780609131, + "step": 5656 + }, + { + "epoch": 1.6541892089486767, + "grad_norm": 1.5705725451812174, + "learning_rate": 1.5941244512893894e-06, + "loss": 0.47339457273483276, + "step": 5657 + }, + { + "epoch": 1.654481649363942, + "grad_norm": 1.7467509306260278, + "learning_rate": 1.5915065904726735e-06, + "loss": 0.5391967296600342, + "step": 5658 + }, + { + "epoch": 1.6547740897792074, + "grad_norm": 1.4866752002795596, + "learning_rate": 1.5888906950901683e-06, + "loss": 0.4832335114479065, + "step": 5659 + }, + { + "epoch": 1.655066530194473, + "grad_norm": 1.6779111992749078, + "learning_rate": 1.5862767657533217e-06, + "loss": 0.4539526104927063, + "step": 5660 + }, + { + "epoch": 1.6553589706097382, + "grad_norm": 1.6528644114250834, + "learning_rate": 1.583664803073125e-06, + "loss": 0.5261383056640625, + "step": 5661 + }, + { + "epoch": 1.6556514110250036, + "grad_norm": 1.497445031852123, + "learning_rate": 1.5810548076601096e-06, + "loss": 0.44060665369033813, + "step": 5662 + }, + { + "epoch": 1.6559438514402691, + "grad_norm": 1.5492116356252563, + "learning_rate": 1.578446780124344e-06, + "loss": 0.4202715754508972, + "step": 5663 + }, + { + "epoch": 1.6562362918555344, + "grad_norm": 1.6539664834530166, + "learning_rate": 1.57584072107544e-06, + "loss": 0.4736124873161316, + "step": 5664 + }, + { + "epoch": 1.6565287322707998, + "grad_norm": 1.71312353367257, + "learning_rate": 1.5732366311225466e-06, + "loss": 0.46696585416793823, + "step": 5665 + }, + { + "epoch": 1.6568211726860653, + "grad_norm": 1.9879711761174368, + "learning_rate": 1.570634510874356e-06, + "loss": 0.566236138343811, + "step": 5666 + }, + { + "epoch": 1.6571136131013307, + "grad_norm": 1.8521270783851422, + "learning_rate": 1.568034360939098e-06, + "loss": 0.5486587882041931, + "step": 5667 + }, + { + "epoch": 1.657406053516596, + "grad_norm": 1.7248593346342498, + "learning_rate": 1.5654361819245423e-06, + "loss": 0.49735748767852783, + "step": 5668 + }, + { + "epoch": 1.6576984939318615, + "grad_norm": 1.7146086057727925, + "learning_rate": 1.562839974437993e-06, + "loss": 0.6306062340736389, + "step": 5669 + }, + { + "epoch": 1.6579909343471266, + "grad_norm": 1.524921725608077, + "learning_rate": 1.5602457390863e-06, + "loss": 0.5062750577926636, + "step": 5670 + }, + { + "epoch": 1.6582833747623922, + "grad_norm": 1.511094812843301, + "learning_rate": 1.5576534764758522e-06, + "loss": 0.5037271976470947, + "step": 5671 + }, + { + "epoch": 1.6585758151776575, + "grad_norm": 1.5731242321067331, + "learning_rate": 1.5550631872125743e-06, + "loss": 0.5749099254608154, + "step": 5672 + }, + { + "epoch": 1.6588682555929228, + "grad_norm": 1.7434864310127793, + "learning_rate": 1.5524748719019312e-06, + "loss": 0.5241814255714417, + "step": 5673 + }, + { + "epoch": 1.6591606960081884, + "grad_norm": 1.6599956977784314, + "learning_rate": 1.5498885311489243e-06, + "loss": 0.5410301685333252, + "step": 5674 + }, + { + "epoch": 1.6594531364234537, + "grad_norm": 1.6011154372945764, + "learning_rate": 1.5473041655580956e-06, + "loss": 0.6363968253135681, + "step": 5675 + }, + { + "epoch": 1.659745576838719, + "grad_norm": 1.7262614943682328, + "learning_rate": 1.5447217757335264e-06, + "loss": 0.5476758480072021, + "step": 5676 + }, + { + "epoch": 1.6600380172539846, + "grad_norm": 2.1890795829733465, + "learning_rate": 1.5421413622788328e-06, + "loss": 0.5375553369522095, + "step": 5677 + }, + { + "epoch": 1.66033045766925, + "grad_norm": 1.6882335328441256, + "learning_rate": 1.53956292579717e-06, + "loss": 0.6227232217788696, + "step": 5678 + }, + { + "epoch": 1.6606228980845152, + "grad_norm": 1.6718724900526085, + "learning_rate": 1.5369864668912327e-06, + "loss": 0.5981070399284363, + "step": 5679 + }, + { + "epoch": 1.6609153384997808, + "grad_norm": 1.6935493861773532, + "learning_rate": 1.5344119861632535e-06, + "loss": 0.5535466074943542, + "step": 5680 + }, + { + "epoch": 1.661207778915046, + "grad_norm": 1.5610811166933365, + "learning_rate": 1.5318394842150009e-06, + "loss": 0.5562780499458313, + "step": 5681 + }, + { + "epoch": 1.6615002193303114, + "grad_norm": 2.8725866349660683, + "learning_rate": 1.5292689616477808e-06, + "loss": 0.5937552452087402, + "step": 5682 + }, + { + "epoch": 1.661792659745577, + "grad_norm": 1.578580766715999, + "learning_rate": 1.526700419062439e-06, + "loss": 0.5154576301574707, + "step": 5683 + }, + { + "epoch": 1.662085100160842, + "grad_norm": 1.691867883182686, + "learning_rate": 1.5241338570593557e-06, + "loss": 0.6887973546981812, + "step": 5684 + }, + { + "epoch": 1.6623775405761076, + "grad_norm": 1.6575847736482525, + "learning_rate": 1.5215692762384481e-06, + "loss": 0.5365385413169861, + "step": 5685 + }, + { + "epoch": 1.662669980991373, + "grad_norm": 1.8209681086307343, + "learning_rate": 1.519006677199173e-06, + "loss": 0.5906165838241577, + "step": 5686 + }, + { + "epoch": 1.6629624214066383, + "grad_norm": 1.5861180854035228, + "learning_rate": 1.5164460605405252e-06, + "loss": 0.5752634406089783, + "step": 5687 + }, + { + "epoch": 1.6632548618219039, + "grad_norm": 1.5319119666926662, + "learning_rate": 1.5138874268610259e-06, + "loss": 0.6265667676925659, + "step": 5688 + }, + { + "epoch": 1.6635473022371692, + "grad_norm": 1.9588164368468703, + "learning_rate": 1.5113307767587449e-06, + "loss": 0.5032769441604614, + "step": 5689 + }, + { + "epoch": 1.6638397426524345, + "grad_norm": 1.799708728422752, + "learning_rate": 1.5087761108312837e-06, + "loss": 0.5740037560462952, + "step": 5690 + }, + { + "epoch": 1.6641321830677, + "grad_norm": 1.7236751091410876, + "learning_rate": 1.5062234296757782e-06, + "loss": 0.5745523571968079, + "step": 5691 + }, + { + "epoch": 1.6644246234829654, + "grad_norm": 1.4898534105858117, + "learning_rate": 1.5036727338889035e-06, + "loss": 0.4448510408401489, + "step": 5692 + }, + { + "epoch": 1.6647170638982307, + "grad_norm": 1.7482222251428332, + "learning_rate": 1.5011240240668678e-06, + "loss": 0.5142196416854858, + "step": 5693 + }, + { + "epoch": 1.6650095043134963, + "grad_norm": 1.484675519827308, + "learning_rate": 1.4985773008054184e-06, + "loss": 0.3317479193210602, + "step": 5694 + }, + { + "epoch": 1.6653019447287614, + "grad_norm": 1.9471413048062423, + "learning_rate": 1.4960325646998353e-06, + "loss": 0.5721619129180908, + "step": 5695 + }, + { + "epoch": 1.665594385144027, + "grad_norm": 1.7546330993452042, + "learning_rate": 1.4934898163449341e-06, + "loss": 0.4937021732330322, + "step": 5696 + }, + { + "epoch": 1.6658868255592922, + "grad_norm": 1.9665727915679332, + "learning_rate": 1.4909490563350694e-06, + "loss": 0.6414870023727417, + "step": 5697 + }, + { + "epoch": 1.6661792659745576, + "grad_norm": 2.144217430639584, + "learning_rate": 1.4884102852641258e-06, + "loss": 0.6265281438827515, + "step": 5698 + }, + { + "epoch": 1.6664717063898231, + "grad_norm": 1.9171094003176723, + "learning_rate": 1.48587350372553e-06, + "loss": 0.5908917784690857, + "step": 5699 + }, + { + "epoch": 1.6667641468050884, + "grad_norm": 1.5794126433874063, + "learning_rate": 1.4833387123122334e-06, + "loss": 0.6098382472991943, + "step": 5700 + }, + { + "epoch": 1.6670565872203538, + "grad_norm": 1.7930907914682574, + "learning_rate": 1.4808059116167306e-06, + "loss": 0.5106536746025085, + "step": 5701 + }, + { + "epoch": 1.6673490276356193, + "grad_norm": 1.9054848074745216, + "learning_rate": 1.4782751022310481e-06, + "loss": 0.5548620820045471, + "step": 5702 + }, + { + "epoch": 1.6676414680508846, + "grad_norm": 1.5332665904029121, + "learning_rate": 1.4757462847467475e-06, + "loss": 0.4596245288848877, + "step": 5703 + }, + { + "epoch": 1.66793390846615, + "grad_norm": 1.98056012220508, + "learning_rate": 1.4732194597549244e-06, + "loss": 0.6000612378120422, + "step": 5704 + }, + { + "epoch": 1.6682263488814155, + "grad_norm": 1.53003821533968, + "learning_rate": 1.4706946278462097e-06, + "loss": 0.5522277355194092, + "step": 5705 + }, + { + "epoch": 1.6685187892966808, + "grad_norm": 1.741978737228361, + "learning_rate": 1.468171789610766e-06, + "loss": 0.4765724837779999, + "step": 5706 + }, + { + "epoch": 1.6688112297119462, + "grad_norm": 1.737640693413614, + "learning_rate": 1.4656509456382927e-06, + "loss": 0.564188539981842, + "step": 5707 + }, + { + "epoch": 1.6691036701272117, + "grad_norm": 1.8586603218062736, + "learning_rate": 1.4631320965180208e-06, + "loss": 0.4910390377044678, + "step": 5708 + }, + { + "epoch": 1.6693961105424768, + "grad_norm": 1.8232002468100077, + "learning_rate": 1.4606152428387166e-06, + "loss": 0.5992041826248169, + "step": 5709 + }, + { + "epoch": 1.6696885509577424, + "grad_norm": 1.7676126822410316, + "learning_rate": 1.4581003851886811e-06, + "loss": 0.4873291850090027, + "step": 5710 + }, + { + "epoch": 1.6699809913730077, + "grad_norm": 1.7366762629360202, + "learning_rate": 1.4555875241557426e-06, + "loss": 0.6487013101577759, + "step": 5711 + }, + { + "epoch": 1.670273431788273, + "grad_norm": 1.5436242943423213, + "learning_rate": 1.4530766603272695e-06, + "loss": 0.4624609351158142, + "step": 5712 + }, + { + "epoch": 1.6705658722035386, + "grad_norm": 1.8279691880026145, + "learning_rate": 1.4505677942901609e-06, + "loss": 0.5765592455863953, + "step": 5713 + }, + { + "epoch": 1.670858312618804, + "grad_norm": 1.6171369345434061, + "learning_rate": 1.4480609266308488e-06, + "loss": 0.6730339527130127, + "step": 5714 + }, + { + "epoch": 1.6711507530340692, + "grad_norm": 1.7616383708984635, + "learning_rate": 1.445556057935299e-06, + "loss": 0.6381770372390747, + "step": 5715 + }, + { + "epoch": 1.6714431934493348, + "grad_norm": 1.8270298461203718, + "learning_rate": 1.4430531887890076e-06, + "loss": 0.6236029863357544, + "step": 5716 + }, + { + "epoch": 1.6717356338646001, + "grad_norm": 1.8837930420569144, + "learning_rate": 1.4405523197770076e-06, + "loss": 0.521639347076416, + "step": 5717 + }, + { + "epoch": 1.6720280742798654, + "grad_norm": 1.651464369232987, + "learning_rate": 1.4380534514838596e-06, + "loss": 0.5912468433380127, + "step": 5718 + }, + { + "epoch": 1.672320514695131, + "grad_norm": 1.7062190862435904, + "learning_rate": 1.4355565844936602e-06, + "loss": 0.5533329248428345, + "step": 5719 + }, + { + "epoch": 1.672612955110396, + "grad_norm": 1.8368834191923704, + "learning_rate": 1.4330617193900365e-06, + "loss": 0.5901006460189819, + "step": 5720 + }, + { + "epoch": 1.6729053955256616, + "grad_norm": 1.7501848609248272, + "learning_rate": 1.4305688567561503e-06, + "loss": 0.5083344578742981, + "step": 5721 + }, + { + "epoch": 1.6731978359409272, + "grad_norm": 1.359271864269329, + "learning_rate": 1.4280779971746894e-06, + "loss": 0.4443317651748657, + "step": 5722 + }, + { + "epoch": 1.6734902763561923, + "grad_norm": 1.931591797384203, + "learning_rate": 1.4255891412278778e-06, + "loss": 0.6355078220367432, + "step": 5723 + }, + { + "epoch": 1.6737827167714578, + "grad_norm": 1.751894451134603, + "learning_rate": 1.423102289497471e-06, + "loss": 0.5403381586074829, + "step": 5724 + }, + { + "epoch": 1.6740751571867232, + "grad_norm": 1.7703719298346523, + "learning_rate": 1.4206174425647556e-06, + "loss": 0.5272151231765747, + "step": 5725 + }, + { + "epoch": 1.6743675976019885, + "grad_norm": 1.6453865684399975, + "learning_rate": 1.41813460101055e-06, + "loss": 0.6750346422195435, + "step": 5726 + }, + { + "epoch": 1.674660038017254, + "grad_norm": 1.7471006401236766, + "learning_rate": 1.4156537654152026e-06, + "loss": 0.552655816078186, + "step": 5727 + }, + { + "epoch": 1.6749524784325194, + "grad_norm": 1.5245393452927156, + "learning_rate": 1.4131749363585933e-06, + "loss": 0.3947732448577881, + "step": 5728 + }, + { + "epoch": 1.6752449188477847, + "grad_norm": 1.4115886314708204, + "learning_rate": 1.4106981144201337e-06, + "loss": 0.4910270571708679, + "step": 5729 + }, + { + "epoch": 1.6755373592630503, + "grad_norm": 1.9268335112778272, + "learning_rate": 1.408223300178767e-06, + "loss": 0.5717943906784058, + "step": 5730 + }, + { + "epoch": 1.6758297996783156, + "grad_norm": 1.6782914146067396, + "learning_rate": 1.4057504942129652e-06, + "loss": 0.4993055462837219, + "step": 5731 + }, + { + "epoch": 1.676122240093581, + "grad_norm": 1.851203153701759, + "learning_rate": 1.4032796971007322e-06, + "loss": 0.4772619605064392, + "step": 5732 + }, + { + "epoch": 1.6764146805088465, + "grad_norm": 1.6250346930838577, + "learning_rate": 1.400810909419601e-06, + "loss": 0.3824518322944641, + "step": 5733 + }, + { + "epoch": 1.6767071209241116, + "grad_norm": 1.815105841906862, + "learning_rate": 1.398344131746634e-06, + "loss": 0.5302368402481079, + "step": 5734 + }, + { + "epoch": 1.676999561339377, + "grad_norm": 1.738439871277175, + "learning_rate": 1.3958793646584279e-06, + "loss": 0.5776697397232056, + "step": 5735 + }, + { + "epoch": 1.6772920017546424, + "grad_norm": 1.7943869884408015, + "learning_rate": 1.3934166087311063e-06, + "loss": 0.53890061378479, + "step": 5736 + }, + { + "epoch": 1.6775844421699078, + "grad_norm": 1.71821325954837, + "learning_rate": 1.3909558645403243e-06, + "loss": 0.47210827469825745, + "step": 5737 + }, + { + "epoch": 1.6778768825851733, + "grad_norm": 1.8689211559459666, + "learning_rate": 1.388497132661264e-06, + "loss": 0.6020913124084473, + "step": 5738 + }, + { + "epoch": 1.6781693230004386, + "grad_norm": 1.8612626444994878, + "learning_rate": 1.3860404136686411e-06, + "loss": 0.4244590997695923, + "step": 5739 + }, + { + "epoch": 1.678461763415704, + "grad_norm": 1.8281285744352933, + "learning_rate": 1.3835857081366965e-06, + "loss": 0.5969624519348145, + "step": 5740 + }, + { + "epoch": 1.6787542038309695, + "grad_norm": 1.774375495611947, + "learning_rate": 1.3811330166392057e-06, + "loss": 0.6573030352592468, + "step": 5741 + }, + { + "epoch": 1.6790466442462348, + "grad_norm": 1.641770470616675, + "learning_rate": 1.3786823397494675e-06, + "loss": 0.4251132905483246, + "step": 5742 + }, + { + "epoch": 1.6793390846615002, + "grad_norm": 1.504067842347657, + "learning_rate": 1.3762336780403163e-06, + "loss": 0.5555700659751892, + "step": 5743 + }, + { + "epoch": 1.6796315250767657, + "grad_norm": 2.1042602241156128, + "learning_rate": 1.3737870320841073e-06, + "loss": 0.5651364326477051, + "step": 5744 + }, + { + "epoch": 1.679923965492031, + "grad_norm": 1.7135873981198582, + "learning_rate": 1.371342402452731e-06, + "loss": 0.6283698678016663, + "step": 5745 + }, + { + "epoch": 1.6802164059072964, + "grad_norm": 1.4720135811876174, + "learning_rate": 1.3688997897176037e-06, + "loss": 0.47864413261413574, + "step": 5746 + }, + { + "epoch": 1.680508846322562, + "grad_norm": 1.9268157997034314, + "learning_rate": 1.366459194449674e-06, + "loss": 0.6254131197929382, + "step": 5747 + }, + { + "epoch": 1.680801286737827, + "grad_norm": 1.4626288934383243, + "learning_rate": 1.364020617219415e-06, + "loss": 0.35147637128829956, + "step": 5748 + }, + { + "epoch": 1.6810937271530926, + "grad_norm": 1.9627536321629568, + "learning_rate": 1.3615840585968287e-06, + "loss": 0.6126410961151123, + "step": 5749 + }, + { + "epoch": 1.681386167568358, + "grad_norm": 1.7732443093164585, + "learning_rate": 1.359149519151447e-06, + "loss": 0.5807974338531494, + "step": 5750 + }, + { + "epoch": 1.6816786079836232, + "grad_norm": 1.5024396656291241, + "learning_rate": 1.3567169994523277e-06, + "loss": 0.5033349990844727, + "step": 5751 + }, + { + "epoch": 1.6819710483988888, + "grad_norm": 1.5694630419560385, + "learning_rate": 1.3542865000680604e-06, + "loss": 0.47656023502349854, + "step": 5752 + }, + { + "epoch": 1.682263488814154, + "grad_norm": 1.6495545571688441, + "learning_rate": 1.3518580215667542e-06, + "loss": 0.5137293338775635, + "step": 5753 + }, + { + "epoch": 1.6825559292294194, + "grad_norm": 1.5950800812601422, + "learning_rate": 1.3494315645160539e-06, + "loss": 0.4636800289154053, + "step": 5754 + }, + { + "epoch": 1.682848369644685, + "grad_norm": 1.937366310273075, + "learning_rate": 1.3470071294831289e-06, + "loss": 0.5825523138046265, + "step": 5755 + }, + { + "epoch": 1.6831408100599503, + "grad_norm": 1.798274160020649, + "learning_rate": 1.344584717034677e-06, + "loss": 0.49282288551330566, + "step": 5756 + }, + { + "epoch": 1.6834332504752156, + "grad_norm": 1.6393172330125654, + "learning_rate": 1.3421643277369211e-06, + "loss": 0.5551935434341431, + "step": 5757 + }, + { + "epoch": 1.6837256908904812, + "grad_norm": 1.7844394306187494, + "learning_rate": 1.339745962155613e-06, + "loss": 0.6423832178115845, + "step": 5758 + }, + { + "epoch": 1.6840181313057463, + "grad_norm": 1.7061841923170233, + "learning_rate": 1.3373296208560316e-06, + "loss": 0.6178075671195984, + "step": 5759 + }, + { + "epoch": 1.6843105717210118, + "grad_norm": 1.7099687312543272, + "learning_rate": 1.3349153044029816e-06, + "loss": 0.6781176328659058, + "step": 5760 + }, + { + "epoch": 1.6846030121362774, + "grad_norm": 1.837996192806761, + "learning_rate": 1.332503013360794e-06, + "loss": 0.6511910557746887, + "step": 5761 + }, + { + "epoch": 1.6848954525515425, + "grad_norm": 1.3861294384859772, + "learning_rate": 1.3300927482933279e-06, + "loss": 0.4980696141719818, + "step": 5762 + }, + { + "epoch": 1.685187892966808, + "grad_norm": 1.6100202697936232, + "learning_rate": 1.3276845097639702e-06, + "loss": 0.49176928400993347, + "step": 5763 + }, + { + "epoch": 1.6854803333820734, + "grad_norm": 1.893755486996651, + "learning_rate": 1.3252782983356272e-06, + "loss": 0.5198799967765808, + "step": 5764 + }, + { + "epoch": 1.6857727737973387, + "grad_norm": 1.4629269004624288, + "learning_rate": 1.322874114570739e-06, + "loss": 0.5058869123458862, + "step": 5765 + }, + { + "epoch": 1.6860652142126042, + "grad_norm": 1.7429625548536576, + "learning_rate": 1.3204719590312698e-06, + "loss": 0.46573105454444885, + "step": 5766 + }, + { + "epoch": 1.6863576546278696, + "grad_norm": 1.702952537068074, + "learning_rate": 1.3180718322787067e-06, + "loss": 0.5033260583877563, + "step": 5767 + }, + { + "epoch": 1.686650095043135, + "grad_norm": 1.4816614067920655, + "learning_rate": 1.3156737348740655e-06, + "loss": 0.5306515693664551, + "step": 5768 + }, + { + "epoch": 1.6869425354584004, + "grad_norm": 1.5781671263541353, + "learning_rate": 1.313277667377888e-06, + "loss": 0.45660221576690674, + "step": 5769 + }, + { + "epoch": 1.6872349758736658, + "grad_norm": 1.8450654821638361, + "learning_rate": 1.3108836303502392e-06, + "loss": 0.5353757739067078, + "step": 5770 + }, + { + "epoch": 1.687527416288931, + "grad_norm": 1.719850536675987, + "learning_rate": 1.3084916243507118e-06, + "loss": 0.5415239930152893, + "step": 5771 + }, + { + "epoch": 1.6878198567041967, + "grad_norm": 1.7512787251419633, + "learning_rate": 1.3061016499384217e-06, + "loss": 0.5860229730606079, + "step": 5772 + }, + { + "epoch": 1.6881122971194618, + "grad_norm": 1.7755386406909037, + "learning_rate": 1.3037137076720107e-06, + "loss": 0.5524891018867493, + "step": 5773 + }, + { + "epoch": 1.6884047375347273, + "grad_norm": 1.9368287931198411, + "learning_rate": 1.3013277981096484e-06, + "loss": 0.5557498931884766, + "step": 5774 + }, + { + "epoch": 1.6886971779499926, + "grad_norm": 1.6092314723070207, + "learning_rate": 1.2989439218090227e-06, + "loss": 0.45877397060394287, + "step": 5775 + }, + { + "epoch": 1.688989618365258, + "grad_norm": 1.587163397973365, + "learning_rate": 1.2965620793273515e-06, + "loss": 0.5310335159301758, + "step": 5776 + }, + { + "epoch": 1.6892820587805235, + "grad_norm": 1.4561579735469703, + "learning_rate": 1.294182271221377e-06, + "loss": 0.4855915904045105, + "step": 5777 + }, + { + "epoch": 1.6895744991957888, + "grad_norm": 1.5707049211364605, + "learning_rate": 1.2918044980473643e-06, + "loss": 0.6070747971534729, + "step": 5778 + }, + { + "epoch": 1.6898669396110542, + "grad_norm": 1.6739979529118527, + "learning_rate": 1.2894287603611033e-06, + "loss": 0.5108609795570374, + "step": 5779 + }, + { + "epoch": 1.6901593800263197, + "grad_norm": 1.7647615756485302, + "learning_rate": 1.2870550587179087e-06, + "loss": 0.49141189455986023, + "step": 5780 + }, + { + "epoch": 1.690451820441585, + "grad_norm": 1.4176033732152467, + "learning_rate": 1.2846833936726178e-06, + "loss": 0.4239678382873535, + "step": 5781 + }, + { + "epoch": 1.6907442608568504, + "grad_norm": 1.8431022697656632, + "learning_rate": 1.2823137657795948e-06, + "loss": 0.6348937153816223, + "step": 5782 + }, + { + "epoch": 1.691036701272116, + "grad_norm": 1.4853353146024342, + "learning_rate": 1.2799461755927233e-06, + "loss": 0.4561845064163208, + "step": 5783 + }, + { + "epoch": 1.6913291416873812, + "grad_norm": 2.1521785942560197, + "learning_rate": 1.2775806236654153e-06, + "loss": 0.5663880109786987, + "step": 5784 + }, + { + "epoch": 1.6916215821026466, + "grad_norm": 1.5729874297711008, + "learning_rate": 1.275217110550604e-06, + "loss": 0.5200550556182861, + "step": 5785 + }, + { + "epoch": 1.6919140225179121, + "grad_norm": 1.7072937541006934, + "learning_rate": 1.2728556368007461e-06, + "loss": 0.5401214361190796, + "step": 5786 + }, + { + "epoch": 1.6922064629331772, + "grad_norm": 2.0089316276908917, + "learning_rate": 1.2704962029678202e-06, + "loss": 0.5409752130508423, + "step": 5787 + }, + { + "epoch": 1.6924989033484428, + "grad_norm": 1.6551832796501305, + "learning_rate": 1.2681388096033298e-06, + "loss": 0.46215158700942993, + "step": 5788 + }, + { + "epoch": 1.692791343763708, + "grad_norm": 1.508586050733543, + "learning_rate": 1.2657834572583027e-06, + "loss": 0.44687867164611816, + "step": 5789 + }, + { + "epoch": 1.6930837841789734, + "grad_norm": 1.5112474922130816, + "learning_rate": 1.2634301464832877e-06, + "loss": 0.47882723808288574, + "step": 5790 + }, + { + "epoch": 1.693376224594239, + "grad_norm": 2.0838428918534264, + "learning_rate": 1.2610788778283567e-06, + "loss": 0.6108201742172241, + "step": 5791 + }, + { + "epoch": 1.6936686650095043, + "grad_norm": 1.4370335670353505, + "learning_rate": 1.2587296518431036e-06, + "loss": 0.45024657249450684, + "step": 5792 + }, + { + "epoch": 1.6939611054247696, + "grad_norm": 2.263053324487421, + "learning_rate": 1.256382469076648e-06, + "loss": 0.6746254563331604, + "step": 5793 + }, + { + "epoch": 1.6942535458400352, + "grad_norm": 1.7423805800598553, + "learning_rate": 1.2540373300776264e-06, + "loss": 0.6439248323440552, + "step": 5794 + }, + { + "epoch": 1.6945459862553005, + "grad_norm": 1.5101648188878154, + "learning_rate": 1.251694235394204e-06, + "loss": 0.467510461807251, + "step": 5795 + }, + { + "epoch": 1.6948384266705658, + "grad_norm": 2.0083904845815117, + "learning_rate": 1.2493531855740626e-06, + "loss": 0.5509516596794128, + "step": 5796 + }, + { + "epoch": 1.6951308670858314, + "grad_norm": 1.6291523574406077, + "learning_rate": 1.247014181164412e-06, + "loss": 0.49178194999694824, + "step": 5797 + }, + { + "epoch": 1.6954233075010965, + "grad_norm": 1.6626228068208797, + "learning_rate": 1.2446772227119753e-06, + "loss": 0.4825005531311035, + "step": 5798 + }, + { + "epoch": 1.695715747916362, + "grad_norm": 1.7609982762736733, + "learning_rate": 1.242342310763005e-06, + "loss": 0.7441064715385437, + "step": 5799 + }, + { + "epoch": 1.6960081883316276, + "grad_norm": 1.6166055740202077, + "learning_rate": 1.2400094458632717e-06, + "loss": 0.5020110011100769, + "step": 5800 + }, + { + "epoch": 1.6963006287468927, + "grad_norm": 1.6328086702132818, + "learning_rate": 1.237678628558069e-06, + "loss": 0.5439830422401428, + "step": 5801 + }, + { + "epoch": 1.6965930691621582, + "grad_norm": 2.0549338843530136, + "learning_rate": 1.235349859392211e-06, + "loss": 0.6235179901123047, + "step": 5802 + }, + { + "epoch": 1.6968855095774236, + "grad_norm": 1.7141848290041162, + "learning_rate": 1.2330231389100323e-06, + "loss": 0.6176612377166748, + "step": 5803 + }, + { + "epoch": 1.697177949992689, + "grad_norm": 1.7381769122607003, + "learning_rate": 1.2306984676553924e-06, + "loss": 0.5956840515136719, + "step": 5804 + }, + { + "epoch": 1.6974703904079544, + "grad_norm": 1.6857909163061566, + "learning_rate": 1.2283758461716667e-06, + "loss": 0.5025947690010071, + "step": 5805 + }, + { + "epoch": 1.6977628308232198, + "grad_norm": 1.507035347865144, + "learning_rate": 1.2260552750017551e-06, + "loss": 0.5772436857223511, + "step": 5806 + }, + { + "epoch": 1.698055271238485, + "grad_norm": 1.6665432076063584, + "learning_rate": 1.223736754688075e-06, + "loss": 0.4336615204811096, + "step": 5807 + }, + { + "epoch": 1.6983477116537506, + "grad_norm": 1.695081220374435, + "learning_rate": 1.221420285772572e-06, + "loss": 0.5697668790817261, + "step": 5808 + }, + { + "epoch": 1.698640152069016, + "grad_norm": 1.8545475442236217, + "learning_rate": 1.2191058687966995e-06, + "loss": 0.4966861605644226, + "step": 5809 + }, + { + "epoch": 1.6989325924842813, + "grad_norm": 1.777484506048346, + "learning_rate": 1.2167935043014411e-06, + "loss": 0.5805951952934265, + "step": 5810 + }, + { + "epoch": 1.6992250328995469, + "grad_norm": 1.6055305498040644, + "learning_rate": 1.2144831928272994e-06, + "loss": 0.4669906497001648, + "step": 5811 + }, + { + "epoch": 1.699517473314812, + "grad_norm": 1.7730179282571827, + "learning_rate": 1.212174934914294e-06, + "loss": 0.5630965828895569, + "step": 5812 + }, + { + "epoch": 1.6998099137300775, + "grad_norm": 1.7272395334456936, + "learning_rate": 1.2098687311019663e-06, + "loss": 0.5345104932785034, + "step": 5813 + }, + { + "epoch": 1.7001023541453428, + "grad_norm": 1.9547814584710963, + "learning_rate": 1.207564581929378e-06, + "loss": 0.5760249495506287, + "step": 5814 + }, + { + "epoch": 1.7003947945606082, + "grad_norm": 1.563397994600299, + "learning_rate": 1.2052624879351105e-06, + "loss": 0.506635308265686, + "step": 5815 + }, + { + "epoch": 1.7006872349758737, + "grad_norm": 1.792775034126629, + "learning_rate": 1.2029624496572622e-06, + "loss": 0.5107032656669617, + "step": 5816 + }, + { + "epoch": 1.700979675391139, + "grad_norm": 1.5891211780153636, + "learning_rate": 1.2006644676334557e-06, + "loss": 0.5888187885284424, + "step": 5817 + }, + { + "epoch": 1.7012721158064044, + "grad_norm": 1.8008314810247776, + "learning_rate": 1.1983685424008285e-06, + "loss": 0.5326075553894043, + "step": 5818 + }, + { + "epoch": 1.70156455622167, + "grad_norm": 1.6515493940564925, + "learning_rate": 1.1960746744960417e-06, + "loss": 0.5097993612289429, + "step": 5819 + }, + { + "epoch": 1.7018569966369352, + "grad_norm": 1.6532256911128915, + "learning_rate": 1.1937828644552696e-06, + "loss": 0.6001093983650208, + "step": 5820 + }, + { + "epoch": 1.7021494370522006, + "grad_norm": 1.7728326525757572, + "learning_rate": 1.1914931128142072e-06, + "loss": 0.513684093952179, + "step": 5821 + }, + { + "epoch": 1.7024418774674661, + "grad_norm": 1.6118848482453871, + "learning_rate": 1.189205420108076e-06, + "loss": 0.4688597321510315, + "step": 5822 + }, + { + "epoch": 1.7027343178827314, + "grad_norm": 1.6755720349462948, + "learning_rate": 1.1869197868716075e-06, + "loss": 0.4537498354911804, + "step": 5823 + }, + { + "epoch": 1.7030267582979968, + "grad_norm": 1.625108439053771, + "learning_rate": 1.1846362136390531e-06, + "loss": 0.43031078577041626, + "step": 5824 + }, + { + "epoch": 1.7033191987132623, + "grad_norm": 1.9244406734438975, + "learning_rate": 1.182354700944187e-06, + "loss": 0.5139330625534058, + "step": 5825 + }, + { + "epoch": 1.7036116391285274, + "grad_norm": 1.7897651312393703, + "learning_rate": 1.180075249320296e-06, + "loss": 0.6542010307312012, + "step": 5826 + }, + { + "epoch": 1.703904079543793, + "grad_norm": 1.6365189888188503, + "learning_rate": 1.1777978593001903e-06, + "loss": 0.5371676087379456, + "step": 5827 + }, + { + "epoch": 1.7041965199590583, + "grad_norm": 1.6793659914593386, + "learning_rate": 1.1755225314161967e-06, + "loss": 0.47583359479904175, + "step": 5828 + }, + { + "epoch": 1.7044889603743236, + "grad_norm": 1.7363884838234833, + "learning_rate": 1.173249266200156e-06, + "loss": 0.5471247434616089, + "step": 5829 + }, + { + "epoch": 1.7047814007895892, + "grad_norm": 1.850508925320166, + "learning_rate": 1.1709780641834323e-06, + "loss": 0.5095713138580322, + "step": 5830 + }, + { + "epoch": 1.7050738412048545, + "grad_norm": 1.5373790027628114, + "learning_rate": 1.1687089258969041e-06, + "loss": 0.41944777965545654, + "step": 5831 + }, + { + "epoch": 1.7053662816201198, + "grad_norm": 1.5434472143224902, + "learning_rate": 1.1664418518709697e-06, + "loss": 0.42380404472351074, + "step": 5832 + }, + { + "epoch": 1.7056587220353854, + "grad_norm": 1.8798510100106, + "learning_rate": 1.1641768426355427e-06, + "loss": 0.5688038468360901, + "step": 5833 + }, + { + "epoch": 1.7059511624506507, + "grad_norm": 1.6396391570153137, + "learning_rate": 1.1619138987200562e-06, + "loss": 0.5432788133621216, + "step": 5834 + }, + { + "epoch": 1.706243602865916, + "grad_norm": 1.699260651340017, + "learning_rate": 1.1596530206534606e-06, + "loss": 0.5408512949943542, + "step": 5835 + }, + { + "epoch": 1.7065360432811816, + "grad_norm": 1.5364052920051108, + "learning_rate": 1.1573942089642198e-06, + "loss": 0.5149247646331787, + "step": 5836 + }, + { + "epoch": 1.7068284836964467, + "grad_norm": 1.6490213140214325, + "learning_rate": 1.1551374641803193e-06, + "loss": 0.36905592679977417, + "step": 5837 + }, + { + "epoch": 1.7071209241117122, + "grad_norm": 1.7960598101415164, + "learning_rate": 1.152882786829259e-06, + "loss": 0.5370720624923706, + "step": 5838 + }, + { + "epoch": 1.7074133645269778, + "grad_norm": 1.5874644037104577, + "learning_rate": 1.1506301774380578e-06, + "loss": 0.4535629153251648, + "step": 5839 + }, + { + "epoch": 1.7077058049422429, + "grad_norm": 1.79916689116012, + "learning_rate": 1.1483796365332455e-06, + "loss": 0.5456075668334961, + "step": 5840 + }, + { + "epoch": 1.7079982453575084, + "grad_norm": 1.4286640626946725, + "learning_rate": 1.1461311646408756e-06, + "loss": 0.5884554386138916, + "step": 5841 + }, + { + "epoch": 1.7082906857727738, + "grad_norm": 1.6397329737807809, + "learning_rate": 1.1438847622865125e-06, + "loss": 0.605168879032135, + "step": 5842 + }, + { + "epoch": 1.708583126188039, + "grad_norm": 1.5178839829112376, + "learning_rate": 1.14164042999524e-06, + "loss": 0.43739163875579834, + "step": 5843 + }, + { + "epoch": 1.7088755666033046, + "grad_norm": 1.46949260133067, + "learning_rate": 1.1393981682916578e-06, + "loss": 0.4508574306964874, + "step": 5844 + }, + { + "epoch": 1.70916800701857, + "grad_norm": 1.822138537734332, + "learning_rate": 1.1371579776998798e-06, + "loss": 0.5918034315109253, + "step": 5845 + }, + { + "epoch": 1.7094604474338353, + "grad_norm": 2.0746386130567873, + "learning_rate": 1.1349198587435373e-06, + "loss": 0.5668582320213318, + "step": 5846 + }, + { + "epoch": 1.7097528878491008, + "grad_norm": 1.9282537614980426, + "learning_rate": 1.1326838119457784e-06, + "loss": 0.6374846696853638, + "step": 5847 + }, + { + "epoch": 1.7100453282643662, + "grad_norm": 1.8903891011788552, + "learning_rate": 1.130449837829264e-06, + "loss": 0.5074985027313232, + "step": 5848 + }, + { + "epoch": 1.7103377686796315, + "grad_norm": 1.5190057242638555, + "learning_rate": 1.1282179369161717e-06, + "loss": 0.5012484788894653, + "step": 5849 + }, + { + "epoch": 1.710630209094897, + "grad_norm": 1.632090745734556, + "learning_rate": 1.1259881097281977e-06, + "loss": 0.4417869746685028, + "step": 5850 + }, + { + "epoch": 1.7109226495101622, + "grad_norm": 1.8294483106085377, + "learning_rate": 1.1237603567865452e-06, + "loss": 0.6032637357711792, + "step": 5851 + }, + { + "epoch": 1.7112150899254277, + "grad_norm": 1.688609377749929, + "learning_rate": 1.121534678611942e-06, + "loss": 0.5790234804153442, + "step": 5852 + }, + { + "epoch": 1.711507530340693, + "grad_norm": 1.591889646615377, + "learning_rate": 1.1193110757246251e-06, + "loss": 0.5436397194862366, + "step": 5853 + }, + { + "epoch": 1.7117999707559584, + "grad_norm": 1.7117652881589365, + "learning_rate": 1.11708954864435e-06, + "loss": 0.5088083744049072, + "step": 5854 + }, + { + "epoch": 1.712092411171224, + "grad_norm": 1.889980799223528, + "learning_rate": 1.1148700978903826e-06, + "loss": 0.5907719135284424, + "step": 5855 + }, + { + "epoch": 1.7123848515864892, + "grad_norm": 1.567722853126729, + "learning_rate": 1.1126527239815078e-06, + "loss": 0.4744384288787842, + "step": 5856 + }, + { + "epoch": 1.7126772920017546, + "grad_norm": 1.9312865174889629, + "learning_rate": 1.110437427436023e-06, + "loss": 0.6644346714019775, + "step": 5857 + }, + { + "epoch": 1.71296973241702, + "grad_norm": 1.6765623554239069, + "learning_rate": 1.10822420877174e-06, + "loss": 0.4926042854785919, + "step": 5858 + }, + { + "epoch": 1.7132621728322854, + "grad_norm": 1.6722485452227753, + "learning_rate": 1.1060130685059845e-06, + "loss": 0.47684335708618164, + "step": 5859 + }, + { + "epoch": 1.7135546132475508, + "grad_norm": 1.8519560247307543, + "learning_rate": 1.1038040071555988e-06, + "loss": 0.5574014186859131, + "step": 5860 + }, + { + "epoch": 1.7138470536628163, + "grad_norm": 1.737717748392033, + "learning_rate": 1.101597025236939e-06, + "loss": 0.6276485323905945, + "step": 5861 + }, + { + "epoch": 1.7141394940780816, + "grad_norm": 1.7853097232505406, + "learning_rate": 1.099392123265869e-06, + "loss": 0.558611273765564, + "step": 5862 + }, + { + "epoch": 1.714431934493347, + "grad_norm": 1.8318989515664625, + "learning_rate": 1.097189301757773e-06, + "loss": 0.5561566948890686, + "step": 5863 + }, + { + "epoch": 1.7147243749086125, + "grad_norm": 1.7772127580066208, + "learning_rate": 1.094988561227548e-06, + "loss": 0.5360273122787476, + "step": 5864 + }, + { + "epoch": 1.7150168153238776, + "grad_norm": 1.9869672499266697, + "learning_rate": 1.0927899021896038e-06, + "loss": 0.5572026968002319, + "step": 5865 + }, + { + "epoch": 1.7153092557391432, + "grad_norm": 1.696631763346783, + "learning_rate": 1.0905933251578626e-06, + "loss": 0.4593105912208557, + "step": 5866 + }, + { + "epoch": 1.7156016961544085, + "grad_norm": 1.7954251083275348, + "learning_rate": 1.0883988306457627e-06, + "loss": 0.5017558336257935, + "step": 5867 + }, + { + "epoch": 1.7158941365696738, + "grad_norm": 1.6294086582619267, + "learning_rate": 1.0862064191662524e-06, + "loss": 0.4982030391693115, + "step": 5868 + }, + { + "epoch": 1.7161865769849394, + "grad_norm": 1.5832146918310879, + "learning_rate": 1.0840160912317943e-06, + "loss": 0.5563114881515503, + "step": 5869 + }, + { + "epoch": 1.7164790174002047, + "grad_norm": 1.6522408781609719, + "learning_rate": 1.0818278473543652e-06, + "loss": 0.4817348122596741, + "step": 5870 + }, + { + "epoch": 1.71677145781547, + "grad_norm": 1.6923338540004815, + "learning_rate": 1.079641688045453e-06, + "loss": 0.47907108068466187, + "step": 5871 + }, + { + "epoch": 1.7170638982307356, + "grad_norm": 1.985278987997586, + "learning_rate": 1.0774576138160596e-06, + "loss": 0.6158252954483032, + "step": 5872 + }, + { + "epoch": 1.717356338646001, + "grad_norm": 1.712800633970283, + "learning_rate": 1.0752756251767015e-06, + "loss": 0.5336505174636841, + "step": 5873 + }, + { + "epoch": 1.7176487790612662, + "grad_norm": 1.6889119428738892, + "learning_rate": 1.0730957226374006e-06, + "loss": 0.5806115865707397, + "step": 5874 + }, + { + "epoch": 1.7179412194765318, + "grad_norm": 1.7163109676688793, + "learning_rate": 1.070917906707698e-06, + "loss": 0.3701411485671997, + "step": 5875 + }, + { + "epoch": 1.7182336598917969, + "grad_norm": 1.5519162070562529, + "learning_rate": 1.0687421778966445e-06, + "loss": 0.5779517292976379, + "step": 5876 + }, + { + "epoch": 1.7185261003070624, + "grad_norm": 1.5444011974555767, + "learning_rate": 1.0665685367128041e-06, + "loss": 0.43965232372283936, + "step": 5877 + }, + { + "epoch": 1.718818540722328, + "grad_norm": 1.7154722678485648, + "learning_rate": 1.064396983664253e-06, + "loss": 0.4768058657646179, + "step": 5878 + }, + { + "epoch": 1.719110981137593, + "grad_norm": 1.6286437020829267, + "learning_rate": 1.0622275192585773e-06, + "loss": 0.5331600904464722, + "step": 5879 + }, + { + "epoch": 1.7194034215528586, + "grad_norm": 1.6603687606186237, + "learning_rate": 1.0600601440028758e-06, + "loss": 0.5495625734329224, + "step": 5880 + }, + { + "epoch": 1.719695861968124, + "grad_norm": 1.6915455937474744, + "learning_rate": 1.0578948584037608e-06, + "loss": 0.4244312345981598, + "step": 5881 + }, + { + "epoch": 1.7199883023833893, + "grad_norm": 1.7562786480710206, + "learning_rate": 1.0557316629673531e-06, + "loss": 0.4618447721004486, + "step": 5882 + }, + { + "epoch": 1.7202807427986548, + "grad_norm": 1.3835850144546908, + "learning_rate": 1.0535705581992873e-06, + "loss": 0.4226785898208618, + "step": 5883 + }, + { + "epoch": 1.7205731832139202, + "grad_norm": 1.8373576265806915, + "learning_rate": 1.0514115446047101e-06, + "loss": 0.5813404321670532, + "step": 5884 + }, + { + "epoch": 1.7208656236291855, + "grad_norm": 1.774672318962678, + "learning_rate": 1.0492546226882738e-06, + "loss": 0.6700260639190674, + "step": 5885 + }, + { + "epoch": 1.721158064044451, + "grad_norm": 1.8100136828076652, + "learning_rate": 1.0470997929541494e-06, + "loss": 0.6024131178855896, + "step": 5886 + }, + { + "epoch": 1.7214505044597164, + "grad_norm": 1.8033126749427817, + "learning_rate": 1.0449470559060125e-06, + "loss": 0.6015123724937439, + "step": 5887 + }, + { + "epoch": 1.7217429448749817, + "grad_norm": 1.714487906410119, + "learning_rate": 1.0427964120470534e-06, + "loss": 0.6631267070770264, + "step": 5888 + }, + { + "epoch": 1.7220353852902472, + "grad_norm": 1.7445362923992234, + "learning_rate": 1.0406478618799731e-06, + "loss": 0.5267488956451416, + "step": 5889 + }, + { + "epoch": 1.7223278257055123, + "grad_norm": 1.6683876570881706, + "learning_rate": 1.038501405906982e-06, + "loss": 0.5190263986587524, + "step": 5890 + }, + { + "epoch": 1.722620266120778, + "grad_norm": 1.6678272928853268, + "learning_rate": 1.0363570446297999e-06, + "loss": 0.5253189206123352, + "step": 5891 + }, + { + "epoch": 1.7229127065360432, + "grad_norm": 1.6306770585402846, + "learning_rate": 1.0342147785496581e-06, + "loss": 0.5271278619766235, + "step": 5892 + }, + { + "epoch": 1.7232051469513086, + "grad_norm": 1.7373442044536598, + "learning_rate": 1.0320746081672994e-06, + "loss": 0.5284109711647034, + "step": 5893 + }, + { + "epoch": 1.723497587366574, + "grad_norm": 1.764049872395232, + "learning_rate": 1.0299365339829747e-06, + "loss": 0.6119050979614258, + "step": 5894 + }, + { + "epoch": 1.7237900277818394, + "grad_norm": 1.583925980020329, + "learning_rate": 1.0278005564964488e-06, + "loss": 0.42297711968421936, + "step": 5895 + }, + { + "epoch": 1.7240824681971048, + "grad_norm": 1.7105013452989373, + "learning_rate": 1.02566667620699e-06, + "loss": 0.5923792123794556, + "step": 5896 + }, + { + "epoch": 1.7243749086123703, + "grad_norm": 1.6831938137571334, + "learning_rate": 1.023534893613377e-06, + "loss": 0.4999189078807831, + "step": 5897 + }, + { + "epoch": 1.7246673490276356, + "grad_norm": 1.6907699986400666, + "learning_rate": 1.0214052092139082e-06, + "loss": 0.49083560705184937, + "step": 5898 + }, + { + "epoch": 1.724959789442901, + "grad_norm": 3.9391609144586437, + "learning_rate": 1.0192776235063795e-06, + "loss": 0.6001632213592529, + "step": 5899 + }, + { + "epoch": 1.7252522298581665, + "grad_norm": 1.473933103211581, + "learning_rate": 1.0171521369881044e-06, + "loss": 0.4897228479385376, + "step": 5900 + }, + { + "epoch": 1.7255446702734318, + "grad_norm": 1.6762179044603425, + "learning_rate": 1.0150287501558997e-06, + "loss": 0.44784292578697205, + "step": 5901 + }, + { + "epoch": 1.7258371106886972, + "grad_norm": 1.542625612657722, + "learning_rate": 1.0129074635060943e-06, + "loss": 0.46105432510375977, + "step": 5902 + }, + { + "epoch": 1.7261295511039627, + "grad_norm": 1.9028079699425045, + "learning_rate": 1.0107882775345278e-06, + "loss": 0.5805546045303345, + "step": 5903 + }, + { + "epoch": 1.7264219915192278, + "grad_norm": 1.719859761694945, + "learning_rate": 1.0086711927365488e-06, + "loss": 0.560761570930481, + "step": 5904 + }, + { + "epoch": 1.7267144319344934, + "grad_norm": 1.475103420661766, + "learning_rate": 1.006556209607007e-06, + "loss": 0.533979058265686, + "step": 5905 + }, + { + "epoch": 1.7270068723497587, + "grad_norm": 1.7039894712110264, + "learning_rate": 1.004443328640271e-06, + "loss": 0.5742807984352112, + "step": 5906 + }, + { + "epoch": 1.727299312765024, + "grad_norm": 1.9394101910903232, + "learning_rate": 1.0023325503302129e-06, + "loss": 0.5617523789405823, + "step": 5907 + }, + { + "epoch": 1.7275917531802896, + "grad_norm": 1.620137966655423, + "learning_rate": 1.0002238751702143e-06, + "loss": 0.45596855878829956, + "step": 5908 + }, + { + "epoch": 1.727884193595555, + "grad_norm": 1.523715810181856, + "learning_rate": 9.981173036531655e-07, + "loss": 0.4917908012866974, + "step": 5909 + }, + { + "epoch": 1.7281766340108202, + "grad_norm": 1.8089323806924067, + "learning_rate": 9.960128362714637e-07, + "loss": 0.6204911470413208, + "step": 5910 + }, + { + "epoch": 1.7284690744260858, + "grad_norm": 1.615074466715287, + "learning_rate": 9.93910473517018e-07, + "loss": 0.47288352251052856, + "step": 5911 + }, + { + "epoch": 1.728761514841351, + "grad_norm": 1.9414111913595387, + "learning_rate": 9.918102158812404e-07, + "loss": 0.48668670654296875, + "step": 5912 + }, + { + "epoch": 1.7290539552566164, + "grad_norm": 1.723740686191889, + "learning_rate": 9.89712063855054e-07, + "loss": 0.43311381340026855, + "step": 5913 + }, + { + "epoch": 1.729346395671882, + "grad_norm": 1.748275288399291, + "learning_rate": 9.876160179288886e-07, + "loss": 0.5066087245941162, + "step": 5914 + }, + { + "epoch": 1.729638836087147, + "grad_norm": 1.6099318260759374, + "learning_rate": 9.855220785926856e-07, + "loss": 0.6022528409957886, + "step": 5915 + }, + { + "epoch": 1.7299312765024126, + "grad_norm": 1.6809686879748886, + "learning_rate": 9.834302463358858e-07, + "loss": 0.5288707613945007, + "step": 5916 + }, + { + "epoch": 1.7302237169176782, + "grad_norm": 1.7087060764928856, + "learning_rate": 9.813405216474436e-07, + "loss": 0.6150302290916443, + "step": 5917 + }, + { + "epoch": 1.7305161573329433, + "grad_norm": 1.7234099983807605, + "learning_rate": 9.792529050158218e-07, + "loss": 0.5431156158447266, + "step": 5918 + }, + { + "epoch": 1.7308085977482088, + "grad_norm": 1.7871856102017598, + "learning_rate": 9.771673969289851e-07, + "loss": 0.6844080686569214, + "step": 5919 + }, + { + "epoch": 1.7311010381634742, + "grad_norm": 1.9483136158091563, + "learning_rate": 9.750839978744098e-07, + "loss": 0.4778372049331665, + "step": 5920 + }, + { + "epoch": 1.7313934785787395, + "grad_norm": 1.803034120095624, + "learning_rate": 9.73002708339077e-07, + "loss": 0.6913809776306152, + "step": 5921 + }, + { + "epoch": 1.731685918994005, + "grad_norm": 1.5934425203745812, + "learning_rate": 9.709235288094765e-07, + "loss": 0.6289864778518677, + "step": 5922 + }, + { + "epoch": 1.7319783594092704, + "grad_norm": 1.7803434049533318, + "learning_rate": 9.68846459771604e-07, + "loss": 0.4735794961452484, + "step": 5923 + }, + { + "epoch": 1.7322707998245357, + "grad_norm": 1.7329775832839742, + "learning_rate": 9.667715017109614e-07, + "loss": 0.53554767370224, + "step": 5924 + }, + { + "epoch": 1.7325632402398012, + "grad_norm": 1.9726609824515038, + "learning_rate": 9.64698655112557e-07, + "loss": 0.5118460655212402, + "step": 5925 + }, + { + "epoch": 1.7328556806550666, + "grad_norm": 1.819236864509276, + "learning_rate": 9.626279204609079e-07, + "loss": 0.5739814043045044, + "step": 5926 + }, + { + "epoch": 1.733148121070332, + "grad_norm": 1.6784156679062403, + "learning_rate": 9.605592982400325e-07, + "loss": 0.5716123580932617, + "step": 5927 + }, + { + "epoch": 1.7334405614855974, + "grad_norm": 1.7916971306174196, + "learning_rate": 9.584927889334605e-07, + "loss": 0.5091898441314697, + "step": 5928 + }, + { + "epoch": 1.7337330019008625, + "grad_norm": 1.6267503374739263, + "learning_rate": 9.564283930242258e-07, + "loss": 0.46946650743484497, + "step": 5929 + }, + { + "epoch": 1.734025442316128, + "grad_norm": 1.6625890698419732, + "learning_rate": 9.543661109948688e-07, + "loss": 0.6238217353820801, + "step": 5930 + }, + { + "epoch": 1.7343178827313934, + "grad_norm": 1.8870256552743607, + "learning_rate": 9.52305943327434e-07, + "loss": 0.7464175224304199, + "step": 5931 + }, + { + "epoch": 1.7346103231466588, + "grad_norm": 2.417177332317345, + "learning_rate": 9.502478905034751e-07, + "loss": 0.6064578294754028, + "step": 5932 + }, + { + "epoch": 1.7349027635619243, + "grad_norm": 1.5711166860907437, + "learning_rate": 9.481919530040484e-07, + "loss": 0.5703303813934326, + "step": 5933 + }, + { + "epoch": 1.7351952039771896, + "grad_norm": 1.773413757718004, + "learning_rate": 9.461381313097162e-07, + "loss": 0.5570278167724609, + "step": 5934 + }, + { + "epoch": 1.735487644392455, + "grad_norm": 1.795987369299435, + "learning_rate": 9.440864259005477e-07, + "loss": 0.54972243309021, + "step": 5935 + }, + { + "epoch": 1.7357800848077205, + "grad_norm": 1.6140356285907533, + "learning_rate": 9.420368372561161e-07, + "loss": 0.5670010447502136, + "step": 5936 + }, + { + "epoch": 1.7360725252229858, + "grad_norm": 1.642113144044588, + "learning_rate": 9.399893658555026e-07, + "loss": 0.5306927561759949, + "step": 5937 + }, + { + "epoch": 1.7363649656382512, + "grad_norm": 1.5565759572639428, + "learning_rate": 9.379440121772876e-07, + "loss": 0.5080308318138123, + "step": 5938 + }, + { + "epoch": 1.7366574060535167, + "grad_norm": 1.5576076668453387, + "learning_rate": 9.359007766995609e-07, + "loss": 0.5444519519805908, + "step": 5939 + }, + { + "epoch": 1.736949846468782, + "grad_norm": 1.737287044912212, + "learning_rate": 9.338596598999172e-07, + "loss": 0.5353262424468994, + "step": 5940 + }, + { + "epoch": 1.7372422868840474, + "grad_norm": 1.5405646785157867, + "learning_rate": 9.318206622554549e-07, + "loss": 0.4766794443130493, + "step": 5941 + }, + { + "epoch": 1.737534727299313, + "grad_norm": 1.5818937282065444, + "learning_rate": 9.29783784242777e-07, + "loss": 0.4913482666015625, + "step": 5942 + }, + { + "epoch": 1.737827167714578, + "grad_norm": 1.5030657740252151, + "learning_rate": 9.277490263379918e-07, + "loss": 0.47637009620666504, + "step": 5943 + }, + { + "epoch": 1.7381196081298436, + "grad_norm": 1.8131560819786492, + "learning_rate": 9.25716389016712e-07, + "loss": 0.5122126936912537, + "step": 5944 + }, + { + "epoch": 1.738412048545109, + "grad_norm": 1.51801570238093, + "learning_rate": 9.236858727540543e-07, + "loss": 0.5263532400131226, + "step": 5945 + }, + { + "epoch": 1.7387044889603742, + "grad_norm": 1.607505719698849, + "learning_rate": 9.216574780246379e-07, + "loss": 0.5214182734489441, + "step": 5946 + }, + { + "epoch": 1.7389969293756398, + "grad_norm": 1.877073258708154, + "learning_rate": 9.196312053025891e-07, + "loss": 0.5955429077148438, + "step": 5947 + }, + { + "epoch": 1.739289369790905, + "grad_norm": 1.6543213511410424, + "learning_rate": 9.176070550615379e-07, + "loss": 0.4809807538986206, + "step": 5948 + }, + { + "epoch": 1.7395818102061704, + "grad_norm": 1.882804975326707, + "learning_rate": 9.155850277746148e-07, + "loss": 0.4769969582557678, + "step": 5949 + }, + { + "epoch": 1.739874250621436, + "grad_norm": 2.4545379886365954, + "learning_rate": 9.135651239144561e-07, + "loss": 0.48527538776397705, + "step": 5950 + }, + { + "epoch": 1.7401666910367013, + "grad_norm": 1.696389032166004, + "learning_rate": 9.115473439532041e-07, + "loss": 0.6703393459320068, + "step": 5951 + }, + { + "epoch": 1.7404591314519666, + "grad_norm": 1.807366721076005, + "learning_rate": 9.095316883625016e-07, + "loss": 0.5742951035499573, + "step": 5952 + }, + { + "epoch": 1.7407515718672322, + "grad_norm": 1.9552666950039521, + "learning_rate": 9.075181576134961e-07, + "loss": 0.6285614967346191, + "step": 5953 + }, + { + "epoch": 1.7410440122824973, + "grad_norm": 1.6961990538831806, + "learning_rate": 9.055067521768379e-07, + "loss": 0.5872488021850586, + "step": 5954 + }, + { + "epoch": 1.7413364526977628, + "grad_norm": 1.6900638369260592, + "learning_rate": 9.034974725226808e-07, + "loss": 0.5483776330947876, + "step": 5955 + }, + { + "epoch": 1.7416288931130284, + "grad_norm": 1.838843211951185, + "learning_rate": 9.014903191206825e-07, + "loss": 0.4913061261177063, + "step": 5956 + }, + { + "epoch": 1.7419213335282935, + "grad_norm": 1.6413412279440867, + "learning_rate": 8.994852924400022e-07, + "loss": 0.5431212186813354, + "step": 5957 + }, + { + "epoch": 1.742213773943559, + "grad_norm": 1.735940615294129, + "learning_rate": 8.974823929493015e-07, + "loss": 0.5391141176223755, + "step": 5958 + }, + { + "epoch": 1.7425062143588244, + "grad_norm": 1.455007956070738, + "learning_rate": 8.954816211167483e-07, + "loss": 0.48980265855789185, + "step": 5959 + }, + { + "epoch": 1.7427986547740897, + "grad_norm": 1.6465722416646151, + "learning_rate": 8.934829774100118e-07, + "loss": 0.6747336387634277, + "step": 5960 + }, + { + "epoch": 1.7430910951893552, + "grad_norm": 2.048914745001018, + "learning_rate": 8.914864622962582e-07, + "loss": 0.4911282956600189, + "step": 5961 + }, + { + "epoch": 1.7433835356046206, + "grad_norm": 1.6999465895023511, + "learning_rate": 8.894920762421644e-07, + "loss": 0.5863965153694153, + "step": 5962 + }, + { + "epoch": 1.7436759760198859, + "grad_norm": 1.6964011957158196, + "learning_rate": 8.87499819713904e-07, + "loss": 0.5413792729377747, + "step": 5963 + }, + { + "epoch": 1.7439684164351514, + "grad_norm": 1.8650590121272839, + "learning_rate": 8.855096931771568e-07, + "loss": 0.5288723707199097, + "step": 5964 + }, + { + "epoch": 1.7442608568504168, + "grad_norm": 1.72339918808855, + "learning_rate": 8.835216970971006e-07, + "loss": 0.5129783749580383, + "step": 5965 + }, + { + "epoch": 1.744553297265682, + "grad_norm": 1.7489856693904517, + "learning_rate": 8.815358319384193e-07, + "loss": 0.5606918334960938, + "step": 5966 + }, + { + "epoch": 1.7448457376809476, + "grad_norm": 1.9036006380739827, + "learning_rate": 8.79552098165296e-07, + "loss": 0.6277288198471069, + "step": 5967 + }, + { + "epoch": 1.7451381780962127, + "grad_norm": 1.7432749923566282, + "learning_rate": 8.775704962414167e-07, + "loss": 0.5390176773071289, + "step": 5968 + }, + { + "epoch": 1.7454306185114783, + "grad_norm": 1.8645943677337757, + "learning_rate": 8.755910266299684e-07, + "loss": 0.680462121963501, + "step": 5969 + }, + { + "epoch": 1.7457230589267436, + "grad_norm": 1.678166381653785, + "learning_rate": 8.736136897936398e-07, + "loss": 0.5134397149085999, + "step": 5970 + }, + { + "epoch": 1.746015499342009, + "grad_norm": 1.796274905651791, + "learning_rate": 8.716384861946248e-07, + "loss": 0.6280460357666016, + "step": 5971 + }, + { + "epoch": 1.7463079397572745, + "grad_norm": 1.8396010080181593, + "learning_rate": 8.696654162946094e-07, + "loss": 0.5425370931625366, + "step": 5972 + }, + { + "epoch": 1.7466003801725398, + "grad_norm": 1.7657169836698012, + "learning_rate": 8.676944805547882e-07, + "loss": 0.5831055045127869, + "step": 5973 + }, + { + "epoch": 1.7468928205878052, + "grad_norm": 1.3865571188404813, + "learning_rate": 8.657256794358592e-07, + "loss": 0.4615570306777954, + "step": 5974 + }, + { + "epoch": 1.7471852610030707, + "grad_norm": 1.6631260131171646, + "learning_rate": 8.637590133980145e-07, + "loss": 0.5727440118789673, + "step": 5975 + }, + { + "epoch": 1.747477701418336, + "grad_norm": 1.6981377401436724, + "learning_rate": 8.617944829009517e-07, + "loss": 0.5652801990509033, + "step": 5976 + }, + { + "epoch": 1.7477701418336014, + "grad_norm": 1.6704888560345945, + "learning_rate": 8.59832088403868e-07, + "loss": 0.42455562949180603, + "step": 5977 + }, + { + "epoch": 1.748062582248867, + "grad_norm": 1.8565352683598422, + "learning_rate": 8.578718303654588e-07, + "loss": 0.526951789855957, + "step": 5978 + }, + { + "epoch": 1.7483550226641322, + "grad_norm": 1.5113931171346078, + "learning_rate": 8.559137092439252e-07, + "loss": 0.45547354221343994, + "step": 5979 + }, + { + "epoch": 1.7486474630793976, + "grad_norm": 1.843493314178274, + "learning_rate": 8.539577254969667e-07, + "loss": 0.5470790863037109, + "step": 5980 + }, + { + "epoch": 1.748939903494663, + "grad_norm": 1.6766357010415907, + "learning_rate": 8.520038795817798e-07, + "loss": 0.5608032941818237, + "step": 5981 + }, + { + "epoch": 1.7492323439099282, + "grad_norm": 1.7251948475523264, + "learning_rate": 8.500521719550648e-07, + "loss": 0.5243809223175049, + "step": 5982 + }, + { + "epoch": 1.7495247843251938, + "grad_norm": 1.7843504248813373, + "learning_rate": 8.481026030730222e-07, + "loss": 0.5040958523750305, + "step": 5983 + }, + { + "epoch": 1.749817224740459, + "grad_norm": 1.9016982717852353, + "learning_rate": 8.461551733913509e-07, + "loss": 0.5026291012763977, + "step": 5984 + }, + { + "epoch": 1.7501096651557244, + "grad_norm": 1.578287817505696, + "learning_rate": 8.442098833652523e-07, + "loss": 0.5273059010505676, + "step": 5985 + }, + { + "epoch": 1.75040210557099, + "grad_norm": 1.7872787423587175, + "learning_rate": 8.42266733449425e-07, + "loss": 0.5811910629272461, + "step": 5986 + }, + { + "epoch": 1.7506945459862553, + "grad_norm": 1.9383664928260165, + "learning_rate": 8.4032572409807e-07, + "loss": 0.6078274250030518, + "step": 5987 + }, + { + "epoch": 1.7509869864015206, + "grad_norm": 1.8956639494069205, + "learning_rate": 8.383868557648833e-07, + "loss": 0.5214031338691711, + "step": 5988 + }, + { + "epoch": 1.7512794268167862, + "grad_norm": 1.8686899180431094, + "learning_rate": 8.364501289030669e-07, + "loss": 0.5464918613433838, + "step": 5989 + }, + { + "epoch": 1.7515718672320515, + "grad_norm": 1.9577387999849984, + "learning_rate": 8.345155439653175e-07, + "loss": 0.48296278715133667, + "step": 5990 + }, + { + "epoch": 1.7518643076473168, + "grad_norm": 1.6097156536359971, + "learning_rate": 8.325831014038344e-07, + "loss": 0.5441919565200806, + "step": 5991 + }, + { + "epoch": 1.7521567480625824, + "grad_norm": 1.524249865256617, + "learning_rate": 8.306528016703097e-07, + "loss": 0.4623905420303345, + "step": 5992 + }, + { + "epoch": 1.7524491884778475, + "grad_norm": 1.7850630013083288, + "learning_rate": 8.287246452159437e-07, + "loss": 0.5671495795249939, + "step": 5993 + }, + { + "epoch": 1.752741628893113, + "grad_norm": 1.7371249179959158, + "learning_rate": 8.267986324914278e-07, + "loss": 0.5400685667991638, + "step": 5994 + }, + { + "epoch": 1.7530340693083786, + "grad_norm": 1.7239850907759944, + "learning_rate": 8.24874763946959e-07, + "loss": 0.4362148642539978, + "step": 5995 + }, + { + "epoch": 1.7533265097236437, + "grad_norm": 1.7548276097653166, + "learning_rate": 8.229530400322283e-07, + "loss": 0.554877519607544, + "step": 5996 + }, + { + "epoch": 1.7536189501389092, + "grad_norm": 1.6421753593232726, + "learning_rate": 8.210334611964266e-07, + "loss": 0.5239896774291992, + "step": 5997 + }, + { + "epoch": 1.7539113905541746, + "grad_norm": 1.9442998633220852, + "learning_rate": 8.191160278882438e-07, + "loss": 0.4729669988155365, + "step": 5998 + }, + { + "epoch": 1.7542038309694399, + "grad_norm": 1.5789777380903094, + "learning_rate": 8.172007405558702e-07, + "loss": 0.5449322462081909, + "step": 5999 + }, + { + "epoch": 1.7544962713847054, + "grad_norm": 1.6329056939447448, + "learning_rate": 8.1528759964699e-07, + "loss": 0.5422194600105286, + "step": 6000 + }, + { + "epoch": 1.7547887117999708, + "grad_norm": 1.4174806038648198, + "learning_rate": 8.1337660560879e-07, + "loss": 0.3854302763938904, + "step": 6001 + }, + { + "epoch": 1.755081152215236, + "grad_norm": 1.7209418471597917, + "learning_rate": 8.114677588879549e-07, + "loss": 0.4678449034690857, + "step": 6002 + }, + { + "epoch": 1.7553735926305016, + "grad_norm": 1.5464176931987315, + "learning_rate": 8.095610599306614e-07, + "loss": 0.5462471842765808, + "step": 6003 + }, + { + "epoch": 1.755666033045767, + "grad_norm": 2.204727836372247, + "learning_rate": 8.076565091825916e-07, + "loss": 0.6314511299133301, + "step": 6004 + }, + { + "epoch": 1.7559584734610323, + "grad_norm": 1.5748030564701405, + "learning_rate": 8.057541070889229e-07, + "loss": 0.6373077630996704, + "step": 6005 + }, + { + "epoch": 1.7562509138762978, + "grad_norm": 1.3531361411828478, + "learning_rate": 8.038538540943297e-07, + "loss": 0.5279273986816406, + "step": 6006 + }, + { + "epoch": 1.756543354291563, + "grad_norm": 1.8888532901747122, + "learning_rate": 8.019557506429843e-07, + "loss": 0.5645443201065063, + "step": 6007 + }, + { + "epoch": 1.7568357947068285, + "grad_norm": 1.776791412383221, + "learning_rate": 8.000597971785573e-07, + "loss": 0.552385151386261, + "step": 6008 + }, + { + "epoch": 1.757128235122094, + "grad_norm": 1.8476397874412334, + "learning_rate": 7.981659941442154e-07, + "loss": 0.5790541172027588, + "step": 6009 + }, + { + "epoch": 1.7574206755373591, + "grad_norm": 1.8909444917759248, + "learning_rate": 7.962743419826247e-07, + "loss": 0.550809383392334, + "step": 6010 + }, + { + "epoch": 1.7577131159526247, + "grad_norm": 1.845124979293289, + "learning_rate": 7.943848411359479e-07, + "loss": 0.4659814238548279, + "step": 6011 + }, + { + "epoch": 1.75800555636789, + "grad_norm": 1.8856668900422473, + "learning_rate": 7.924974920458428e-07, + "loss": 0.5099040269851685, + "step": 6012 + }, + { + "epoch": 1.7582979967831553, + "grad_norm": 1.8389637809818669, + "learning_rate": 7.906122951534678e-07, + "loss": 0.4819038510322571, + "step": 6013 + }, + { + "epoch": 1.758590437198421, + "grad_norm": 1.6198962208116707, + "learning_rate": 7.887292508994737e-07, + "loss": 0.4033840298652649, + "step": 6014 + }, + { + "epoch": 1.7588828776136862, + "grad_norm": 2.0694906070649397, + "learning_rate": 7.868483597240117e-07, + "loss": 0.6316418647766113, + "step": 6015 + }, + { + "epoch": 1.7591753180289516, + "grad_norm": 1.574018695954754, + "learning_rate": 7.84969622066728e-07, + "loss": 0.5141040682792664, + "step": 6016 + }, + { + "epoch": 1.759467758444217, + "grad_norm": 1.625714616568423, + "learning_rate": 7.830930383667668e-07, + "loss": 0.44808077812194824, + "step": 6017 + }, + { + "epoch": 1.7597601988594824, + "grad_norm": 1.6060850378753004, + "learning_rate": 7.812186090627694e-07, + "loss": 0.5661089420318604, + "step": 6018 + }, + { + "epoch": 1.7600526392747478, + "grad_norm": 1.553528332849771, + "learning_rate": 7.793463345928697e-07, + "loss": 0.487590491771698, + "step": 6019 + }, + { + "epoch": 1.7603450796900133, + "grad_norm": 1.9699234516767667, + "learning_rate": 7.774762153947024e-07, + "loss": 0.5775022506713867, + "step": 6020 + }, + { + "epoch": 1.7606375201052784, + "grad_norm": 1.7091892859281639, + "learning_rate": 7.756082519053965e-07, + "loss": 0.5714563131332397, + "step": 6021 + }, + { + "epoch": 1.760929960520544, + "grad_norm": 1.8764497127243964, + "learning_rate": 7.73742444561576e-07, + "loss": 0.6063593626022339, + "step": 6022 + }, + { + "epoch": 1.7612224009358093, + "grad_norm": 1.7254357706950765, + "learning_rate": 7.718787937993622e-07, + "loss": 0.48034632205963135, + "step": 6023 + }, + { + "epoch": 1.7615148413510746, + "grad_norm": 1.4591503666123415, + "learning_rate": 7.700173000543742e-07, + "loss": 0.6003588438034058, + "step": 6024 + }, + { + "epoch": 1.7618072817663402, + "grad_norm": 1.6378303717233282, + "learning_rate": 7.681579637617209e-07, + "loss": 0.42842140793800354, + "step": 6025 + }, + { + "epoch": 1.7620997221816055, + "grad_norm": 1.502196803812996, + "learning_rate": 7.663007853560145e-07, + "loss": 0.5235859155654907, + "step": 6026 + }, + { + "epoch": 1.7623921625968708, + "grad_norm": 1.6904284507093605, + "learning_rate": 7.644457652713566e-07, + "loss": 0.47140365839004517, + "step": 6027 + }, + { + "epoch": 1.7626846030121364, + "grad_norm": 1.6014343948293654, + "learning_rate": 7.625929039413483e-07, + "loss": 0.53680419921875, + "step": 6028 + }, + { + "epoch": 1.7629770434274017, + "grad_norm": 1.6173156649426792, + "learning_rate": 7.60742201799084e-07, + "loss": 0.5280998349189758, + "step": 6029 + }, + { + "epoch": 1.763269483842667, + "grad_norm": 1.58299525140219, + "learning_rate": 7.588936592771545e-07, + "loss": 0.49653276801109314, + "step": 6030 + }, + { + "epoch": 1.7635619242579326, + "grad_norm": 1.6031325431493386, + "learning_rate": 7.570472768076464e-07, + "loss": 0.511070966720581, + "step": 6031 + }, + { + "epoch": 1.7638543646731977, + "grad_norm": 1.9173967106238505, + "learning_rate": 7.552030548221379e-07, + "loss": 0.6601030826568604, + "step": 6032 + }, + { + "epoch": 1.7641468050884632, + "grad_norm": 1.7630822043129881, + "learning_rate": 7.533609937517072e-07, + "loss": 0.6216480731964111, + "step": 6033 + }, + { + "epoch": 1.7644392455037288, + "grad_norm": 2.1444282721386765, + "learning_rate": 7.515210940269224e-07, + "loss": 0.7237618565559387, + "step": 6034 + }, + { + "epoch": 1.7647316859189939, + "grad_norm": 1.9895778147794236, + "learning_rate": 7.496833560778527e-07, + "loss": 0.4979498088359833, + "step": 6035 + }, + { + "epoch": 1.7650241263342594, + "grad_norm": 1.5816967377469986, + "learning_rate": 7.478477803340533e-07, + "loss": 0.49408191442489624, + "step": 6036 + }, + { + "epoch": 1.7653165667495248, + "grad_norm": 1.8439349693338256, + "learning_rate": 7.460143672245823e-07, + "loss": 0.524259626865387, + "step": 6037 + }, + { + "epoch": 1.76560900716479, + "grad_norm": 1.8574151410796558, + "learning_rate": 7.441831171779878e-07, + "loss": 0.625320315361023, + "step": 6038 + }, + { + "epoch": 1.7659014475800556, + "grad_norm": 1.7217980866482836, + "learning_rate": 7.42354030622312e-07, + "loss": 0.5971028804779053, + "step": 6039 + }, + { + "epoch": 1.766193887995321, + "grad_norm": 1.5069481360511938, + "learning_rate": 7.405271079850951e-07, + "loss": 0.48935002088546753, + "step": 6040 + }, + { + "epoch": 1.7664863284105863, + "grad_norm": 1.7616973297205794, + "learning_rate": 7.387023496933687e-07, + "loss": 0.46346336603164673, + "step": 6041 + }, + { + "epoch": 1.7667787688258518, + "grad_norm": 1.5425066644175864, + "learning_rate": 7.368797561736574e-07, + "loss": 0.5135314464569092, + "step": 6042 + }, + { + "epoch": 1.7670712092411172, + "grad_norm": 1.7938719309176694, + "learning_rate": 7.350593278519824e-07, + "loss": 0.45815128087997437, + "step": 6043 + }, + { + "epoch": 1.7673636496563825, + "grad_norm": 1.8253657375894647, + "learning_rate": 7.332410651538591e-07, + "loss": 0.5663015246391296, + "step": 6044 + }, + { + "epoch": 1.767656090071648, + "grad_norm": 1.6737365706300193, + "learning_rate": 7.314249685042929e-07, + "loss": 0.5323490500450134, + "step": 6045 + }, + { + "epoch": 1.7679485304869131, + "grad_norm": 1.8380863614801877, + "learning_rate": 7.296110383277866e-07, + "loss": 0.5489768981933594, + "step": 6046 + }, + { + "epoch": 1.7682409709021787, + "grad_norm": 1.867533811207324, + "learning_rate": 7.277992750483364e-07, + "loss": 0.5951086282730103, + "step": 6047 + }, + { + "epoch": 1.7685334113174442, + "grad_norm": 1.6688539257267474, + "learning_rate": 7.259896790894271e-07, + "loss": 0.48228102922439575, + "step": 6048 + }, + { + "epoch": 1.7688258517327093, + "grad_norm": 1.7579049817410466, + "learning_rate": 7.241822508740448e-07, + "loss": 0.6318891644477844, + "step": 6049 + }, + { + "epoch": 1.769118292147975, + "grad_norm": 1.967894881109258, + "learning_rate": 7.223769908246636e-07, + "loss": 0.4966656267642975, + "step": 6050 + }, + { + "epoch": 1.7694107325632402, + "grad_norm": 1.7465352091582635, + "learning_rate": 7.205738993632516e-07, + "loss": 0.5645290613174438, + "step": 6051 + }, + { + "epoch": 1.7697031729785055, + "grad_norm": 1.8324400656837103, + "learning_rate": 7.187729769112717e-07, + "loss": 0.560075044631958, + "step": 6052 + }, + { + "epoch": 1.769995613393771, + "grad_norm": 1.658346896913261, + "learning_rate": 7.169742238896771e-07, + "loss": 0.6375163793563843, + "step": 6053 + }, + { + "epoch": 1.7702880538090364, + "grad_norm": 1.9991114191844357, + "learning_rate": 7.15177640718916e-07, + "loss": 0.5620392560958862, + "step": 6054 + }, + { + "epoch": 1.7705804942243017, + "grad_norm": 1.7885795694198106, + "learning_rate": 7.133832278189301e-07, + "loss": 0.5382653474807739, + "step": 6055 + }, + { + "epoch": 1.7708729346395673, + "grad_norm": 1.954649524899457, + "learning_rate": 7.115909856091497e-07, + "loss": 0.502597451210022, + "step": 6056 + }, + { + "epoch": 1.7711653750548326, + "grad_norm": 1.782753780230982, + "learning_rate": 7.098009145085016e-07, + "loss": 0.5876599550247192, + "step": 6057 + }, + { + "epoch": 1.771457815470098, + "grad_norm": 1.7624219528533958, + "learning_rate": 7.080130149354048e-07, + "loss": 0.5164280533790588, + "step": 6058 + }, + { + "epoch": 1.7717502558853635, + "grad_norm": 1.7004652166347358, + "learning_rate": 7.062272873077691e-07, + "loss": 0.5192137360572815, + "step": 6059 + }, + { + "epoch": 1.7720426963006286, + "grad_norm": 1.6924472823946135, + "learning_rate": 7.044437320429987e-07, + "loss": 0.5298370122909546, + "step": 6060 + }, + { + "epoch": 1.7723351367158942, + "grad_norm": 1.671988873461514, + "learning_rate": 7.026623495579876e-07, + "loss": 0.5099462270736694, + "step": 6061 + }, + { + "epoch": 1.7726275771311595, + "grad_norm": 1.8314661737989666, + "learning_rate": 7.00883140269123e-07, + "loss": 0.6061269640922546, + "step": 6062 + }, + { + "epoch": 1.7729200175464248, + "grad_norm": 1.9189229950794147, + "learning_rate": 6.991061045922854e-07, + "loss": 0.683641254901886, + "step": 6063 + }, + { + "epoch": 1.7732124579616904, + "grad_norm": 2.089118565246571, + "learning_rate": 6.973312429428458e-07, + "loss": 0.6294830441474915, + "step": 6064 + }, + { + "epoch": 1.7735048983769557, + "grad_norm": 1.6252098698149335, + "learning_rate": 6.95558555735667e-07, + "loss": 0.40493613481521606, + "step": 6065 + }, + { + "epoch": 1.773797338792221, + "grad_norm": 1.7745752298261492, + "learning_rate": 6.93788043385103e-07, + "loss": 0.501255452632904, + "step": 6066 + }, + { + "epoch": 1.7740897792074866, + "grad_norm": 1.7883463098117711, + "learning_rate": 6.920197063050038e-07, + "loss": 0.6004104614257812, + "step": 6067 + }, + { + "epoch": 1.7743822196227519, + "grad_norm": 1.5939834110995985, + "learning_rate": 6.902535449087023e-07, + "loss": 0.48683321475982666, + "step": 6068 + }, + { + "epoch": 1.7746746600380172, + "grad_norm": 1.7279814402431617, + "learning_rate": 6.884895596090302e-07, + "loss": 0.6048111319541931, + "step": 6069 + }, + { + "epoch": 1.7749671004532828, + "grad_norm": 1.8759604993064984, + "learning_rate": 6.867277508183101e-07, + "loss": 0.5532732009887695, + "step": 6070 + }, + { + "epoch": 1.7752595408685479, + "grad_norm": 2.066556008321799, + "learning_rate": 6.849681189483515e-07, + "loss": 0.544552206993103, + "step": 6071 + }, + { + "epoch": 1.7755519812838134, + "grad_norm": 1.9161876673278242, + "learning_rate": 6.832106644104586e-07, + "loss": 0.5114158391952515, + "step": 6072 + }, + { + "epoch": 1.775844421699079, + "grad_norm": 1.6996182780694216, + "learning_rate": 6.814553876154273e-07, + "loss": 0.45777493715286255, + "step": 6073 + }, + { + "epoch": 1.776136862114344, + "grad_norm": 1.6209289540377791, + "learning_rate": 6.797022889735405e-07, + "loss": 0.5449005365371704, + "step": 6074 + }, + { + "epoch": 1.7764293025296096, + "grad_norm": 1.8749070330960134, + "learning_rate": 6.779513688945749e-07, + "loss": 0.6308485865592957, + "step": 6075 + }, + { + "epoch": 1.776721742944875, + "grad_norm": 1.951122544814841, + "learning_rate": 6.762026277877986e-07, + "loss": 0.5904842019081116, + "step": 6076 + }, + { + "epoch": 1.7770141833601403, + "grad_norm": 1.8358819377761475, + "learning_rate": 6.744560660619681e-07, + "loss": 0.6681115627288818, + "step": 6077 + }, + { + "epoch": 1.7773066237754058, + "grad_norm": 1.7337774705028348, + "learning_rate": 6.727116841253334e-07, + "loss": 0.5084429979324341, + "step": 6078 + }, + { + "epoch": 1.7775990641906712, + "grad_norm": 1.706737040250044, + "learning_rate": 6.709694823856305e-07, + "loss": 0.5705291032791138, + "step": 6079 + }, + { + "epoch": 1.7778915046059365, + "grad_norm": 1.541912819246542, + "learning_rate": 6.692294612500894e-07, + "loss": 0.6481744050979614, + "step": 6080 + }, + { + "epoch": 1.778183945021202, + "grad_norm": 1.5164317234096627, + "learning_rate": 6.67491621125429e-07, + "loss": 0.5236573815345764, + "step": 6081 + }, + { + "epoch": 1.7784763854364674, + "grad_norm": 1.761941770239031, + "learning_rate": 6.657559624178611e-07, + "loss": 0.5169326663017273, + "step": 6082 + }, + { + "epoch": 1.7787688258517327, + "grad_norm": 1.7653960525219785, + "learning_rate": 6.640224855330824e-07, + "loss": 0.5304254293441772, + "step": 6083 + }, + { + "epoch": 1.7790612662669982, + "grad_norm": 1.7073706399680681, + "learning_rate": 6.622911908762852e-07, + "loss": 0.457882285118103, + "step": 6084 + }, + { + "epoch": 1.7793537066822633, + "grad_norm": 1.4459810475641077, + "learning_rate": 6.605620788521472e-07, + "loss": 0.48427796363830566, + "step": 6085 + }, + { + "epoch": 1.7796461470975289, + "grad_norm": 1.7511368613506917, + "learning_rate": 6.588351498648382e-07, + "loss": 0.598512589931488, + "step": 6086 + }, + { + "epoch": 1.7799385875127944, + "grad_norm": 1.6445184894388314, + "learning_rate": 6.571104043180188e-07, + "loss": 0.5065094232559204, + "step": 6087 + }, + { + "epoch": 1.7802310279280595, + "grad_norm": 1.7505635404599922, + "learning_rate": 6.553878426148364e-07, + "loss": 0.5493142008781433, + "step": 6088 + }, + { + "epoch": 1.780523468343325, + "grad_norm": 1.5236545905427594, + "learning_rate": 6.5366746515793e-07, + "loss": 0.40520578622817993, + "step": 6089 + }, + { + "epoch": 1.7808159087585904, + "grad_norm": 1.6562045226817075, + "learning_rate": 6.51949272349427e-07, + "loss": 0.5416547656059265, + "step": 6090 + }, + { + "epoch": 1.7811083491738557, + "grad_norm": 1.5389792406208165, + "learning_rate": 6.502332645909438e-07, + "loss": 0.4531989097595215, + "step": 6091 + }, + { + "epoch": 1.7814007895891213, + "grad_norm": 1.9811412419033423, + "learning_rate": 6.485194422835872e-07, + "loss": 0.6385304927825928, + "step": 6092 + }, + { + "epoch": 1.7816932300043866, + "grad_norm": 1.631678357707061, + "learning_rate": 6.468078058279537e-07, + "loss": 0.5503095388412476, + "step": 6093 + }, + { + "epoch": 1.781985670419652, + "grad_norm": 1.810992666384156, + "learning_rate": 6.450983556241264e-07, + "loss": 0.5184366703033447, + "step": 6094 + }, + { + "epoch": 1.7822781108349175, + "grad_norm": 1.8021498649724184, + "learning_rate": 6.433910920716813e-07, + "loss": 0.5211689472198486, + "step": 6095 + }, + { + "epoch": 1.7825705512501828, + "grad_norm": 1.5495698877916986, + "learning_rate": 6.416860155696781e-07, + "loss": 0.7357909679412842, + "step": 6096 + }, + { + "epoch": 1.7828629916654481, + "grad_norm": 1.6814949660424658, + "learning_rate": 6.399831265166689e-07, + "loss": 0.6283953189849854, + "step": 6097 + }, + { + "epoch": 1.7831554320807137, + "grad_norm": 1.7274003515879492, + "learning_rate": 6.382824253106945e-07, + "loss": 0.45040953159332275, + "step": 6098 + }, + { + "epoch": 1.7834478724959788, + "grad_norm": 1.9179221464776945, + "learning_rate": 6.365839123492834e-07, + "loss": 0.5056609511375427, + "step": 6099 + }, + { + "epoch": 1.7837403129112444, + "grad_norm": 1.4295507016254647, + "learning_rate": 6.348875880294536e-07, + "loss": 0.4940416216850281, + "step": 6100 + }, + { + "epoch": 1.7840327533265097, + "grad_norm": 1.487738102541406, + "learning_rate": 6.33193452747708e-07, + "loss": 0.45796072483062744, + "step": 6101 + }, + { + "epoch": 1.784325193741775, + "grad_norm": 1.5314389713015535, + "learning_rate": 6.315015069000408e-07, + "loss": 0.4828432500362396, + "step": 6102 + }, + { + "epoch": 1.7846176341570406, + "grad_norm": 1.7652995666195541, + "learning_rate": 6.298117508819357e-07, + "loss": 0.5564515590667725, + "step": 6103 + }, + { + "epoch": 1.7849100745723059, + "grad_norm": 1.7672116497467336, + "learning_rate": 6.281241850883624e-07, + "loss": 0.5160977840423584, + "step": 6104 + }, + { + "epoch": 1.7852025149875712, + "grad_norm": 1.6835388368372863, + "learning_rate": 6.264388099137775e-07, + "loss": 0.585543155670166, + "step": 6105 + }, + { + "epoch": 1.7854949554028368, + "grad_norm": 1.9025389414417693, + "learning_rate": 6.247556257521303e-07, + "loss": 0.5377194881439209, + "step": 6106 + }, + { + "epoch": 1.785787395818102, + "grad_norm": 1.6124331818311004, + "learning_rate": 6.230746329968518e-07, + "loss": 0.46788060665130615, + "step": 6107 + }, + { + "epoch": 1.7860798362333674, + "grad_norm": 1.481941465563148, + "learning_rate": 6.213958320408664e-07, + "loss": 0.511722207069397, + "step": 6108 + }, + { + "epoch": 1.786372276648633, + "grad_norm": 1.7380505303184415, + "learning_rate": 6.197192232765814e-07, + "loss": 0.5609079599380493, + "step": 6109 + }, + { + "epoch": 1.786664717063898, + "grad_norm": 1.5715739237199864, + "learning_rate": 6.180448070958955e-07, + "loss": 0.47641855478286743, + "step": 6110 + }, + { + "epoch": 1.7869571574791636, + "grad_norm": 1.4072609352957208, + "learning_rate": 6.163725838901946e-07, + "loss": 0.4209919273853302, + "step": 6111 + }, + { + "epoch": 1.7872495978944292, + "grad_norm": 1.7120783337900378, + "learning_rate": 6.147025540503459e-07, + "loss": 0.6012829542160034, + "step": 6112 + }, + { + "epoch": 1.7875420383096943, + "grad_norm": 1.8789998564305304, + "learning_rate": 6.130347179667129e-07, + "loss": 0.6112918853759766, + "step": 6113 + }, + { + "epoch": 1.7878344787249598, + "grad_norm": 1.8641199827985835, + "learning_rate": 6.113690760291402e-07, + "loss": 0.6370030641555786, + "step": 6114 + }, + { + "epoch": 1.7881269191402251, + "grad_norm": 1.837749741108103, + "learning_rate": 6.097056286269631e-07, + "loss": 0.5385129451751709, + "step": 6115 + }, + { + "epoch": 1.7884193595554905, + "grad_norm": 1.7733960362556163, + "learning_rate": 6.080443761490007e-07, + "loss": 0.4707196354866028, + "step": 6116 + }, + { + "epoch": 1.788711799970756, + "grad_norm": 1.8302621423982353, + "learning_rate": 6.063853189835611e-07, + "loss": 0.5361602306365967, + "step": 6117 + }, + { + "epoch": 1.7890042403860213, + "grad_norm": 1.592603561791519, + "learning_rate": 6.047284575184398e-07, + "loss": 0.48841261863708496, + "step": 6118 + }, + { + "epoch": 1.7892966808012867, + "grad_norm": 1.6413123655048356, + "learning_rate": 6.030737921409169e-07, + "loss": 0.47491973638534546, + "step": 6119 + }, + { + "epoch": 1.7895891212165522, + "grad_norm": 1.608045516338794, + "learning_rate": 6.014213232377608e-07, + "loss": 0.4579542875289917, + "step": 6120 + }, + { + "epoch": 1.7898815616318176, + "grad_norm": 1.7739986275669979, + "learning_rate": 5.997710511952259e-07, + "loss": 0.4517485499382019, + "step": 6121 + }, + { + "epoch": 1.7901740020470829, + "grad_norm": 1.61243285020885, + "learning_rate": 5.981229763990559e-07, + "loss": 0.5656695365905762, + "step": 6122 + }, + { + "epoch": 1.7904664424623484, + "grad_norm": 1.8328920976142473, + "learning_rate": 5.964770992344737e-07, + "loss": 0.5000064373016357, + "step": 6123 + }, + { + "epoch": 1.7907588828776135, + "grad_norm": 1.691423776793607, + "learning_rate": 5.948334200861927e-07, + "loss": 0.4823925495147705, + "step": 6124 + }, + { + "epoch": 1.791051323292879, + "grad_norm": 1.6081373509153076, + "learning_rate": 5.931919393384189e-07, + "loss": 0.45079779624938965, + "step": 6125 + }, + { + "epoch": 1.7913437637081446, + "grad_norm": 1.7368976771393152, + "learning_rate": 5.915526573748331e-07, + "loss": 0.5887237787246704, + "step": 6126 + }, + { + "epoch": 1.7916362041234097, + "grad_norm": 1.5326002891728705, + "learning_rate": 5.8991557457861e-07, + "loss": 0.5625102519989014, + "step": 6127 + }, + { + "epoch": 1.7919286445386753, + "grad_norm": 1.773152580661058, + "learning_rate": 5.882806913324079e-07, + "loss": 0.5290789604187012, + "step": 6128 + }, + { + "epoch": 1.7922210849539406, + "grad_norm": 1.8240731968563617, + "learning_rate": 5.86648008018369e-07, + "loss": 0.47694748640060425, + "step": 6129 + }, + { + "epoch": 1.792513525369206, + "grad_norm": 1.7480468996944738, + "learning_rate": 5.850175250181244e-07, + "loss": 0.6297628879547119, + "step": 6130 + }, + { + "epoch": 1.7928059657844715, + "grad_norm": 1.767468792446569, + "learning_rate": 5.833892427127908e-07, + "loss": 0.5748087167739868, + "step": 6131 + }, + { + "epoch": 1.7930984061997368, + "grad_norm": 2.0367130445902313, + "learning_rate": 5.817631614829666e-07, + "loss": 0.552059531211853, + "step": 6132 + }, + { + "epoch": 1.7933908466150021, + "grad_norm": 1.881082319886368, + "learning_rate": 5.801392817087392e-07, + "loss": 0.5980287790298462, + "step": 6133 + }, + { + "epoch": 1.7936832870302677, + "grad_norm": 1.7948740811393897, + "learning_rate": 5.785176037696815e-07, + "loss": 0.5682743191719055, + "step": 6134 + }, + { + "epoch": 1.793975727445533, + "grad_norm": 1.6227048981437364, + "learning_rate": 5.768981280448494e-07, + "loss": 0.6907520294189453, + "step": 6135 + }, + { + "epoch": 1.7942681678607983, + "grad_norm": 1.82613812962419, + "learning_rate": 5.752808549127875e-07, + "loss": 0.5939712524414062, + "step": 6136 + }, + { + "epoch": 1.794560608276064, + "grad_norm": 1.961952469296216, + "learning_rate": 5.736657847515215e-07, + "loss": 0.5169910192489624, + "step": 6137 + }, + { + "epoch": 1.794853048691329, + "grad_norm": 1.7101466149490088, + "learning_rate": 5.720529179385659e-07, + "loss": 0.5795155167579651, + "step": 6138 + }, + { + "epoch": 1.7951454891065945, + "grad_norm": 1.6643593680063449, + "learning_rate": 5.704422548509181e-07, + "loss": 0.4296284317970276, + "step": 6139 + }, + { + "epoch": 1.7954379295218599, + "grad_norm": 1.780840768711558, + "learning_rate": 5.688337958650603e-07, + "loss": 0.5175303220748901, + "step": 6140 + }, + { + "epoch": 1.7957303699371252, + "grad_norm": 1.5534990300027502, + "learning_rate": 5.672275413569605e-07, + "loss": 0.49900466203689575, + "step": 6141 + }, + { + "epoch": 1.7960228103523908, + "grad_norm": 1.741229060320259, + "learning_rate": 5.65623491702072e-07, + "loss": 0.5047665238380432, + "step": 6142 + }, + { + "epoch": 1.796315250767656, + "grad_norm": 1.6004175896698871, + "learning_rate": 5.64021647275329e-07, + "loss": 0.5309686660766602, + "step": 6143 + }, + { + "epoch": 1.7966076911829214, + "grad_norm": 1.84753723892279, + "learning_rate": 5.624220084511544e-07, + "loss": 0.7270892858505249, + "step": 6144 + }, + { + "epoch": 1.796900131598187, + "grad_norm": 1.8607152469266723, + "learning_rate": 5.608245756034536e-07, + "loss": 0.515272319316864, + "step": 6145 + }, + { + "epoch": 1.7971925720134523, + "grad_norm": 1.5111910050436628, + "learning_rate": 5.592293491056167e-07, + "loss": 0.4919237196445465, + "step": 6146 + }, + { + "epoch": 1.7974850124287176, + "grad_norm": 1.8345189418412804, + "learning_rate": 5.576363293305187e-07, + "loss": 0.5812259316444397, + "step": 6147 + }, + { + "epoch": 1.7977774528439832, + "grad_norm": 1.7464814721572284, + "learning_rate": 5.560455166505185e-07, + "loss": 0.434345006942749, + "step": 6148 + }, + { + "epoch": 1.7980698932592483, + "grad_norm": 1.6287087584719833, + "learning_rate": 5.544569114374588e-07, + "loss": 0.4670771360397339, + "step": 6149 + }, + { + "epoch": 1.7983623336745138, + "grad_norm": 1.5038620849892772, + "learning_rate": 5.528705140626667e-07, + "loss": 0.5867526531219482, + "step": 6150 + }, + { + "epoch": 1.7986547740897794, + "grad_norm": 1.8981858755166237, + "learning_rate": 5.512863248969513e-07, + "loss": 0.5453605651855469, + "step": 6151 + }, + { + "epoch": 1.7989472145050445, + "grad_norm": 1.9030067654858334, + "learning_rate": 5.497043443106087e-07, + "loss": 0.5535463690757751, + "step": 6152 + }, + { + "epoch": 1.79923965492031, + "grad_norm": 1.72031713178446, + "learning_rate": 5.481245726734174e-07, + "loss": 0.6250847578048706, + "step": 6153 + }, + { + "epoch": 1.7995320953355753, + "grad_norm": 1.625961067284692, + "learning_rate": 5.465470103546399e-07, + "loss": 0.45504581928253174, + "step": 6154 + }, + { + "epoch": 1.7998245357508407, + "grad_norm": 2.039802523536217, + "learning_rate": 5.449716577230202e-07, + "loss": 0.6192604303359985, + "step": 6155 + }, + { + "epoch": 1.8001169761661062, + "grad_norm": 1.8695276161806251, + "learning_rate": 5.433985151467869e-07, + "loss": 0.5624358654022217, + "step": 6156 + }, + { + "epoch": 1.8004094165813715, + "grad_norm": 1.7494457460727728, + "learning_rate": 5.418275829936537e-07, + "loss": 0.5759576559066772, + "step": 6157 + }, + { + "epoch": 1.8007018569966369, + "grad_norm": 1.752894288026352, + "learning_rate": 5.402588616308169e-07, + "loss": 0.5710508227348328, + "step": 6158 + }, + { + "epoch": 1.8009942974119024, + "grad_norm": 1.6781697189669698, + "learning_rate": 5.386923514249542e-07, + "loss": 0.6146141290664673, + "step": 6159 + }, + { + "epoch": 1.8012867378271677, + "grad_norm": 1.618055518270054, + "learning_rate": 5.371280527422296e-07, + "loss": 0.425834983587265, + "step": 6160 + }, + { + "epoch": 1.801579178242433, + "grad_norm": 1.8062077594882358, + "learning_rate": 5.35565965948287e-07, + "loss": 0.4353194236755371, + "step": 6161 + }, + { + "epoch": 1.8018716186576986, + "grad_norm": 2.0598668441022037, + "learning_rate": 5.340060914082546e-07, + "loss": 0.7202355861663818, + "step": 6162 + }, + { + "epoch": 1.8021640590729637, + "grad_norm": 1.552014134498689, + "learning_rate": 5.324484294867449e-07, + "loss": 0.5371845960617065, + "step": 6163 + }, + { + "epoch": 1.8024564994882293, + "grad_norm": 1.7812688374701713, + "learning_rate": 5.308929805478513e-07, + "loss": 0.4995431900024414, + "step": 6164 + }, + { + "epoch": 1.8027489399034948, + "grad_norm": 1.9376433940202618, + "learning_rate": 5.293397449551519e-07, + "loss": 0.6503393650054932, + "step": 6165 + }, + { + "epoch": 1.80304138031876, + "grad_norm": 1.608511841040304, + "learning_rate": 5.277887230717027e-07, + "loss": 0.5083032250404358, + "step": 6166 + }, + { + "epoch": 1.8033338207340255, + "grad_norm": 1.7910725457082355, + "learning_rate": 5.262399152600473e-07, + "loss": 0.6067851781845093, + "step": 6167 + }, + { + "epoch": 1.8036262611492908, + "grad_norm": 1.6601362559713981, + "learning_rate": 5.246933218822104e-07, + "loss": 0.6446479558944702, + "step": 6168 + }, + { + "epoch": 1.8039187015645561, + "grad_norm": 1.9668874595165033, + "learning_rate": 5.231489432996984e-07, + "loss": 0.6940749883651733, + "step": 6169 + }, + { + "epoch": 1.8042111419798217, + "grad_norm": 1.6254914024201104, + "learning_rate": 5.216067798735014e-07, + "loss": 0.558691143989563, + "step": 6170 + }, + { + "epoch": 1.804503582395087, + "grad_norm": 1.706821795047188, + "learning_rate": 5.2006683196409e-07, + "loss": 0.4561213254928589, + "step": 6171 + }, + { + "epoch": 1.8047960228103523, + "grad_norm": 1.5741713506995776, + "learning_rate": 5.185290999314174e-07, + "loss": 0.514278769493103, + "step": 6172 + }, + { + "epoch": 1.805088463225618, + "grad_norm": 1.7438493762338294, + "learning_rate": 5.169935841349194e-07, + "loss": 0.41933614015579224, + "step": 6173 + }, + { + "epoch": 1.8053809036408832, + "grad_norm": 1.5639626592195386, + "learning_rate": 5.154602849335133e-07, + "loss": 0.5590407848358154, + "step": 6174 + }, + { + "epoch": 1.8056733440561485, + "grad_norm": 1.7923343761763981, + "learning_rate": 5.139292026855991e-07, + "loss": 0.49428898096084595, + "step": 6175 + }, + { + "epoch": 1.805965784471414, + "grad_norm": 1.6980318077322492, + "learning_rate": 5.124003377490582e-07, + "loss": 0.4737596809864044, + "step": 6176 + }, + { + "epoch": 1.8062582248866792, + "grad_norm": 1.6716862203734568, + "learning_rate": 5.108736904812517e-07, + "loss": 0.5017397403717041, + "step": 6177 + }, + { + "epoch": 1.8065506653019447, + "grad_norm": 1.733919571237643, + "learning_rate": 5.09349261239026e-07, + "loss": 0.4509057402610779, + "step": 6178 + }, + { + "epoch": 1.80684310571721, + "grad_norm": 1.9095997808768526, + "learning_rate": 5.078270503787053e-07, + "loss": 0.4440206289291382, + "step": 6179 + }, + { + "epoch": 1.8071355461324754, + "grad_norm": 1.6672235625660048, + "learning_rate": 5.063070582560991e-07, + "loss": 0.4981609582901001, + "step": 6180 + }, + { + "epoch": 1.807427986547741, + "grad_norm": 1.4041701397189061, + "learning_rate": 5.047892852264946e-07, + "loss": 0.4057808518409729, + "step": 6181 + }, + { + "epoch": 1.8077204269630063, + "grad_norm": 1.8238388895662465, + "learning_rate": 5.032737316446634e-07, + "loss": 0.5770435333251953, + "step": 6182 + }, + { + "epoch": 1.8080128673782716, + "grad_norm": 1.5817149529336438, + "learning_rate": 5.017603978648567e-07, + "loss": 0.5431563258171082, + "step": 6183 + }, + { + "epoch": 1.8083053077935372, + "grad_norm": 1.7959973431061746, + "learning_rate": 5.002492842408058e-07, + "loss": 0.469868928194046, + "step": 6184 + }, + { + "epoch": 1.8085977482088025, + "grad_norm": 1.6470575782998251, + "learning_rate": 4.98740391125726e-07, + "loss": 0.4581238925457001, + "step": 6185 + }, + { + "epoch": 1.8088901886240678, + "grad_norm": 1.5613704220145663, + "learning_rate": 4.972337188723108e-07, + "loss": 0.43255913257598877, + "step": 6186 + }, + { + "epoch": 1.8091826290393334, + "grad_norm": 1.6405804521880538, + "learning_rate": 4.957292678327374e-07, + "loss": 0.5817975997924805, + "step": 6187 + }, + { + "epoch": 1.8094750694545985, + "grad_norm": 1.701175567145501, + "learning_rate": 4.9422703835866e-07, + "loss": 0.506614089012146, + "step": 6188 + }, + { + "epoch": 1.809767509869864, + "grad_norm": 1.8093255501568073, + "learning_rate": 4.927270308012155e-07, + "loss": 0.5245084762573242, + "step": 6189 + }, + { + "epoch": 1.8100599502851296, + "grad_norm": 1.9638481802757681, + "learning_rate": 4.912292455110235e-07, + "loss": 0.48700785636901855, + "step": 6190 + }, + { + "epoch": 1.8103523907003947, + "grad_norm": 1.7084108143801102, + "learning_rate": 4.897336828381794e-07, + "loss": 0.5512829422950745, + "step": 6191 + }, + { + "epoch": 1.8106448311156602, + "grad_norm": 1.9425355962156208, + "learning_rate": 4.882403431322647e-07, + "loss": 0.444965660572052, + "step": 6192 + }, + { + "epoch": 1.8109372715309255, + "grad_norm": 1.6773870360526466, + "learning_rate": 4.86749226742338e-07, + "loss": 0.49120527505874634, + "step": 6193 + }, + { + "epoch": 1.8112297119461909, + "grad_norm": 1.5444026883137385, + "learning_rate": 4.852603340169371e-07, + "loss": 0.47114405035972595, + "step": 6194 + }, + { + "epoch": 1.8115221523614564, + "grad_norm": 1.3641759741105037, + "learning_rate": 4.837736653040825e-07, + "loss": 0.41404014825820923, + "step": 6195 + }, + { + "epoch": 1.8118145927767217, + "grad_norm": 1.5779692763243462, + "learning_rate": 4.822892209512742e-07, + "loss": 0.5773917436599731, + "step": 6196 + }, + { + "epoch": 1.812107033191987, + "grad_norm": 1.5867022738126413, + "learning_rate": 4.808070013054911e-07, + "loss": 0.5048927068710327, + "step": 6197 + }, + { + "epoch": 1.8123994736072526, + "grad_norm": 1.4880382186782968, + "learning_rate": 4.793270067131961e-07, + "loss": 0.48112595081329346, + "step": 6198 + }, + { + "epoch": 1.812691914022518, + "grad_norm": 1.5982708355484612, + "learning_rate": 4.778492375203236e-07, + "loss": 0.465067982673645, + "step": 6199 + }, + { + "epoch": 1.8129843544377833, + "grad_norm": 2.10382956966043, + "learning_rate": 4.763736940722985e-07, + "loss": 0.5456488132476807, + "step": 6200 + }, + { + "epoch": 1.8132767948530488, + "grad_norm": 1.7197696401081977, + "learning_rate": 4.74900376714017e-07, + "loss": 0.5078476071357727, + "step": 6201 + }, + { + "epoch": 1.813569235268314, + "grad_norm": 1.8035895737751002, + "learning_rate": 4.7342928578985814e-07, + "loss": 0.5087896585464478, + "step": 6202 + }, + { + "epoch": 1.8138616756835795, + "grad_norm": 1.8289842367399733, + "learning_rate": 4.719604216436824e-07, + "loss": 0.5734537243843079, + "step": 6203 + }, + { + "epoch": 1.814154116098845, + "grad_norm": 1.8255387764821909, + "learning_rate": 4.704937846188262e-07, + "loss": 0.5163359045982361, + "step": 6204 + }, + { + "epoch": 1.8144465565141101, + "grad_norm": 1.7367361746759034, + "learning_rate": 4.6902937505810765e-07, + "loss": 0.5884007811546326, + "step": 6205 + }, + { + "epoch": 1.8147389969293757, + "grad_norm": 1.459881439563451, + "learning_rate": 4.675671933038228e-07, + "loss": 0.454215407371521, + "step": 6206 + }, + { + "epoch": 1.815031437344641, + "grad_norm": 1.4834270754413148, + "learning_rate": 4.661072396977506e-07, + "loss": 0.4380212426185608, + "step": 6207 + }, + { + "epoch": 1.8153238777599063, + "grad_norm": 1.5724796080178702, + "learning_rate": 4.646495145811425e-07, + "loss": 0.6138126850128174, + "step": 6208 + }, + { + "epoch": 1.8156163181751719, + "grad_norm": 1.7578891144089137, + "learning_rate": 4.6319401829473366e-07, + "loss": 0.560515284538269, + "step": 6209 + }, + { + "epoch": 1.8159087585904372, + "grad_norm": 1.6717823771103892, + "learning_rate": 4.6174075117873976e-07, + "loss": 0.4744090735912323, + "step": 6210 + }, + { + "epoch": 1.8162011990057025, + "grad_norm": 1.566667953265204, + "learning_rate": 4.6028971357285126e-07, + "loss": 0.4508114457130432, + "step": 6211 + }, + { + "epoch": 1.816493639420968, + "grad_norm": 1.6686159118306128, + "learning_rate": 4.5884090581623906e-07, + "loss": 0.5437598824501038, + "step": 6212 + }, + { + "epoch": 1.8167860798362334, + "grad_norm": 1.871048661690424, + "learning_rate": 4.5739432824755456e-07, + "loss": 0.608635425567627, + "step": 6213 + }, + { + "epoch": 1.8170785202514987, + "grad_norm": 1.683927429440131, + "learning_rate": 4.5594998120492505e-07, + "loss": 0.45614784955978394, + "step": 6214 + }, + { + "epoch": 1.8173709606667643, + "grad_norm": 1.8175326303925177, + "learning_rate": 4.5450786502595933e-07, + "loss": 0.46722525358200073, + "step": 6215 + }, + { + "epoch": 1.8176634010820294, + "grad_norm": 1.6729337536988582, + "learning_rate": 4.5306798004774333e-07, + "loss": 0.5424127578735352, + "step": 6216 + }, + { + "epoch": 1.817955841497295, + "grad_norm": 1.8512870023540355, + "learning_rate": 4.5163032660684e-07, + "loss": 0.4360300302505493, + "step": 6217 + }, + { + "epoch": 1.8182482819125603, + "grad_norm": 1.4671759860658016, + "learning_rate": 4.5019490503929395e-07, + "loss": 0.43406206369400024, + "step": 6218 + }, + { + "epoch": 1.8185407223278256, + "grad_norm": 1.5669201854687904, + "learning_rate": 4.4876171568062346e-07, + "loss": 0.5435998439788818, + "step": 6219 + }, + { + "epoch": 1.8188331627430911, + "grad_norm": 1.7571994730111475, + "learning_rate": 4.4733075886583043e-07, + "loss": 0.4555914103984833, + "step": 6220 + }, + { + "epoch": 1.8191256031583565, + "grad_norm": 1.9267993644134682, + "learning_rate": 4.4590203492939076e-07, + "loss": 0.5246081352233887, + "step": 6221 + }, + { + "epoch": 1.8194180435736218, + "grad_norm": 1.4234567063452161, + "learning_rate": 4.4447554420525954e-07, + "loss": 0.5093664526939392, + "step": 6222 + }, + { + "epoch": 1.8197104839888874, + "grad_norm": 1.9251138549109805, + "learning_rate": 4.430512870268733e-07, + "loss": 0.5759550333023071, + "step": 6223 + }, + { + "epoch": 1.8200029244041527, + "grad_norm": 2.2446814471076184, + "learning_rate": 4.416292637271402e-07, + "loss": 0.5477207899093628, + "step": 6224 + }, + { + "epoch": 1.820295364819418, + "grad_norm": 1.7579783947323675, + "learning_rate": 4.402094746384511e-07, + "loss": 0.5786882638931274, + "step": 6225 + }, + { + "epoch": 1.8205878052346836, + "grad_norm": 1.6652775403735034, + "learning_rate": 4.3879192009267266e-07, + "loss": 0.36909428238868713, + "step": 6226 + }, + { + "epoch": 1.8208802456499487, + "grad_norm": 1.6359565015929571, + "learning_rate": 4.3737660042114993e-07, + "loss": 0.5471982955932617, + "step": 6227 + }, + { + "epoch": 1.8211726860652142, + "grad_norm": 1.633893653092529, + "learning_rate": 4.3596351595470596e-07, + "loss": 0.49737733602523804, + "step": 6228 + }, + { + "epoch": 1.8214651264804798, + "grad_norm": 1.8445639233475513, + "learning_rate": 4.3455266702363997e-07, + "loss": 0.70830237865448, + "step": 6229 + }, + { + "epoch": 1.8217575668957449, + "grad_norm": 1.5312305470870462, + "learning_rate": 4.331440539577281e-07, + "loss": 0.5844424962997437, + "step": 6230 + }, + { + "epoch": 1.8220500073110104, + "grad_norm": 1.5427896071730656, + "learning_rate": 4.317376770862269e-07, + "loss": 0.42457354068756104, + "step": 6231 + }, + { + "epoch": 1.8223424477262757, + "grad_norm": 2.058390634719774, + "learning_rate": 4.3033353673786695e-07, + "loss": 0.5154321193695068, + "step": 6232 + }, + { + "epoch": 1.822634888141541, + "grad_norm": 1.7898699548834731, + "learning_rate": 4.2893163324085886e-07, + "loss": 0.5896856784820557, + "step": 6233 + }, + { + "epoch": 1.8229273285568066, + "grad_norm": 1.8303948048078211, + "learning_rate": 4.2753196692288835e-07, + "loss": 0.5032835006713867, + "step": 6234 + }, + { + "epoch": 1.823219768972072, + "grad_norm": 1.8584560183845538, + "learning_rate": 4.2613453811111814e-07, + "loss": 0.4691713750362396, + "step": 6235 + }, + { + "epoch": 1.8235122093873373, + "grad_norm": 1.5627513261590378, + "learning_rate": 4.2473934713219033e-07, + "loss": 0.595095694065094, + "step": 6236 + }, + { + "epoch": 1.8238046498026028, + "grad_norm": 1.6531612719483142, + "learning_rate": 4.233463943122218e-07, + "loss": 0.5004895329475403, + "step": 6237 + }, + { + "epoch": 1.8240970902178681, + "grad_norm": 1.7047690953050751, + "learning_rate": 4.2195567997680654e-07, + "loss": 0.4924081563949585, + "step": 6238 + }, + { + "epoch": 1.8243895306331335, + "grad_norm": 1.7572886707576447, + "learning_rate": 4.2056720445101565e-07, + "loss": 0.5350006818771362, + "step": 6239 + }, + { + "epoch": 1.824681971048399, + "grad_norm": 1.9485734179206806, + "learning_rate": 4.191809680593961e-07, + "loss": 0.5404629707336426, + "step": 6240 + }, + { + "epoch": 1.8249744114636641, + "grad_norm": 1.6023324600099473, + "learning_rate": 4.177969711259744e-07, + "loss": 0.727859377861023, + "step": 6241 + }, + { + "epoch": 1.8252668518789297, + "grad_norm": 1.553973004264676, + "learning_rate": 4.164152139742494e-07, + "loss": 0.4805057644844055, + "step": 6242 + }, + { + "epoch": 1.8255592922941952, + "grad_norm": 1.7536116301732134, + "learning_rate": 4.1503569692719847e-07, + "loss": 0.5520761013031006, + "step": 6243 + }, + { + "epoch": 1.8258517327094603, + "grad_norm": 1.8327055737656117, + "learning_rate": 4.1365842030727576e-07, + "loss": 0.6130107641220093, + "step": 6244 + }, + { + "epoch": 1.8261441731247259, + "grad_norm": 1.7887203227793926, + "learning_rate": 4.122833844364116e-07, + "loss": 0.6048229932785034, + "step": 6245 + }, + { + "epoch": 1.8264366135399912, + "grad_norm": 1.717414490213998, + "learning_rate": 4.1091058963601214e-07, + "loss": 0.667324960231781, + "step": 6246 + }, + { + "epoch": 1.8267290539552565, + "grad_norm": 2.083699506724501, + "learning_rate": 4.095400362269597e-07, + "loss": 0.45595815777778625, + "step": 6247 + }, + { + "epoch": 1.827021494370522, + "grad_norm": 1.7162831332631867, + "learning_rate": 4.081717245296124e-07, + "loss": 0.49015533924102783, + "step": 6248 + }, + { + "epoch": 1.8273139347857874, + "grad_norm": 2.1906207360630763, + "learning_rate": 4.068056548638055e-07, + "loss": 0.5230038166046143, + "step": 6249 + }, + { + "epoch": 1.8276063752010527, + "grad_norm": 1.6860531929221865, + "learning_rate": 4.054418275488492e-07, + "loss": 0.5025942325592041, + "step": 6250 + }, + { + "epoch": 1.8278988156163183, + "grad_norm": 1.736980191753769, + "learning_rate": 4.0408024290352955e-07, + "loss": 0.5136677026748657, + "step": 6251 + }, + { + "epoch": 1.8281912560315836, + "grad_norm": 1.7988212644666006, + "learning_rate": 4.0272090124611086e-07, + "loss": 0.6209211945533752, + "step": 6252 + }, + { + "epoch": 1.828483696446849, + "grad_norm": 1.9742781188768104, + "learning_rate": 4.0136380289432784e-07, + "loss": 0.5913738012313843, + "step": 6253 + }, + { + "epoch": 1.8287761368621145, + "grad_norm": 1.9710058674803597, + "learning_rate": 4.000089481653946e-07, + "loss": 0.5745095610618591, + "step": 6254 + }, + { + "epoch": 1.8290685772773796, + "grad_norm": 1.4867167586867893, + "learning_rate": 3.9865633737600105e-07, + "loss": 0.4566704034805298, + "step": 6255 + }, + { + "epoch": 1.8293610176926451, + "grad_norm": 1.672257025513455, + "learning_rate": 3.9730597084231105e-07, + "loss": 0.49784860014915466, + "step": 6256 + }, + { + "epoch": 1.8296534581079105, + "grad_norm": 1.7381596787517106, + "learning_rate": 3.9595784887996647e-07, + "loss": 0.4489399790763855, + "step": 6257 + }, + { + "epoch": 1.8299458985231758, + "grad_norm": 1.9703484082158151, + "learning_rate": 3.946119718040797e-07, + "loss": 0.6335956454277039, + "step": 6258 + }, + { + "epoch": 1.8302383389384413, + "grad_norm": 1.4097270774574866, + "learning_rate": 3.932683399292436e-07, + "loss": 0.44865918159484863, + "step": 6259 + }, + { + "epoch": 1.8305307793537067, + "grad_norm": 1.6485718017332285, + "learning_rate": 3.919269535695225e-07, + "loss": 0.4328421354293823, + "step": 6260 + }, + { + "epoch": 1.830823219768972, + "grad_norm": 1.6528043958881276, + "learning_rate": 3.9058781303845886e-07, + "loss": 0.463814377784729, + "step": 6261 + }, + { + "epoch": 1.8311156601842375, + "grad_norm": 1.9336577936651187, + "learning_rate": 3.892509186490667e-07, + "loss": 0.5857536196708679, + "step": 6262 + }, + { + "epoch": 1.8314081005995029, + "grad_norm": 1.4512027972560333, + "learning_rate": 3.879162707138395e-07, + "loss": 0.4873831272125244, + "step": 6263 + }, + { + "epoch": 1.8317005410147682, + "grad_norm": 1.89367526659171, + "learning_rate": 3.8658386954474104e-07, + "loss": 0.5428040027618408, + "step": 6264 + }, + { + "epoch": 1.8319929814300338, + "grad_norm": 1.759804366679343, + "learning_rate": 3.852537154532121e-07, + "loss": 0.49092623591423035, + "step": 6265 + }, + { + "epoch": 1.8322854218452989, + "grad_norm": 1.7919708064212196, + "learning_rate": 3.839258087501685e-07, + "loss": 0.5515817999839783, + "step": 6266 + }, + { + "epoch": 1.8325778622605644, + "grad_norm": 1.5550731443697672, + "learning_rate": 3.8260014974600077e-07, + "loss": 0.48080340027809143, + "step": 6267 + }, + { + "epoch": 1.83287030267583, + "grad_norm": 2.292962123842254, + "learning_rate": 3.812767387505734e-07, + "loss": 0.6129888296127319, + "step": 6268 + }, + { + "epoch": 1.833162743091095, + "grad_norm": 1.8203026764024284, + "learning_rate": 3.7995557607322543e-07, + "loss": 0.5843402147293091, + "step": 6269 + }, + { + "epoch": 1.8334551835063606, + "grad_norm": 1.9423893526281284, + "learning_rate": 3.7863666202276996e-07, + "loss": 0.5573143362998962, + "step": 6270 + }, + { + "epoch": 1.833747623921626, + "grad_norm": 1.9386384718546945, + "learning_rate": 3.773199969074959e-07, + "loss": 0.552756667137146, + "step": 6271 + }, + { + "epoch": 1.8340400643368913, + "grad_norm": 1.7629811878645265, + "learning_rate": 3.7600558103516706e-07, + "loss": 0.5559083223342896, + "step": 6272 + }, + { + "epoch": 1.8343325047521568, + "grad_norm": 1.9388416947858518, + "learning_rate": 3.746934147130177e-07, + "loss": 0.5388067364692688, + "step": 6273 + }, + { + "epoch": 1.8346249451674221, + "grad_norm": 1.694909278172827, + "learning_rate": 3.7338349824776133e-07, + "loss": 0.5816110968589783, + "step": 6274 + }, + { + "epoch": 1.8349173855826875, + "grad_norm": 1.9312358476553817, + "learning_rate": 3.720758319455786e-07, + "loss": 0.5720102787017822, + "step": 6275 + }, + { + "epoch": 1.835209825997953, + "grad_norm": 1.5440220572809102, + "learning_rate": 3.707704161121328e-07, + "loss": 0.46005699038505554, + "step": 6276 + }, + { + "epoch": 1.8355022664132183, + "grad_norm": 2.0613584980065776, + "learning_rate": 3.6946725105255656e-07, + "loss": 0.5602168440818787, + "step": 6277 + }, + { + "epoch": 1.8357947068284837, + "grad_norm": 1.6156922208810771, + "learning_rate": 3.68166337071455e-07, + "loss": 0.5390583276748657, + "step": 6278 + }, + { + "epoch": 1.8360871472437492, + "grad_norm": 1.558407958302267, + "learning_rate": 3.668676744729094e-07, + "loss": 0.48980700969696045, + "step": 6279 + }, + { + "epoch": 1.8363795876590143, + "grad_norm": 1.5853357453165142, + "learning_rate": 3.655712635604747e-07, + "loss": 0.6565061807632446, + "step": 6280 + }, + { + "epoch": 1.8366720280742799, + "grad_norm": 1.5692146512642422, + "learning_rate": 3.642771046371785e-07, + "loss": 0.465609610080719, + "step": 6281 + }, + { + "epoch": 1.8369644684895454, + "grad_norm": 1.7219983092976099, + "learning_rate": 3.6298519800552434e-07, + "loss": 0.5698891282081604, + "step": 6282 + }, + { + "epoch": 1.8372569089048105, + "grad_norm": 2.103680074754177, + "learning_rate": 3.616955439674863e-07, + "loss": 0.5885399580001831, + "step": 6283 + }, + { + "epoch": 1.837549349320076, + "grad_norm": 1.7028861151189467, + "learning_rate": 3.60408142824511e-07, + "loss": 0.5158063173294067, + "step": 6284 + }, + { + "epoch": 1.8378417897353414, + "grad_norm": 1.6728867893623607, + "learning_rate": 3.5912299487752434e-07, + "loss": 0.49203822016716003, + "step": 6285 + }, + { + "epoch": 1.8381342301506067, + "grad_norm": 1.991753525300203, + "learning_rate": 3.578401004269183e-07, + "loss": 0.5756489038467407, + "step": 6286 + }, + { + "epoch": 1.8384266705658723, + "grad_norm": 1.9424738806131756, + "learning_rate": 3.565594597725652e-07, + "loss": 0.5970584154129028, + "step": 6287 + }, + { + "epoch": 1.8387191109811376, + "grad_norm": 1.4438564684738853, + "learning_rate": 3.552810732138046e-07, + "loss": 0.48702481389045715, + "step": 6288 + }, + { + "epoch": 1.839011551396403, + "grad_norm": 1.6632334435868308, + "learning_rate": 3.540049410494517e-07, + "loss": 0.4818963408470154, + "step": 6289 + }, + { + "epoch": 1.8393039918116685, + "grad_norm": 1.6617150886827665, + "learning_rate": 3.5273106357779585e-07, + "loss": 0.389699786901474, + "step": 6290 + }, + { + "epoch": 1.8395964322269338, + "grad_norm": 1.7654595369504777, + "learning_rate": 3.514594410965977e-07, + "loss": 0.6438174247741699, + "step": 6291 + }, + { + "epoch": 1.8398888726421991, + "grad_norm": 1.9409260673022277, + "learning_rate": 3.501900739030906e-07, + "loss": 0.654021143913269, + "step": 6292 + }, + { + "epoch": 1.8401813130574647, + "grad_norm": 1.921461492738401, + "learning_rate": 3.489229622939827e-07, + "loss": 0.748673677444458, + "step": 6293 + }, + { + "epoch": 1.8404737534727298, + "grad_norm": 1.850157344469969, + "learning_rate": 3.476581065654527e-07, + "loss": 0.47883105278015137, + "step": 6294 + }, + { + "epoch": 1.8407661938879953, + "grad_norm": 1.555147241743972, + "learning_rate": 3.4639550701315303e-07, + "loss": 0.5221554040908813, + "step": 6295 + }, + { + "epoch": 1.8410586343032607, + "grad_norm": 1.7256564846330384, + "learning_rate": 3.451351639322087e-07, + "loss": 0.482231080532074, + "step": 6296 + }, + { + "epoch": 1.841351074718526, + "grad_norm": 1.797442509245834, + "learning_rate": 3.4387707761721625e-07, + "loss": 0.5407366752624512, + "step": 6297 + }, + { + "epoch": 1.8416435151337915, + "grad_norm": 1.9177358417772523, + "learning_rate": 3.426212483622482e-07, + "loss": 0.626631498336792, + "step": 6298 + }, + { + "epoch": 1.8419359555490569, + "grad_norm": 1.4729327167263073, + "learning_rate": 3.4136767646084424e-07, + "loss": 0.4401513338088989, + "step": 6299 + }, + { + "epoch": 1.8422283959643222, + "grad_norm": 1.756926078765411, + "learning_rate": 3.4011636220602106e-07, + "loss": 0.48130229115486145, + "step": 6300 + }, + { + "epoch": 1.8425208363795877, + "grad_norm": 1.9010914484665373, + "learning_rate": 3.3886730589026475e-07, + "loss": 0.7132935523986816, + "step": 6301 + }, + { + "epoch": 1.842813276794853, + "grad_norm": 1.692313625720156, + "learning_rate": 3.37620507805535e-07, + "loss": 0.6665343642234802, + "step": 6302 + }, + { + "epoch": 1.8431057172101184, + "grad_norm": 1.7909091838212496, + "learning_rate": 3.3637596824326435e-07, + "loss": 0.4313231408596039, + "step": 6303 + }, + { + "epoch": 1.843398157625384, + "grad_norm": 1.6745971926171657, + "learning_rate": 3.3513368749435447e-07, + "loss": 0.6263744235038757, + "step": 6304 + }, + { + "epoch": 1.843690598040649, + "grad_norm": 1.6133043168174617, + "learning_rate": 3.3389366584918313e-07, + "loss": 0.6215947866439819, + "step": 6305 + }, + { + "epoch": 1.8439830384559146, + "grad_norm": 1.6349014502820445, + "learning_rate": 3.3265590359759517e-07, + "loss": 0.45956021547317505, + "step": 6306 + }, + { + "epoch": 1.8442754788711802, + "grad_norm": 1.6194578088821072, + "learning_rate": 3.3142040102891126e-07, + "loss": 0.5363642573356628, + "step": 6307 + }, + { + "epoch": 1.8445679192864453, + "grad_norm": 1.7115305858843777, + "learning_rate": 3.3018715843192273e-07, + "loss": 0.4574592709541321, + "step": 6308 + }, + { + "epoch": 1.8448603597017108, + "grad_norm": 1.6684239678735615, + "learning_rate": 3.2895617609489337e-07, + "loss": 0.43236005306243896, + "step": 6309 + }, + { + "epoch": 1.8451528001169761, + "grad_norm": 1.574172974777944, + "learning_rate": 3.277274543055564e-07, + "loss": 0.46349820494651794, + "step": 6310 + }, + { + "epoch": 1.8454452405322415, + "grad_norm": 1.9135327602518888, + "learning_rate": 3.265009933511176e-07, + "loss": 0.5233386754989624, + "step": 6311 + }, + { + "epoch": 1.845737680947507, + "grad_norm": 1.5165768096310508, + "learning_rate": 3.252767935182566e-07, + "loss": 0.44902727007865906, + "step": 6312 + }, + { + "epoch": 1.8460301213627723, + "grad_norm": 1.9281348385682333, + "learning_rate": 3.240548550931222e-07, + "loss": 0.709855854511261, + "step": 6313 + }, + { + "epoch": 1.8463225617780377, + "grad_norm": 1.8532989008830933, + "learning_rate": 3.228351783613348e-07, + "loss": 0.5194632411003113, + "step": 6314 + }, + { + "epoch": 1.8466150021933032, + "grad_norm": 1.750242735396334, + "learning_rate": 3.2161776360798535e-07, + "loss": 0.6027804017066956, + "step": 6315 + }, + { + "epoch": 1.8469074426085685, + "grad_norm": 1.591118544218686, + "learning_rate": 3.2040261111763946e-07, + "loss": 0.5047632455825806, + "step": 6316 + }, + { + "epoch": 1.8471998830238339, + "grad_norm": 2.082041129535105, + "learning_rate": 3.1918972117433e-07, + "loss": 0.5763708353042603, + "step": 6317 + }, + { + "epoch": 1.8474923234390994, + "grad_norm": 1.7701935148884373, + "learning_rate": 3.1797909406156234e-07, + "loss": 0.4725028872489929, + "step": 6318 + }, + { + "epoch": 1.8477847638543645, + "grad_norm": 1.5419878667068574, + "learning_rate": 3.167707300623135e-07, + "loss": 0.523047924041748, + "step": 6319 + }, + { + "epoch": 1.84807720426963, + "grad_norm": 1.6321175932285703, + "learning_rate": 3.15564629459032e-07, + "loss": 0.5100070238113403, + "step": 6320 + }, + { + "epoch": 1.8483696446848956, + "grad_norm": 1.7375024362733555, + "learning_rate": 3.143607925336356e-07, + "loss": 0.6019359827041626, + "step": 6321 + }, + { + "epoch": 1.8486620851001607, + "grad_norm": 1.8195133886893664, + "learning_rate": 3.1315921956751483e-07, + "loss": 0.5514570474624634, + "step": 6322 + }, + { + "epoch": 1.8489545255154263, + "grad_norm": 1.6002643586013279, + "learning_rate": 3.1195991084152944e-07, + "loss": 0.49585646390914917, + "step": 6323 + }, + { + "epoch": 1.8492469659306916, + "grad_norm": 1.724322382501938, + "learning_rate": 3.1076286663601076e-07, + "loss": 0.5738509297370911, + "step": 6324 + }, + { + "epoch": 1.849539406345957, + "grad_norm": 1.8621720995112787, + "learning_rate": 3.095680872307605e-07, + "loss": 0.5149112939834595, + "step": 6325 + }, + { + "epoch": 1.8498318467612225, + "grad_norm": 1.6738148879498993, + "learning_rate": 3.0837557290505083e-07, + "loss": 0.45808184146881104, + "step": 6326 + }, + { + "epoch": 1.8501242871764878, + "grad_norm": 1.6155317269058609, + "learning_rate": 3.0718532393762435e-07, + "loss": 0.5173396468162537, + "step": 6327 + }, + { + "epoch": 1.8504167275917531, + "grad_norm": 1.6905273546590853, + "learning_rate": 3.059973406066963e-07, + "loss": 0.6229383945465088, + "step": 6328 + }, + { + "epoch": 1.8507091680070187, + "grad_norm": 1.6794531990129002, + "learning_rate": 3.0481162318994894e-07, + "loss": 0.45520371198654175, + "step": 6329 + }, + { + "epoch": 1.851001608422284, + "grad_norm": 1.5024073523898138, + "learning_rate": 3.036281719645373e-07, + "loss": 0.43216121196746826, + "step": 6330 + }, + { + "epoch": 1.8512940488375493, + "grad_norm": 1.9238309164883824, + "learning_rate": 3.0244698720708456e-07, + "loss": 0.5440583825111389, + "step": 6331 + }, + { + "epoch": 1.8515864892528149, + "grad_norm": 1.8189444343843324, + "learning_rate": 3.0126806919368756e-07, + "loss": 0.5474626421928406, + "step": 6332 + }, + { + "epoch": 1.85187892966808, + "grad_norm": 1.7800420936387606, + "learning_rate": 3.000914181999093e-07, + "loss": 0.5122883915901184, + "step": 6333 + }, + { + "epoch": 1.8521713700833455, + "grad_norm": 1.776220435476035, + "learning_rate": 2.989170345007852e-07, + "loss": 0.48304370045661926, + "step": 6334 + }, + { + "epoch": 1.8524638104986109, + "grad_norm": 1.6949801188317577, + "learning_rate": 2.977449183708214e-07, + "loss": 0.566180408000946, + "step": 6335 + }, + { + "epoch": 1.8527562509138762, + "grad_norm": 1.7482351137010406, + "learning_rate": 2.96575070083992e-07, + "loss": 0.5218988656997681, + "step": 6336 + }, + { + "epoch": 1.8530486913291417, + "grad_norm": 1.8289145949576808, + "learning_rate": 2.954074899137427e-07, + "loss": 0.49669283628463745, + "step": 6337 + }, + { + "epoch": 1.853341131744407, + "grad_norm": 1.6012219042297557, + "learning_rate": 2.942421781329874e-07, + "loss": 0.5505487322807312, + "step": 6338 + }, + { + "epoch": 1.8536335721596724, + "grad_norm": 1.6156483149639533, + "learning_rate": 2.930791350141116e-07, + "loss": 0.5386735200881958, + "step": 6339 + }, + { + "epoch": 1.853926012574938, + "grad_norm": 2.0764057670166776, + "learning_rate": 2.919183608289689e-07, + "loss": 0.5266523957252502, + "step": 6340 + }, + { + "epoch": 1.8542184529902033, + "grad_norm": 1.573480922837112, + "learning_rate": 2.907598558488822e-07, + "loss": 0.5335103273391724, + "step": 6341 + }, + { + "epoch": 1.8545108934054686, + "grad_norm": 1.8447961626822076, + "learning_rate": 2.896036203446473e-07, + "loss": 0.6155405044555664, + "step": 6342 + }, + { + "epoch": 1.8548033338207341, + "grad_norm": 1.5602039082453873, + "learning_rate": 2.884496545865245e-07, + "loss": 0.5258159041404724, + "step": 6343 + }, + { + "epoch": 1.8550957742359993, + "grad_norm": 1.7894466773590292, + "learning_rate": 2.8729795884424927e-07, + "loss": 0.5428795218467712, + "step": 6344 + }, + { + "epoch": 1.8553882146512648, + "grad_norm": 1.4344098630811726, + "learning_rate": 2.8614853338702066e-07, + "loss": 0.4876418709754944, + "step": 6345 + }, + { + "epoch": 1.8556806550665303, + "grad_norm": 1.606511441088432, + "learning_rate": 2.850013784835115e-07, + "loss": 0.49640393257141113, + "step": 6346 + }, + { + "epoch": 1.8559730954817955, + "grad_norm": 1.8316843043903746, + "learning_rate": 2.838564944018618e-07, + "loss": 0.5726122260093689, + "step": 6347 + }, + { + "epoch": 1.856265535897061, + "grad_norm": 1.653087716973347, + "learning_rate": 2.827138814096819e-07, + "loss": 0.5106557011604309, + "step": 6348 + }, + { + "epoch": 1.8565579763123263, + "grad_norm": 1.5025453294784719, + "learning_rate": 2.8157353977405044e-07, + "loss": 0.45941129326820374, + "step": 6349 + }, + { + "epoch": 1.8568504167275917, + "grad_norm": 1.781767756464568, + "learning_rate": 2.8043546976151414e-07, + "loss": 0.488609254360199, + "step": 6350 + }, + { + "epoch": 1.8571428571428572, + "grad_norm": 1.764244860072195, + "learning_rate": 2.7929967163809135e-07, + "loss": 0.639745831489563, + "step": 6351 + }, + { + "epoch": 1.8574352975581225, + "grad_norm": 1.498822179909691, + "learning_rate": 2.7816614566926747e-07, + "loss": 0.45327228307724, + "step": 6352 + }, + { + "epoch": 1.8577277379733879, + "grad_norm": 1.625074504661963, + "learning_rate": 2.7703489211999725e-07, + "loss": 0.5606091022491455, + "step": 6353 + }, + { + "epoch": 1.8580201783886534, + "grad_norm": 1.7312129624633084, + "learning_rate": 2.759059112547047e-07, + "loss": 0.5078528523445129, + "step": 6354 + }, + { + "epoch": 1.8583126188039187, + "grad_norm": 1.6687258508972733, + "learning_rate": 2.74779203337282e-07, + "loss": 0.5558253526687622, + "step": 6355 + }, + { + "epoch": 1.858605059219184, + "grad_norm": 1.7014892476807573, + "learning_rate": 2.7365476863108974e-07, + "loss": 0.3962102234363556, + "step": 6356 + }, + { + "epoch": 1.8588974996344496, + "grad_norm": 1.6957699860554467, + "learning_rate": 2.725326073989587e-07, + "loss": 0.4737718105316162, + "step": 6357 + }, + { + "epoch": 1.8591899400497147, + "grad_norm": 3.313281560384309, + "learning_rate": 2.7141271990318576e-07, + "loss": 0.5389090180397034, + "step": 6358 + }, + { + "epoch": 1.8594823804649803, + "grad_norm": 1.7840378938084138, + "learning_rate": 2.7029510640554033e-07, + "loss": 0.5311479568481445, + "step": 6359 + }, + { + "epoch": 1.8597748208802458, + "grad_norm": 1.631290291956445, + "learning_rate": 2.691797671672558e-07, + "loss": 0.4753482937812805, + "step": 6360 + }, + { + "epoch": 1.860067261295511, + "grad_norm": 1.581254208029566, + "learning_rate": 2.6806670244903577e-07, + "loss": 0.5192427635192871, + "step": 6361 + }, + { + "epoch": 1.8603597017107765, + "grad_norm": 1.9540580966263197, + "learning_rate": 2.6695591251105214e-07, + "loss": 0.5910875797271729, + "step": 6362 + }, + { + "epoch": 1.8606521421260418, + "grad_norm": 1.7486575397054567, + "learning_rate": 2.658473976129472e-07, + "loss": 0.5465212464332581, + "step": 6363 + }, + { + "epoch": 1.8609445825413071, + "grad_norm": 1.7446293681201037, + "learning_rate": 2.647411580138282e-07, + "loss": 0.43188267946243286, + "step": 6364 + }, + { + "epoch": 1.8612370229565727, + "grad_norm": 2.144472636918694, + "learning_rate": 2.636371939722715e-07, + "loss": 0.5723724365234375, + "step": 6365 + }, + { + "epoch": 1.861529463371838, + "grad_norm": 1.6310859619397844, + "learning_rate": 2.62535505746323e-07, + "loss": 0.47383856773376465, + "step": 6366 + }, + { + "epoch": 1.8618219037871033, + "grad_norm": 1.764378835172625, + "learning_rate": 2.6143609359349566e-07, + "loss": 0.502855658531189, + "step": 6367 + }, + { + "epoch": 1.8621143442023689, + "grad_norm": 2.265501418087609, + "learning_rate": 2.6033895777077043e-07, + "loss": 0.5934205055236816, + "step": 6368 + }, + { + "epoch": 1.8624067846176342, + "grad_norm": 1.469455820490925, + "learning_rate": 2.5924409853459455e-07, + "loss": 0.4157971143722534, + "step": 6369 + }, + { + "epoch": 1.8626992250328995, + "grad_norm": 1.8051847044948597, + "learning_rate": 2.5815151614088764e-07, + "loss": 0.5944307446479797, + "step": 6370 + }, + { + "epoch": 1.862991665448165, + "grad_norm": 2.0081645135491812, + "learning_rate": 2.57061210845031e-07, + "loss": 0.5603153705596924, + "step": 6371 + }, + { + "epoch": 1.8632841058634302, + "grad_norm": 1.752999497142634, + "learning_rate": 2.559731829018786e-07, + "loss": 0.49231380224227905, + "step": 6372 + }, + { + "epoch": 1.8635765462786957, + "grad_norm": 1.666251917997058, + "learning_rate": 2.548874325657502e-07, + "loss": 0.46984565258026123, + "step": 6373 + }, + { + "epoch": 1.863868986693961, + "grad_norm": 1.7373025752546019, + "learning_rate": 2.5380396009043297e-07, + "loss": 0.5088338255882263, + "step": 6374 + }, + { + "epoch": 1.8641614271092264, + "grad_norm": 1.7554684094014161, + "learning_rate": 2.52722765729182e-07, + "loss": 0.4760589599609375, + "step": 6375 + }, + { + "epoch": 1.864453867524492, + "grad_norm": 1.6521606786384044, + "learning_rate": 2.5164384973471954e-07, + "loss": 0.44232040643692017, + "step": 6376 + }, + { + "epoch": 1.8647463079397573, + "grad_norm": 1.736903879415624, + "learning_rate": 2.505672123592373e-07, + "loss": 0.46714338660240173, + "step": 6377 + }, + { + "epoch": 1.8650387483550226, + "grad_norm": 1.9333860177281759, + "learning_rate": 2.494928538543917e-07, + "loss": 0.5527149438858032, + "step": 6378 + }, + { + "epoch": 1.8653311887702881, + "grad_norm": 1.690422887605866, + "learning_rate": 2.484207744713074e-07, + "loss": 0.5006313323974609, + "step": 6379 + }, + { + "epoch": 1.8656236291855535, + "grad_norm": 1.5247883016042734, + "learning_rate": 2.473509744605751e-07, + "loss": 0.5007860660552979, + "step": 6380 + }, + { + "epoch": 1.8659160696008188, + "grad_norm": 1.683063597354387, + "learning_rate": 2.4628345407225804e-07, + "loss": 0.4354132413864136, + "step": 6381 + }, + { + "epoch": 1.8662085100160843, + "grad_norm": 1.718309113338333, + "learning_rate": 2.452182135558789e-07, + "loss": 0.5199555158615112, + "step": 6382 + }, + { + "epoch": 1.8665009504313494, + "grad_norm": 1.6260046663066803, + "learning_rate": 2.441552531604319e-07, + "loss": 0.5117326974868774, + "step": 6383 + }, + { + "epoch": 1.866793390846615, + "grad_norm": 1.803024051218915, + "learning_rate": 2.43094573134377e-07, + "loss": 0.5169814825057983, + "step": 6384 + }, + { + "epoch": 1.8670858312618805, + "grad_norm": 1.7012998015666523, + "learning_rate": 2.420361737256438e-07, + "loss": 0.563339352607727, + "step": 6385 + }, + { + "epoch": 1.8673782716771457, + "grad_norm": 2.1248949598274325, + "learning_rate": 2.409800551816255e-07, + "loss": 0.710465133190155, + "step": 6386 + }, + { + "epoch": 1.8676707120924112, + "grad_norm": 1.6580658731053397, + "learning_rate": 2.3992621774918343e-07, + "loss": 0.6894562244415283, + "step": 6387 + }, + { + "epoch": 1.8679631525076765, + "grad_norm": 1.7380197058585787, + "learning_rate": 2.388746616746462e-07, + "loss": 0.5105183124542236, + "step": 6388 + }, + { + "epoch": 1.8682555929229419, + "grad_norm": 2.0034985048956684, + "learning_rate": 2.3782538720380722e-07, + "loss": 0.4602908492088318, + "step": 6389 + }, + { + "epoch": 1.8685480333382074, + "grad_norm": 1.7787197864367217, + "learning_rate": 2.3677839458192908e-07, + "loss": 0.5395161509513855, + "step": 6390 + }, + { + "epoch": 1.8688404737534727, + "grad_norm": 1.6121023481071262, + "learning_rate": 2.3573368405374054e-07, + "loss": 0.5842725038528442, + "step": 6391 + }, + { + "epoch": 1.869132914168738, + "grad_norm": 1.6354709739233064, + "learning_rate": 2.346912558634362e-07, + "loss": 0.5837947130203247, + "step": 6392 + }, + { + "epoch": 1.8694253545840036, + "grad_norm": 1.8136211176417363, + "learning_rate": 2.3365111025467568e-07, + "loss": 0.5255596041679382, + "step": 6393 + }, + { + "epoch": 1.869717794999269, + "grad_norm": 1.5586602271443384, + "learning_rate": 2.326132474705889e-07, + "loss": 0.5614485144615173, + "step": 6394 + }, + { + "epoch": 1.8700102354145343, + "grad_norm": 1.5895893761997042, + "learning_rate": 2.3157766775376733e-07, + "loss": 0.5510128736495972, + "step": 6395 + }, + { + "epoch": 1.8703026758297998, + "grad_norm": 2.295988070565878, + "learning_rate": 2.3054437134627406e-07, + "loss": 0.690884530544281, + "step": 6396 + }, + { + "epoch": 1.870595116245065, + "grad_norm": 1.94960784120805, + "learning_rate": 2.2951335848963364e-07, + "loss": 0.637476921081543, + "step": 6397 + }, + { + "epoch": 1.8708875566603305, + "grad_norm": 1.6526446878259382, + "learning_rate": 2.2848462942484108e-07, + "loss": 0.5254319906234741, + "step": 6398 + }, + { + "epoch": 1.871179997075596, + "grad_norm": 1.7552717813182315, + "learning_rate": 2.27458184392354e-07, + "loss": 0.5038233995437622, + "step": 6399 + }, + { + "epoch": 1.8714724374908611, + "grad_norm": 1.4123258498894362, + "learning_rate": 2.2643402363209832e-07, + "loss": 0.43701431155204773, + "step": 6400 + }, + { + "epoch": 1.8717648779061267, + "grad_norm": 1.8138198755485717, + "learning_rate": 2.2541214738346583e-07, + "loss": 0.5490877628326416, + "step": 6401 + }, + { + "epoch": 1.872057318321392, + "grad_norm": 1.5452561215431913, + "learning_rate": 2.2439255588531327e-07, + "loss": 0.48393410444259644, + "step": 6402 + }, + { + "epoch": 1.8723497587366573, + "grad_norm": 1.6213926610567049, + "learning_rate": 2.2337524937596444e-07, + "loss": 0.5439243912696838, + "step": 6403 + }, + { + "epoch": 1.8726421991519229, + "grad_norm": 1.6026974016529494, + "learning_rate": 2.22360228093208e-07, + "loss": 0.5272157192230225, + "step": 6404 + }, + { + "epoch": 1.8729346395671882, + "grad_norm": 1.6750451870732375, + "learning_rate": 2.2134749227429864e-07, + "loss": 0.6323473453521729, + "step": 6405 + }, + { + "epoch": 1.8732270799824535, + "grad_norm": 1.6749139186520705, + "learning_rate": 2.2033704215595808e-07, + "loss": 0.4568995237350464, + "step": 6406 + }, + { + "epoch": 1.873519520397719, + "grad_norm": 1.8331627672377568, + "learning_rate": 2.1932887797437296e-07, + "loss": 0.5817153453826904, + "step": 6407 + }, + { + "epoch": 1.8738119608129844, + "grad_norm": 1.4674902238035163, + "learning_rate": 2.183229999651948e-07, + "loss": 0.5104260444641113, + "step": 6408 + }, + { + "epoch": 1.8741044012282497, + "grad_norm": 1.7946613600749395, + "learning_rate": 2.1731940836354105e-07, + "loss": 0.44944921135902405, + "step": 6409 + }, + { + "epoch": 1.8743968416435153, + "grad_norm": 1.794977484250215, + "learning_rate": 2.163181034039974e-07, + "loss": 0.6935169696807861, + "step": 6410 + }, + { + "epoch": 1.8746892820587804, + "grad_norm": 1.7330999339843873, + "learning_rate": 2.1531908532060998e-07, + "loss": 0.55609130859375, + "step": 6411 + }, + { + "epoch": 1.874981722474046, + "grad_norm": 1.6428359107019144, + "learning_rate": 2.143223543468953e-07, + "loss": 0.5402215719223022, + "step": 6412 + }, + { + "epoch": 1.8752741628893113, + "grad_norm": 1.8163043216263146, + "learning_rate": 2.1332791071583258e-07, + "loss": 0.5669365525245667, + "step": 6413 + }, + { + "epoch": 1.8755666033045766, + "grad_norm": 2.2122008806914044, + "learning_rate": 2.123357546598659e-07, + "loss": 0.46257615089416504, + "step": 6414 + }, + { + "epoch": 1.8758590437198421, + "grad_norm": 1.6308794717153283, + "learning_rate": 2.1134588641090858e-07, + "loss": 0.4596136212348938, + "step": 6415 + }, + { + "epoch": 1.8761514841351075, + "grad_norm": 1.6758615624094995, + "learning_rate": 2.1035830620033227e-07, + "loss": 0.5086819529533386, + "step": 6416 + }, + { + "epoch": 1.8764439245503728, + "grad_norm": 1.8974547658257448, + "learning_rate": 2.0937301425898115e-07, + "loss": 0.6008501052856445, + "step": 6417 + }, + { + "epoch": 1.8767363649656383, + "grad_norm": 1.8448672190670345, + "learning_rate": 2.0839001081715882e-07, + "loss": 0.5943784713745117, + "step": 6418 + }, + { + "epoch": 1.8770288053809037, + "grad_norm": 1.3203141385144623, + "learning_rate": 2.0740929610463813e-07, + "loss": 0.5006660223007202, + "step": 6419 + }, + { + "epoch": 1.877321245796169, + "grad_norm": 1.7508035137785818, + "learning_rate": 2.0643087035065458e-07, + "loss": 0.5434073805809021, + "step": 6420 + }, + { + "epoch": 1.8776136862114345, + "grad_norm": 1.8446497118213794, + "learning_rate": 2.0545473378390858e-07, + "loss": 0.6426963210105896, + "step": 6421 + }, + { + "epoch": 1.8779061266266996, + "grad_norm": 1.7388169538440008, + "learning_rate": 2.044808866325676e-07, + "loss": 0.5190218687057495, + "step": 6422 + }, + { + "epoch": 1.8781985670419652, + "grad_norm": 1.5291942184143035, + "learning_rate": 2.035093291242607e-07, + "loss": 0.40918534994125366, + "step": 6423 + }, + { + "epoch": 1.8784910074572307, + "grad_norm": 1.719713887519883, + "learning_rate": 2.0254006148608507e-07, + "loss": 0.5403652191162109, + "step": 6424 + }, + { + "epoch": 1.8787834478724958, + "grad_norm": 1.3839892041506006, + "learning_rate": 2.0157308394460062e-07, + "loss": 0.49781516194343567, + "step": 6425 + }, + { + "epoch": 1.8790758882877614, + "grad_norm": 1.8332751958303748, + "learning_rate": 2.006083967258321e-07, + "loss": 0.5841303467750549, + "step": 6426 + }, + { + "epoch": 1.8793683287030267, + "grad_norm": 1.679945923485487, + "learning_rate": 1.9964600005527024e-07, + "loss": 0.5054808855056763, + "step": 6427 + }, + { + "epoch": 1.879660769118292, + "grad_norm": 1.7695393284467882, + "learning_rate": 1.9868589415786843e-07, + "loss": 0.4801362454891205, + "step": 6428 + }, + { + "epoch": 1.8799532095335576, + "grad_norm": 1.8547174560912147, + "learning_rate": 1.9772807925804494e-07, + "loss": 0.4709380269050598, + "step": 6429 + }, + { + "epoch": 1.880245649948823, + "grad_norm": 1.8447220446699908, + "learning_rate": 1.9677255557968511e-07, + "loss": 0.665968120098114, + "step": 6430 + }, + { + "epoch": 1.8805380903640883, + "grad_norm": 1.7494009698963573, + "learning_rate": 1.9581932334613585e-07, + "loss": 0.515839159488678, + "step": 6431 + }, + { + "epoch": 1.8808305307793538, + "grad_norm": 1.6699738562759978, + "learning_rate": 1.948683827802089e-07, + "loss": 0.5399242043495178, + "step": 6432 + }, + { + "epoch": 1.8811229711946191, + "grad_norm": 1.7478095955612059, + "learning_rate": 1.9391973410418097e-07, + "loss": 0.6167087554931641, + "step": 6433 + }, + { + "epoch": 1.8814154116098845, + "grad_norm": 1.826500337038364, + "learning_rate": 1.9297337753979462e-07, + "loss": 0.6139745116233826, + "step": 6434 + }, + { + "epoch": 1.88170785202515, + "grad_norm": 2.0873679343118257, + "learning_rate": 1.9202931330825292e-07, + "loss": 0.7103149890899658, + "step": 6435 + }, + { + "epoch": 1.8820002924404151, + "grad_norm": 1.6777685812633742, + "learning_rate": 1.9108754163022602e-07, + "loss": 0.5958741903305054, + "step": 6436 + }, + { + "epoch": 1.8822927328556807, + "grad_norm": 1.2489160599157765, + "learning_rate": 1.9014806272584673e-07, + "loss": 0.32660478353500366, + "step": 6437 + }, + { + "epoch": 1.8825851732709462, + "grad_norm": 1.822465954469875, + "learning_rate": 1.8921087681471272e-07, + "loss": 0.49485981464385986, + "step": 6438 + }, + { + "epoch": 1.8828776136862113, + "grad_norm": 1.5404253681507418, + "learning_rate": 1.8827598411588544e-07, + "loss": 0.5106277465820312, + "step": 6439 + }, + { + "epoch": 1.8831700541014769, + "grad_norm": 1.5696470040532076, + "learning_rate": 1.8734338484789115e-07, + "loss": 0.50006502866745, + "step": 6440 + }, + { + "epoch": 1.8834624945167422, + "grad_norm": 1.5827360977472946, + "learning_rate": 1.8641307922871887e-07, + "loss": 0.47097745537757874, + "step": 6441 + }, + { + "epoch": 1.8837549349320075, + "grad_norm": 1.718260594389779, + "learning_rate": 1.854850674758213e-07, + "loss": 0.5874402523040771, + "step": 6442 + }, + { + "epoch": 1.884047375347273, + "grad_norm": 1.7055917291229012, + "learning_rate": 1.8455934980611602e-07, + "loss": 0.45705318450927734, + "step": 6443 + }, + { + "epoch": 1.8843398157625384, + "grad_norm": 1.8262667617041222, + "learning_rate": 1.8363592643598328e-07, + "loss": 0.4949952960014343, + "step": 6444 + }, + { + "epoch": 1.8846322561778037, + "grad_norm": 2.0005095204142056, + "learning_rate": 1.827147975812693e-07, + "loss": 0.5311721563339233, + "step": 6445 + }, + { + "epoch": 1.8849246965930693, + "grad_norm": 1.8075375628836245, + "learning_rate": 1.817959634572819e-07, + "loss": 0.5652828216552734, + "step": 6446 + }, + { + "epoch": 1.8852171370083346, + "grad_norm": 1.7007026167846622, + "learning_rate": 1.8087942427879146e-07, + "loss": 0.4856044054031372, + "step": 6447 + }, + { + "epoch": 1.8855095774236, + "grad_norm": 1.6920105837383546, + "learning_rate": 1.799651802600344e-07, + "loss": 0.55420982837677, + "step": 6448 + }, + { + "epoch": 1.8858020178388655, + "grad_norm": 1.8804834035548856, + "learning_rate": 1.7905323161470867e-07, + "loss": 0.5869326591491699, + "step": 6449 + }, + { + "epoch": 1.8860944582541306, + "grad_norm": 1.761061751635786, + "learning_rate": 1.781435785559793e-07, + "loss": 0.4505504369735718, + "step": 6450 + }, + { + "epoch": 1.8863868986693961, + "grad_norm": 1.7194415376329713, + "learning_rate": 1.7723622129646955e-07, + "loss": 0.5460773706436157, + "step": 6451 + }, + { + "epoch": 1.8866793390846615, + "grad_norm": 1.7253684204963688, + "learning_rate": 1.7633116004826978e-07, + "loss": 0.6214778423309326, + "step": 6452 + }, + { + "epoch": 1.8869717794999268, + "grad_norm": 1.786722853658628, + "learning_rate": 1.7542839502293297e-07, + "loss": 0.4900703430175781, + "step": 6453 + }, + { + "epoch": 1.8872642199151923, + "grad_norm": 1.8351888114829378, + "learning_rate": 1.7452792643147364e-07, + "loss": 0.5177547931671143, + "step": 6454 + }, + { + "epoch": 1.8875566603304577, + "grad_norm": 1.6033594290974305, + "learning_rate": 1.7362975448437236e-07, + "loss": 0.3914458453655243, + "step": 6455 + }, + { + "epoch": 1.887849100745723, + "grad_norm": 1.7306995937297311, + "learning_rate": 1.7273387939157116e-07, + "loss": 0.5222523212432861, + "step": 6456 + }, + { + "epoch": 1.8881415411609885, + "grad_norm": 1.8351026582741266, + "learning_rate": 1.7184030136247477e-07, + "loss": 0.5097587704658508, + "step": 6457 + }, + { + "epoch": 1.8884339815762539, + "grad_norm": 1.711376264331189, + "learning_rate": 1.7094902060595053e-07, + "loss": 0.517410397529602, + "step": 6458 + }, + { + "epoch": 1.8887264219915192, + "grad_norm": 1.5054067124169248, + "learning_rate": 1.7006003733033182e-07, + "loss": 0.4951689839363098, + "step": 6459 + }, + { + "epoch": 1.8890188624067847, + "grad_norm": 1.8698243351971042, + "learning_rate": 1.6917335174341242e-07, + "loss": 0.5530004501342773, + "step": 6460 + }, + { + "epoch": 1.8893113028220498, + "grad_norm": 1.3793759581483827, + "learning_rate": 1.6828896405244988e-07, + "loss": 0.5231990814208984, + "step": 6461 + }, + { + "epoch": 1.8896037432373154, + "grad_norm": 1.7109665283076239, + "learning_rate": 1.6740687446416326e-07, + "loss": 0.5142268538475037, + "step": 6462 + }, + { + "epoch": 1.889896183652581, + "grad_norm": 1.5939124952252972, + "learning_rate": 1.6652708318473765e-07, + "loss": 0.4803999364376068, + "step": 6463 + }, + { + "epoch": 1.890188624067846, + "grad_norm": 1.8261203070041963, + "learning_rate": 1.6564959041981743e-07, + "loss": 0.38822099566459656, + "step": 6464 + }, + { + "epoch": 1.8904810644831116, + "grad_norm": 1.7158195687276572, + "learning_rate": 1.6477439637451186e-07, + "loss": 0.4778556823730469, + "step": 6465 + }, + { + "epoch": 1.890773504898377, + "grad_norm": 1.548976438279917, + "learning_rate": 1.6390150125339178e-07, + "loss": 0.5083664059638977, + "step": 6466 + }, + { + "epoch": 1.8910659453136422, + "grad_norm": 2.298817115631298, + "learning_rate": 1.6303090526049058e-07, + "loss": 0.6592142581939697, + "step": 6467 + }, + { + "epoch": 1.8913583857289078, + "grad_norm": 1.7188849828284447, + "learning_rate": 1.6216260859930776e-07, + "loss": 0.6350588798522949, + "step": 6468 + }, + { + "epoch": 1.8916508261441731, + "grad_norm": 1.900981319900476, + "learning_rate": 1.6129661147279763e-07, + "loss": 0.5542852282524109, + "step": 6469 + }, + { + "epoch": 1.8919432665594385, + "grad_norm": 1.7094379727839777, + "learning_rate": 1.6043291408338602e-07, + "loss": 0.572988748550415, + "step": 6470 + }, + { + "epoch": 1.892235706974704, + "grad_norm": 1.578693569659532, + "learning_rate": 1.5957151663295367e-07, + "loss": 0.4801466763019562, + "step": 6471 + }, + { + "epoch": 1.8925281473899693, + "grad_norm": 2.0149025268161207, + "learning_rate": 1.5871241932284953e-07, + "loss": 0.6286160349845886, + "step": 6472 + }, + { + "epoch": 1.8928205878052347, + "grad_norm": 1.8739502258074872, + "learning_rate": 1.5785562235388074e-07, + "loss": 0.5731645822525024, + "step": 6473 + }, + { + "epoch": 1.8931130282205002, + "grad_norm": 2.02559646967304, + "learning_rate": 1.5700112592631933e-07, + "loss": 0.47890836000442505, + "step": 6474 + }, + { + "epoch": 1.8934054686357653, + "grad_norm": 1.8833158182705436, + "learning_rate": 1.5614893023989886e-07, + "loss": 0.4379703998565674, + "step": 6475 + }, + { + "epoch": 1.8936979090510309, + "grad_norm": 1.886508266764503, + "learning_rate": 1.5529903549381331e-07, + "loss": 0.5629044771194458, + "step": 6476 + }, + { + "epoch": 1.8939903494662964, + "grad_norm": 1.6388873220258502, + "learning_rate": 1.5445144188672268e-07, + "loss": 0.4995439052581787, + "step": 6477 + }, + { + "epoch": 1.8942827898815615, + "grad_norm": 1.54762620576383, + "learning_rate": 1.5360614961674403e-07, + "loss": 0.5350549221038818, + "step": 6478 + }, + { + "epoch": 1.894575230296827, + "grad_norm": 1.636976407400752, + "learning_rate": 1.5276315888146266e-07, + "loss": 0.5245925188064575, + "step": 6479 + }, + { + "epoch": 1.8948676707120924, + "grad_norm": 1.870112790684546, + "learning_rate": 1.519224698779198e-07, + "loss": 0.5159675478935242, + "step": 6480 + }, + { + "epoch": 1.8951601111273577, + "grad_norm": 1.6327790205426773, + "learning_rate": 1.5108408280262276e-07, + "loss": 0.5046014189720154, + "step": 6481 + }, + { + "epoch": 1.8954525515426233, + "grad_norm": 1.5658787677393426, + "learning_rate": 1.502479978515381e-07, + "loss": 0.35977911949157715, + "step": 6482 + }, + { + "epoch": 1.8957449919578886, + "grad_norm": 1.6374646749200208, + "learning_rate": 1.4941421522009725e-07, + "loss": 0.4689600467681885, + "step": 6483 + }, + { + "epoch": 1.896037432373154, + "grad_norm": 1.713919299692529, + "learning_rate": 1.485827351031899e-07, + "loss": 0.5729683637619019, + "step": 6484 + }, + { + "epoch": 1.8963298727884195, + "grad_norm": 2.081397285004385, + "learning_rate": 1.4775355769517163e-07, + "loss": 0.5929673314094543, + "step": 6485 + }, + { + "epoch": 1.8966223132036848, + "grad_norm": 1.6003411415494537, + "learning_rate": 1.4692668318985636e-07, + "loss": 0.43075594305992126, + "step": 6486 + }, + { + "epoch": 1.8969147536189501, + "grad_norm": 1.7646064022155787, + "learning_rate": 1.461021117805217e-07, + "loss": 0.5247992277145386, + "step": 6487 + }, + { + "epoch": 1.8972071940342157, + "grad_norm": 1.7947400732319756, + "learning_rate": 1.4527984365990455e-07, + "loss": 0.4930630326271057, + "step": 6488 + }, + { + "epoch": 1.8974996344494808, + "grad_norm": 2.490399223660391, + "learning_rate": 1.4445987902020676e-07, + "loss": 0.7183758616447449, + "step": 6489 + }, + { + "epoch": 1.8977920748647463, + "grad_norm": 1.6603594705802933, + "learning_rate": 1.4364221805309052e-07, + "loss": 0.4766094982624054, + "step": 6490 + }, + { + "epoch": 1.8980845152800117, + "grad_norm": 1.7401365125544646, + "learning_rate": 1.4282686094967747e-07, + "loss": 0.43594151735305786, + "step": 6491 + }, + { + "epoch": 1.898376955695277, + "grad_norm": 1.4953976915814553, + "learning_rate": 1.4201380790055397e-07, + "loss": 0.49320366978645325, + "step": 6492 + }, + { + "epoch": 1.8986693961105425, + "grad_norm": 1.7835092237734465, + "learning_rate": 1.4120305909576359e-07, + "loss": 0.600296139717102, + "step": 6493 + }, + { + "epoch": 1.8989618365258079, + "grad_norm": 1.8325915671317163, + "learning_rate": 1.4039461472481696e-07, + "loss": 0.6692827939987183, + "step": 6494 + }, + { + "epoch": 1.8992542769410732, + "grad_norm": 1.5707947665490356, + "learning_rate": 1.395884749766807e-07, + "loss": 0.49206262826919556, + "step": 6495 + }, + { + "epoch": 1.8995467173563387, + "grad_norm": 1.5801197568349268, + "learning_rate": 1.3878464003978741e-07, + "loss": 0.4987361431121826, + "step": 6496 + }, + { + "epoch": 1.899839157771604, + "grad_norm": 1.5345056226134064, + "learning_rate": 1.3798311010202681e-07, + "loss": 0.5020350217819214, + "step": 6497 + }, + { + "epoch": 1.9001315981868694, + "grad_norm": 1.804856300616187, + "learning_rate": 1.3718388535075123e-07, + "loss": 0.5906451344490051, + "step": 6498 + }, + { + "epoch": 1.900424038602135, + "grad_norm": 1.7402170644717794, + "learning_rate": 1.3638696597277678e-07, + "loss": 0.5089905858039856, + "step": 6499 + }, + { + "epoch": 1.9007164790174, + "grad_norm": 1.7322257732687294, + "learning_rate": 1.3559235215437672e-07, + "loss": 0.4633820056915283, + "step": 6500 + }, + { + "epoch": 1.9010089194326656, + "grad_norm": 1.760704522549711, + "learning_rate": 1.34800044081288e-07, + "loss": 0.4214053750038147, + "step": 6501 + }, + { + "epoch": 1.9013013598479311, + "grad_norm": 1.8502899980656935, + "learning_rate": 1.3401004193870694e-07, + "loss": 0.6652689576148987, + "step": 6502 + }, + { + "epoch": 1.9015938002631962, + "grad_norm": 1.9902189127655232, + "learning_rate": 1.3322234591129247e-07, + "loss": 0.610877275466919, + "step": 6503 + }, + { + "epoch": 1.9018862406784618, + "grad_norm": 1.8174576160077263, + "learning_rate": 1.324369561831651e-07, + "loss": 0.5051777958869934, + "step": 6504 + }, + { + "epoch": 1.9021786810937271, + "grad_norm": 1.679424427708786, + "learning_rate": 1.3165387293790133e-07, + "loss": 0.5004675984382629, + "step": 6505 + }, + { + "epoch": 1.9024711215089924, + "grad_norm": 1.7771913721647656, + "learning_rate": 1.3087309635854583e-07, + "loss": 0.5778615474700928, + "step": 6506 + }, + { + "epoch": 1.902763561924258, + "grad_norm": 2.0292378822767807, + "learning_rate": 1.300946266275982e-07, + "loss": 0.5282145738601685, + "step": 6507 + }, + { + "epoch": 1.9030560023395233, + "grad_norm": 1.7978860546574749, + "learning_rate": 1.2931846392702174e-07, + "loss": 0.5965359807014465, + "step": 6508 + }, + { + "epoch": 1.9033484427547886, + "grad_norm": 1.6652110616876246, + "learning_rate": 1.2854460843823912e-07, + "loss": 0.5891281366348267, + "step": 6509 + }, + { + "epoch": 1.9036408831700542, + "grad_norm": 1.6248785946895787, + "learning_rate": 1.2777306034213677e-07, + "loss": 0.516204297542572, + "step": 6510 + }, + { + "epoch": 1.9039333235853195, + "grad_norm": 1.6814946652270708, + "learning_rate": 1.2700381981905486e-07, + "loss": 0.5148355960845947, + "step": 6511 + }, + { + "epoch": 1.9042257640005849, + "grad_norm": 1.8013354973349966, + "learning_rate": 1.2623688704880287e-07, + "loss": 0.5599791407585144, + "step": 6512 + }, + { + "epoch": 1.9045182044158504, + "grad_norm": 1.4831223871376102, + "learning_rate": 1.2547226221064412e-07, + "loss": 0.44349417090415955, + "step": 6513 + }, + { + "epoch": 1.9048106448311155, + "grad_norm": 1.8442502212828862, + "learning_rate": 1.2470994548330672e-07, + "loss": 0.5919830799102783, + "step": 6514 + }, + { + "epoch": 1.905103085246381, + "grad_norm": 1.5907952124169482, + "learning_rate": 1.2394993704497592e-07, + "loss": 0.4615499675273895, + "step": 6515 + }, + { + "epoch": 1.9053955256616466, + "grad_norm": 2.080938429630683, + "learning_rate": 1.2319223707330074e-07, + "loss": 0.5217719674110413, + "step": 6516 + }, + { + "epoch": 1.9056879660769117, + "grad_norm": 1.7218384779241716, + "learning_rate": 1.2243684574538838e-07, + "loss": 0.510722279548645, + "step": 6517 + }, + { + "epoch": 1.9059804064921773, + "grad_norm": 1.7577870608967676, + "learning_rate": 1.2168376323780652e-07, + "loss": 0.6744403839111328, + "step": 6518 + }, + { + "epoch": 1.9062728469074426, + "grad_norm": 1.852387461765699, + "learning_rate": 1.209329897265832e-07, + "loss": 0.4991394281387329, + "step": 6519 + }, + { + "epoch": 1.906565287322708, + "grad_norm": 1.5123359691224252, + "learning_rate": 1.2018452538720805e-07, + "loss": 0.43237754702568054, + "step": 6520 + }, + { + "epoch": 1.9068577277379735, + "grad_norm": 1.929873331270624, + "learning_rate": 1.1943837039463112e-07, + "loss": 0.6042662262916565, + "step": 6521 + }, + { + "epoch": 1.9071501681532388, + "grad_norm": 1.5924831654811167, + "learning_rate": 1.186945249232585e-07, + "loss": 0.4275910556316376, + "step": 6522 + }, + { + "epoch": 1.9074426085685041, + "grad_norm": 1.6404715584331906, + "learning_rate": 1.1795298914696219e-07, + "loss": 0.4368266463279724, + "step": 6523 + }, + { + "epoch": 1.9077350489837697, + "grad_norm": 1.7092717646033881, + "learning_rate": 1.172137632390713e-07, + "loss": 0.49492496252059937, + "step": 6524 + }, + { + "epoch": 1.908027489399035, + "grad_norm": 1.587478317321713, + "learning_rate": 1.164768473723743e-07, + "loss": 0.4296407103538513, + "step": 6525 + }, + { + "epoch": 1.9083199298143003, + "grad_norm": 1.746911434628144, + "learning_rate": 1.1574224171912118e-07, + "loss": 0.4609370231628418, + "step": 6526 + }, + { + "epoch": 1.9086123702295659, + "grad_norm": 1.687321204236502, + "learning_rate": 1.1500994645102237e-07, + "loss": 0.5201660394668579, + "step": 6527 + }, + { + "epoch": 1.908904810644831, + "grad_norm": 1.686028014701993, + "learning_rate": 1.1427996173924649e-07, + "loss": 0.49946731328964233, + "step": 6528 + }, + { + "epoch": 1.9091972510600965, + "grad_norm": 1.700984250030961, + "learning_rate": 1.1355228775442262e-07, + "loss": 0.5479187369346619, + "step": 6529 + }, + { + "epoch": 1.9094896914753618, + "grad_norm": 1.6485232123504545, + "learning_rate": 1.1282692466664247e-07, + "loss": 0.5227243900299072, + "step": 6530 + }, + { + "epoch": 1.9097821318906272, + "grad_norm": 1.896983089459967, + "learning_rate": 1.1210387264545264e-07, + "loss": 0.42863208055496216, + "step": 6531 + }, + { + "epoch": 1.9100745723058927, + "grad_norm": 1.552171259240321, + "learning_rate": 1.113831318598635e-07, + "loss": 0.37858498096466064, + "step": 6532 + }, + { + "epoch": 1.910367012721158, + "grad_norm": 1.852509398879224, + "learning_rate": 1.1066470247834471e-07, + "loss": 0.6447315216064453, + "step": 6533 + }, + { + "epoch": 1.9106594531364234, + "grad_norm": 1.4833597844037574, + "learning_rate": 1.0994858466882197e-07, + "loss": 0.4159877300262451, + "step": 6534 + }, + { + "epoch": 1.910951893551689, + "grad_norm": 1.7056274655886765, + "learning_rate": 1.0923477859868581e-07, + "loss": 0.5042530298233032, + "step": 6535 + }, + { + "epoch": 1.9112443339669543, + "grad_norm": 1.6939120860687955, + "learning_rate": 1.0852328443478278e-07, + "loss": 0.35955798625946045, + "step": 6536 + }, + { + "epoch": 1.9115367743822196, + "grad_norm": 1.6272843503399623, + "learning_rate": 1.0781410234342093e-07, + "loss": 0.561823308467865, + "step": 6537 + }, + { + "epoch": 1.9118292147974851, + "grad_norm": 1.6724333597123697, + "learning_rate": 1.0710723249036659e-07, + "loss": 0.44518136978149414, + "step": 6538 + }, + { + "epoch": 1.9121216552127502, + "grad_norm": 2.0012454930429397, + "learning_rate": 1.0640267504084756e-07, + "loss": 0.5657057166099548, + "step": 6539 + }, + { + "epoch": 1.9124140956280158, + "grad_norm": 1.5762808769057957, + "learning_rate": 1.0570043015954989e-07, + "loss": 0.5659947395324707, + "step": 6540 + }, + { + "epoch": 1.9127065360432813, + "grad_norm": 1.5811137035723222, + "learning_rate": 1.0500049801061784e-07, + "loss": 0.45648419857025146, + "step": 6541 + }, + { + "epoch": 1.9129989764585464, + "grad_norm": 1.8646406465839787, + "learning_rate": 1.0430287875765611e-07, + "loss": 0.4978141784667969, + "step": 6542 + }, + { + "epoch": 1.913291416873812, + "grad_norm": 1.54273033799953, + "learning_rate": 1.0360757256372977e-07, + "loss": 0.5397627949714661, + "step": 6543 + }, + { + "epoch": 1.9135838572890773, + "grad_norm": 1.8918413526412523, + "learning_rate": 1.029145795913633e-07, + "loss": 0.6359304189682007, + "step": 6544 + }, + { + "epoch": 1.9138762977043426, + "grad_norm": 1.70706044627556, + "learning_rate": 1.0222390000253824e-07, + "loss": 0.5023899078369141, + "step": 6545 + }, + { + "epoch": 1.9141687381196082, + "grad_norm": 1.8668808073409142, + "learning_rate": 1.0153553395869654e-07, + "loss": 0.5231877565383911, + "step": 6546 + }, + { + "epoch": 1.9144611785348735, + "grad_norm": 1.7146199886416342, + "learning_rate": 1.008494816207406e-07, + "loss": 0.5925711393356323, + "step": 6547 + }, + { + "epoch": 1.9147536189501388, + "grad_norm": 1.5881527564838034, + "learning_rate": 1.0016574314902993e-07, + "loss": 0.42732810974121094, + "step": 6548 + }, + { + "epoch": 1.9150460593654044, + "grad_norm": 1.8539790257850415, + "learning_rate": 9.948431870338559e-08, + "loss": 0.5011821985244751, + "step": 6549 + }, + { + "epoch": 1.9153384997806697, + "grad_norm": 1.7063021653673758, + "learning_rate": 9.88052084430846e-08, + "loss": 0.5112487077713013, + "step": 6550 + }, + { + "epoch": 1.915630940195935, + "grad_norm": 1.8003514575818433, + "learning_rate": 9.812841252686667e-08, + "loss": 0.4751431345939636, + "step": 6551 + }, + { + "epoch": 1.9159233806112006, + "grad_norm": 1.9933791417538373, + "learning_rate": 9.745393111292745e-08, + "loss": 0.5343109369277954, + "step": 6552 + }, + { + "epoch": 1.9162158210264657, + "grad_norm": 1.4980785147509508, + "learning_rate": 9.678176435892417e-08, + "loss": 0.4602724015712738, + "step": 6553 + }, + { + "epoch": 1.9165082614417313, + "grad_norm": 1.5436966250785777, + "learning_rate": 9.611191242197005e-08, + "loss": 0.4756245017051697, + "step": 6554 + }, + { + "epoch": 1.9168007018569968, + "grad_norm": 1.6531719135209273, + "learning_rate": 9.544437545864093e-08, + "loss": 0.5291459560394287, + "step": 6555 + }, + { + "epoch": 1.917093142272262, + "grad_norm": 2.0976196168420946, + "learning_rate": 9.47791536249676e-08, + "loss": 0.5357412099838257, + "step": 6556 + }, + { + "epoch": 1.9173855826875275, + "grad_norm": 2.238353466121697, + "learning_rate": 9.411624707644229e-08, + "loss": 0.6298913955688477, + "step": 6557 + }, + { + "epoch": 1.9176780231027928, + "grad_norm": 1.4485326554294644, + "learning_rate": 9.345565596801553e-08, + "loss": 0.5150517225265503, + "step": 6558 + }, + { + "epoch": 1.917970463518058, + "grad_norm": 1.8563821954536717, + "learning_rate": 9.279738045409603e-08, + "loss": 0.6264858245849609, + "step": 6559 + }, + { + "epoch": 1.9182629039333237, + "grad_norm": 1.46383829182073, + "learning_rate": 9.214142068855292e-08, + "loss": 0.33123475313186646, + "step": 6560 + }, + { + "epoch": 1.918555344348589, + "grad_norm": 1.6384165039446617, + "learning_rate": 9.148777682471133e-08, + "loss": 0.5540212392807007, + "step": 6561 + }, + { + "epoch": 1.9188477847638543, + "grad_norm": 1.8427168178125763, + "learning_rate": 9.083644901535793e-08, + "loss": 0.5633922219276428, + "step": 6562 + }, + { + "epoch": 1.9191402251791199, + "grad_norm": 1.7743383669625796, + "learning_rate": 9.018743741273428e-08, + "loss": 0.58629310131073, + "step": 6563 + }, + { + "epoch": 1.9194326655943852, + "grad_norm": 1.8674136448530827, + "learning_rate": 8.95407421685457e-08, + "loss": 0.5985243320465088, + "step": 6564 + }, + { + "epoch": 1.9197251060096505, + "grad_norm": 1.6803719834498339, + "learning_rate": 8.889636343395235e-08, + "loss": 0.5344138741493225, + "step": 6565 + }, + { + "epoch": 1.920017546424916, + "grad_norm": 1.783895238536977, + "learning_rate": 8.825430135957381e-08, + "loss": 0.6139744520187378, + "step": 6566 + }, + { + "epoch": 1.9203099868401812, + "grad_norm": 1.4220884637268112, + "learning_rate": 8.761455609548663e-08, + "loss": 0.46376854181289673, + "step": 6567 + }, + { + "epoch": 1.9206024272554467, + "grad_norm": 1.7412635159811354, + "learning_rate": 8.697712779122902e-08, + "loss": 0.5053622722625732, + "step": 6568 + }, + { + "epoch": 1.920894867670712, + "grad_norm": 1.52795636278423, + "learning_rate": 8.634201659579622e-08, + "loss": 0.4363771080970764, + "step": 6569 + }, + { + "epoch": 1.9211873080859774, + "grad_norm": 1.6799265353987254, + "learning_rate": 8.570922265764059e-08, + "loss": 0.4167904853820801, + "step": 6570 + }, + { + "epoch": 1.921479748501243, + "grad_norm": 1.7506509667217935, + "learning_rate": 8.507874612467382e-08, + "loss": 0.525320291519165, + "step": 6571 + }, + { + "epoch": 1.9217721889165083, + "grad_norm": 1.5127507314447914, + "learning_rate": 8.445058714426691e-08, + "loss": 0.4087376594543457, + "step": 6572 + }, + { + "epoch": 1.9220646293317736, + "grad_norm": 1.975359435328043, + "learning_rate": 8.382474586324796e-08, + "loss": 0.471457839012146, + "step": 6573 + }, + { + "epoch": 1.9223570697470391, + "grad_norm": 1.5584377744842253, + "learning_rate": 8.32012224279033e-08, + "loss": 0.6125116348266602, + "step": 6574 + }, + { + "epoch": 1.9226495101623045, + "grad_norm": 1.8527915049964467, + "learning_rate": 8.258001698397744e-08, + "loss": 0.3800301253795624, + "step": 6575 + }, + { + "epoch": 1.9229419505775698, + "grad_norm": 1.7927235022665284, + "learning_rate": 8.196112967667313e-08, + "loss": 0.561034083366394, + "step": 6576 + }, + { + "epoch": 1.9232343909928353, + "grad_norm": 1.8012018638552385, + "learning_rate": 8.134456065065354e-08, + "loss": 0.5768460631370544, + "step": 6577 + }, + { + "epoch": 1.9235268314081004, + "grad_norm": 1.809882879975094, + "learning_rate": 8.073031005003562e-08, + "loss": 0.47440657019615173, + "step": 6578 + }, + { + "epoch": 1.923819271823366, + "grad_norm": 1.4902012429082565, + "learning_rate": 8.011837801839672e-08, + "loss": 0.5315208435058594, + "step": 6579 + }, + { + "epoch": 1.9241117122386315, + "grad_norm": 1.7054296975282524, + "learning_rate": 7.950876469877467e-08, + "loss": 0.4587036371231079, + "step": 6580 + }, + { + "epoch": 1.9244041526538966, + "grad_norm": 1.6717861291166198, + "learning_rate": 7.890147023366101e-08, + "loss": 0.5356466770172119, + "step": 6581 + }, + { + "epoch": 1.9246965930691622, + "grad_norm": 1.8066170712430372, + "learning_rate": 7.829649476500667e-08, + "loss": 0.48034095764160156, + "step": 6582 + }, + { + "epoch": 1.9249890334844275, + "grad_norm": 1.9403707417182101, + "learning_rate": 7.769383843422185e-08, + "loss": 0.502929151058197, + "step": 6583 + }, + { + "epoch": 1.9252814738996928, + "grad_norm": 1.5994546211401888, + "learning_rate": 7.709350138217386e-08, + "loss": 0.44771361351013184, + "step": 6584 + }, + { + "epoch": 1.9255739143149584, + "grad_norm": 1.7058923530240673, + "learning_rate": 7.649548374918824e-08, + "loss": 0.462479829788208, + "step": 6585 + }, + { + "epoch": 1.9258663547302237, + "grad_norm": 1.7481939511400157, + "learning_rate": 7.589978567504763e-08, + "loss": 0.4758496880531311, + "step": 6586 + }, + { + "epoch": 1.926158795145489, + "grad_norm": 1.8447645858435646, + "learning_rate": 7.530640729899174e-08, + "loss": 0.521172285079956, + "step": 6587 + }, + { + "epoch": 1.9264512355607546, + "grad_norm": 1.685029384432281, + "learning_rate": 7.471534875971964e-08, + "loss": 0.5274392366409302, + "step": 6588 + }, + { + "epoch": 1.92674367597602, + "grad_norm": 1.5547682278755586, + "learning_rate": 7.412661019538858e-08, + "loss": 0.4350961446762085, + "step": 6589 + }, + { + "epoch": 1.9270361163912852, + "grad_norm": 1.5773569785123847, + "learning_rate": 7.354019174361183e-08, + "loss": 0.6298524737358093, + "step": 6590 + }, + { + "epoch": 1.9273285568065508, + "grad_norm": 1.7494178023153484, + "learning_rate": 7.295609354146194e-08, + "loss": 0.5451292395591736, + "step": 6591 + }, + { + "epoch": 1.927620997221816, + "grad_norm": 1.8824055292173802, + "learning_rate": 7.23743157254675e-08, + "loss": 0.5371264219284058, + "step": 6592 + }, + { + "epoch": 1.9279134376370815, + "grad_norm": 1.714393478017535, + "learning_rate": 7.179485843161526e-08, + "loss": 0.5805129408836365, + "step": 6593 + }, + { + "epoch": 1.928205878052347, + "grad_norm": 1.9692321834579947, + "learning_rate": 7.121772179535135e-08, + "loss": 0.5542718172073364, + "step": 6594 + }, + { + "epoch": 1.928498318467612, + "grad_norm": 1.7503350699121312, + "learning_rate": 7.064290595157675e-08, + "loss": 0.5668192505836487, + "step": 6595 + }, + { + "epoch": 1.9287907588828777, + "grad_norm": 1.6293975396756264, + "learning_rate": 7.007041103465062e-08, + "loss": 0.5107895731925964, + "step": 6596 + }, + { + "epoch": 1.929083199298143, + "grad_norm": 1.847055531354174, + "learning_rate": 6.950023717839261e-08, + "loss": 0.47974276542663574, + "step": 6597 + }, + { + "epoch": 1.9293756397134083, + "grad_norm": 1.5624753949857668, + "learning_rate": 6.893238451607387e-08, + "loss": 0.5641148090362549, + "step": 6598 + }, + { + "epoch": 1.9296680801286739, + "grad_norm": 1.7181332365296518, + "learning_rate": 6.836685318042935e-08, + "loss": 0.5940253734588623, + "step": 6599 + }, + { + "epoch": 1.9299605205439392, + "grad_norm": 1.6880020580834156, + "learning_rate": 6.780364330364775e-08, + "loss": 0.46844422817230225, + "step": 6600 + }, + { + "epoch": 1.9302529609592045, + "grad_norm": 1.6235992853167036, + "learning_rate": 6.724275501737487e-08, + "loss": 0.3933336138725281, + "step": 6601 + }, + { + "epoch": 1.93054540137447, + "grad_norm": 1.4538666395679365, + "learning_rate": 6.668418845271695e-08, + "loss": 0.4786602258682251, + "step": 6602 + }, + { + "epoch": 1.9308378417897354, + "grad_norm": 1.798637107768398, + "learning_rate": 6.612794374023402e-08, + "loss": 0.49695518612861633, + "step": 6603 + }, + { + "epoch": 1.9311302822050007, + "grad_norm": 1.5049309556488495, + "learning_rate": 6.557402100994426e-08, + "loss": 0.4798729121685028, + "step": 6604 + }, + { + "epoch": 1.9314227226202663, + "grad_norm": 1.7300127457609986, + "learning_rate": 6.502242039132634e-08, + "loss": 0.4187319278717041, + "step": 6605 + }, + { + "epoch": 1.9317151630355314, + "grad_norm": 2.050722935709042, + "learning_rate": 6.447314201331156e-08, + "loss": 0.4945526719093323, + "step": 6606 + }, + { + "epoch": 1.932007603450797, + "grad_norm": 1.8976456851513979, + "learning_rate": 6.392618600429057e-08, + "loss": 0.5721586346626282, + "step": 6607 + }, + { + "epoch": 1.9323000438660622, + "grad_norm": 1.6286185694607815, + "learning_rate": 6.338155249211109e-08, + "loss": 0.45542022585868835, + "step": 6608 + }, + { + "epoch": 1.9325924842813276, + "grad_norm": 1.7597762099762242, + "learning_rate": 6.283924160407796e-08, + "loss": 0.5627170205116272, + "step": 6609 + }, + { + "epoch": 1.9328849246965931, + "grad_norm": 1.6951677907486626, + "learning_rate": 6.22992534669542e-08, + "loss": 0.5369620323181152, + "step": 6610 + }, + { + "epoch": 1.9331773651118584, + "grad_norm": 1.619968087818578, + "learning_rate": 6.176158820695665e-08, + "loss": 0.5268368124961853, + "step": 6611 + }, + { + "epoch": 1.9334698055271238, + "grad_norm": 1.6828649754520415, + "learning_rate": 6.122624594976257e-08, + "loss": 0.5734575986862183, + "step": 6612 + }, + { + "epoch": 1.9337622459423893, + "grad_norm": 1.86766787540182, + "learning_rate": 6.069322682050516e-08, + "loss": 0.5066978931427002, + "step": 6613 + }, + { + "epoch": 1.9340546863576547, + "grad_norm": 1.68962846891993, + "learning_rate": 6.016253094377366e-08, + "loss": 0.5462731719017029, + "step": 6614 + }, + { + "epoch": 1.93434712677292, + "grad_norm": 1.8689912619353801, + "learning_rate": 5.963415844361553e-08, + "loss": 0.5407041311264038, + "step": 6615 + }, + { + "epoch": 1.9346395671881855, + "grad_norm": 1.792133188360025, + "learning_rate": 5.910810944353418e-08, + "loss": 0.48977869749069214, + "step": 6616 + }, + { + "epoch": 1.9349320076034506, + "grad_norm": 1.8900630995604775, + "learning_rate": 5.858438406649125e-08, + "loss": 0.5320937037467957, + "step": 6617 + }, + { + "epoch": 1.9352244480187162, + "grad_norm": 1.6602834270947344, + "learning_rate": 5.806298243490327e-08, + "loss": 0.5860059261322021, + "step": 6618 + }, + { + "epoch": 1.9355168884339817, + "grad_norm": 1.7299178033338176, + "learning_rate": 5.7543904670644965e-08, + "loss": 0.49517208337783813, + "step": 6619 + }, + { + "epoch": 1.9358093288492468, + "grad_norm": 1.4975030277698207, + "learning_rate": 5.7027150895049286e-08, + "loss": 0.5060882568359375, + "step": 6620 + }, + { + "epoch": 1.9361017692645124, + "grad_norm": 1.7387399518104565, + "learning_rate": 5.651272122890184e-08, + "loss": 0.5887798070907593, + "step": 6621 + }, + { + "epoch": 1.9363942096797777, + "grad_norm": 2.006477050241073, + "learning_rate": 5.600061579244753e-08, + "loss": 0.6567577123641968, + "step": 6622 + }, + { + "epoch": 1.936686650095043, + "grad_norm": 1.7419376875296542, + "learning_rate": 5.549083470538952e-08, + "loss": 0.5672584176063538, + "step": 6623 + }, + { + "epoch": 1.9369790905103086, + "grad_norm": 1.6312975104255192, + "learning_rate": 5.4983378086885806e-08, + "loss": 0.5166369676589966, + "step": 6624 + }, + { + "epoch": 1.937271530925574, + "grad_norm": 1.7351407182284893, + "learning_rate": 5.447824605555041e-08, + "loss": 0.5157661437988281, + "step": 6625 + }, + { + "epoch": 1.9375639713408392, + "grad_norm": 1.5452343867654343, + "learning_rate": 5.397543872945443e-08, + "loss": 0.5001711845397949, + "step": 6626 + }, + { + "epoch": 1.9378564117561048, + "grad_norm": 1.5666441918912, + "learning_rate": 5.34749562261272e-08, + "loss": 0.48944878578186035, + "step": 6627 + }, + { + "epoch": 1.9381488521713701, + "grad_norm": 1.8943450842549039, + "learning_rate": 5.297679866255401e-08, + "loss": 0.5400780439376831, + "step": 6628 + }, + { + "epoch": 1.9384412925866354, + "grad_norm": 1.6944930575034618, + "learning_rate": 5.248096615517395e-08, + "loss": 0.544346809387207, + "step": 6629 + }, + { + "epoch": 1.938733733001901, + "grad_norm": 1.8360261063384646, + "learning_rate": 5.1987458819886535e-08, + "loss": 0.5283153653144836, + "step": 6630 + }, + { + "epoch": 1.939026173417166, + "grad_norm": 1.8162414803988312, + "learning_rate": 5.149627677204616e-08, + "loss": 0.555808424949646, + "step": 6631 + }, + { + "epoch": 1.9393186138324316, + "grad_norm": 1.7068645601820531, + "learning_rate": 5.10074201264632e-08, + "loss": 0.5230466723442078, + "step": 6632 + }, + { + "epoch": 1.9396110542476972, + "grad_norm": 1.592321180041504, + "learning_rate": 5.052088899740515e-08, + "loss": 0.4810416102409363, + "step": 6633 + }, + { + "epoch": 1.9399034946629623, + "grad_norm": 1.2489690563293379, + "learning_rate": 5.0036683498594365e-08, + "loss": 0.35233962535858154, + "step": 6634 + }, + { + "epoch": 1.9401959350782279, + "grad_norm": 1.5949248677680616, + "learning_rate": 4.955480374321253e-08, + "loss": 0.5250035524368286, + "step": 6635 + }, + { + "epoch": 1.9404883754934932, + "grad_norm": 1.5547636594172098, + "learning_rate": 4.907524984389622e-08, + "loss": 0.5896221399307251, + "step": 6636 + }, + { + "epoch": 1.9407808159087585, + "grad_norm": 1.5725705573586048, + "learning_rate": 4.859802191273688e-08, + "loss": 0.5410518050193787, + "step": 6637 + }, + { + "epoch": 1.941073256324024, + "grad_norm": 1.5273512663488045, + "learning_rate": 4.812312006128528e-08, + "loss": 0.5044152736663818, + "step": 6638 + }, + { + "epoch": 1.9413656967392894, + "grad_norm": 1.6537481992077037, + "learning_rate": 4.765054440054484e-08, + "loss": 0.5388177633285522, + "step": 6639 + }, + { + "epoch": 1.9416581371545547, + "grad_norm": 2.0702365693466485, + "learning_rate": 4.718029504097943e-08, + "loss": 0.5074491500854492, + "step": 6640 + }, + { + "epoch": 1.9419505775698203, + "grad_norm": 1.6224415285858116, + "learning_rate": 4.671237209250557e-08, + "loss": 0.47772669792175293, + "step": 6641 + }, + { + "epoch": 1.9422430179850856, + "grad_norm": 1.6570845374645817, + "learning_rate": 4.624677566449798e-08, + "loss": 0.4682825207710266, + "step": 6642 + }, + { + "epoch": 1.942535458400351, + "grad_norm": 1.5100328644654928, + "learning_rate": 4.578350586578628e-08, + "loss": 0.48880642652511597, + "step": 6643 + }, + { + "epoch": 1.9428278988156165, + "grad_norm": 1.6890744037677652, + "learning_rate": 4.532256280465719e-08, + "loss": 0.4590389132499695, + "step": 6644 + }, + { + "epoch": 1.9431203392308816, + "grad_norm": 1.903981857624826, + "learning_rate": 4.48639465888534e-08, + "loss": 0.5893105268478394, + "step": 6645 + }, + { + "epoch": 1.9434127796461471, + "grad_norm": 1.7274912065627603, + "learning_rate": 4.4407657325574725e-08, + "loss": 0.561900794506073, + "step": 6646 + }, + { + "epoch": 1.9437052200614124, + "grad_norm": 1.662019693277273, + "learning_rate": 4.395369512147474e-08, + "loss": 0.4140210747718811, + "step": 6647 + }, + { + "epoch": 1.9439976604766778, + "grad_norm": 1.7955978434650512, + "learning_rate": 4.350206008266522e-08, + "loss": 0.6220303773880005, + "step": 6648 + }, + { + "epoch": 1.9442901008919433, + "grad_norm": 1.771531678180808, + "learning_rate": 4.3052752314712844e-08, + "loss": 0.4903472065925598, + "step": 6649 + }, + { + "epoch": 1.9445825413072086, + "grad_norm": 1.889992657698585, + "learning_rate": 4.260577192263915e-08, + "loss": 0.4519340991973877, + "step": 6650 + }, + { + "epoch": 1.944874981722474, + "grad_norm": 1.7435292517018475, + "learning_rate": 4.216111901092501e-08, + "loss": 0.49067920446395874, + "step": 6651 + }, + { + "epoch": 1.9451674221377395, + "grad_norm": 1.8654652047797853, + "learning_rate": 4.1718793683505066e-08, + "loss": 0.5935854911804199, + "step": 6652 + }, + { + "epoch": 1.9454598625530048, + "grad_norm": 1.7744411864937968, + "learning_rate": 4.127879604376883e-08, + "loss": 0.5209576487541199, + "step": 6653 + }, + { + "epoch": 1.9457523029682702, + "grad_norm": 1.50564473891113, + "learning_rate": 4.084112619456515e-08, + "loss": 0.4454221725463867, + "step": 6654 + }, + { + "epoch": 1.9460447433835357, + "grad_norm": 1.8157940398905494, + "learning_rate": 4.0405784238194415e-08, + "loss": 0.5129591226577759, + "step": 6655 + }, + { + "epoch": 1.9463371837988008, + "grad_norm": 1.63185696744402, + "learning_rate": 3.997277027641744e-08, + "loss": 0.48704665899276733, + "step": 6656 + }, + { + "epoch": 1.9466296242140664, + "grad_norm": 1.8037751571098388, + "learning_rate": 3.95420844104466e-08, + "loss": 0.4510651230812073, + "step": 6657 + }, + { + "epoch": 1.946922064629332, + "grad_norm": 1.7817975919339482, + "learning_rate": 3.911372674095249e-08, + "loss": 0.5116807222366333, + "step": 6658 + }, + { + "epoch": 1.947214505044597, + "grad_norm": 1.7985765763419883, + "learning_rate": 3.868769736806277e-08, + "loss": 0.592056393623352, + "step": 6659 + }, + { + "epoch": 1.9475069454598626, + "grad_norm": 1.7881377609654638, + "learning_rate": 3.8263996391357805e-08, + "loss": 0.579146146774292, + "step": 6660 + }, + { + "epoch": 1.947799385875128, + "grad_norm": 1.6202416659647267, + "learning_rate": 3.784262390987503e-08, + "loss": 0.5253209471702576, + "step": 6661 + }, + { + "epoch": 1.9480918262903932, + "grad_norm": 2.008309380522338, + "learning_rate": 3.742358002210789e-08, + "loss": 0.5614888072013855, + "step": 6662 + }, + { + "epoch": 1.9483842667056588, + "grad_norm": 1.6491223001780133, + "learning_rate": 3.7006864826005796e-08, + "loss": 0.5630952715873718, + "step": 6663 + }, + { + "epoch": 1.9486767071209241, + "grad_norm": 2.1390311477096944, + "learning_rate": 3.659247841897306e-08, + "loss": 0.5990846157073975, + "step": 6664 + }, + { + "epoch": 1.9489691475361894, + "grad_norm": 1.6162006621933969, + "learning_rate": 3.6180420897868886e-08, + "loss": 0.5290813446044922, + "step": 6665 + }, + { + "epoch": 1.949261587951455, + "grad_norm": 2.6144126732722803, + "learning_rate": 3.577069235901176e-08, + "loss": 0.6710211038589478, + "step": 6666 + }, + { + "epoch": 1.9495540283667203, + "grad_norm": 1.71689411729531, + "learning_rate": 3.536329289817064e-08, + "loss": 0.4802299737930298, + "step": 6667 + }, + { + "epoch": 1.9498464687819856, + "grad_norm": 1.6268319596207468, + "learning_rate": 3.495822261057491e-08, + "loss": 0.5432649850845337, + "step": 6668 + }, + { + "epoch": 1.9501389091972512, + "grad_norm": 1.9426982793491434, + "learning_rate": 3.4555481590905495e-08, + "loss": 0.5824951529502869, + "step": 6669 + }, + { + "epoch": 1.9504313496125163, + "grad_norm": 1.5773733844612365, + "learning_rate": 3.4155069933301535e-08, + "loss": 0.48428961634635925, + "step": 6670 + }, + { + "epoch": 1.9507237900277818, + "grad_norm": 1.7258198741312958, + "learning_rate": 3.375698773135705e-08, + "loss": 0.5684780478477478, + "step": 6671 + }, + { + "epoch": 1.9510162304430474, + "grad_norm": 1.7742355369350526, + "learning_rate": 3.336123507811983e-08, + "loss": 0.5658689737319946, + "step": 6672 + }, + { + "epoch": 1.9513086708583125, + "grad_norm": 1.7743474017748566, + "learning_rate": 3.2967812066097006e-08, + "loss": 0.6265745162963867, + "step": 6673 + }, + { + "epoch": 1.951601111273578, + "grad_norm": 1.768397532537575, + "learning_rate": 3.257671878724722e-08, + "loss": 0.5732975006103516, + "step": 6674 + }, + { + "epoch": 1.9518935516888434, + "grad_norm": 2.3801499199920273, + "learning_rate": 3.218795533298624e-08, + "loss": 0.46968942880630493, + "step": 6675 + }, + { + "epoch": 1.9521859921041087, + "grad_norm": 1.9250466851177817, + "learning_rate": 3.180152179418472e-08, + "loss": 0.5651586055755615, + "step": 6676 + }, + { + "epoch": 1.9524784325193743, + "grad_norm": 1.4699414350235678, + "learning_rate": 3.141741826117151e-08, + "loss": 0.46789437532424927, + "step": 6677 + }, + { + "epoch": 1.9527708729346396, + "grad_norm": 1.6701838665271502, + "learning_rate": 3.1035644823725896e-08, + "loss": 0.5332610011100769, + "step": 6678 + }, + { + "epoch": 1.953063313349905, + "grad_norm": 1.825129394239336, + "learning_rate": 3.06562015710854e-08, + "loss": 0.49613600969314575, + "step": 6679 + }, + { + "epoch": 1.9533557537651705, + "grad_norm": 2.1340240197713265, + "learning_rate": 3.027908859194351e-08, + "loss": 0.5498408079147339, + "step": 6680 + }, + { + "epoch": 1.9536481941804358, + "grad_norm": 1.8887907896186948, + "learning_rate": 2.99043059744486e-08, + "loss": 0.6802657842636108, + "step": 6681 + }, + { + "epoch": 1.953940634595701, + "grad_norm": 1.8609256911752867, + "learning_rate": 2.9531853806201716e-08, + "loss": 0.5149989724159241, + "step": 6682 + }, + { + "epoch": 1.9542330750109667, + "grad_norm": 1.7262483706342455, + "learning_rate": 2.9161732174263212e-08, + "loss": 0.5249730944633484, + "step": 6683 + }, + { + "epoch": 1.9545255154262318, + "grad_norm": 1.7003943133697261, + "learning_rate": 2.8793941165147222e-08, + "loss": 0.5711483359336853, + "step": 6684 + }, + { + "epoch": 1.9548179558414973, + "grad_norm": 1.7303037823896377, + "learning_rate": 2.842848086482053e-08, + "loss": 0.4591020345687866, + "step": 6685 + }, + { + "epoch": 1.9551103962567626, + "grad_norm": 1.887004603599524, + "learning_rate": 2.8065351358708136e-08, + "loss": 0.575869083404541, + "step": 6686 + }, + { + "epoch": 1.955402836672028, + "grad_norm": 1.7563501117497715, + "learning_rate": 2.7704552731688816e-08, + "loss": 0.5664101839065552, + "step": 6687 + }, + { + "epoch": 1.9556952770872935, + "grad_norm": 1.5280681451949298, + "learning_rate": 2.7346085068098437e-08, + "loss": 0.5739811062812805, + "step": 6688 + }, + { + "epoch": 1.9559877175025588, + "grad_norm": 1.64304520297204, + "learning_rate": 2.6989948451726643e-08, + "loss": 0.4707348942756653, + "step": 6689 + }, + { + "epoch": 1.9562801579178242, + "grad_norm": 1.4347028954089904, + "learning_rate": 2.6636142965816848e-08, + "loss": 0.38842523097991943, + "step": 6690 + }, + { + "epoch": 1.9565725983330897, + "grad_norm": 1.9429266961932796, + "learning_rate": 2.628466869306956e-08, + "loss": 0.4295673668384552, + "step": 6691 + }, + { + "epoch": 1.956865038748355, + "grad_norm": 1.9886421076178336, + "learning_rate": 2.5935525715640176e-08, + "loss": 0.5358999967575073, + "step": 6692 + }, + { + "epoch": 1.9571574791636204, + "grad_norm": 1.8207487442928234, + "learning_rate": 2.5588714115137857e-08, + "loss": 0.49730730056762695, + "step": 6693 + }, + { + "epoch": 1.957449919578886, + "grad_norm": 1.8975782350563493, + "learning_rate": 2.5244233972627762e-08, + "loss": 0.5368232131004333, + "step": 6694 + }, + { + "epoch": 1.957742359994151, + "grad_norm": 1.6616905607648789, + "learning_rate": 2.4902085368632144e-08, + "loss": 0.48084500432014465, + "step": 6695 + }, + { + "epoch": 1.9580348004094166, + "grad_norm": 1.6503756551181779, + "learning_rate": 2.45622683831237e-08, + "loss": 0.5197296142578125, + "step": 6696 + }, + { + "epoch": 1.9583272408246821, + "grad_norm": 1.7005704554604877, + "learning_rate": 2.4224783095532224e-08, + "loss": 0.4807678163051605, + "step": 6697 + }, + { + "epoch": 1.9586196812399472, + "grad_norm": 1.5200854711140026, + "learning_rate": 2.388962958474461e-08, + "loss": 0.5117641687393188, + "step": 6698 + }, + { + "epoch": 1.9589121216552128, + "grad_norm": 1.5153035364420055, + "learning_rate": 2.355680792910153e-08, + "loss": 0.5318149328231812, + "step": 6699 + }, + { + "epoch": 1.959204562070478, + "grad_norm": 1.642749755305391, + "learning_rate": 2.3226318206395206e-08, + "loss": 0.5590193271636963, + "step": 6700 + }, + { + "epoch": 1.9594970024857434, + "grad_norm": 1.9273854799208605, + "learning_rate": 2.2898160493878275e-08, + "loss": 0.7686688899993896, + "step": 6701 + }, + { + "epoch": 1.959789442901009, + "grad_norm": 1.7479554033366604, + "learning_rate": 2.257233486825383e-08, + "loss": 0.5085177421569824, + "step": 6702 + }, + { + "epoch": 1.9600818833162743, + "grad_norm": 1.4224817781801729, + "learning_rate": 2.2248841405683176e-08, + "loss": 0.44002413749694824, + "step": 6703 + }, + { + "epoch": 1.9603743237315396, + "grad_norm": 1.6541616903883845, + "learning_rate": 2.1927680181779154e-08, + "loss": 0.5369126796722412, + "step": 6704 + }, + { + "epoch": 1.9606667641468052, + "grad_norm": 1.5811100430561291, + "learning_rate": 2.1608851271612828e-08, + "loss": 0.516021728515625, + "step": 6705 + }, + { + "epoch": 1.9609592045620705, + "grad_norm": 1.577385822778267, + "learning_rate": 2.1292354749707922e-08, + "loss": 0.5215185284614563, + "step": 6706 + }, + { + "epoch": 1.9612516449773358, + "grad_norm": 1.7926842955012665, + "learning_rate": 2.0978190690043032e-08, + "loss": 0.6051908731460571, + "step": 6707 + }, + { + "epoch": 1.9615440853926014, + "grad_norm": 1.8529134419730404, + "learning_rate": 2.066635916605386e-08, + "loss": 0.5426267385482788, + "step": 6708 + }, + { + "epoch": 1.9618365258078665, + "grad_norm": 2.064852719580073, + "learning_rate": 2.0356860250626554e-08, + "loss": 0.5888626575469971, + "step": 6709 + }, + { + "epoch": 1.962128966223132, + "grad_norm": 1.4287637894797525, + "learning_rate": 2.004969401610657e-08, + "loss": 0.5225001573562622, + "step": 6710 + }, + { + "epoch": 1.9624214066383976, + "grad_norm": 1.616132198436982, + "learning_rate": 1.974486053429092e-08, + "loss": 0.5735136270523071, + "step": 6711 + }, + { + "epoch": 1.9627138470536627, + "grad_norm": 1.6327567238976746, + "learning_rate": 1.9442359876433724e-08, + "loss": 0.5302764177322388, + "step": 6712 + }, + { + "epoch": 1.9630062874689282, + "grad_norm": 1.625182085046959, + "learning_rate": 1.9142192113241752e-08, + "loss": 0.5078837871551514, + "step": 6713 + }, + { + "epoch": 1.9632987278841936, + "grad_norm": 1.7110845788062152, + "learning_rate": 1.884435731487888e-08, + "loss": 0.5772985219955444, + "step": 6714 + }, + { + "epoch": 1.963591168299459, + "grad_norm": 2.2561904758082925, + "learning_rate": 1.8548855550959423e-08, + "loss": 0.5974931716918945, + "step": 6715 + }, + { + "epoch": 1.9638836087147244, + "grad_norm": 1.8105323667501525, + "learning_rate": 1.8255686890558123e-08, + "loss": 0.5065072774887085, + "step": 6716 + }, + { + "epoch": 1.9641760491299898, + "grad_norm": 1.4374806170365766, + "learning_rate": 1.7964851402199058e-08, + "loss": 0.4729428291320801, + "step": 6717 + }, + { + "epoch": 1.964468489545255, + "grad_norm": 1.7924892088352824, + "learning_rate": 1.7676349153864515e-08, + "loss": 0.46363723278045654, + "step": 6718 + }, + { + "epoch": 1.9647609299605207, + "grad_norm": 1.6096201158909726, + "learning_rate": 1.7390180212990547e-08, + "loss": 0.5436959266662598, + "step": 6719 + }, + { + "epoch": 1.965053370375786, + "grad_norm": 1.8570609869736334, + "learning_rate": 1.7106344646465877e-08, + "loss": 0.7571452856063843, + "step": 6720 + }, + { + "epoch": 1.9653458107910513, + "grad_norm": 1.7203125443062617, + "learning_rate": 1.682484252063632e-08, + "loss": 0.5724680423736572, + "step": 6721 + }, + { + "epoch": 1.9656382512063169, + "grad_norm": 1.5552868811193872, + "learning_rate": 1.654567390130146e-08, + "loss": 0.46937745809555054, + "step": 6722 + }, + { + "epoch": 1.965930691621582, + "grad_norm": 1.4639592826813614, + "learning_rate": 1.6268838853713552e-08, + "loss": 0.5764822363853455, + "step": 6723 + }, + { + "epoch": 1.9662231320368475, + "grad_norm": 1.8890557259087926, + "learning_rate": 1.5994337442584164e-08, + "loss": 0.6074192523956299, + "step": 6724 + }, + { + "epoch": 1.9665155724521128, + "grad_norm": 1.8156005720173343, + "learning_rate": 1.572216973207419e-08, + "loss": 0.6001715064048767, + "step": 6725 + }, + { + "epoch": 1.9668080128673782, + "grad_norm": 1.8635551001096793, + "learning_rate": 1.545233578580163e-08, + "loss": 0.5819540619850159, + "step": 6726 + }, + { + "epoch": 1.9671004532826437, + "grad_norm": 1.509757451229315, + "learning_rate": 1.518483566683826e-08, + "loss": 0.4745405912399292, + "step": 6727 + }, + { + "epoch": 1.967392893697909, + "grad_norm": 1.5301158686504193, + "learning_rate": 1.4919669437710725e-08, + "loss": 0.4438042640686035, + "step": 6728 + }, + { + "epoch": 1.9676853341131744, + "grad_norm": 1.6058873643565785, + "learning_rate": 1.465683716040056e-08, + "loss": 0.45798003673553467, + "step": 6729 + }, + { + "epoch": 1.96797777452844, + "grad_norm": 1.5582798501168125, + "learning_rate": 1.4396338896341955e-08, + "loss": 0.3918766379356384, + "step": 6730 + }, + { + "epoch": 1.9682702149437052, + "grad_norm": 1.6253936447718431, + "learning_rate": 1.4138174706426199e-08, + "loss": 0.5266170501708984, + "step": 6731 + }, + { + "epoch": 1.9685626553589706, + "grad_norm": 1.733772185361853, + "learning_rate": 1.3882344650998359e-08, + "loss": 0.5166668891906738, + "step": 6732 + }, + { + "epoch": 1.9688550957742361, + "grad_norm": 1.7595735268115036, + "learning_rate": 1.3628848789853932e-08, + "loss": 0.39324697852134705, + "step": 6733 + }, + { + "epoch": 1.9691475361895012, + "grad_norm": 1.8212233848125128, + "learning_rate": 1.3377687182248855e-08, + "loss": 0.4915732443332672, + "step": 6734 + }, + { + "epoch": 1.9694399766047668, + "grad_norm": 1.7689973508355645, + "learning_rate": 1.31288598868895e-08, + "loss": 0.5416492819786072, + "step": 6735 + }, + { + "epoch": 1.9697324170200323, + "grad_norm": 1.6021351256215517, + "learning_rate": 1.288236696193823e-08, + "loss": 0.4713748097419739, + "step": 6736 + }, + { + "epoch": 1.9700248574352974, + "grad_norm": 1.7411270752119496, + "learning_rate": 1.263820846501118e-08, + "loss": 0.44074663519859314, + "step": 6737 + }, + { + "epoch": 1.970317297850563, + "grad_norm": 1.7164561827524085, + "learning_rate": 1.2396384453179366e-08, + "loss": 0.4694680869579315, + "step": 6738 + }, + { + "epoch": 1.9706097382658283, + "grad_norm": 1.8691907501418656, + "learning_rate": 1.215689498296535e-08, + "loss": 0.553142786026001, + "step": 6739 + }, + { + "epoch": 1.9709021786810936, + "grad_norm": 1.7953149807008746, + "learning_rate": 1.1919740110351019e-08, + "loss": 0.533849835395813, + "step": 6740 + }, + { + "epoch": 1.9711946190963592, + "grad_norm": 1.9503927011602655, + "learning_rate": 1.1684919890768698e-08, + "loss": 0.5448808670043945, + "step": 6741 + }, + { + "epoch": 1.9714870595116245, + "grad_norm": 1.6447356703420446, + "learning_rate": 1.1452434379106703e-08, + "loss": 0.46860289573669434, + "step": 6742 + }, + { + "epoch": 1.9717794999268898, + "grad_norm": 2.0052944353876696, + "learning_rate": 1.122228362970712e-08, + "loss": 0.5552232265472412, + "step": 6743 + }, + { + "epoch": 1.9720719403421554, + "grad_norm": 1.7151457677082285, + "learning_rate": 1.0994467696364698e-08, + "loss": 0.4639692008495331, + "step": 6744 + }, + { + "epoch": 1.9723643807574207, + "grad_norm": 2.0905035821875746, + "learning_rate": 1.076898663233239e-08, + "loss": 0.7129387259483337, + "step": 6745 + }, + { + "epoch": 1.972656821172686, + "grad_norm": 1.6674482501618961, + "learning_rate": 1.0545840490313597e-08, + "loss": 0.6637833118438721, + "step": 6746 + }, + { + "epoch": 1.9729492615879516, + "grad_norm": 2.029336881837252, + "learning_rate": 1.0325029322467705e-08, + "loss": 0.6215991973876953, + "step": 6747 + }, + { + "epoch": 1.9732417020032167, + "grad_norm": 1.878624196936373, + "learning_rate": 1.0106553180407874e-08, + "loss": 0.48594456911087036, + "step": 6748 + }, + { + "epoch": 1.9735341424184822, + "grad_norm": 1.9063825585940108, + "learning_rate": 9.890412115202142e-09, + "loss": 0.5443629622459412, + "step": 6749 + }, + { + "epoch": 1.9738265828337478, + "grad_norm": 1.7053157420855176, + "learning_rate": 9.676606177371207e-09, + "loss": 0.643796443939209, + "step": 6750 + }, + { + "epoch": 1.974119023249013, + "grad_norm": 1.6282972872252912, + "learning_rate": 9.465135416891757e-09, + "loss": 0.6305385828018188, + "step": 6751 + }, + { + "epoch": 1.9744114636642784, + "grad_norm": 1.5632532849336644, + "learning_rate": 9.255999883193146e-09, + "loss": 0.5120108723640442, + "step": 6752 + }, + { + "epoch": 1.9747039040795438, + "grad_norm": 1.6718955354026932, + "learning_rate": 9.0491996251596e-09, + "loss": 0.5552967190742493, + "step": 6753 + }, + { + "epoch": 1.974996344494809, + "grad_norm": 1.935016742711985, + "learning_rate": 8.84473469113023e-09, + "loss": 0.6341986656188965, + "step": 6754 + }, + { + "epoch": 1.9752887849100746, + "grad_norm": 1.9011990155600869, + "learning_rate": 8.642605128896808e-09, + "loss": 0.5204262137413025, + "step": 6755 + }, + { + "epoch": 1.97558122532534, + "grad_norm": 1.75594319264598, + "learning_rate": 8.442810985705984e-09, + "loss": 0.4980974793434143, + "step": 6756 + }, + { + "epoch": 1.9758736657406053, + "grad_norm": 1.9165104575442982, + "learning_rate": 8.245352308258181e-09, + "loss": 0.5432465076446533, + "step": 6757 + }, + { + "epoch": 1.9761661061558708, + "grad_norm": 1.7852742537308695, + "learning_rate": 8.0502291427087e-09, + "loss": 0.813039243221283, + "step": 6758 + }, + { + "epoch": 1.9764585465711362, + "grad_norm": 1.8018799007975157, + "learning_rate": 7.85744153466661e-09, + "loss": 0.5723720788955688, + "step": 6759 + }, + { + "epoch": 1.9767509869864015, + "grad_norm": 1.8628448153664545, + "learning_rate": 7.666989529193647e-09, + "loss": 0.5562596321105957, + "step": 6760 + }, + { + "epoch": 1.977043427401667, + "grad_norm": 1.796195928066652, + "learning_rate": 7.478873170807532e-09, + "loss": 0.5455175638198853, + "step": 6761 + }, + { + "epoch": 1.9773358678169322, + "grad_norm": 1.791853318736957, + "learning_rate": 7.2930925034797595e-09, + "loss": 0.5753832459449768, + "step": 6762 + }, + { + "epoch": 1.9776283082321977, + "grad_norm": 1.786340662775674, + "learning_rate": 7.109647570634482e-09, + "loss": 0.49962282180786133, + "step": 6763 + }, + { + "epoch": 1.977920748647463, + "grad_norm": 1.4222417158044076, + "learning_rate": 6.9285384151507316e-09, + "loss": 0.44443345069885254, + "step": 6764 + }, + { + "epoch": 1.9782131890627284, + "grad_norm": 1.5729694345436978, + "learning_rate": 6.749765079363535e-09, + "loss": 0.3236424922943115, + "step": 6765 + }, + { + "epoch": 1.978505629477994, + "grad_norm": 1.766865850057596, + "learning_rate": 6.573327605057245e-09, + "loss": 0.5246942639350891, + "step": 6766 + }, + { + "epoch": 1.9787980698932592, + "grad_norm": 1.6890664092399734, + "learning_rate": 6.399226033475536e-09, + "loss": 0.6525053381919861, + "step": 6767 + }, + { + "epoch": 1.9790905103085246, + "grad_norm": 1.5450928873923104, + "learning_rate": 6.227460405312524e-09, + "loss": 0.502121090888977, + "step": 6768 + }, + { + "epoch": 1.9793829507237901, + "grad_norm": 2.00727430176714, + "learning_rate": 6.058030760718314e-09, + "loss": 0.6137609481811523, + "step": 6769 + }, + { + "epoch": 1.9796753911390554, + "grad_norm": 2.274345342275455, + "learning_rate": 5.890937139294561e-09, + "loss": 0.6673166751861572, + "step": 6770 + }, + { + "epoch": 1.9799678315543208, + "grad_norm": 1.8444636633461322, + "learning_rate": 5.726179580098912e-09, + "loss": 0.5888657569885254, + "step": 6771 + }, + { + "epoch": 1.9802602719695863, + "grad_norm": 1.6693157475267608, + "learning_rate": 5.563758121642781e-09, + "loss": 0.5239546298980713, + "step": 6772 + }, + { + "epoch": 1.9805527123848514, + "grad_norm": 1.8912704609026834, + "learning_rate": 5.403672801890247e-09, + "loss": 0.5446778535842896, + "step": 6773 + }, + { + "epoch": 1.980845152800117, + "grad_norm": 1.9927071253973727, + "learning_rate": 5.245923658262486e-09, + "loss": 0.6198326349258423, + "step": 6774 + }, + { + "epoch": 1.9811375932153825, + "grad_norm": 2.15235475034657, + "learning_rate": 5.090510727630005e-09, + "loss": 0.586353063583374, + "step": 6775 + }, + { + "epoch": 1.9814300336306476, + "grad_norm": 1.7195990521736408, + "learning_rate": 4.93743404632041e-09, + "loss": 0.6344239711761475, + "step": 6776 + }, + { + "epoch": 1.9817224740459132, + "grad_norm": 1.7280364585810115, + "learning_rate": 4.7866936501150816e-09, + "loss": 0.529091477394104, + "step": 6777 + }, + { + "epoch": 1.9820149144611785, + "grad_norm": 1.7357230298596742, + "learning_rate": 4.6382895742491665e-09, + "loss": 0.50063157081604, + "step": 6778 + }, + { + "epoch": 1.9823073548764438, + "grad_norm": 1.526019679238999, + "learning_rate": 4.492221853409362e-09, + "loss": 0.48398512601852417, + "step": 6779 + }, + { + "epoch": 1.9825997952917094, + "grad_norm": 1.5319705226915326, + "learning_rate": 4.348490521738358e-09, + "loss": 0.5330454707145691, + "step": 6780 + }, + { + "epoch": 1.9828922357069747, + "grad_norm": 1.829830860451363, + "learning_rate": 4.207095612833723e-09, + "loss": 0.4562032222747803, + "step": 6781 + }, + { + "epoch": 1.98318467612224, + "grad_norm": 1.7011927258883048, + "learning_rate": 4.0680371597456855e-09, + "loss": 0.47456252574920654, + "step": 6782 + }, + { + "epoch": 1.9834771165375056, + "grad_norm": 1.8486724201847988, + "learning_rate": 3.931315194977137e-09, + "loss": 0.6283844709396362, + "step": 6783 + }, + { + "epoch": 1.983769556952771, + "grad_norm": 1.7243813126388492, + "learning_rate": 3.7969297504858445e-09, + "loss": 0.5886485576629639, + "step": 6784 + }, + { + "epoch": 1.9840619973680362, + "grad_norm": 1.679651544361786, + "learning_rate": 3.664880857685571e-09, + "loss": 0.4711921811103821, + "step": 6785 + }, + { + "epoch": 1.9843544377833018, + "grad_norm": 1.8051937774075772, + "learning_rate": 3.5351685474394048e-09, + "loss": 0.5372034311294556, + "step": 6786 + }, + { + "epoch": 1.9846468781985669, + "grad_norm": 1.7143010926050217, + "learning_rate": 3.4077928500686473e-09, + "loss": 0.5314334034919739, + "step": 6787 + }, + { + "epoch": 1.9849393186138324, + "grad_norm": 1.7988305575744603, + "learning_rate": 3.2827537953461496e-09, + "loss": 0.6022863984107971, + "step": 6788 + }, + { + "epoch": 1.985231759029098, + "grad_norm": 1.844296066004364, + "learning_rate": 3.160051412499643e-09, + "loss": 0.6739746928215027, + "step": 6789 + }, + { + "epoch": 1.985524199444363, + "grad_norm": 1.795022844462659, + "learning_rate": 3.0396857302084082e-09, + "loss": 0.6454254388809204, + "step": 6790 + }, + { + "epoch": 1.9858166398596286, + "grad_norm": 1.7777744811692944, + "learning_rate": 2.9216567766088276e-09, + "loss": 0.567995011806488, + "step": 6791 + }, + { + "epoch": 1.986109080274894, + "grad_norm": 1.7916482396337698, + "learning_rate": 2.8059645792877233e-09, + "loss": 0.568576455116272, + "step": 6792 + }, + { + "epoch": 1.9864015206901593, + "grad_norm": 1.5789903561856604, + "learning_rate": 2.6926091652890175e-09, + "loss": 0.5053816437721252, + "step": 6793 + }, + { + "epoch": 1.9866939611054248, + "grad_norm": 1.4966825154239165, + "learning_rate": 2.5815905611081825e-09, + "loss": 0.47705504298210144, + "step": 6794 + }, + { + "epoch": 1.9869864015206902, + "grad_norm": 1.7555838648022946, + "learning_rate": 2.472908792695572e-09, + "loss": 0.48271438479423523, + "step": 6795 + }, + { + "epoch": 1.9872788419359555, + "grad_norm": 1.675207035758499, + "learning_rate": 2.3665638854541982e-09, + "loss": 0.5694486498832703, + "step": 6796 + }, + { + "epoch": 1.987571282351221, + "grad_norm": 1.6539598401922624, + "learning_rate": 2.2625558642419553e-09, + "loss": 0.4940011501312256, + "step": 6797 + }, + { + "epoch": 1.9878637227664864, + "grad_norm": 1.8961348890729253, + "learning_rate": 2.160884753370507e-09, + "loss": 0.5536549091339111, + "step": 6798 + }, + { + "epoch": 1.9881561631817517, + "grad_norm": 1.923836316704977, + "learning_rate": 2.0615505766041765e-09, + "loss": 0.5354948043823242, + "step": 6799 + }, + { + "epoch": 1.9884486035970173, + "grad_norm": 1.9901895658271425, + "learning_rate": 1.9645533571610585e-09, + "loss": 0.6246936321258545, + "step": 6800 + }, + { + "epoch": 1.9887410440122824, + "grad_norm": 1.9564588316886224, + "learning_rate": 1.869893117715238e-09, + "loss": 0.6690058708190918, + "step": 6801 + }, + { + "epoch": 1.989033484427548, + "grad_norm": 1.5017853956289122, + "learning_rate": 1.7775698803923491e-09, + "loss": 0.4022945761680603, + "step": 6802 + }, + { + "epoch": 1.9893259248428132, + "grad_norm": 1.863253138688696, + "learning_rate": 1.6875836667729073e-09, + "loss": 0.7192882299423218, + "step": 6803 + }, + { + "epoch": 1.9896183652580786, + "grad_norm": 1.7599554073021901, + "learning_rate": 1.5999344978889774e-09, + "loss": 0.4818531274795532, + "step": 6804 + }, + { + "epoch": 1.989910805673344, + "grad_norm": 1.6555564933889482, + "learning_rate": 1.5146223942297256e-09, + "loss": 0.5877143144607544, + "step": 6805 + }, + { + "epoch": 1.9902032460886094, + "grad_norm": 1.699617544549682, + "learning_rate": 1.4316473757347571e-09, + "loss": 0.5317925810813904, + "step": 6806 + }, + { + "epoch": 1.9904956865038748, + "grad_norm": 1.8556459190322732, + "learning_rate": 1.3510094618007785e-09, + "loss": 0.5203319787979126, + "step": 6807 + }, + { + "epoch": 1.9907881269191403, + "grad_norm": 1.8877803824180381, + "learning_rate": 1.2727086712760462e-09, + "loss": 0.5171575546264648, + "step": 6808 + }, + { + "epoch": 1.9910805673344056, + "grad_norm": 1.6004462821959236, + "learning_rate": 1.1967450224614763e-09, + "loss": 0.4570615291595459, + "step": 6809 + }, + { + "epoch": 1.991373007749671, + "grad_norm": 2.16253456274772, + "learning_rate": 1.123118533113976e-09, + "loss": 0.5689741969108582, + "step": 6810 + }, + { + "epoch": 1.9916654481649365, + "grad_norm": 1.796551268093938, + "learning_rate": 1.0518292204442226e-09, + "loss": 0.5029700994491577, + "step": 6811 + }, + { + "epoch": 1.9919578885802016, + "grad_norm": 1.774689424925791, + "learning_rate": 9.828771011144434e-10, + "loss": 0.5461232662200928, + "step": 6812 + }, + { + "epoch": 1.9922503289954672, + "grad_norm": 1.6779660974331405, + "learning_rate": 9.162621912417458e-10, + "loss": 0.4681328535079956, + "step": 6813 + }, + { + "epoch": 1.9925427694107327, + "grad_norm": 1.6414551197415561, + "learning_rate": 8.519845063970078e-10, + "loss": 0.6356761455535889, + "step": 6814 + }, + { + "epoch": 1.9928352098259978, + "grad_norm": 1.5821864651194355, + "learning_rate": 7.900440616059879e-10, + "loss": 0.48491230607032776, + "step": 6815 + }, + { + "epoch": 1.9931276502412634, + "grad_norm": 1.5771006740515017, + "learning_rate": 7.304408713448841e-10, + "loss": 0.45563238859176636, + "step": 6816 + }, + { + "epoch": 1.9934200906565287, + "grad_norm": 1.773565509502716, + "learning_rate": 6.731749495481054e-10, + "loss": 0.6067036986351013, + "step": 6817 + }, + { + "epoch": 1.993712531071794, + "grad_norm": 1.8573415580854213, + "learning_rate": 6.182463095982805e-10, + "loss": 0.6162583827972412, + "step": 6818 + }, + { + "epoch": 1.9940049714870596, + "grad_norm": 2.0477517064592456, + "learning_rate": 5.656549643373587e-10, + "loss": 0.6621623039245605, + "step": 6819 + }, + { + "epoch": 1.994297411902325, + "grad_norm": 1.4386624972833835, + "learning_rate": 5.154009260566195e-10, + "loss": 0.5374715328216553, + "step": 6820 + }, + { + "epoch": 1.9945898523175902, + "grad_norm": 1.690807663421353, + "learning_rate": 4.674842065033325e-10, + "loss": 0.5164921283721924, + "step": 6821 + }, + { + "epoch": 1.9948822927328558, + "grad_norm": 1.6217517652016564, + "learning_rate": 4.2190481687631736e-10, + "loss": 0.4816705584526062, + "step": 6822 + }, + { + "epoch": 1.995174733148121, + "grad_norm": 1.963964870727347, + "learning_rate": 3.786627678314947e-10, + "loss": 0.5393646955490112, + "step": 6823 + }, + { + "epoch": 1.9954671735633864, + "grad_norm": 1.965068141803477, + "learning_rate": 3.377580694763349e-10, + "loss": 0.6161901950836182, + "step": 6824 + }, + { + "epoch": 1.995759613978652, + "grad_norm": 1.7151080887799663, + "learning_rate": 2.991907313698583e-10, + "loss": 0.45819348096847534, + "step": 6825 + }, + { + "epoch": 1.996052054393917, + "grad_norm": 1.6568040031723943, + "learning_rate": 2.6296076252929623e-10, + "loss": 0.4111405611038208, + "step": 6826 + }, + { + "epoch": 1.9963444948091826, + "grad_norm": 1.579319709420574, + "learning_rate": 2.2906817142120952e-10, + "loss": 0.5351378917694092, + "step": 6827 + }, + { + "epoch": 1.9966369352244482, + "grad_norm": 1.9986759890358465, + "learning_rate": 1.9751296597037007e-10, + "loss": 0.5349807739257812, + "step": 6828 + }, + { + "epoch": 1.9969293756397133, + "grad_norm": 1.3400965720769549, + "learning_rate": 1.68295153549769e-10, + "loss": 0.3669770061969757, + "step": 6829 + }, + { + "epoch": 1.9972218160549788, + "grad_norm": 1.6049954159966944, + "learning_rate": 1.414147409906086e-10, + "loss": 0.51691073179245, + "step": 6830 + }, + { + "epoch": 1.9975142564702442, + "grad_norm": 1.6369812108335593, + "learning_rate": 1.1687173457564095e-10, + "loss": 0.530505895614624, + "step": 6831 + }, + { + "epoch": 1.9978066968855095, + "grad_norm": 1.7168823925537207, + "learning_rate": 9.466614004138841e-11, + "loss": 0.6562793850898743, + "step": 6832 + }, + { + "epoch": 1.998099137300775, + "grad_norm": 1.7130665778689727, + "learning_rate": 7.479796257925387e-11, + "loss": 0.5174558758735657, + "step": 6833 + }, + { + "epoch": 1.9983915777160404, + "grad_norm": 1.825880314789344, + "learning_rate": 5.726720683219e-11, + "loss": 0.5514833331108093, + "step": 6834 + }, + { + "epoch": 1.9986840181313057, + "grad_norm": 1.4872034884229834, + "learning_rate": 4.207387689803e-11, + "loss": 0.4652816653251648, + "step": 6835 + }, + { + "epoch": 1.9989764585465712, + "grad_norm": 1.497818750132978, + "learning_rate": 2.9217976328377305e-11, + "loss": 0.420850932598114, + "step": 6836 + }, + { + "epoch": 1.9992688989618366, + "grad_norm": 1.7752439390839505, + "learning_rate": 1.8699508128605658e-11, + "loss": 0.5394539833068848, + "step": 6837 + }, + { + "epoch": 1.999561339377102, + "grad_norm": 1.5556477853097161, + "learning_rate": 1.051847475674883e-11, + "loss": 0.458107590675354, + "step": 6838 + }, + { + "epoch": 1.9998537797923674, + "grad_norm": 1.6149334678852978, + "learning_rate": 4.6748781246108706e-12, + "loss": 0.552463173866272, + "step": 6839 + }, + { + "epoch": 2.0, + "grad_norm": 3.285366195149545, + "learning_rate": 1.1687195999865453e-12, + "loss": 0.4656301736831665, + "step": 6840 + } + ], + "logging_steps": 1, + "max_steps": 6840, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2089529852362752.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-6840/training_args.bin b/checkpoint-6840/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..81f2336f2b4301fde755bb2ff1a553c0af833dc6 --- /dev/null +++ b/checkpoint-6840/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f18a5144102f6d607705c76873c9b6b52fea03ff40dc71ea0f2bf5e2547fe44f +size 6968 diff --git a/checkpoint-6840/zero_to_fp32.py b/checkpoint-6840/zero_to_fp32.py new file mode 100644 index 0000000000000000000000000000000000000000..5995d6e6f04e43b989587aa9022a3aef0c66d694 --- /dev/null +++ b/checkpoint-6840/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if ZERO_STAGE not in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info("Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info("Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/processor_config.json b/processor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e717d9bf475c411369034636e82e48cf79108a8 --- /dev/null +++ b/processor_config.json @@ -0,0 +1,63 @@ +{ + "image_processor": { + "data_format": "channels_first", + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessorFast", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "merge_size": 2, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2 + }, + "processor_class": "Qwen2_5_VLProcessor", + "video_processor": { + "data_format": "channels_first", + "default_to_square": true, + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "do_sample_frames": false, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessor", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "max_frames": 768, + "merge_size": 2, + "min_frames": 4, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "return_metadata": false, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2, + "video_processor_type": "Qwen2VLVideoProcessor" + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..34510ff0037cd50428af467a17ead5a96140a32c --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7322db3e15385c79a5a29523dd1ccad6d343278 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,31 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "processor_class": "Qwen2_5_VLProcessor", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e045f36c49d9be60827f700c2e7d8175dcd5d1df --- /dev/null +++ b/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 2.0, + "total_flos": 2089529852362752.0, + "train_loss": 0.6033765813455596, + "train_runtime": 36066.319, + "train_samples_per_second": 0.758, + "train_steps_per_second": 0.19 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5d76a7c12292aea66d52d72b4b1a29e18b880456 --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,6841 @@ +{"current_steps": 1, "total_steps": 6840, "loss": 1.2599382400512695, "lr": 0.0, "epoch": 0.00029244041526538966, "percentage": 0.01, "elapsed_time": "0:00:09", "remaining_time": "18:01:29"} +{"current_steps": 2, "total_steps": 6840, "loss": 0.9314937591552734, "lr": 5.847953216374269e-08, "epoch": 0.0005848808305307793, "percentage": 0.03, "elapsed_time": "0:00:15", "remaining_time": "14:30:31"} +{"current_steps": 3, "total_steps": 6840, "loss": 1.1433629989624023, "lr": 1.1695906432748539e-07, "epoch": 0.000877321245796169, "percentage": 0.04, "elapsed_time": "0:00:19", "remaining_time": "12:35:11"} +{"current_steps": 4, "total_steps": 6840, "loss": 1.2224640846252441, "lr": 1.7543859649122808e-07, "epoch": 0.0011697616610615586, "percentage": 0.06, "elapsed_time": "0:00:24", "remaining_time": "11:33:49"} +{"current_steps": 5, "total_steps": 6840, "loss": 1.0468370914459229, "lr": 2.3391812865497077e-07, "epoch": 0.0014622020763269484, "percentage": 0.07, "elapsed_time": "0:00:28", "remaining_time": "10:59:48"} +{"current_steps": 6, "total_steps": 6840, "loss": 1.1314436197280884, "lr": 2.9239766081871344e-07, "epoch": 0.001754642491592338, "percentage": 0.09, "elapsed_time": "0:00:33", "remaining_time": "10:35:21"} +{"current_steps": 7, "total_steps": 6840, "loss": 0.9903597831726074, "lr": 3.5087719298245616e-07, "epoch": 0.0020470829068577278, "percentage": 0.1, "elapsed_time": "0:00:38", "remaining_time": "10:24:59"} +{"current_steps": 8, "total_steps": 6840, "loss": 1.1988611221313477, "lr": 4.093567251461988e-07, "epoch": 0.0023395233221231173, "percentage": 0.12, "elapsed_time": "0:00:43", "remaining_time": "10:16:48"} +{"current_steps": 9, "total_steps": 6840, "loss": 1.1209533214569092, "lr": 4.6783625730994155e-07, "epoch": 0.002631963737388507, "percentage": 0.13, "elapsed_time": "0:00:47", "remaining_time": "10:04:32"} +{"current_steps": 10, "total_steps": 6840, "loss": 1.1582586765289307, "lr": 5.263157894736843e-07, "epoch": 0.0029244041526538967, "percentage": 0.15, "elapsed_time": "0:00:52", "remaining_time": "9:58:12"} +{"current_steps": 11, "total_steps": 6840, "loss": 1.2469007968902588, "lr": 5.847953216374269e-07, "epoch": 0.0032168445679192866, "percentage": 0.16, "elapsed_time": "0:00:57", "remaining_time": "9:50:23"} +{"current_steps": 12, "total_steps": 6840, "loss": 1.115494728088379, "lr": 6.432748538011696e-07, "epoch": 0.003509284983184676, "percentage": 0.18, "elapsed_time": "0:01:02", "remaining_time": "9:54:06"} +{"current_steps": 13, "total_steps": 6840, "loss": 1.1927871704101562, "lr": 7.017543859649123e-07, "epoch": 0.0038017253984500656, "percentage": 0.19, "elapsed_time": "0:01:08", "remaining_time": "9:55:36"} +{"current_steps": 14, "total_steps": 6840, "loss": 1.1014869213104248, "lr": 7.60233918128655e-07, "epoch": 0.0040941658137154556, "percentage": 0.2, "elapsed_time": "0:01:13", "remaining_time": "9:55:05"} +{"current_steps": 15, "total_steps": 6840, "loss": 1.1055865287780762, "lr": 8.187134502923977e-07, "epoch": 0.004386606228980845, "percentage": 0.22, "elapsed_time": "0:01:18", "remaining_time": "9:53:39"} +{"current_steps": 16, "total_steps": 6840, "loss": 1.1071349382400513, "lr": 8.771929824561404e-07, "epoch": 0.0046790466442462346, "percentage": 0.23, "elapsed_time": "0:01:24", "remaining_time": "9:59:10"} +{"current_steps": 17, "total_steps": 6840, "loss": 1.1737473011016846, "lr": 9.356725146198831e-07, "epoch": 0.004971487059511625, "percentage": 0.25, "elapsed_time": "0:01:28", "remaining_time": "9:52:05"} +{"current_steps": 18, "total_steps": 6840, "loss": 1.283717155456543, "lr": 9.941520467836258e-07, "epoch": 0.005263927474777014, "percentage": 0.26, "elapsed_time": "0:01:34", "remaining_time": "9:54:29"} +{"current_steps": 19, "total_steps": 6840, "loss": 1.2509160041809082, "lr": 1.0526315789473685e-06, "epoch": 0.005556367890042404, "percentage": 0.28, "elapsed_time": "0:01:38", "remaining_time": "9:48:42"} +{"current_steps": 20, "total_steps": 6840, "loss": 0.9722317457199097, "lr": 1.111111111111111e-06, "epoch": 0.005848808305307793, "percentage": 0.29, "elapsed_time": "0:01:44", "remaining_time": "9:55:47"} +{"current_steps": 21, "total_steps": 6840, "loss": 1.1927049160003662, "lr": 1.1695906432748538e-06, "epoch": 0.006141248720573183, "percentage": 0.31, "elapsed_time": "0:01:50", "remaining_time": "9:58:32"} +{"current_steps": 22, "total_steps": 6840, "loss": 1.2133033275604248, "lr": 1.2280701754385965e-06, "epoch": 0.006433689135838573, "percentage": 0.32, "elapsed_time": "0:01:56", "remaining_time": "10:01:42"} +{"current_steps": 23, "total_steps": 6840, "loss": 1.2373273372650146, "lr": 1.2865497076023392e-06, "epoch": 0.006726129551103963, "percentage": 0.34, "elapsed_time": "0:02:02", "remaining_time": "10:03:16"} +{"current_steps": 24, "total_steps": 6840, "loss": 0.9476668834686279, "lr": 1.345029239766082e-06, "epoch": 0.007018569966369352, "percentage": 0.35, "elapsed_time": "0:02:08", "remaining_time": "10:08:02"} +{"current_steps": 25, "total_steps": 6840, "loss": 1.1171324253082275, "lr": 1.4035087719298246e-06, "epoch": 0.007311010381634742, "percentage": 0.37, "elapsed_time": "0:02:14", "remaining_time": "10:11:25"} +{"current_steps": 26, "total_steps": 6840, "loss": 1.1276075839996338, "lr": 1.4619883040935674e-06, "epoch": 0.007603450796900131, "percentage": 0.38, "elapsed_time": "0:02:20", "remaining_time": "10:13:27"} +{"current_steps": 27, "total_steps": 6840, "loss": 1.190751314163208, "lr": 1.52046783625731e-06, "epoch": 0.007895891212165522, "percentage": 0.39, "elapsed_time": "0:02:25", "remaining_time": "10:11:41"} +{"current_steps": 28, "total_steps": 6840, "loss": 1.2171813249588013, "lr": 1.5789473684210526e-06, "epoch": 0.008188331627430911, "percentage": 0.41, "elapsed_time": "0:02:29", "remaining_time": "10:07:04"} +{"current_steps": 29, "total_steps": 6840, "loss": 0.8595987558364868, "lr": 1.6374269005847953e-06, "epoch": 0.0084807720426963, "percentage": 0.42, "elapsed_time": "0:02:35", "remaining_time": "10:10:12"} +{"current_steps": 30, "total_steps": 6840, "loss": 1.0270106792449951, "lr": 1.695906432748538e-06, "epoch": 0.00877321245796169, "percentage": 0.44, "elapsed_time": "0:02:41", "remaining_time": "10:12:32"} +{"current_steps": 31, "total_steps": 6840, "loss": 1.068537712097168, "lr": 1.7543859649122807e-06, "epoch": 0.00906565287322708, "percentage": 0.45, "elapsed_time": "0:02:47", "remaining_time": "10:12:02"} +{"current_steps": 32, "total_steps": 6840, "loss": 1.1307867765426636, "lr": 1.8128654970760235e-06, "epoch": 0.009358093288492469, "percentage": 0.47, "elapsed_time": "0:02:52", "remaining_time": "10:10:22"} +{"current_steps": 33, "total_steps": 6840, "loss": 1.0837950706481934, "lr": 1.8713450292397662e-06, "epoch": 0.009650533703757859, "percentage": 0.48, "elapsed_time": "0:02:57", "remaining_time": "10:09:16"} +{"current_steps": 34, "total_steps": 6840, "loss": 1.1506178379058838, "lr": 1.929824561403509e-06, "epoch": 0.00994297411902325, "percentage": 0.5, "elapsed_time": "0:03:02", "remaining_time": "10:10:07"} +{"current_steps": 35, "total_steps": 6840, "loss": 0.9450151324272156, "lr": 1.9883040935672516e-06, "epoch": 0.01023541453428864, "percentage": 0.51, "elapsed_time": "0:03:08", "remaining_time": "10:10:21"} +{"current_steps": 36, "total_steps": 6840, "loss": 1.2040901184082031, "lr": 2.0467836257309943e-06, "epoch": 0.010527854949554029, "percentage": 0.53, "elapsed_time": "0:03:14", "remaining_time": "10:11:14"} +{"current_steps": 37, "total_steps": 6840, "loss": 1.2316429615020752, "lr": 2.105263157894737e-06, "epoch": 0.010820295364819418, "percentage": 0.54, "elapsed_time": "0:03:18", "remaining_time": "10:08:53"} +{"current_steps": 38, "total_steps": 6840, "loss": 1.2119100093841553, "lr": 2.1637426900584798e-06, "epoch": 0.011112735780084808, "percentage": 0.56, "elapsed_time": "0:03:23", "remaining_time": "10:06:51"} +{"current_steps": 39, "total_steps": 6840, "loss": 1.276926875114441, "lr": 2.222222222222222e-06, "epoch": 0.011405176195350197, "percentage": 0.57, "elapsed_time": "0:03:28", "remaining_time": "10:07:00"} +{"current_steps": 40, "total_steps": 6840, "loss": 0.9357824921607971, "lr": 2.280701754385965e-06, "epoch": 0.011697616610615587, "percentage": 0.58, "elapsed_time": "0:03:33", "remaining_time": "10:05:16"} +{"current_steps": 41, "total_steps": 6840, "loss": 1.1861131191253662, "lr": 2.3391812865497075e-06, "epoch": 0.011990057025880976, "percentage": 0.6, "elapsed_time": "0:03:38", "remaining_time": "10:03:49"} +{"current_steps": 42, "total_steps": 6840, "loss": 1.1745539903640747, "lr": 2.3976608187134502e-06, "epoch": 0.012282497441146366, "percentage": 0.61, "elapsed_time": "0:03:44", "remaining_time": "10:05:29"} +{"current_steps": 43, "total_steps": 6840, "loss": 1.0291770696640015, "lr": 2.456140350877193e-06, "epoch": 0.012574937856411755, "percentage": 0.63, "elapsed_time": "0:03:49", "remaining_time": "10:05:22"} +{"current_steps": 44, "total_steps": 6840, "loss": 1.2361294031143188, "lr": 2.5146198830409357e-06, "epoch": 0.012867378271677147, "percentage": 0.64, "elapsed_time": "0:03:55", "remaining_time": "10:05:21"} +{"current_steps": 45, "total_steps": 6840, "loss": 1.1909143924713135, "lr": 2.5730994152046784e-06, "epoch": 0.013159818686942536, "percentage": 0.66, "elapsed_time": "0:04:01", "remaining_time": "10:07:41"} +{"current_steps": 46, "total_steps": 6840, "loss": 1.1998133659362793, "lr": 2.631578947368421e-06, "epoch": 0.013452259102207926, "percentage": 0.67, "elapsed_time": "0:04:06", "remaining_time": "10:06:38"} +{"current_steps": 47, "total_steps": 6840, "loss": 1.0011268854141235, "lr": 2.690058479532164e-06, "epoch": 0.013744699517473315, "percentage": 0.69, "elapsed_time": "0:04:11", "remaining_time": "10:06:28"} +{"current_steps": 48, "total_steps": 6840, "loss": 1.0552136898040771, "lr": 2.7485380116959066e-06, "epoch": 0.014037139932738705, "percentage": 0.7, "elapsed_time": "0:04:17", "remaining_time": "10:08:14"} +{"current_steps": 49, "total_steps": 6840, "loss": 0.9727921485900879, "lr": 2.8070175438596493e-06, "epoch": 0.014329580348004094, "percentage": 0.72, "elapsed_time": "0:04:23", "remaining_time": "10:07:52"} +{"current_steps": 50, "total_steps": 6840, "loss": 0.9361351728439331, "lr": 2.865497076023392e-06, "epoch": 0.014622020763269484, "percentage": 0.73, "elapsed_time": "0:04:28", "remaining_time": "10:07:32"} +{"current_steps": 51, "total_steps": 6840, "loss": 1.140329360961914, "lr": 2.9239766081871347e-06, "epoch": 0.014914461178534873, "percentage": 0.75, "elapsed_time": "0:04:33", "remaining_time": "10:06:01"} +{"current_steps": 52, "total_steps": 6840, "loss": 0.991325855255127, "lr": 2.9824561403508774e-06, "epoch": 0.015206901593800263, "percentage": 0.76, "elapsed_time": "0:04:38", "remaining_time": "10:06:39"} +{"current_steps": 53, "total_steps": 6840, "loss": 1.3082914352416992, "lr": 3.04093567251462e-06, "epoch": 0.015499342009065652, "percentage": 0.77, "elapsed_time": "0:04:44", "remaining_time": "10:06:23"} +{"current_steps": 54, "total_steps": 6840, "loss": 0.9767723083496094, "lr": 3.0994152046783624e-06, "epoch": 0.015791782424331043, "percentage": 0.79, "elapsed_time": "0:04:49", "remaining_time": "10:06:09"} +{"current_steps": 55, "total_steps": 6840, "loss": 1.193568229675293, "lr": 3.157894736842105e-06, "epoch": 0.016084222839596433, "percentage": 0.8, "elapsed_time": "0:04:55", "remaining_time": "10:08:12"} +{"current_steps": 56, "total_steps": 6840, "loss": 0.9767440557479858, "lr": 3.216374269005848e-06, "epoch": 0.016376663254861822, "percentage": 0.82, "elapsed_time": "0:05:00", "remaining_time": "10:07:42"} +{"current_steps": 57, "total_steps": 6840, "loss": 0.8888605833053589, "lr": 3.2748538011695906e-06, "epoch": 0.016669103670127212, "percentage": 0.83, "elapsed_time": "0:05:06", "remaining_time": "10:08:03"} +{"current_steps": 58, "total_steps": 6840, "loss": 1.087357997894287, "lr": 3.3333333333333333e-06, "epoch": 0.0169615440853926, "percentage": 0.85, "elapsed_time": "0:05:12", "remaining_time": "10:09:32"} +{"current_steps": 59, "total_steps": 6840, "loss": 1.0217459201812744, "lr": 3.391812865497076e-06, "epoch": 0.01725398450065799, "percentage": 0.86, "elapsed_time": "0:05:19", "remaining_time": "10:11:59"} +{"current_steps": 60, "total_steps": 6840, "loss": 1.0783777236938477, "lr": 3.4502923976608188e-06, "epoch": 0.01754642491592338, "percentage": 0.88, "elapsed_time": "0:05:23", "remaining_time": "10:09:43"} +{"current_steps": 61, "total_steps": 6840, "loss": 0.85099196434021, "lr": 3.5087719298245615e-06, "epoch": 0.01783886533118877, "percentage": 0.89, "elapsed_time": "0:05:29", "remaining_time": "10:10:36"} +{"current_steps": 62, "total_steps": 6840, "loss": 0.9322569966316223, "lr": 3.567251461988304e-06, "epoch": 0.01813130574645416, "percentage": 0.91, "elapsed_time": "0:05:35", "remaining_time": "10:10:35"} +{"current_steps": 63, "total_steps": 6840, "loss": 0.8749685287475586, "lr": 3.625730994152047e-06, "epoch": 0.01842374616171955, "percentage": 0.92, "elapsed_time": "0:05:42", "remaining_time": "10:13:32"} +{"current_steps": 64, "total_steps": 6840, "loss": 0.857900857925415, "lr": 3.6842105263157896e-06, "epoch": 0.018716186576984938, "percentage": 0.94, "elapsed_time": "0:05:48", "remaining_time": "10:15:29"} +{"current_steps": 65, "total_steps": 6840, "loss": 0.8891770243644714, "lr": 3.7426900584795324e-06, "epoch": 0.019008626992250328, "percentage": 0.95, "elapsed_time": "0:05:54", "remaining_time": "10:16:13"} +{"current_steps": 66, "total_steps": 6840, "loss": 1.0730159282684326, "lr": 3.801169590643275e-06, "epoch": 0.019301067407515717, "percentage": 0.96, "elapsed_time": "0:06:01", "remaining_time": "10:18:08"} +{"current_steps": 67, "total_steps": 6840, "loss": 1.108138084411621, "lr": 3.859649122807018e-06, "epoch": 0.019593507822781107, "percentage": 0.98, "elapsed_time": "0:06:06", "remaining_time": "10:16:40"} +{"current_steps": 68, "total_steps": 6840, "loss": 1.2126305103302002, "lr": 3.9181286549707605e-06, "epoch": 0.0198859482380465, "percentage": 0.99, "elapsed_time": "0:06:09", "remaining_time": "10:13:28"} +{"current_steps": 69, "total_steps": 6840, "loss": 0.9527193307876587, "lr": 3.976608187134503e-06, "epoch": 0.02017838865331189, "percentage": 1.01, "elapsed_time": "0:06:15", "remaining_time": "10:14:55"} +{"current_steps": 70, "total_steps": 6840, "loss": 1.0454832315444946, "lr": 4.035087719298246e-06, "epoch": 0.02047082906857728, "percentage": 1.02, "elapsed_time": "0:06:21", "remaining_time": "10:14:51"} +{"current_steps": 71, "total_steps": 6840, "loss": 0.7075237035751343, "lr": 4.093567251461989e-06, "epoch": 0.020763269483842668, "percentage": 1.04, "elapsed_time": "0:06:27", "remaining_time": "10:15:16"} +{"current_steps": 72, "total_steps": 6840, "loss": 1.0972111225128174, "lr": 4.152046783625731e-06, "epoch": 0.021055709899108058, "percentage": 1.05, "elapsed_time": "0:06:33", "remaining_time": "10:15:54"} +{"current_steps": 73, "total_steps": 6840, "loss": 1.0400984287261963, "lr": 4.210526315789474e-06, "epoch": 0.021348150314373447, "percentage": 1.07, "elapsed_time": "0:06:39", "remaining_time": "10:17:58"} +{"current_steps": 74, "total_steps": 6840, "loss": 0.7712557315826416, "lr": 4.269005847953217e-06, "epoch": 0.021640590729638837, "percentage": 1.08, "elapsed_time": "0:06:45", "remaining_time": "10:17:36"} +{"current_steps": 75, "total_steps": 6840, "loss": 1.1733636856079102, "lr": 4.3274853801169596e-06, "epoch": 0.021933031144904226, "percentage": 1.1, "elapsed_time": "0:06:51", "remaining_time": "10:17:56"} +{"current_steps": 76, "total_steps": 6840, "loss": 0.8653621673583984, "lr": 4.385964912280702e-06, "epoch": 0.022225471560169616, "percentage": 1.11, "elapsed_time": "0:06:57", "remaining_time": "10:18:51"} +{"current_steps": 77, "total_steps": 6840, "loss": 0.8797299861907959, "lr": 4.444444444444444e-06, "epoch": 0.022517911975435005, "percentage": 1.13, "elapsed_time": "0:07:03", "remaining_time": "10:19:23"} +{"current_steps": 78, "total_steps": 6840, "loss": 0.8357750177383423, "lr": 4.502923976608187e-06, "epoch": 0.022810352390700395, "percentage": 1.14, "elapsed_time": "0:07:08", "remaining_time": "10:18:40"} +{"current_steps": 79, "total_steps": 6840, "loss": 1.1249456405639648, "lr": 4.56140350877193e-06, "epoch": 0.023102792805965784, "percentage": 1.15, "elapsed_time": "0:07:11", "remaining_time": "10:15:53"} +{"current_steps": 80, "total_steps": 6840, "loss": 0.9920758008956909, "lr": 4.619883040935672e-06, "epoch": 0.023395233221231174, "percentage": 1.17, "elapsed_time": "0:07:16", "remaining_time": "10:15:10"} +{"current_steps": 81, "total_steps": 6840, "loss": 0.8115094900131226, "lr": 4.678362573099415e-06, "epoch": 0.023687673636496563, "percentage": 1.18, "elapsed_time": "0:07:22", "remaining_time": "10:15:17"} +{"current_steps": 82, "total_steps": 6840, "loss": 1.060575246810913, "lr": 4.736842105263158e-06, "epoch": 0.023980114051761953, "percentage": 1.2, "elapsed_time": "0:07:28", "remaining_time": "10:16:25"} +{"current_steps": 83, "total_steps": 6840, "loss": 1.028218150138855, "lr": 4.7953216374269005e-06, "epoch": 0.024272554467027342, "percentage": 1.21, "elapsed_time": "0:07:34", "remaining_time": "10:17:01"} +{"current_steps": 84, "total_steps": 6840, "loss": 1.090872049331665, "lr": 4.853801169590643e-06, "epoch": 0.02456499488229273, "percentage": 1.23, "elapsed_time": "0:07:40", "remaining_time": "10:16:43"} +{"current_steps": 85, "total_steps": 6840, "loss": 1.0069574117660522, "lr": 4.912280701754386e-06, "epoch": 0.02485743529755812, "percentage": 1.24, "elapsed_time": "0:07:45", "remaining_time": "10:16:36"} +{"current_steps": 86, "total_steps": 6840, "loss": 0.9391698837280273, "lr": 4.970760233918129e-06, "epoch": 0.02514987571282351, "percentage": 1.26, "elapsed_time": "0:07:50", "remaining_time": "10:15:59"} +{"current_steps": 87, "total_steps": 6840, "loss": 0.881995677947998, "lr": 5.029239766081871e-06, "epoch": 0.025442316128088904, "percentage": 1.27, "elapsed_time": "0:07:55", "remaining_time": "10:15:10"} +{"current_steps": 88, "total_steps": 6840, "loss": 0.87871253490448, "lr": 5.087719298245615e-06, "epoch": 0.025734756543354293, "percentage": 1.29, "elapsed_time": "0:08:00", "remaining_time": "10:15:00"} +{"current_steps": 89, "total_steps": 6840, "loss": 1.005904197692871, "lr": 5.146198830409357e-06, "epoch": 0.026027196958619683, "percentage": 1.3, "elapsed_time": "0:08:05", "remaining_time": "10:13:44"} +{"current_steps": 90, "total_steps": 6840, "loss": 0.8624223470687866, "lr": 5.2046783625731e-06, "epoch": 0.026319637373885072, "percentage": 1.32, "elapsed_time": "0:08:09", "remaining_time": "10:11:46"} +{"current_steps": 91, "total_steps": 6840, "loss": 0.9976427555084229, "lr": 5.263157894736842e-06, "epoch": 0.02661207778915046, "percentage": 1.33, "elapsed_time": "0:08:15", "remaining_time": "10:11:55"} +{"current_steps": 92, "total_steps": 6840, "loss": 0.7743148803710938, "lr": 5.321637426900586e-06, "epoch": 0.02690451820441585, "percentage": 1.35, "elapsed_time": "0:08:21", "remaining_time": "10:13:04"} +{"current_steps": 93, "total_steps": 6840, "loss": 0.8541792631149292, "lr": 5.380116959064328e-06, "epoch": 0.02719695861968124, "percentage": 1.36, "elapsed_time": "0:08:26", "remaining_time": "10:11:59"} +{"current_steps": 94, "total_steps": 6840, "loss": 0.9141846895217896, "lr": 5.438596491228071e-06, "epoch": 0.02748939903494663, "percentage": 1.37, "elapsed_time": "0:08:30", "remaining_time": "10:11:07"} +{"current_steps": 95, "total_steps": 6840, "loss": 0.9762974977493286, "lr": 5.497076023391813e-06, "epoch": 0.02778183945021202, "percentage": 1.39, "elapsed_time": "0:08:37", "remaining_time": "10:12:14"} +{"current_steps": 96, "total_steps": 6840, "loss": 0.8039775490760803, "lr": 5.555555555555557e-06, "epoch": 0.02807427986547741, "percentage": 1.4, "elapsed_time": "0:08:43", "remaining_time": "10:13:11"} +{"current_steps": 97, "total_steps": 6840, "loss": 0.9464477300643921, "lr": 5.6140350877192985e-06, "epoch": 0.0283667202807428, "percentage": 1.42, "elapsed_time": "0:08:47", "remaining_time": "10:11:37"} +{"current_steps": 98, "total_steps": 6840, "loss": 0.8264896869659424, "lr": 5.672514619883041e-06, "epoch": 0.028659160696008188, "percentage": 1.43, "elapsed_time": "0:08:53", "remaining_time": "10:11:16"} +{"current_steps": 99, "total_steps": 6840, "loss": 0.8347363471984863, "lr": 5.730994152046784e-06, "epoch": 0.028951601111273578, "percentage": 1.45, "elapsed_time": "0:08:58", "remaining_time": "10:11:31"} +{"current_steps": 100, "total_steps": 6840, "loss": 0.7974327802658081, "lr": 5.789473684210527e-06, "epoch": 0.029244041526538967, "percentage": 1.46, "elapsed_time": "0:09:04", "remaining_time": "10:11:13"} +{"current_steps": 101, "total_steps": 6840, "loss": 0.7953752875328064, "lr": 5.847953216374269e-06, "epoch": 0.029536481941804357, "percentage": 1.48, "elapsed_time": "0:09:14", "remaining_time": "10:16:15"} +{"current_steps": 102, "total_steps": 6840, "loss": 0.8652607798576355, "lr": 5.906432748538012e-06, "epoch": 0.029828922357069746, "percentage": 1.49, "elapsed_time": "0:09:19", "remaining_time": "10:15:50"} +{"current_steps": 103, "total_steps": 6840, "loss": 0.973792552947998, "lr": 5.964912280701755e-06, "epoch": 0.030121362772335136, "percentage": 1.51, "elapsed_time": "0:09:23", "remaining_time": "10:14:48"} +{"current_steps": 104, "total_steps": 6840, "loss": 0.8093612194061279, "lr": 6.023391812865498e-06, "epoch": 0.030413803187600525, "percentage": 1.52, "elapsed_time": "0:09:29", "remaining_time": "10:15:05"} +{"current_steps": 105, "total_steps": 6840, "loss": 0.8463394045829773, "lr": 6.08187134502924e-06, "epoch": 0.030706243602865915, "percentage": 1.54, "elapsed_time": "0:09:35", "remaining_time": "10:14:46"} +{"current_steps": 106, "total_steps": 6840, "loss": 0.7898350358009338, "lr": 6.140350877192983e-06, "epoch": 0.030998684018131304, "percentage": 1.55, "elapsed_time": "0:09:38", "remaining_time": "10:12:37"} +{"current_steps": 107, "total_steps": 6840, "loss": 0.9750698804855347, "lr": 6.198830409356725e-06, "epoch": 0.0312911244333967, "percentage": 1.56, "elapsed_time": "0:09:43", "remaining_time": "10:12:26"} +{"current_steps": 108, "total_steps": 6840, "loss": 0.8137387633323669, "lr": 6.2573099415204685e-06, "epoch": 0.031583564848662087, "percentage": 1.58, "elapsed_time": "0:09:50", "remaining_time": "10:13:28"} +{"current_steps": 109, "total_steps": 6840, "loss": 1.0641593933105469, "lr": 6.31578947368421e-06, "epoch": 0.031876005263927476, "percentage": 1.59, "elapsed_time": "0:09:54", "remaining_time": "10:11:50"} +{"current_steps": 110, "total_steps": 6840, "loss": 0.9647193551063538, "lr": 6.374269005847954e-06, "epoch": 0.032168445679192866, "percentage": 1.61, "elapsed_time": "0:10:00", "remaining_time": "10:11:49"} +{"current_steps": 111, "total_steps": 6840, "loss": 0.9693200588226318, "lr": 6.432748538011696e-06, "epoch": 0.032460886094458255, "percentage": 1.62, "elapsed_time": "0:10:04", "remaining_time": "10:10:34"} +{"current_steps": 112, "total_steps": 6840, "loss": 0.9600590467453003, "lr": 6.491228070175439e-06, "epoch": 0.032753326509723645, "percentage": 1.64, "elapsed_time": "0:10:09", "remaining_time": "10:10:17"} +{"current_steps": 113, "total_steps": 6840, "loss": 0.8908880949020386, "lr": 6.549707602339181e-06, "epoch": 0.033045766924989034, "percentage": 1.65, "elapsed_time": "0:10:14", "remaining_time": "10:09:55"} +{"current_steps": 114, "total_steps": 6840, "loss": 0.9803124666213989, "lr": 6.608187134502925e-06, "epoch": 0.033338207340254424, "percentage": 1.67, "elapsed_time": "0:10:18", "remaining_time": "10:08:30"} +{"current_steps": 115, "total_steps": 6840, "loss": 0.8288271427154541, "lr": 6.666666666666667e-06, "epoch": 0.03363064775551981, "percentage": 1.68, "elapsed_time": "0:10:24", "remaining_time": "10:09:07"} +{"current_steps": 116, "total_steps": 6840, "loss": 0.7203798890113831, "lr": 6.72514619883041e-06, "epoch": 0.0339230881707852, "percentage": 1.7, "elapsed_time": "0:10:29", "remaining_time": "10:07:59"} +{"current_steps": 117, "total_steps": 6840, "loss": 0.7670629024505615, "lr": 6.783625730994152e-06, "epoch": 0.03421552858605059, "percentage": 1.71, "elapsed_time": "0:10:34", "remaining_time": "10:07:43"} +{"current_steps": 118, "total_steps": 6840, "loss": 0.8487929105758667, "lr": 6.842105263157896e-06, "epoch": 0.03450796900131598, "percentage": 1.73, "elapsed_time": "0:10:39", "remaining_time": "10:06:56"} +{"current_steps": 119, "total_steps": 6840, "loss": 0.8332704305648804, "lr": 6.9005847953216375e-06, "epoch": 0.03480040941658137, "percentage": 1.74, "elapsed_time": "0:10:45", "remaining_time": "10:07:21"} +{"current_steps": 120, "total_steps": 6840, "loss": 0.9984017610549927, "lr": 6.959064327485381e-06, "epoch": 0.03509284983184676, "percentage": 1.75, "elapsed_time": "0:10:50", "remaining_time": "10:06:52"} +{"current_steps": 121, "total_steps": 6840, "loss": 0.788459062576294, "lr": 7.017543859649123e-06, "epoch": 0.03538529024711215, "percentage": 1.77, "elapsed_time": "0:10:55", "remaining_time": "10:07:03"} +{"current_steps": 122, "total_steps": 6840, "loss": 1.0288443565368652, "lr": 7.0760233918128665e-06, "epoch": 0.03567773066237754, "percentage": 1.78, "elapsed_time": "0:10:59", "remaining_time": "10:05:29"} +{"current_steps": 123, "total_steps": 6840, "loss": 0.7939552664756775, "lr": 7.134502923976608e-06, "epoch": 0.03597017107764293, "percentage": 1.8, "elapsed_time": "0:11:05", "remaining_time": "10:05:30"} +{"current_steps": 124, "total_steps": 6840, "loss": 0.8816506862640381, "lr": 7.192982456140352e-06, "epoch": 0.03626261149290832, "percentage": 1.81, "elapsed_time": "0:11:11", "remaining_time": "10:06:04"} +{"current_steps": 125, "total_steps": 6840, "loss": 0.8864353895187378, "lr": 7.251461988304094e-06, "epoch": 0.03655505190817371, "percentage": 1.83, "elapsed_time": "0:11:16", "remaining_time": "10:06:00"} +{"current_steps": 126, "total_steps": 6840, "loss": 0.9817954897880554, "lr": 7.309941520467837e-06, "epoch": 0.0368474923234391, "percentage": 1.84, "elapsed_time": "0:11:22", "remaining_time": "10:05:53"} +{"current_steps": 127, "total_steps": 6840, "loss": 0.8423842787742615, "lr": 7.368421052631579e-06, "epoch": 0.03713993273870449, "percentage": 1.86, "elapsed_time": "0:11:27", "remaining_time": "10:05:52"} +{"current_steps": 128, "total_steps": 6840, "loss": 0.8375135660171509, "lr": 7.426900584795322e-06, "epoch": 0.037432373153969876, "percentage": 1.87, "elapsed_time": "0:11:31", "remaining_time": "10:04:34"} +{"current_steps": 129, "total_steps": 6840, "loss": 0.9105685949325562, "lr": 7.485380116959065e-06, "epoch": 0.037724813569235266, "percentage": 1.89, "elapsed_time": "0:11:36", "remaining_time": "10:03:47"} +{"current_steps": 130, "total_steps": 6840, "loss": 0.8784557580947876, "lr": 7.5438596491228074e-06, "epoch": 0.038017253984500655, "percentage": 1.9, "elapsed_time": "0:11:41", "remaining_time": "10:03:52"} +{"current_steps": 131, "total_steps": 6840, "loss": 0.7557879686355591, "lr": 7.60233918128655e-06, "epoch": 0.038309694399766045, "percentage": 1.92, "elapsed_time": "0:11:47", "remaining_time": "10:04:13"} +{"current_steps": 132, "total_steps": 6840, "loss": 0.8966819047927856, "lr": 7.660818713450294e-06, "epoch": 0.038602134815031434, "percentage": 1.93, "elapsed_time": "0:11:52", "remaining_time": "10:03:22"} +{"current_steps": 133, "total_steps": 6840, "loss": 0.7642185091972351, "lr": 7.719298245614036e-06, "epoch": 0.038894575230296824, "percentage": 1.94, "elapsed_time": "0:11:57", "remaining_time": "10:03:20"} +{"current_steps": 134, "total_steps": 6840, "loss": 0.8313230276107788, "lr": 7.77777777777778e-06, "epoch": 0.03918701564556221, "percentage": 1.96, "elapsed_time": "0:12:03", "remaining_time": "10:03:02"} +{"current_steps": 135, "total_steps": 6840, "loss": 0.8388677835464478, "lr": 7.836257309941521e-06, "epoch": 0.0394794560608276, "percentage": 1.97, "elapsed_time": "0:12:08", "remaining_time": "10:03:03"} +{"current_steps": 136, "total_steps": 6840, "loss": 0.9065952301025391, "lr": 7.894736842105265e-06, "epoch": 0.039771896476093, "percentage": 1.99, "elapsed_time": "0:12:13", "remaining_time": "10:02:31"} +{"current_steps": 137, "total_steps": 6840, "loss": 0.8153767585754395, "lr": 7.953216374269006e-06, "epoch": 0.04006433689135839, "percentage": 2.0, "elapsed_time": "0:12:20", "remaining_time": "10:03:51"} +{"current_steps": 138, "total_steps": 6840, "loss": 0.8976421356201172, "lr": 8.01169590643275e-06, "epoch": 0.04035677730662378, "percentage": 2.02, "elapsed_time": "0:12:24", "remaining_time": "10:02:35"} +{"current_steps": 139, "total_steps": 6840, "loss": 0.7360264658927917, "lr": 8.070175438596492e-06, "epoch": 0.04064921772188917, "percentage": 2.03, "elapsed_time": "0:12:29", "remaining_time": "10:02:15"} +{"current_steps": 140, "total_steps": 6840, "loss": 0.8442148566246033, "lr": 8.128654970760235e-06, "epoch": 0.04094165813715456, "percentage": 2.05, "elapsed_time": "0:12:34", "remaining_time": "10:01:53"} +{"current_steps": 141, "total_steps": 6840, "loss": 0.6541435718536377, "lr": 8.187134502923977e-06, "epoch": 0.04123409855241995, "percentage": 2.06, "elapsed_time": "0:12:40", "remaining_time": "10:02:17"} +{"current_steps": 142, "total_steps": 6840, "loss": 0.7492353916168213, "lr": 8.24561403508772e-06, "epoch": 0.041526538967685336, "percentage": 2.08, "elapsed_time": "0:12:45", "remaining_time": "10:01:57"} +{"current_steps": 143, "total_steps": 6840, "loss": 0.6681893467903137, "lr": 8.304093567251463e-06, "epoch": 0.041818979382950726, "percentage": 2.09, "elapsed_time": "0:12:50", "remaining_time": "10:01:08"} +{"current_steps": 144, "total_steps": 6840, "loss": 0.8384866714477539, "lr": 8.362573099415205e-06, "epoch": 0.042111419798216115, "percentage": 2.11, "elapsed_time": "0:12:54", "remaining_time": "10:00:19"} +{"current_steps": 145, "total_steps": 6840, "loss": 0.8338214159011841, "lr": 8.421052631578948e-06, "epoch": 0.042403860213481505, "percentage": 2.12, "elapsed_time": "0:12:59", "remaining_time": "10:00:10"} +{"current_steps": 146, "total_steps": 6840, "loss": 0.8549021482467651, "lr": 8.47953216374269e-06, "epoch": 0.042696300628746894, "percentage": 2.13, "elapsed_time": "0:13:05", "remaining_time": "9:59:58"} +{"current_steps": 147, "total_steps": 6840, "loss": 0.8324464559555054, "lr": 8.538011695906434e-06, "epoch": 0.042988741044012284, "percentage": 2.15, "elapsed_time": "0:13:09", "remaining_time": "9:58:50"} +{"current_steps": 148, "total_steps": 6840, "loss": 0.9247474670410156, "lr": 8.596491228070176e-06, "epoch": 0.04328118145927767, "percentage": 2.16, "elapsed_time": "0:13:12", "remaining_time": "9:57:28"} +{"current_steps": 149, "total_steps": 6840, "loss": 0.8488880395889282, "lr": 8.654970760233919e-06, "epoch": 0.04357362187454306, "percentage": 2.18, "elapsed_time": "0:13:18", "remaining_time": "9:57:47"} +{"current_steps": 150, "total_steps": 6840, "loss": 0.7844473123550415, "lr": 8.713450292397661e-06, "epoch": 0.04386606228980845, "percentage": 2.19, "elapsed_time": "0:13:24", "remaining_time": "9:58:12"} +{"current_steps": 151, "total_steps": 6840, "loss": 1.0540976524353027, "lr": 8.771929824561405e-06, "epoch": 0.04415850270507384, "percentage": 2.21, "elapsed_time": "0:13:29", "remaining_time": "9:57:54"} +{"current_steps": 152, "total_steps": 6840, "loss": 0.7919446229934692, "lr": 8.830409356725146e-06, "epoch": 0.04445094312033923, "percentage": 2.22, "elapsed_time": "0:13:34", "remaining_time": "9:56:57"} +{"current_steps": 153, "total_steps": 6840, "loss": 0.818670928478241, "lr": 8.888888888888888e-06, "epoch": 0.04474338353560462, "percentage": 2.24, "elapsed_time": "0:13:38", "remaining_time": "9:56:25"} +{"current_steps": 154, "total_steps": 6840, "loss": 0.8491114377975464, "lr": 8.947368421052632e-06, "epoch": 0.04503582395087001, "percentage": 2.25, "elapsed_time": "0:13:43", "remaining_time": "9:55:31"} +{"current_steps": 155, "total_steps": 6840, "loss": 0.660563588142395, "lr": 9.005847953216374e-06, "epoch": 0.0453282643661354, "percentage": 2.27, "elapsed_time": "0:13:47", "remaining_time": "9:54:55"} +{"current_steps": 156, "total_steps": 6840, "loss": 0.8559159636497498, "lr": 9.064327485380117e-06, "epoch": 0.04562070478140079, "percentage": 2.28, "elapsed_time": "0:13:52", "remaining_time": "9:54:18"} +{"current_steps": 157, "total_steps": 6840, "loss": 0.8478386402130127, "lr": 9.12280701754386e-06, "epoch": 0.04591314519666618, "percentage": 2.3, "elapsed_time": "0:13:58", "remaining_time": "9:54:47"} +{"current_steps": 158, "total_steps": 6840, "loss": 0.758915901184082, "lr": 9.181286549707603e-06, "epoch": 0.04620558561193157, "percentage": 2.31, "elapsed_time": "0:14:03", "remaining_time": "9:54:53"} +{"current_steps": 159, "total_steps": 6840, "loss": 0.773307204246521, "lr": 9.239766081871345e-06, "epoch": 0.04649802602719696, "percentage": 2.32, "elapsed_time": "0:14:10", "remaining_time": "9:55:20"} +{"current_steps": 160, "total_steps": 6840, "loss": 0.8948490023612976, "lr": 9.298245614035088e-06, "epoch": 0.04679046644246235, "percentage": 2.34, "elapsed_time": "0:14:15", "remaining_time": "9:55:02"} +{"current_steps": 161, "total_steps": 6840, "loss": 0.83086097240448, "lr": 9.35672514619883e-06, "epoch": 0.04708290685772774, "percentage": 2.35, "elapsed_time": "0:14:20", "remaining_time": "9:55:11"} +{"current_steps": 162, "total_steps": 6840, "loss": 0.7683168649673462, "lr": 9.415204678362574e-06, "epoch": 0.047375347272993126, "percentage": 2.37, "elapsed_time": "0:14:26", "remaining_time": "9:54:59"} +{"current_steps": 163, "total_steps": 6840, "loss": 0.9267748594284058, "lr": 9.473684210526315e-06, "epoch": 0.047667787688258516, "percentage": 2.38, "elapsed_time": "0:14:31", "remaining_time": "9:54:48"} +{"current_steps": 164, "total_steps": 6840, "loss": 0.9243365526199341, "lr": 9.532163742690059e-06, "epoch": 0.047960228103523905, "percentage": 2.4, "elapsed_time": "0:14:36", "remaining_time": "9:54:57"} +{"current_steps": 165, "total_steps": 6840, "loss": 0.7841176986694336, "lr": 9.590643274853801e-06, "epoch": 0.048252668518789295, "percentage": 2.41, "elapsed_time": "0:14:41", "remaining_time": "9:54:20"} +{"current_steps": 166, "total_steps": 6840, "loss": 0.8318643569946289, "lr": 9.649122807017545e-06, "epoch": 0.048545108934054684, "percentage": 2.43, "elapsed_time": "0:14:46", "remaining_time": "9:54:08"} +{"current_steps": 167, "total_steps": 6840, "loss": 0.866286039352417, "lr": 9.707602339181286e-06, "epoch": 0.048837549349320074, "percentage": 2.44, "elapsed_time": "0:14:51", "remaining_time": "9:53:49"} +{"current_steps": 168, "total_steps": 6840, "loss": 0.8232241868972778, "lr": 9.76608187134503e-06, "epoch": 0.04912998976458546, "percentage": 2.46, "elapsed_time": "0:14:57", "remaining_time": "9:53:43"} +{"current_steps": 169, "total_steps": 6840, "loss": 0.874968945980072, "lr": 9.824561403508772e-06, "epoch": 0.04942243017985085, "percentage": 2.47, "elapsed_time": "0:15:02", "remaining_time": "9:53:29"} +{"current_steps": 170, "total_steps": 6840, "loss": 0.9048999547958374, "lr": 9.883040935672515e-06, "epoch": 0.04971487059511624, "percentage": 2.49, "elapsed_time": "0:15:06", "remaining_time": "9:52:55"} +{"current_steps": 171, "total_steps": 6840, "loss": 0.9220215082168579, "lr": 9.941520467836257e-06, "epoch": 0.05000731101038163, "percentage": 2.5, "elapsed_time": "0:15:11", "remaining_time": "9:52:17"} +{"current_steps": 172, "total_steps": 6840, "loss": 0.8326996564865112, "lr": 1e-05, "epoch": 0.05029975142564702, "percentage": 2.51, "elapsed_time": "0:15:16", "remaining_time": "9:52:26"} +{"current_steps": 173, "total_steps": 6840, "loss": 0.8023662567138672, "lr": 1.0058479532163743e-05, "epoch": 0.05059219184091241, "percentage": 2.53, "elapsed_time": "0:15:23", "remaining_time": "9:53:07"} +{"current_steps": 174, "total_steps": 6840, "loss": 0.9172271490097046, "lr": 1.0116959064327488e-05, "epoch": 0.05088463225617781, "percentage": 2.54, "elapsed_time": "0:15:28", "remaining_time": "9:52:51"} +{"current_steps": 175, "total_steps": 6840, "loss": 0.8016377687454224, "lr": 1.017543859649123e-05, "epoch": 0.0511770726714432, "percentage": 2.56, "elapsed_time": "0:15:35", "remaining_time": "9:53:30"} +{"current_steps": 176, "total_steps": 6840, "loss": 0.7656369805335999, "lr": 1.0233918128654972e-05, "epoch": 0.051469513086708586, "percentage": 2.57, "elapsed_time": "0:15:39", "remaining_time": "9:53:10"} +{"current_steps": 177, "total_steps": 6840, "loss": 0.7769640684127808, "lr": 1.0292397660818714e-05, "epoch": 0.051761953501973976, "percentage": 2.59, "elapsed_time": "0:15:44", "remaining_time": "9:52:41"} +{"current_steps": 178, "total_steps": 6840, "loss": 0.9830589294433594, "lr": 1.0350877192982459e-05, "epoch": 0.052054393917239365, "percentage": 2.6, "elapsed_time": "0:15:49", "remaining_time": "9:52:33"} +{"current_steps": 179, "total_steps": 6840, "loss": 0.8002523183822632, "lr": 1.04093567251462e-05, "epoch": 0.052346834332504755, "percentage": 2.62, "elapsed_time": "0:15:53", "remaining_time": "9:51:40"} +{"current_steps": 180, "total_steps": 6840, "loss": 0.879243016242981, "lr": 1.0467836257309943e-05, "epoch": 0.052639274747770144, "percentage": 2.63, "elapsed_time": "0:15:58", "remaining_time": "9:51:13"} +{"current_steps": 181, "total_steps": 6840, "loss": 0.7266525030136108, "lr": 1.0526315789473684e-05, "epoch": 0.052931715163035534, "percentage": 2.65, "elapsed_time": "0:16:03", "remaining_time": "9:51:02"} +{"current_steps": 182, "total_steps": 6840, "loss": 0.784702479839325, "lr": 1.0584795321637428e-05, "epoch": 0.05322415557830092, "percentage": 2.66, "elapsed_time": "0:16:08", "remaining_time": "9:50:31"} +{"current_steps": 183, "total_steps": 6840, "loss": 0.8419734239578247, "lr": 1.0643274853801172e-05, "epoch": 0.05351659599356631, "percentage": 2.68, "elapsed_time": "0:16:15", "remaining_time": "9:51:25"} +{"current_steps": 184, "total_steps": 6840, "loss": 0.8462855815887451, "lr": 1.0701754385964913e-05, "epoch": 0.0538090364088317, "percentage": 2.69, "elapsed_time": "0:16:21", "remaining_time": "9:51:33"} +{"current_steps": 185, "total_steps": 6840, "loss": 0.8888737559318542, "lr": 1.0760233918128655e-05, "epoch": 0.05410147682409709, "percentage": 2.7, "elapsed_time": "0:16:26", "remaining_time": "9:51:39"} +{"current_steps": 186, "total_steps": 6840, "loss": 0.8063781261444092, "lr": 1.0818713450292399e-05, "epoch": 0.05439391723936248, "percentage": 2.72, "elapsed_time": "0:16:32", "remaining_time": "9:51:55"} +{"current_steps": 187, "total_steps": 6840, "loss": 0.7981499433517456, "lr": 1.0877192982456142e-05, "epoch": 0.05468635765462787, "percentage": 2.73, "elapsed_time": "0:16:38", "remaining_time": "9:52:06"} +{"current_steps": 188, "total_steps": 6840, "loss": 0.8474490642547607, "lr": 1.0935672514619884e-05, "epoch": 0.05497879806989326, "percentage": 2.75, "elapsed_time": "0:16:42", "remaining_time": "9:51:08"} +{"current_steps": 189, "total_steps": 6840, "loss": 0.818732500076294, "lr": 1.0994152046783626e-05, "epoch": 0.05527123848515865, "percentage": 2.76, "elapsed_time": "0:16:47", "remaining_time": "9:50:44"} +{"current_steps": 190, "total_steps": 6840, "loss": 0.7660291194915771, "lr": 1.105263157894737e-05, "epoch": 0.05556367890042404, "percentage": 2.78, "elapsed_time": "0:16:52", "remaining_time": "9:50:24"} +{"current_steps": 191, "total_steps": 6840, "loss": 0.8240147233009338, "lr": 1.1111111111111113e-05, "epoch": 0.05585611931568943, "percentage": 2.79, "elapsed_time": "0:16:56", "remaining_time": "9:49:31"} +{"current_steps": 192, "total_steps": 6840, "loss": 0.9377203583717346, "lr": 1.1169590643274855e-05, "epoch": 0.05614855973095482, "percentage": 2.81, "elapsed_time": "0:17:02", "remaining_time": "9:49:51"} +{"current_steps": 193, "total_steps": 6840, "loss": 0.8662704229354858, "lr": 1.1228070175438597e-05, "epoch": 0.05644100014622021, "percentage": 2.82, "elapsed_time": "0:17:06", "remaining_time": "9:49:08"} +{"current_steps": 194, "total_steps": 6840, "loss": 0.717308759689331, "lr": 1.128654970760234e-05, "epoch": 0.0567334405614856, "percentage": 2.84, "elapsed_time": "0:17:11", "remaining_time": "9:49:00"} +{"current_steps": 195, "total_steps": 6840, "loss": 0.8538037538528442, "lr": 1.1345029239766083e-05, "epoch": 0.05702588097675099, "percentage": 2.85, "elapsed_time": "0:17:15", "remaining_time": "9:48:23"} +{"current_steps": 196, "total_steps": 6840, "loss": 0.9016960859298706, "lr": 1.1403508771929826e-05, "epoch": 0.057318321392016376, "percentage": 2.87, "elapsed_time": "0:17:21", "remaining_time": "9:48:20"} +{"current_steps": 197, "total_steps": 6840, "loss": 0.9313502311706543, "lr": 1.1461988304093568e-05, "epoch": 0.057610761807281766, "percentage": 2.88, "elapsed_time": "0:17:25", "remaining_time": "9:47:37"} +{"current_steps": 198, "total_steps": 6840, "loss": 0.7330124974250793, "lr": 1.1520467836257312e-05, "epoch": 0.057903202222547155, "percentage": 2.89, "elapsed_time": "0:17:31", "remaining_time": "9:48:00"} +{"current_steps": 199, "total_steps": 6840, "loss": 0.8904056549072266, "lr": 1.1578947368421053e-05, "epoch": 0.058195642637812545, "percentage": 2.91, "elapsed_time": "0:17:36", "remaining_time": "9:47:41"} +{"current_steps": 200, "total_steps": 6840, "loss": 0.7816377878189087, "lr": 1.1637426900584797e-05, "epoch": 0.058488083053077934, "percentage": 2.92, "elapsed_time": "0:17:40", "remaining_time": "9:46:58"} +{"current_steps": 201, "total_steps": 6840, "loss": 0.7109910249710083, "lr": 1.1695906432748539e-05, "epoch": 0.058780523468343324, "percentage": 2.94, "elapsed_time": "0:17:50", "remaining_time": "9:49:33"} +{"current_steps": 202, "total_steps": 6840, "loss": 0.7657924890518188, "lr": 1.1754385964912282e-05, "epoch": 0.05907296388360871, "percentage": 2.95, "elapsed_time": "0:17:55", "remaining_time": "9:49:07"} +{"current_steps": 203, "total_steps": 6840, "loss": 0.8521978259086609, "lr": 1.1812865497076024e-05, "epoch": 0.0593654042988741, "percentage": 2.97, "elapsed_time": "0:18:01", "remaining_time": "9:49:03"} +{"current_steps": 204, "total_steps": 6840, "loss": 0.7558364868164062, "lr": 1.1871345029239766e-05, "epoch": 0.05965784471413949, "percentage": 2.98, "elapsed_time": "0:18:06", "remaining_time": "9:49:17"} +{"current_steps": 205, "total_steps": 6840, "loss": 0.8488497734069824, "lr": 1.192982456140351e-05, "epoch": 0.05995028512940488, "percentage": 3.0, "elapsed_time": "0:18:12", "remaining_time": "9:49:29"} +{"current_steps": 206, "total_steps": 6840, "loss": 0.7905591726303101, "lr": 1.1988304093567253e-05, "epoch": 0.06024272554467027, "percentage": 3.01, "elapsed_time": "0:18:17", "remaining_time": "9:49:11"} +{"current_steps": 207, "total_steps": 6840, "loss": 0.747936487197876, "lr": 1.2046783625730995e-05, "epoch": 0.06053516595993566, "percentage": 3.03, "elapsed_time": "0:18:23", "remaining_time": "9:49:13"} +{"current_steps": 208, "total_steps": 6840, "loss": 0.8653486967086792, "lr": 1.2105263157894737e-05, "epoch": 0.06082760637520105, "percentage": 3.04, "elapsed_time": "0:18:28", "remaining_time": "9:49:03"} +{"current_steps": 209, "total_steps": 6840, "loss": 0.8662437200546265, "lr": 1.216374269005848e-05, "epoch": 0.06112004679046644, "percentage": 3.06, "elapsed_time": "0:18:33", "remaining_time": "9:48:59"} +{"current_steps": 210, "total_steps": 6840, "loss": 0.9567133188247681, "lr": 1.2222222222222224e-05, "epoch": 0.06141248720573183, "percentage": 3.07, "elapsed_time": "0:18:39", "remaining_time": "9:49:16"} +{"current_steps": 211, "total_steps": 6840, "loss": 0.8994660377502441, "lr": 1.2280701754385966e-05, "epoch": 0.06170492762099722, "percentage": 3.08, "elapsed_time": "0:18:46", "remaining_time": "9:50:05"} +{"current_steps": 212, "total_steps": 6840, "loss": 0.7889316082000732, "lr": 1.2339181286549708e-05, "epoch": 0.06199736803626261, "percentage": 3.1, "elapsed_time": "0:18:52", "remaining_time": "9:49:58"} +{"current_steps": 213, "total_steps": 6840, "loss": 0.883985161781311, "lr": 1.239766081871345e-05, "epoch": 0.062289808451528005, "percentage": 3.11, "elapsed_time": "0:18:58", "remaining_time": "9:50:08"} +{"current_steps": 214, "total_steps": 6840, "loss": 0.7780495882034302, "lr": 1.2456140350877195e-05, "epoch": 0.0625822488667934, "percentage": 3.13, "elapsed_time": "0:19:03", "remaining_time": "9:50:16"} +{"current_steps": 215, "total_steps": 6840, "loss": 0.6514906883239746, "lr": 1.2514619883040937e-05, "epoch": 0.06287468928205878, "percentage": 3.14, "elapsed_time": "0:19:09", "remaining_time": "9:50:12"} +{"current_steps": 216, "total_steps": 6840, "loss": 0.750559389591217, "lr": 1.2573099415204679e-05, "epoch": 0.06316712969732417, "percentage": 3.16, "elapsed_time": "0:19:15", "remaining_time": "9:50:21"} +{"current_steps": 217, "total_steps": 6840, "loss": 0.8330573439598083, "lr": 1.263157894736842e-05, "epoch": 0.06345957011258956, "percentage": 3.17, "elapsed_time": "0:19:18", "remaining_time": "9:49:29"} +{"current_steps": 218, "total_steps": 6840, "loss": 0.8075361847877502, "lr": 1.2690058479532166e-05, "epoch": 0.06375201052785495, "percentage": 3.19, "elapsed_time": "0:19:23", "remaining_time": "9:48:55"} +{"current_steps": 219, "total_steps": 6840, "loss": 0.7636772394180298, "lr": 1.2748538011695908e-05, "epoch": 0.06404445094312033, "percentage": 3.2, "elapsed_time": "0:19:29", "remaining_time": "9:49:23"} +{"current_steps": 220, "total_steps": 6840, "loss": 0.8241903185844421, "lr": 1.280701754385965e-05, "epoch": 0.06433689135838573, "percentage": 3.22, "elapsed_time": "0:19:35", "remaining_time": "9:49:35"} +{"current_steps": 221, "total_steps": 6840, "loss": 0.6582514047622681, "lr": 1.2865497076023392e-05, "epoch": 0.06462933177365111, "percentage": 3.23, "elapsed_time": "0:19:40", "remaining_time": "9:49:24"} +{"current_steps": 222, "total_steps": 6840, "loss": 0.6363992691040039, "lr": 1.2923976608187137e-05, "epoch": 0.06492177218891651, "percentage": 3.25, "elapsed_time": "0:19:46", "remaining_time": "9:49:30"} +{"current_steps": 223, "total_steps": 6840, "loss": 0.8093860149383545, "lr": 1.2982456140350879e-05, "epoch": 0.06521421260418189, "percentage": 3.26, "elapsed_time": "0:19:50", "remaining_time": "9:48:37"} +{"current_steps": 224, "total_steps": 6840, "loss": 0.7719511985778809, "lr": 1.304093567251462e-05, "epoch": 0.06550665301944729, "percentage": 3.27, "elapsed_time": "0:19:54", "remaining_time": "9:47:51"} +{"current_steps": 225, "total_steps": 6840, "loss": 0.8314809799194336, "lr": 1.3099415204678362e-05, "epoch": 0.06579909343471267, "percentage": 3.29, "elapsed_time": "0:19:58", "remaining_time": "9:47:22"} +{"current_steps": 226, "total_steps": 6840, "loss": 0.8752902746200562, "lr": 1.3157894736842108e-05, "epoch": 0.06609153384997807, "percentage": 3.3, "elapsed_time": "0:20:04", "remaining_time": "9:47:22"} +{"current_steps": 227, "total_steps": 6840, "loss": 0.7564839124679565, "lr": 1.321637426900585e-05, "epoch": 0.06638397426524345, "percentage": 3.32, "elapsed_time": "0:20:08", "remaining_time": "9:46:56"} +{"current_steps": 228, "total_steps": 6840, "loss": 0.7377971410751343, "lr": 1.3274853801169591e-05, "epoch": 0.06667641468050885, "percentage": 3.33, "elapsed_time": "0:20:14", "remaining_time": "9:47:01"} +{"current_steps": 229, "total_steps": 6840, "loss": 0.7298087477684021, "lr": 1.3333333333333333e-05, "epoch": 0.06696885509577423, "percentage": 3.35, "elapsed_time": "0:20:19", "remaining_time": "9:46:54"} +{"current_steps": 230, "total_steps": 6840, "loss": 0.7291176915168762, "lr": 1.3391812865497079e-05, "epoch": 0.06726129551103963, "percentage": 3.36, "elapsed_time": "0:20:24", "remaining_time": "9:46:44"} +{"current_steps": 231, "total_steps": 6840, "loss": 0.8226944208145142, "lr": 1.345029239766082e-05, "epoch": 0.06755373592630501, "percentage": 3.38, "elapsed_time": "0:20:30", "remaining_time": "9:46:47"} +{"current_steps": 232, "total_steps": 6840, "loss": 0.7185185551643372, "lr": 1.3508771929824562e-05, "epoch": 0.0678461763415704, "percentage": 3.39, "elapsed_time": "0:20:35", "remaining_time": "9:46:18"} +{"current_steps": 233, "total_steps": 6840, "loss": 0.7028212547302246, "lr": 1.3567251461988304e-05, "epoch": 0.0681386167568358, "percentage": 3.41, "elapsed_time": "0:20:40", "remaining_time": "9:46:27"} +{"current_steps": 234, "total_steps": 6840, "loss": 0.8809897899627686, "lr": 1.362573099415205e-05, "epoch": 0.06843105717210118, "percentage": 3.42, "elapsed_time": "0:20:45", "remaining_time": "9:45:54"} +{"current_steps": 235, "total_steps": 6840, "loss": 0.7779085040092468, "lr": 1.3684210526315791e-05, "epoch": 0.06872349758736658, "percentage": 3.44, "elapsed_time": "0:20:50", "remaining_time": "9:45:56"} +{"current_steps": 236, "total_steps": 6840, "loss": 0.731019139289856, "lr": 1.3742690058479533e-05, "epoch": 0.06901593800263196, "percentage": 3.45, "elapsed_time": "0:20:57", "remaining_time": "9:46:18"} +{"current_steps": 237, "total_steps": 6840, "loss": 0.7495850920677185, "lr": 1.3801169590643275e-05, "epoch": 0.06930837841789736, "percentage": 3.46, "elapsed_time": "0:21:01", "remaining_time": "9:45:34"} +{"current_steps": 238, "total_steps": 6840, "loss": 0.7018189430236816, "lr": 1.385964912280702e-05, "epoch": 0.06960081883316274, "percentage": 3.48, "elapsed_time": "0:21:06", "remaining_time": "9:45:20"} +{"current_steps": 239, "total_steps": 6840, "loss": 0.7072417736053467, "lr": 1.3918128654970762e-05, "epoch": 0.06989325924842814, "percentage": 3.49, "elapsed_time": "0:21:11", "remaining_time": "9:45:15"} +{"current_steps": 240, "total_steps": 6840, "loss": 0.8125720620155334, "lr": 1.3976608187134504e-05, "epoch": 0.07018569966369352, "percentage": 3.51, "elapsed_time": "0:21:15", "remaining_time": "9:44:48"} +{"current_steps": 241, "total_steps": 6840, "loss": 0.6101655960083008, "lr": 1.4035087719298246e-05, "epoch": 0.07047814007895892, "percentage": 3.52, "elapsed_time": "0:21:21", "remaining_time": "9:44:47"} +{"current_steps": 242, "total_steps": 6840, "loss": 0.9005568623542786, "lr": 1.409356725146199e-05, "epoch": 0.0707705804942243, "percentage": 3.54, "elapsed_time": "0:21:26", "remaining_time": "9:44:24"} +{"current_steps": 243, "total_steps": 6840, "loss": 0.7678338289260864, "lr": 1.4152046783625733e-05, "epoch": 0.0710630209094897, "percentage": 3.55, "elapsed_time": "0:21:31", "remaining_time": "9:44:16"} +{"current_steps": 244, "total_steps": 6840, "loss": 0.7563410997390747, "lr": 1.4210526315789475e-05, "epoch": 0.07135546132475508, "percentage": 3.57, "elapsed_time": "0:21:36", "remaining_time": "9:44:04"} +{"current_steps": 245, "total_steps": 6840, "loss": 0.7497583627700806, "lr": 1.4269005847953217e-05, "epoch": 0.07164790174002048, "percentage": 3.58, "elapsed_time": "0:21:41", "remaining_time": "9:43:57"} +{"current_steps": 246, "total_steps": 6840, "loss": 0.8913442492485046, "lr": 1.432748538011696e-05, "epoch": 0.07194034215528586, "percentage": 3.6, "elapsed_time": "0:21:47", "remaining_time": "9:44:04"} +{"current_steps": 247, "total_steps": 6840, "loss": 0.7714704871177673, "lr": 1.4385964912280704e-05, "epoch": 0.07223278257055125, "percentage": 3.61, "elapsed_time": "0:21:51", "remaining_time": "9:43:37"} +{"current_steps": 248, "total_steps": 6840, "loss": 0.6752789616584778, "lr": 1.4444444444444446e-05, "epoch": 0.07252522298581664, "percentage": 3.63, "elapsed_time": "0:21:56", "remaining_time": "9:43:21"} +{"current_steps": 249, "total_steps": 6840, "loss": 0.6092795133590698, "lr": 1.4502923976608188e-05, "epoch": 0.07281766340108203, "percentage": 3.64, "elapsed_time": "0:22:02", "remaining_time": "9:43:14"} +{"current_steps": 250, "total_steps": 6840, "loss": 0.9300343990325928, "lr": 1.4561403508771931e-05, "epoch": 0.07311010381634742, "percentage": 3.65, "elapsed_time": "0:22:07", "remaining_time": "9:43:16"} +{"current_steps": 251, "total_steps": 6840, "loss": 0.8005613088607788, "lr": 1.4619883040935675e-05, "epoch": 0.07340254423161281, "percentage": 3.67, "elapsed_time": "0:22:13", "remaining_time": "9:43:21"} +{"current_steps": 252, "total_steps": 6840, "loss": 0.7188931703567505, "lr": 1.4678362573099417e-05, "epoch": 0.0736949846468782, "percentage": 3.68, "elapsed_time": "0:22:19", "remaining_time": "9:43:27"} +{"current_steps": 253, "total_steps": 6840, "loss": 0.6967242956161499, "lr": 1.4736842105263159e-05, "epoch": 0.07398742506214359, "percentage": 3.7, "elapsed_time": "0:22:25", "remaining_time": "9:43:46"} +{"current_steps": 254, "total_steps": 6840, "loss": 0.6921653747558594, "lr": 1.4795321637426902e-05, "epoch": 0.07427986547740897, "percentage": 3.71, "elapsed_time": "0:22:30", "remaining_time": "9:43:38"} +{"current_steps": 255, "total_steps": 6840, "loss": 0.8498743772506714, "lr": 1.4853801169590644e-05, "epoch": 0.07457230589267437, "percentage": 3.73, "elapsed_time": "0:22:35", "remaining_time": "9:43:35"} +{"current_steps": 256, "total_steps": 6840, "loss": 0.6420027017593384, "lr": 1.4912280701754388e-05, "epoch": 0.07486474630793975, "percentage": 3.74, "elapsed_time": "0:22:41", "remaining_time": "9:43:28"} +{"current_steps": 257, "total_steps": 6840, "loss": 0.7101434469223022, "lr": 1.497076023391813e-05, "epoch": 0.07515718672320515, "percentage": 3.76, "elapsed_time": "0:22:47", "remaining_time": "9:43:45"} +{"current_steps": 258, "total_steps": 6840, "loss": 0.740740180015564, "lr": 1.5029239766081873e-05, "epoch": 0.07544962713847053, "percentage": 3.77, "elapsed_time": "0:22:52", "remaining_time": "9:43:27"} +{"current_steps": 259, "total_steps": 6840, "loss": 0.891905665397644, "lr": 1.5087719298245615e-05, "epoch": 0.07574206755373593, "percentage": 3.79, "elapsed_time": "0:22:57", "remaining_time": "9:43:14"} +{"current_steps": 260, "total_steps": 6840, "loss": 0.867740273475647, "lr": 1.5146198830409358e-05, "epoch": 0.07603450796900131, "percentage": 3.8, "elapsed_time": "0:23:01", "remaining_time": "9:42:38"} +{"current_steps": 261, "total_steps": 6840, "loss": 0.7895220518112183, "lr": 1.52046783625731e-05, "epoch": 0.07632694838426671, "percentage": 3.82, "elapsed_time": "0:23:07", "remaining_time": "9:42:56"} +{"current_steps": 262, "total_steps": 6840, "loss": 0.7987008094787598, "lr": 1.5263157894736846e-05, "epoch": 0.07661938879953209, "percentage": 3.83, "elapsed_time": "0:23:12", "remaining_time": "9:42:41"} +{"current_steps": 263, "total_steps": 6840, "loss": 0.7780282497406006, "lr": 1.5321637426900587e-05, "epoch": 0.07691182921479749, "percentage": 3.85, "elapsed_time": "0:23:16", "remaining_time": "9:42:15"} +{"current_steps": 264, "total_steps": 6840, "loss": 0.6265891194343567, "lr": 1.538011695906433e-05, "epoch": 0.07720426963006287, "percentage": 3.86, "elapsed_time": "0:23:22", "remaining_time": "9:42:26"} +{"current_steps": 265, "total_steps": 6840, "loss": 0.6559646129608154, "lr": 1.543859649122807e-05, "epoch": 0.07749671004532827, "percentage": 3.87, "elapsed_time": "0:23:28", "remaining_time": "9:42:15"} +{"current_steps": 266, "total_steps": 6840, "loss": 0.8362047672271729, "lr": 1.5497076023391816e-05, "epoch": 0.07778915046059365, "percentage": 3.89, "elapsed_time": "0:23:33", "remaining_time": "9:42:10"} +{"current_steps": 267, "total_steps": 6840, "loss": 0.707663357257843, "lr": 1.555555555555556e-05, "epoch": 0.07808159087585904, "percentage": 3.9, "elapsed_time": "0:23:38", "remaining_time": "9:41:53"} +{"current_steps": 268, "total_steps": 6840, "loss": 0.67903071641922, "lr": 1.56140350877193e-05, "epoch": 0.07837403129112443, "percentage": 3.92, "elapsed_time": "0:23:43", "remaining_time": "9:41:36"} +{"current_steps": 269, "total_steps": 6840, "loss": 0.7634894251823425, "lr": 1.5672514619883042e-05, "epoch": 0.07866647170638982, "percentage": 3.93, "elapsed_time": "0:23:47", "remaining_time": "9:41:18"} +{"current_steps": 270, "total_steps": 6840, "loss": 0.6395117044448853, "lr": 1.5730994152046787e-05, "epoch": 0.0789589121216552, "percentage": 3.95, "elapsed_time": "0:23:52", "remaining_time": "9:40:57"} +{"current_steps": 271, "total_steps": 6840, "loss": 0.6948165893554688, "lr": 1.578947368421053e-05, "epoch": 0.0792513525369206, "percentage": 3.96, "elapsed_time": "0:23:58", "remaining_time": "9:41:03"} +{"current_steps": 272, "total_steps": 6840, "loss": 0.9288383722305298, "lr": 1.584795321637427e-05, "epoch": 0.079543792952186, "percentage": 3.98, "elapsed_time": "0:24:03", "remaining_time": "9:40:46"} +{"current_steps": 273, "total_steps": 6840, "loss": 0.9291346073150635, "lr": 1.5906432748538013e-05, "epoch": 0.07983623336745138, "percentage": 3.99, "elapsed_time": "0:24:07", "remaining_time": "9:40:07"} +{"current_steps": 274, "total_steps": 6840, "loss": 0.7399512529373169, "lr": 1.5964912280701755e-05, "epoch": 0.08012867378271678, "percentage": 4.01, "elapsed_time": "0:24:11", "remaining_time": "9:39:54"} +{"current_steps": 275, "total_steps": 6840, "loss": 0.6890764236450195, "lr": 1.60233918128655e-05, "epoch": 0.08042111419798216, "percentage": 4.02, "elapsed_time": "0:24:16", "remaining_time": "9:39:39"} +{"current_steps": 276, "total_steps": 6840, "loss": 0.6520324349403381, "lr": 1.6081871345029242e-05, "epoch": 0.08071355461324756, "percentage": 4.04, "elapsed_time": "0:24:23", "remaining_time": "9:39:56"} +{"current_steps": 277, "total_steps": 6840, "loss": 0.6726658344268799, "lr": 1.6140350877192984e-05, "epoch": 0.08100599502851294, "percentage": 4.05, "elapsed_time": "0:24:29", "remaining_time": "9:40:13"} +{"current_steps": 278, "total_steps": 6840, "loss": 0.7453294992446899, "lr": 1.6198830409356726e-05, "epoch": 0.08129843544377834, "percentage": 4.06, "elapsed_time": "0:24:34", "remaining_time": "9:40:04"} +{"current_steps": 279, "total_steps": 6840, "loss": 0.755578875541687, "lr": 1.625730994152047e-05, "epoch": 0.08159087585904372, "percentage": 4.08, "elapsed_time": "0:24:39", "remaining_time": "9:39:53"} +{"current_steps": 280, "total_steps": 6840, "loss": 0.713086724281311, "lr": 1.6315789473684213e-05, "epoch": 0.08188331627430911, "percentage": 4.09, "elapsed_time": "0:24:45", "remaining_time": "9:40:02"} +{"current_steps": 281, "total_steps": 6840, "loss": 0.8714310526847839, "lr": 1.6374269005847955e-05, "epoch": 0.0821757566895745, "percentage": 4.11, "elapsed_time": "0:24:50", "remaining_time": "9:39:50"} +{"current_steps": 282, "total_steps": 6840, "loss": 0.6827348470687866, "lr": 1.6432748538011697e-05, "epoch": 0.0824681971048399, "percentage": 4.12, "elapsed_time": "0:24:54", "remaining_time": "9:39:25"} +{"current_steps": 283, "total_steps": 6840, "loss": 0.8613482713699341, "lr": 1.649122807017544e-05, "epoch": 0.08276063752010528, "percentage": 4.14, "elapsed_time": "0:25:00", "remaining_time": "9:39:14"} +{"current_steps": 284, "total_steps": 6840, "loss": 0.7442763447761536, "lr": 1.6549707602339184e-05, "epoch": 0.08305307793537067, "percentage": 4.15, "elapsed_time": "0:25:06", "remaining_time": "9:39:39"} +{"current_steps": 285, "total_steps": 6840, "loss": 0.7505494356155396, "lr": 1.6608187134502926e-05, "epoch": 0.08334551835063606, "percentage": 4.17, "elapsed_time": "0:25:11", "remaining_time": "9:39:23"} +{"current_steps": 286, "total_steps": 6840, "loss": 0.7720779776573181, "lr": 1.6666666666666667e-05, "epoch": 0.08363795876590145, "percentage": 4.18, "elapsed_time": "0:25:18", "remaining_time": "9:39:47"} +{"current_steps": 287, "total_steps": 6840, "loss": 0.7746216654777527, "lr": 1.672514619883041e-05, "epoch": 0.08393039918116683, "percentage": 4.2, "elapsed_time": "0:25:24", "remaining_time": "9:39:59"} +{"current_steps": 288, "total_steps": 6840, "loss": 0.8471436500549316, "lr": 1.6783625730994155e-05, "epoch": 0.08422283959643223, "percentage": 4.21, "elapsed_time": "0:25:28", "remaining_time": "9:39:31"} +{"current_steps": 289, "total_steps": 6840, "loss": 0.7117248773574829, "lr": 1.6842105263157896e-05, "epoch": 0.08451528001169761, "percentage": 4.23, "elapsed_time": "0:25:33", "remaining_time": "9:39:28"} +{"current_steps": 290, "total_steps": 6840, "loss": 0.758680522441864, "lr": 1.690058479532164e-05, "epoch": 0.08480772042696301, "percentage": 4.24, "elapsed_time": "0:25:38", "remaining_time": "9:39:01"} +{"current_steps": 291, "total_steps": 6840, "loss": 0.9083560705184937, "lr": 1.695906432748538e-05, "epoch": 0.08510016084222839, "percentage": 4.25, "elapsed_time": "0:25:44", "remaining_time": "9:39:29"} +{"current_steps": 292, "total_steps": 6840, "loss": 0.7457551956176758, "lr": 1.7017543859649125e-05, "epoch": 0.08539260125749379, "percentage": 4.27, "elapsed_time": "0:25:50", "remaining_time": "9:39:22"} +{"current_steps": 293, "total_steps": 6840, "loss": 0.7463638782501221, "lr": 1.7076023391812867e-05, "epoch": 0.08568504167275917, "percentage": 4.28, "elapsed_time": "0:25:55", "remaining_time": "9:39:19"} +{"current_steps": 294, "total_steps": 6840, "loss": 0.6983559131622314, "lr": 1.713450292397661e-05, "epoch": 0.08597748208802457, "percentage": 4.3, "elapsed_time": "0:26:00", "remaining_time": "9:39:13"} +{"current_steps": 295, "total_steps": 6840, "loss": 0.8043842911720276, "lr": 1.719298245614035e-05, "epoch": 0.08626992250328995, "percentage": 4.31, "elapsed_time": "0:26:06", "remaining_time": "9:39:19"} +{"current_steps": 296, "total_steps": 6840, "loss": 0.7150747776031494, "lr": 1.7251461988304093e-05, "epoch": 0.08656236291855535, "percentage": 4.33, "elapsed_time": "0:26:10", "remaining_time": "9:38:42"} +{"current_steps": 297, "total_steps": 6840, "loss": 0.7805558443069458, "lr": 1.7309941520467838e-05, "epoch": 0.08685480333382073, "percentage": 4.34, "elapsed_time": "0:26:16", "remaining_time": "9:38:43"} +{"current_steps": 298, "total_steps": 6840, "loss": 0.7158486843109131, "lr": 1.736842105263158e-05, "epoch": 0.08714724374908613, "percentage": 4.36, "elapsed_time": "0:26:20", "remaining_time": "9:38:26"} +{"current_steps": 299, "total_steps": 6840, "loss": 0.6496458053588867, "lr": 1.7426900584795322e-05, "epoch": 0.08743968416435151, "percentage": 4.37, "elapsed_time": "0:26:26", "remaining_time": "9:38:16"} +{"current_steps": 300, "total_steps": 6840, "loss": 0.7488506436347961, "lr": 1.7485380116959064e-05, "epoch": 0.0877321245796169, "percentage": 4.39, "elapsed_time": "0:26:32", "remaining_time": "9:38:26"} +{"current_steps": 301, "total_steps": 6840, "loss": 0.8370999097824097, "lr": 1.754385964912281e-05, "epoch": 0.08802456499488229, "percentage": 4.4, "elapsed_time": "0:26:41", "remaining_time": "9:39:45"} +{"current_steps": 302, "total_steps": 6840, "loss": 0.6624353528022766, "lr": 1.760233918128655e-05, "epoch": 0.08831700541014768, "percentage": 4.42, "elapsed_time": "0:26:45", "remaining_time": "9:39:26"} +{"current_steps": 303, "total_steps": 6840, "loss": 0.6861047148704529, "lr": 1.7660818713450293e-05, "epoch": 0.08860944582541307, "percentage": 4.43, "elapsed_time": "0:26:50", "remaining_time": "9:39:05"} +{"current_steps": 304, "total_steps": 6840, "loss": 0.746711015701294, "lr": 1.7719298245614035e-05, "epoch": 0.08890188624067846, "percentage": 4.44, "elapsed_time": "0:26:55", "remaining_time": "9:38:52"} +{"current_steps": 305, "total_steps": 6840, "loss": 0.7794955968856812, "lr": 1.7777777777777777e-05, "epoch": 0.08919432665594385, "percentage": 4.46, "elapsed_time": "0:27:01", "remaining_time": "9:38:55"} +{"current_steps": 306, "total_steps": 6840, "loss": 0.7202489972114563, "lr": 1.7836257309941522e-05, "epoch": 0.08948676707120924, "percentage": 4.47, "elapsed_time": "0:27:06", "remaining_time": "9:38:51"} +{"current_steps": 307, "total_steps": 6840, "loss": 0.7252119183540344, "lr": 1.7894736842105264e-05, "epoch": 0.08977920748647462, "percentage": 4.49, "elapsed_time": "0:27:11", "remaining_time": "9:38:37"} +{"current_steps": 308, "total_steps": 6840, "loss": 0.9168737530708313, "lr": 1.7953216374269006e-05, "epoch": 0.09007164790174002, "percentage": 4.5, "elapsed_time": "0:27:17", "remaining_time": "9:38:38"} +{"current_steps": 309, "total_steps": 6840, "loss": 0.7647944688796997, "lr": 1.8011695906432747e-05, "epoch": 0.0903640883170054, "percentage": 4.52, "elapsed_time": "0:27:23", "remaining_time": "9:38:50"} +{"current_steps": 310, "total_steps": 6840, "loss": 0.7836136817932129, "lr": 1.8070175438596493e-05, "epoch": 0.0906565287322708, "percentage": 4.53, "elapsed_time": "0:27:29", "remaining_time": "9:38:58"} +{"current_steps": 311, "total_steps": 6840, "loss": 0.6495587825775146, "lr": 1.8128654970760235e-05, "epoch": 0.0909489691475362, "percentage": 4.55, "elapsed_time": "0:27:34", "remaining_time": "9:39:02"} +{"current_steps": 312, "total_steps": 6840, "loss": 0.7266290187835693, "lr": 1.8187134502923976e-05, "epoch": 0.09124140956280158, "percentage": 4.56, "elapsed_time": "0:27:39", "remaining_time": "9:38:33"} +{"current_steps": 313, "total_steps": 6840, "loss": 0.8417587876319885, "lr": 1.824561403508772e-05, "epoch": 0.09153384997806698, "percentage": 4.58, "elapsed_time": "0:27:43", "remaining_time": "9:38:04"} +{"current_steps": 314, "total_steps": 6840, "loss": 0.8431564569473267, "lr": 1.8304093567251464e-05, "epoch": 0.09182629039333236, "percentage": 4.59, "elapsed_time": "0:27:48", "remaining_time": "9:38:00"} +{"current_steps": 315, "total_steps": 6840, "loss": 0.7724050283432007, "lr": 1.8362573099415205e-05, "epoch": 0.09211873080859775, "percentage": 4.61, "elapsed_time": "0:27:53", "remaining_time": "9:37:52"} +{"current_steps": 316, "total_steps": 6840, "loss": 0.6687352657318115, "lr": 1.8421052631578947e-05, "epoch": 0.09241117122386314, "percentage": 4.62, "elapsed_time": "0:27:59", "remaining_time": "9:38:01"} +{"current_steps": 317, "total_steps": 6840, "loss": 0.7465454339981079, "lr": 1.847953216374269e-05, "epoch": 0.09270361163912853, "percentage": 4.63, "elapsed_time": "0:28:04", "remaining_time": "9:37:51"} +{"current_steps": 318, "total_steps": 6840, "loss": 0.6944088935852051, "lr": 1.8538011695906434e-05, "epoch": 0.09299605205439392, "percentage": 4.65, "elapsed_time": "0:28:09", "remaining_time": "9:37:29"} +{"current_steps": 319, "total_steps": 6840, "loss": 0.6692598462104797, "lr": 1.8596491228070176e-05, "epoch": 0.09328849246965931, "percentage": 4.66, "elapsed_time": "0:28:14", "remaining_time": "9:37:24"} +{"current_steps": 320, "total_steps": 6840, "loss": 0.7287981510162354, "lr": 1.8654970760233918e-05, "epoch": 0.0935809328849247, "percentage": 4.68, "elapsed_time": "0:28:19", "remaining_time": "9:37:12"} +{"current_steps": 321, "total_steps": 6840, "loss": 0.704437255859375, "lr": 1.871345029239766e-05, "epoch": 0.09387337330019009, "percentage": 4.69, "elapsed_time": "0:28:25", "remaining_time": "9:37:09"} +{"current_steps": 322, "total_steps": 6840, "loss": 0.6425009965896606, "lr": 1.8771929824561405e-05, "epoch": 0.09416581371545547, "percentage": 4.71, "elapsed_time": "0:28:30", "remaining_time": "9:36:55"} +{"current_steps": 323, "total_steps": 6840, "loss": 0.765799880027771, "lr": 1.8830409356725147e-05, "epoch": 0.09445825413072087, "percentage": 4.72, "elapsed_time": "0:28:35", "remaining_time": "9:36:52"} +{"current_steps": 324, "total_steps": 6840, "loss": 0.9151520133018494, "lr": 1.888888888888889e-05, "epoch": 0.09475069454598625, "percentage": 4.74, "elapsed_time": "0:28:41", "remaining_time": "9:36:55"} +{"current_steps": 325, "total_steps": 6840, "loss": 0.8753486275672913, "lr": 1.894736842105263e-05, "epoch": 0.09504313496125165, "percentage": 4.75, "elapsed_time": "0:28:46", "remaining_time": "9:36:50"} +{"current_steps": 326, "total_steps": 6840, "loss": 0.7652826309204102, "lr": 1.9005847953216376e-05, "epoch": 0.09533557537651703, "percentage": 4.77, "elapsed_time": "0:28:50", "remaining_time": "9:36:20"} +{"current_steps": 327, "total_steps": 6840, "loss": 0.7309015393257141, "lr": 1.9064327485380118e-05, "epoch": 0.09562801579178243, "percentage": 4.78, "elapsed_time": "0:28:55", "remaining_time": "9:36:11"} +{"current_steps": 328, "total_steps": 6840, "loss": 0.7656553983688354, "lr": 1.912280701754386e-05, "epoch": 0.09592045620704781, "percentage": 4.8, "elapsed_time": "0:28:59", "remaining_time": "9:35:43"} +{"current_steps": 329, "total_steps": 6840, "loss": 0.7400631904602051, "lr": 1.9181286549707602e-05, "epoch": 0.09621289662231321, "percentage": 4.81, "elapsed_time": "0:29:04", "remaining_time": "9:35:23"} +{"current_steps": 330, "total_steps": 6840, "loss": 0.6812465190887451, "lr": 1.9239766081871347e-05, "epoch": 0.09650533703757859, "percentage": 4.82, "elapsed_time": "0:29:08", "remaining_time": "9:34:54"} +{"current_steps": 331, "total_steps": 6840, "loss": 0.6820628046989441, "lr": 1.929824561403509e-05, "epoch": 0.09679777745284399, "percentage": 4.84, "elapsed_time": "0:29:12", "remaining_time": "9:34:31"} +{"current_steps": 332, "total_steps": 6840, "loss": 0.7437758445739746, "lr": 1.935672514619883e-05, "epoch": 0.09709021786810937, "percentage": 4.85, "elapsed_time": "0:29:17", "remaining_time": "9:34:14"} +{"current_steps": 333, "total_steps": 6840, "loss": 0.8011504411697388, "lr": 1.9415204678362573e-05, "epoch": 0.09738265828337477, "percentage": 4.87, "elapsed_time": "0:29:22", "remaining_time": "9:33:53"} +{"current_steps": 334, "total_steps": 6840, "loss": 0.7437810301780701, "lr": 1.9473684210526318e-05, "epoch": 0.09767509869864015, "percentage": 4.88, "elapsed_time": "0:29:27", "remaining_time": "9:33:41"} +{"current_steps": 335, "total_steps": 6840, "loss": 0.7419568300247192, "lr": 1.953216374269006e-05, "epoch": 0.09796753911390554, "percentage": 4.9, "elapsed_time": "0:29:33", "remaining_time": "9:33:59"} +{"current_steps": 336, "total_steps": 6840, "loss": 0.7805042266845703, "lr": 1.9590643274853802e-05, "epoch": 0.09825997952917093, "percentage": 4.91, "elapsed_time": "0:29:39", "remaining_time": "9:33:59"} +{"current_steps": 337, "total_steps": 6840, "loss": 0.6952530145645142, "lr": 1.9649122807017544e-05, "epoch": 0.09855241994443632, "percentage": 4.93, "elapsed_time": "0:29:45", "remaining_time": "9:34:05"} +{"current_steps": 338, "total_steps": 6840, "loss": 0.7669289112091064, "lr": 1.970760233918129e-05, "epoch": 0.0988448603597017, "percentage": 4.94, "elapsed_time": "0:29:49", "remaining_time": "9:33:47"} +{"current_steps": 339, "total_steps": 6840, "loss": 0.8033919930458069, "lr": 1.976608187134503e-05, "epoch": 0.0991373007749671, "percentage": 4.96, "elapsed_time": "0:29:53", "remaining_time": "9:33:20"} +{"current_steps": 340, "total_steps": 6840, "loss": 0.6523177623748779, "lr": 1.9824561403508773e-05, "epoch": 0.09942974119023248, "percentage": 4.97, "elapsed_time": "0:29:59", "remaining_time": "9:33:13"} +{"current_steps": 341, "total_steps": 6840, "loss": 0.7221896648406982, "lr": 1.9883040935672515e-05, "epoch": 0.09972218160549788, "percentage": 4.99, "elapsed_time": "0:30:03", "remaining_time": "9:33:00"} +{"current_steps": 342, "total_steps": 6840, "loss": 0.6054700016975403, "lr": 1.994152046783626e-05, "epoch": 0.10001462202076326, "percentage": 5.0, "elapsed_time": "0:30:09", "remaining_time": "9:33:07"} +{"current_steps": 343, "total_steps": 6840, "loss": 0.8368290662765503, "lr": 2e-05, "epoch": 0.10030706243602866, "percentage": 5.01, "elapsed_time": "0:30:15", "remaining_time": "9:33:01"} +{"current_steps": 344, "total_steps": 6840, "loss": 0.9075677990913391, "lr": 1.99999988312804e-05, "epoch": 0.10059950285129404, "percentage": 5.03, "elapsed_time": "0:30:19", "remaining_time": "9:32:44"} +{"current_steps": 345, "total_steps": 6840, "loss": 0.7202495336532593, "lr": 1.999999532512188e-05, "epoch": 0.10089194326655944, "percentage": 5.04, "elapsed_time": "0:30:25", "remaining_time": "9:32:54"} +{"current_steps": 346, "total_steps": 6840, "loss": 0.7373536229133606, "lr": 1.9999989481525245e-05, "epoch": 0.10118438368182482, "percentage": 5.06, "elapsed_time": "0:30:31", "remaining_time": "9:32:54"} +{"current_steps": 347, "total_steps": 6840, "loss": 0.7292035222053528, "lr": 1.9999981300491873e-05, "epoch": 0.10147682409709022, "percentage": 5.07, "elapsed_time": "0:30:36", "remaining_time": "9:32:48"} +{"current_steps": 348, "total_steps": 6840, "loss": 0.8970675468444824, "lr": 1.9999970782023673e-05, "epoch": 0.10176926451235561, "percentage": 5.09, "elapsed_time": "0:30:43", "remaining_time": "9:33:04"} +{"current_steps": 349, "total_steps": 6840, "loss": 0.7909846305847168, "lr": 1.9999957926123104e-05, "epoch": 0.102061704927621, "percentage": 5.1, "elapsed_time": "0:30:48", "remaining_time": "9:33:01"} +{"current_steps": 350, "total_steps": 6840, "loss": 0.7784097790718079, "lr": 1.999994273279317e-05, "epoch": 0.1023541453428864, "percentage": 5.12, "elapsed_time": "0:30:54", "remaining_time": "9:33:06"} +{"current_steps": 351, "total_steps": 6840, "loss": 0.7129874229431152, "lr": 1.9999925202037422e-05, "epoch": 0.10264658575815178, "percentage": 5.13, "elapsed_time": "0:30:59", "remaining_time": "9:33:00"} +{"current_steps": 352, "total_steps": 6840, "loss": 0.7185519337654114, "lr": 1.999990533385996e-05, "epoch": 0.10293902617341717, "percentage": 5.15, "elapsed_time": "0:31:05", "remaining_time": "9:33:06"} +{"current_steps": 353, "total_steps": 6840, "loss": 0.812228798866272, "lr": 1.9999883128265428e-05, "epoch": 0.10323146658868255, "percentage": 5.16, "elapsed_time": "0:31:10", "remaining_time": "9:32:59"} +{"current_steps": 354, "total_steps": 6840, "loss": 0.7187886238098145, "lr": 1.999985858525901e-05, "epoch": 0.10352390700394795, "percentage": 5.18, "elapsed_time": "0:31:16", "remaining_time": "9:32:55"} +{"current_steps": 355, "total_steps": 6840, "loss": 0.6618789434432983, "lr": 1.9999831704846452e-05, "epoch": 0.10381634741921333, "percentage": 5.19, "elapsed_time": "0:31:21", "remaining_time": "9:32:57"} +{"current_steps": 356, "total_steps": 6840, "loss": 0.9226458072662354, "lr": 1.999980248703403e-05, "epoch": 0.10410878783447873, "percentage": 5.2, "elapsed_time": "0:31:26", "remaining_time": "9:32:33"} +{"current_steps": 357, "total_steps": 6840, "loss": 0.7326352596282959, "lr": 1.9999770931828578e-05, "epoch": 0.10440122824974411, "percentage": 5.22, "elapsed_time": "0:31:31", "remaining_time": "9:32:23"} +{"current_steps": 358, "total_steps": 6840, "loss": 0.719240128993988, "lr": 1.9999737039237472e-05, "epoch": 0.10469366866500951, "percentage": 5.23, "elapsed_time": "0:31:35", "remaining_time": "9:32:05"} +{"current_steps": 359, "total_steps": 6840, "loss": 0.7380290031433105, "lr": 1.999970080926863e-05, "epoch": 0.10498610908027489, "percentage": 5.25, "elapsed_time": "0:31:41", "remaining_time": "9:32:10"} +{"current_steps": 360, "total_steps": 6840, "loss": 0.736219048500061, "lr": 1.9999662241930523e-05, "epoch": 0.10527854949554029, "percentage": 5.26, "elapsed_time": "0:31:46", "remaining_time": "9:31:57"} +{"current_steps": 361, "total_steps": 6840, "loss": 0.8160735368728638, "lr": 1.999962133723217e-05, "epoch": 0.10557098991080567, "percentage": 5.28, "elapsed_time": "0:31:51", "remaining_time": "9:31:49"} +{"current_steps": 362, "total_steps": 6840, "loss": 0.6679781675338745, "lr": 1.9999578095183126e-05, "epoch": 0.10586343032607107, "percentage": 5.29, "elapsed_time": "0:31:56", "remaining_time": "9:31:31"} +{"current_steps": 363, "total_steps": 6840, "loss": 0.7670542001724243, "lr": 1.9999532515793498e-05, "epoch": 0.10615587074133645, "percentage": 5.31, "elapsed_time": "0:32:00", "remaining_time": "9:31:15"} +{"current_steps": 364, "total_steps": 6840, "loss": 0.6395057439804077, "lr": 1.9999484599073945e-05, "epoch": 0.10644831115660185, "percentage": 5.32, "elapsed_time": "0:32:05", "remaining_time": "9:30:57"} +{"current_steps": 365, "total_steps": 6840, "loss": 0.7226368188858032, "lr": 1.9999434345035666e-05, "epoch": 0.10674075157186723, "percentage": 5.34, "elapsed_time": "0:32:10", "remaining_time": "9:30:43"} +{"current_steps": 366, "total_steps": 6840, "loss": 0.6236128211021423, "lr": 1.9999381753690403e-05, "epoch": 0.10703319198713263, "percentage": 5.35, "elapsed_time": "0:32:15", "remaining_time": "9:30:32"} +{"current_steps": 367, "total_steps": 6840, "loss": 0.5937299132347107, "lr": 1.9999326825050455e-05, "epoch": 0.10732563240239801, "percentage": 5.37, "elapsed_time": "0:32:21", "remaining_time": "9:30:40"} +{"current_steps": 368, "total_steps": 6840, "loss": 0.6014857292175293, "lr": 1.999926955912866e-05, "epoch": 0.1076180728176634, "percentage": 5.38, "elapsed_time": "0:32:26", "remaining_time": "9:30:28"} +{"current_steps": 369, "total_steps": 6840, "loss": 0.5898704528808594, "lr": 1.9999209955938394e-05, "epoch": 0.10791051323292879, "percentage": 5.39, "elapsed_time": "0:32:32", "remaining_time": "9:30:34"} +{"current_steps": 370, "total_steps": 6840, "loss": 0.6879048943519592, "lr": 1.9999148015493602e-05, "epoch": 0.10820295364819418, "percentage": 5.41, "elapsed_time": "0:32:37", "remaining_time": "9:30:25"} +{"current_steps": 371, "total_steps": 6840, "loss": 0.781298041343689, "lr": 1.999908373780876e-05, "epoch": 0.10849539406345957, "percentage": 5.42, "elapsed_time": "0:32:42", "remaining_time": "9:30:19"} +{"current_steps": 372, "total_steps": 6840, "loss": 0.6997531652450562, "lr": 1.9999017122898886e-05, "epoch": 0.10878783447872496, "percentage": 5.44, "elapsed_time": "0:32:47", "remaining_time": "9:30:00"} +{"current_steps": 373, "total_steps": 6840, "loss": 0.6979694366455078, "lr": 1.9998948170779556e-05, "epoch": 0.10908027489399034, "percentage": 5.45, "elapsed_time": "0:32:52", "remaining_time": "9:30:02"} +{"current_steps": 374, "total_steps": 6840, "loss": 0.8069214820861816, "lr": 1.999887688146689e-05, "epoch": 0.10937271530925574, "percentage": 5.47, "elapsed_time": "0:32:57", "remaining_time": "9:29:41"} +{"current_steps": 375, "total_steps": 6840, "loss": 0.875137448310852, "lr": 1.9998803254977538e-05, "epoch": 0.10966515572452112, "percentage": 5.48, "elapsed_time": "0:33:02", "remaining_time": "9:29:45"} +{"current_steps": 376, "total_steps": 6840, "loss": 0.8267173767089844, "lr": 1.9998727291328725e-05, "epoch": 0.10995759613978652, "percentage": 5.5, "elapsed_time": "0:33:08", "remaining_time": "9:29:47"} +{"current_steps": 377, "total_steps": 6840, "loss": 0.7589337825775146, "lr": 1.99986489905382e-05, "epoch": 0.1102500365550519, "percentage": 5.51, "elapsed_time": "0:33:14", "remaining_time": "9:29:47"} +{"current_steps": 378, "total_steps": 6840, "loss": 0.7479992508888245, "lr": 1.999856835262427e-05, "epoch": 0.1105424769703173, "percentage": 5.53, "elapsed_time": "0:33:18", "remaining_time": "9:29:31"} +{"current_steps": 379, "total_steps": 6840, "loss": 0.7315084934234619, "lr": 1.999848537760577e-05, "epoch": 0.11083491738558268, "percentage": 5.54, "elapsed_time": "0:33:24", "remaining_time": "9:29:26"} +{"current_steps": 380, "total_steps": 6840, "loss": 0.6256793737411499, "lr": 1.9998400065502113e-05, "epoch": 0.11112735780084808, "percentage": 5.56, "elapsed_time": "0:33:28", "remaining_time": "9:29:05"} +{"current_steps": 381, "total_steps": 6840, "loss": 0.7521710395812988, "lr": 1.999831241633323e-05, "epoch": 0.11141979821611346, "percentage": 5.57, "elapsed_time": "0:33:32", "remaining_time": "9:28:39"} +{"current_steps": 382, "total_steps": 6840, "loss": 0.6824651956558228, "lr": 1.999822243011961e-05, "epoch": 0.11171223863137886, "percentage": 5.58, "elapsed_time": "0:33:38", "remaining_time": "9:28:51"} +{"current_steps": 383, "total_steps": 6840, "loss": 0.7254977226257324, "lr": 1.9998130106882286e-05, "epoch": 0.11200467904664424, "percentage": 5.6, "elapsed_time": "0:33:42", "remaining_time": "9:28:25"} +{"current_steps": 384, "total_steps": 6840, "loss": 0.8263741731643677, "lr": 1.999803544664284e-05, "epoch": 0.11229711946190964, "percentage": 5.61, "elapsed_time": "0:33:48", "remaining_time": "9:28:21"} +{"current_steps": 385, "total_steps": 6840, "loss": 0.6829507350921631, "lr": 1.9997938449423397e-05, "epoch": 0.11258955987717502, "percentage": 5.63, "elapsed_time": "0:33:52", "remaining_time": "9:28:04"} +{"current_steps": 386, "total_steps": 6840, "loss": 0.7452428340911865, "lr": 1.9997839115246632e-05, "epoch": 0.11288200029244042, "percentage": 5.64, "elapsed_time": "0:33:58", "remaining_time": "9:28:02"} +{"current_steps": 387, "total_steps": 6840, "loss": 0.7900702953338623, "lr": 1.999773744413576e-05, "epoch": 0.11317444070770581, "percentage": 5.66, "elapsed_time": "0:34:03", "remaining_time": "9:27:54"} +{"current_steps": 388, "total_steps": 6840, "loss": 0.6215303540229797, "lr": 1.9997633436114547e-05, "epoch": 0.1134668811229712, "percentage": 5.67, "elapsed_time": "0:34:10", "remaining_time": "9:28:12"} +{"current_steps": 389, "total_steps": 6840, "loss": 0.798041820526123, "lr": 1.999752709120731e-05, "epoch": 0.11375932153823659, "percentage": 5.69, "elapsed_time": "0:34:15", "remaining_time": "9:28:10"} +{"current_steps": 390, "total_steps": 6840, "loss": 0.6033064126968384, "lr": 1.9997418409438893e-05, "epoch": 0.11405176195350197, "percentage": 5.7, "elapsed_time": "0:34:21", "remaining_time": "9:28:17"} +{"current_steps": 391, "total_steps": 6840, "loss": 0.6358453631401062, "lr": 1.9997307390834712e-05, "epoch": 0.11434420236876737, "percentage": 5.72, "elapsed_time": "0:34:28", "remaining_time": "9:28:30"} +{"current_steps": 392, "total_steps": 6840, "loss": 0.6544308662414551, "lr": 1.999719403542071e-05, "epoch": 0.11463664278403275, "percentage": 5.73, "elapsed_time": "0:34:33", "remaining_time": "9:28:20"} +{"current_steps": 393, "total_steps": 6840, "loss": 0.73077392578125, "lr": 1.9997078343223393e-05, "epoch": 0.11492908319929815, "percentage": 5.75, "elapsed_time": "0:34:38", "remaining_time": "9:28:19"} +{"current_steps": 394, "total_steps": 6840, "loss": 0.5874192118644714, "lr": 1.9996960314269792e-05, "epoch": 0.11522152361456353, "percentage": 5.76, "elapsed_time": "0:34:43", "remaining_time": "9:28:05"} +{"current_steps": 395, "total_steps": 6840, "loss": 0.8242438435554504, "lr": 1.9996839948587503e-05, "epoch": 0.11551396402982893, "percentage": 5.77, "elapsed_time": "0:34:49", "remaining_time": "9:28:17"} +{"current_steps": 396, "total_steps": 6840, "loss": 0.9496668577194214, "lr": 1.9996717246204655e-05, "epoch": 0.11580640444509431, "percentage": 5.79, "elapsed_time": "0:34:55", "remaining_time": "9:28:13"} +{"current_steps": 397, "total_steps": 6840, "loss": 0.6940287351608276, "lr": 1.9996592207149933e-05, "epoch": 0.1160988448603597, "percentage": 5.8, "elapsed_time": "0:35:00", "remaining_time": "9:28:07"} +{"current_steps": 398, "total_steps": 6840, "loss": 0.7403827905654907, "lr": 1.999646483145256e-05, "epoch": 0.11639128527562509, "percentage": 5.82, "elapsed_time": "0:35:04", "remaining_time": "9:27:47"} +{"current_steps": 399, "total_steps": 6840, "loss": 0.7493172287940979, "lr": 1.9996335119142315e-05, "epoch": 0.11668372569089049, "percentage": 5.83, "elapsed_time": "0:35:09", "remaining_time": "9:27:39"} +{"current_steps": 400, "total_steps": 6840, "loss": 0.6048015356063843, "lr": 1.9996203070249516e-05, "epoch": 0.11697616610615587, "percentage": 5.85, "elapsed_time": "0:35:14", "remaining_time": "9:27:29"} +{"current_steps": 401, "total_steps": 6840, "loss": 0.7220426797866821, "lr": 1.9996068684805025e-05, "epoch": 0.11726860652142126, "percentage": 5.86, "elapsed_time": "0:35:24", "remaining_time": "9:28:40"} +{"current_steps": 402, "total_steps": 6840, "loss": 0.7294620275497437, "lr": 1.9995931962840255e-05, "epoch": 0.11756104693668665, "percentage": 5.88, "elapsed_time": "0:35:31", "remaining_time": "9:28:58"} +{"current_steps": 403, "total_steps": 6840, "loss": 0.7075647115707397, "lr": 1.999579290438717e-05, "epoch": 0.11785348735195204, "percentage": 5.89, "elapsed_time": "0:35:37", "remaining_time": "9:28:56"} +{"current_steps": 404, "total_steps": 6840, "loss": 0.7396657466888428, "lr": 1.9995651509478264e-05, "epoch": 0.11814592776721743, "percentage": 5.91, "elapsed_time": "0:35:42", "remaining_time": "9:28:52"} +{"current_steps": 405, "total_steps": 6840, "loss": 0.8240506649017334, "lr": 1.999550777814659e-05, "epoch": 0.11843836818248282, "percentage": 5.92, "elapsed_time": "0:35:47", "remaining_time": "9:28:43"} +{"current_steps": 406, "total_steps": 6840, "loss": 0.7518147826194763, "lr": 1.9995361710425752e-05, "epoch": 0.1187308085977482, "percentage": 5.94, "elapsed_time": "0:35:53", "remaining_time": "9:28:43"} +{"current_steps": 407, "total_steps": 6840, "loss": 0.6998933553695679, "lr": 1.9995213306349886e-05, "epoch": 0.1190232490130136, "percentage": 5.95, "elapsed_time": "0:35:59", "remaining_time": "9:28:47"} +{"current_steps": 408, "total_steps": 6840, "loss": 0.659205973148346, "lr": 1.999506256595368e-05, "epoch": 0.11931568942827898, "percentage": 5.96, "elapsed_time": "0:36:04", "remaining_time": "9:28:41"} +{"current_steps": 409, "total_steps": 6840, "loss": 0.7826964259147644, "lr": 1.9994909489272372e-05, "epoch": 0.11960812984354438, "percentage": 5.98, "elapsed_time": "0:36:09", "remaining_time": "9:28:33"} +{"current_steps": 410, "total_steps": 6840, "loss": 0.770768404006958, "lr": 1.999475407634174e-05, "epoch": 0.11990057025880976, "percentage": 5.99, "elapsed_time": "0:36:13", "remaining_time": "9:28:12"} +{"current_steps": 411, "total_steps": 6840, "loss": 0.7390692234039307, "lr": 1.9994596327198113e-05, "epoch": 0.12019301067407516, "percentage": 6.01, "elapsed_time": "0:36:19", "remaining_time": "9:28:11"} +{"current_steps": 412, "total_steps": 6840, "loss": 0.7092628479003906, "lr": 1.999443624187836e-05, "epoch": 0.12048545108934054, "percentage": 6.02, "elapsed_time": "0:36:24", "remaining_time": "9:28:09"} +{"current_steps": 413, "total_steps": 6840, "loss": 0.5252765417098999, "lr": 1.9994273820419903e-05, "epoch": 0.12077789150460594, "percentage": 6.04, "elapsed_time": "0:36:30", "remaining_time": "9:28:05"} +{"current_steps": 414, "total_steps": 6840, "loss": 0.8131704330444336, "lr": 1.9994109062860707e-05, "epoch": 0.12107033191987132, "percentage": 6.05, "elapsed_time": "0:36:34", "remaining_time": "9:27:43"} +{"current_steps": 415, "total_steps": 6840, "loss": 0.8257562518119812, "lr": 1.9993941969239284e-05, "epoch": 0.12136277233513672, "percentage": 6.07, "elapsed_time": "0:36:39", "remaining_time": "9:27:32"} +{"current_steps": 416, "total_steps": 6840, "loss": 0.7163048982620239, "lr": 1.999377253959469e-05, "epoch": 0.1216552127504021, "percentage": 6.08, "elapsed_time": "0:36:45", "remaining_time": "9:27:40"} +{"current_steps": 417, "total_steps": 6840, "loss": 0.7216504812240601, "lr": 1.9993600773966528e-05, "epoch": 0.1219476531656675, "percentage": 6.1, "elapsed_time": "0:36:51", "remaining_time": "9:27:43"} +{"current_steps": 418, "total_steps": 6840, "loss": 0.7831340432167053, "lr": 1.9993426672394945e-05, "epoch": 0.12224009358093288, "percentage": 6.11, "elapsed_time": "0:36:56", "remaining_time": "9:27:41"} +{"current_steps": 419, "total_steps": 6840, "loss": 0.7675709128379822, "lr": 1.9993250234920638e-05, "epoch": 0.12253253399619828, "percentage": 6.13, "elapsed_time": "0:37:01", "remaining_time": "9:27:20"} +{"current_steps": 420, "total_steps": 6840, "loss": 0.8085238337516785, "lr": 1.999307146158485e-05, "epoch": 0.12282497441146366, "percentage": 6.14, "elapsed_time": "0:37:07", "remaining_time": "9:27:29"} +{"current_steps": 421, "total_steps": 6840, "loss": 0.735150933265686, "lr": 1.9992890352429368e-05, "epoch": 0.12311741482672905, "percentage": 6.15, "elapsed_time": "0:37:12", "remaining_time": "9:27:15"} +{"current_steps": 422, "total_steps": 6840, "loss": 0.612186074256897, "lr": 1.9992706907496523e-05, "epoch": 0.12340985524199444, "percentage": 6.17, "elapsed_time": "0:37:18", "remaining_time": "9:27:24"} +{"current_steps": 423, "total_steps": 6840, "loss": 0.6636590957641602, "lr": 1.9992521126829194e-05, "epoch": 0.12370229565725983, "percentage": 6.18, "elapsed_time": "0:37:24", "remaining_time": "9:27:23"} +{"current_steps": 424, "total_steps": 6840, "loss": 0.6814526319503784, "lr": 1.9992333010470806e-05, "epoch": 0.12399473607252522, "percentage": 6.2, "elapsed_time": "0:37:28", "remaining_time": "9:27:03"} +{"current_steps": 425, "total_steps": 6840, "loss": 0.6940894722938538, "lr": 1.9992142558465335e-05, "epoch": 0.12428717648779061, "percentage": 6.21, "elapsed_time": "0:37:34", "remaining_time": "9:27:07"} +{"current_steps": 426, "total_steps": 6840, "loss": 0.7485121488571167, "lr": 1.9991949770857294e-05, "epoch": 0.12457961690305601, "percentage": 6.23, "elapsed_time": "0:37:39", "remaining_time": "9:26:53"} +{"current_steps": 427, "total_steps": 6840, "loss": 0.5315885543823242, "lr": 1.9991754647691744e-05, "epoch": 0.12487205731832139, "percentage": 6.24, "elapsed_time": "0:37:44", "remaining_time": "9:26:55"} +{"current_steps": 428, "total_steps": 6840, "loss": 0.7416529655456543, "lr": 1.9991557189014297e-05, "epoch": 0.1251644977335868, "percentage": 6.26, "elapsed_time": "0:37:50", "remaining_time": "9:26:57"} +{"current_steps": 429, "total_steps": 6840, "loss": 0.7937026023864746, "lr": 1.9991357394871106e-05, "epoch": 0.12545693814885217, "percentage": 6.27, "elapsed_time": "0:37:55", "remaining_time": "9:26:45"} +{"current_steps": 430, "total_steps": 6840, "loss": 0.7009662389755249, "lr": 1.9991155265308872e-05, "epoch": 0.12574937856411755, "percentage": 6.29, "elapsed_time": "0:38:00", "remaining_time": "9:26:36"} +{"current_steps": 431, "total_steps": 6840, "loss": 0.6577681303024292, "lr": 1.999095080037484e-05, "epoch": 0.12604181897938296, "percentage": 6.3, "elapsed_time": "0:38:04", "remaining_time": "9:26:13"} +{"current_steps": 432, "total_steps": 6840, "loss": 0.7372399568557739, "lr": 1.9990744000116808e-05, "epoch": 0.12633425939464835, "percentage": 6.32, "elapsed_time": "0:38:09", "remaining_time": "9:26:04"} +{"current_steps": 433, "total_steps": 6840, "loss": 0.5959814190864563, "lr": 1.999053486458311e-05, "epoch": 0.12662669980991373, "percentage": 6.33, "elapsed_time": "0:38:14", "remaining_time": "9:25:50"} +{"current_steps": 434, "total_steps": 6840, "loss": 0.6684107780456543, "lr": 1.999032339382263e-05, "epoch": 0.1269191402251791, "percentage": 6.35, "elapsed_time": "0:38:20", "remaining_time": "9:26:00"} +{"current_steps": 435, "total_steps": 6840, "loss": 0.8837687373161316, "lr": 1.99901095878848e-05, "epoch": 0.12721158064044452, "percentage": 6.36, "elapsed_time": "0:38:24", "remaining_time": "9:25:28"} +{"current_steps": 436, "total_steps": 6840, "loss": 0.7128579616546631, "lr": 1.9989893446819594e-05, "epoch": 0.1275040210557099, "percentage": 6.37, "elapsed_time": "0:38:28", "remaining_time": "9:25:10"} +{"current_steps": 437, "total_steps": 6840, "loss": 0.6634687185287476, "lr": 1.9989674970677533e-05, "epoch": 0.1277964614709753, "percentage": 6.39, "elapsed_time": "0:38:34", "remaining_time": "9:25:17"} +{"current_steps": 438, "total_steps": 6840, "loss": 0.7866299152374268, "lr": 1.998945415950969e-05, "epoch": 0.12808890188624067, "percentage": 6.4, "elapsed_time": "0:38:38", "remaining_time": "9:24:54"} +{"current_steps": 439, "total_steps": 6840, "loss": 0.8104820251464844, "lr": 1.998923101336767e-05, "epoch": 0.12838134230150608, "percentage": 6.42, "elapsed_time": "0:38:44", "remaining_time": "9:24:49"} +{"current_steps": 440, "total_steps": 6840, "loss": 0.6643097400665283, "lr": 1.9989005532303637e-05, "epoch": 0.12867378271677146, "percentage": 6.43, "elapsed_time": "0:38:49", "remaining_time": "9:24:47"} +{"current_steps": 441, "total_steps": 6840, "loss": 0.7663843631744385, "lr": 1.9988777716370293e-05, "epoch": 0.12896622313203684, "percentage": 6.45, "elapsed_time": "0:38:55", "remaining_time": "9:24:55"} +{"current_steps": 442, "total_steps": 6840, "loss": 0.8831629753112793, "lr": 1.9988547565620896e-05, "epoch": 0.12925866354730223, "percentage": 6.46, "elapsed_time": "0:39:02", "remaining_time": "9:25:13"} +{"current_steps": 443, "total_steps": 6840, "loss": 0.6889798045158386, "lr": 1.9988315080109233e-05, "epoch": 0.12955110396256764, "percentage": 6.48, "elapsed_time": "0:39:09", "remaining_time": "9:25:20"} +{"current_steps": 444, "total_steps": 6840, "loss": 0.8173589706420898, "lr": 1.9988080259889652e-05, "epoch": 0.12984354437783302, "percentage": 6.49, "elapsed_time": "0:39:14", "remaining_time": "9:25:19"} +{"current_steps": 445, "total_steps": 6840, "loss": 0.7444369196891785, "lr": 1.998784310501704e-05, "epoch": 0.1301359847930984, "percentage": 6.51, "elapsed_time": "0:39:19", "remaining_time": "9:25:11"} +{"current_steps": 446, "total_steps": 6840, "loss": 0.6728573441505432, "lr": 1.998760361554682e-05, "epoch": 0.13042842520836379, "percentage": 6.52, "elapsed_time": "0:39:26", "remaining_time": "9:25:21"} +{"current_steps": 447, "total_steps": 6840, "loss": 0.6398168802261353, "lr": 1.998736179153499e-05, "epoch": 0.1307208656236292, "percentage": 6.54, "elapsed_time": "0:39:30", "remaining_time": "9:25:03"} +{"current_steps": 448, "total_steps": 6840, "loss": 0.7367146015167236, "lr": 1.9987117633038063e-05, "epoch": 0.13101330603889458, "percentage": 6.55, "elapsed_time": "0:39:36", "remaining_time": "9:25:00"} +{"current_steps": 449, "total_steps": 6840, "loss": 0.7072159051895142, "lr": 1.998687114011311e-05, "epoch": 0.13130574645415996, "percentage": 6.56, "elapsed_time": "0:39:40", "remaining_time": "9:24:45"} +{"current_steps": 450, "total_steps": 6840, "loss": 0.7899993062019348, "lr": 1.998662231281775e-05, "epoch": 0.13159818686942534, "percentage": 6.58, "elapsed_time": "0:39:46", "remaining_time": "9:24:41"} +{"current_steps": 451, "total_steps": 6840, "loss": 0.7668592929840088, "lr": 1.9986371151210146e-05, "epoch": 0.13189062728469075, "percentage": 6.59, "elapsed_time": "0:39:51", "remaining_time": "9:24:36"} +{"current_steps": 452, "total_steps": 6840, "loss": 0.7222825288772583, "lr": 1.9986117655349003e-05, "epoch": 0.13218306769995614, "percentage": 6.61, "elapsed_time": "0:39:55", "remaining_time": "9:24:10"} +{"current_steps": 453, "total_steps": 6840, "loss": 0.7301540374755859, "lr": 1.9985861825293577e-05, "epoch": 0.13247550811522152, "percentage": 6.62, "elapsed_time": "0:40:00", "remaining_time": "9:24:07"} +{"current_steps": 454, "total_steps": 6840, "loss": 0.6517907381057739, "lr": 1.998560366110366e-05, "epoch": 0.1327679485304869, "percentage": 6.64, "elapsed_time": "0:40:07", "remaining_time": "9:24:28"} +{"current_steps": 455, "total_steps": 6840, "loss": 0.6889342069625854, "lr": 1.99853431628396e-05, "epoch": 0.1330603889457523, "percentage": 6.65, "elapsed_time": "0:40:11", "remaining_time": "9:24:02"} +{"current_steps": 456, "total_steps": 6840, "loss": 0.6804303526878357, "lr": 1.9985080330562293e-05, "epoch": 0.1333528293610177, "percentage": 6.67, "elapsed_time": "0:40:15", "remaining_time": "9:23:37"} +{"current_steps": 457, "total_steps": 6840, "loss": 0.7699184417724609, "lr": 1.9984815164333163e-05, "epoch": 0.13364526977628308, "percentage": 6.68, "elapsed_time": "0:40:21", "remaining_time": "9:23:42"} +{"current_steps": 458, "total_steps": 6840, "loss": 0.7470533847808838, "lr": 1.99845476642142e-05, "epoch": 0.13393771019154846, "percentage": 6.7, "elapsed_time": "0:40:26", "remaining_time": "9:23:30"} +{"current_steps": 459, "total_steps": 6840, "loss": 0.6689419746398926, "lr": 1.9984277830267927e-05, "epoch": 0.13423015060681387, "percentage": 6.71, "elapsed_time": "0:40:32", "remaining_time": "9:23:33"} +{"current_steps": 460, "total_steps": 6840, "loss": 0.6395387649536133, "lr": 1.998400566255742e-05, "epoch": 0.13452259102207925, "percentage": 6.73, "elapsed_time": "0:40:39", "remaining_time": "9:23:50"} +{"current_steps": 461, "total_steps": 6840, "loss": 0.7785208225250244, "lr": 1.9983731161146288e-05, "epoch": 0.13481503143734463, "percentage": 6.74, "elapsed_time": "0:40:43", "remaining_time": "9:23:36"} +{"current_steps": 462, "total_steps": 6840, "loss": 0.6864018440246582, "lr": 1.9983454326098703e-05, "epoch": 0.13510747185261002, "percentage": 6.75, "elapsed_time": "0:40:49", "remaining_time": "9:23:39"} +{"current_steps": 463, "total_steps": 6840, "loss": 0.7201317548751831, "lr": 1.9983175157479366e-05, "epoch": 0.13539991226787543, "percentage": 6.77, "elapsed_time": "0:40:56", "remaining_time": "9:23:56"} +{"current_steps": 464, "total_steps": 6840, "loss": 0.7128555774688721, "lr": 1.9982893655353534e-05, "epoch": 0.1356923526831408, "percentage": 6.78, "elapsed_time": "0:41:02", "remaining_time": "9:23:55"} +{"current_steps": 465, "total_steps": 6840, "loss": 0.7252457141876221, "lr": 1.998260981978701e-05, "epoch": 0.1359847930984062, "percentage": 6.8, "elapsed_time": "0:41:09", "remaining_time": "9:24:12"} +{"current_steps": 466, "total_steps": 6840, "loss": 0.7453348636627197, "lr": 1.9982323650846137e-05, "epoch": 0.1362772335136716, "percentage": 6.81, "elapsed_time": "0:41:14", "remaining_time": "9:24:07"} +{"current_steps": 467, "total_steps": 6840, "loss": 0.6643078923225403, "lr": 1.9982035148597804e-05, "epoch": 0.13656967392893699, "percentage": 6.83, "elapsed_time": "0:41:19", "remaining_time": "9:23:56"} +{"current_steps": 468, "total_steps": 6840, "loss": 0.7249360084533691, "lr": 1.9981744313109445e-05, "epoch": 0.13686211434420237, "percentage": 6.84, "elapsed_time": "0:41:25", "remaining_time": "9:23:57"} +{"current_steps": 469, "total_steps": 6840, "loss": 0.8179303407669067, "lr": 1.9981451144449042e-05, "epoch": 0.13715455475946775, "percentage": 6.86, "elapsed_time": "0:41:30", "remaining_time": "9:23:56"} +{"current_steps": 470, "total_steps": 6840, "loss": 0.6763637661933899, "lr": 1.9981155642685125e-05, "epoch": 0.13744699517473316, "percentage": 6.87, "elapsed_time": "0:41:37", "remaining_time": "9:24:06"} +{"current_steps": 471, "total_steps": 6840, "loss": 0.6684300303459167, "lr": 1.998085780788676e-05, "epoch": 0.13773943558999854, "percentage": 6.89, "elapsed_time": "0:41:42", "remaining_time": "9:23:54"} +{"current_steps": 472, "total_steps": 6840, "loss": 0.7251675128936768, "lr": 1.9980557640123566e-05, "epoch": 0.13803187600526393, "percentage": 6.9, "elapsed_time": "0:41:46", "remaining_time": "9:23:36"} +{"current_steps": 473, "total_steps": 6840, "loss": 0.7146456241607666, "lr": 1.998025513946571e-05, "epoch": 0.1383243164205293, "percentage": 6.92, "elapsed_time": "0:41:51", "remaining_time": "9:23:32"} +{"current_steps": 474, "total_steps": 6840, "loss": 0.7067978382110596, "lr": 1.9979950305983895e-05, "epoch": 0.13861675683579472, "percentage": 6.93, "elapsed_time": "0:41:56", "remaining_time": "9:23:22"} +{"current_steps": 475, "total_steps": 6840, "loss": 0.7017637491226196, "lr": 1.9979643139749373e-05, "epoch": 0.1389091972510601, "percentage": 6.94, "elapsed_time": "0:42:02", "remaining_time": "9:23:22"} +{"current_steps": 476, "total_steps": 6840, "loss": 0.7511367201805115, "lr": 1.9979333640833947e-05, "epoch": 0.13920163766632548, "percentage": 6.96, "elapsed_time": "0:42:07", "remaining_time": "9:23:12"} +{"current_steps": 477, "total_steps": 6840, "loss": 0.8129127025604248, "lr": 1.997902180930996e-05, "epoch": 0.13949407808159087, "percentage": 6.97, "elapsed_time": "0:42:12", "remaining_time": "9:23:03"} +{"current_steps": 478, "total_steps": 6840, "loss": 0.7760868072509766, "lr": 1.9978707645250293e-05, "epoch": 0.13978651849685628, "percentage": 6.99, "elapsed_time": "0:42:16", "remaining_time": "9:22:45"} +{"current_steps": 479, "total_steps": 6840, "loss": 0.5190733671188354, "lr": 1.9978391148728388e-05, "epoch": 0.14007895891212166, "percentage": 7.0, "elapsed_time": "0:42:22", "remaining_time": "9:22:50"} +{"current_steps": 480, "total_steps": 6840, "loss": 0.759798526763916, "lr": 1.9978072319818222e-05, "epoch": 0.14037139932738704, "percentage": 7.02, "elapsed_time": "0:42:28", "remaining_time": "9:22:51"} +{"current_steps": 481, "total_steps": 6840, "loss": 0.5750235319137573, "lr": 1.997775115859432e-05, "epoch": 0.14066383974265242, "percentage": 7.03, "elapsed_time": "0:42:34", "remaining_time": "9:22:49"} +{"current_steps": 482, "total_steps": 6840, "loss": 0.6837687492370605, "lr": 1.9977427665131748e-05, "epoch": 0.14095628015791783, "percentage": 7.05, "elapsed_time": "0:42:38", "remaining_time": "9:22:32"} +{"current_steps": 483, "total_steps": 6840, "loss": 0.8774302005767822, "lr": 1.9977101839506123e-05, "epoch": 0.14124872057318322, "percentage": 7.06, "elapsed_time": "0:42:43", "remaining_time": "9:22:23"} +{"current_steps": 484, "total_steps": 6840, "loss": 0.6447024345397949, "lr": 1.9976773681793605e-05, "epoch": 0.1415411609884486, "percentage": 7.08, "elapsed_time": "0:42:48", "remaining_time": "9:22:14"} +{"current_steps": 485, "total_steps": 6840, "loss": 0.6212965250015259, "lr": 1.99764431920709e-05, "epoch": 0.14183360140371398, "percentage": 7.09, "elapsed_time": "0:42:53", "remaining_time": "9:22:07"} +{"current_steps": 486, "total_steps": 6840, "loss": 0.7606823444366455, "lr": 1.9976110370415257e-05, "epoch": 0.1421260418189794, "percentage": 7.11, "elapsed_time": "0:42:58", "remaining_time": "9:21:47"} +{"current_steps": 487, "total_steps": 6840, "loss": 0.792106032371521, "lr": 1.9975775216904468e-05, "epoch": 0.14241848223424478, "percentage": 7.12, "elapsed_time": "0:43:03", "remaining_time": "9:21:40"} +{"current_steps": 488, "total_steps": 6840, "loss": 0.828373372554779, "lr": 1.997543773161688e-05, "epoch": 0.14271092264951016, "percentage": 7.13, "elapsed_time": "0:43:08", "remaining_time": "9:21:38"} +{"current_steps": 489, "total_steps": 6840, "loss": 0.7148743867874146, "lr": 1.997509791463137e-05, "epoch": 0.14300336306477554, "percentage": 7.15, "elapsed_time": "0:43:14", "remaining_time": "9:21:35"} +{"current_steps": 490, "total_steps": 6840, "loss": 0.6566554307937622, "lr": 1.9974755766027372e-05, "epoch": 0.14329580348004095, "percentage": 7.16, "elapsed_time": "0:43:18", "remaining_time": "9:21:15"} +{"current_steps": 491, "total_steps": 6840, "loss": 0.7833706140518188, "lr": 1.9974411285884865e-05, "epoch": 0.14358824389530633, "percentage": 7.18, "elapsed_time": "0:43:24", "remaining_time": "9:21:24"} +{"current_steps": 492, "total_steps": 6840, "loss": 0.7661226987838745, "lr": 1.997406447428436e-05, "epoch": 0.14388068431057172, "percentage": 7.19, "elapsed_time": "0:43:29", "remaining_time": "9:21:02"} +{"current_steps": 493, "total_steps": 6840, "loss": 0.5403884649276733, "lr": 1.9973715331306935e-05, "epoch": 0.1441731247258371, "percentage": 7.21, "elapsed_time": "0:43:33", "remaining_time": "9:20:44"} +{"current_steps": 494, "total_steps": 6840, "loss": 0.7744722366333008, "lr": 1.9973363857034183e-05, "epoch": 0.1444655651411025, "percentage": 7.22, "elapsed_time": "0:43:37", "remaining_time": "9:20:23"} +{"current_steps": 495, "total_steps": 6840, "loss": 0.9036808013916016, "lr": 1.9973010051548274e-05, "epoch": 0.1447580055563679, "percentage": 7.24, "elapsed_time": "0:43:41", "remaining_time": "9:20:04"} +{"current_steps": 496, "total_steps": 6840, "loss": 0.6952388286590576, "lr": 1.9972653914931902e-05, "epoch": 0.14505044597163327, "percentage": 7.25, "elapsed_time": "0:43:47", "remaining_time": "9:20:01"} +{"current_steps": 497, "total_steps": 6840, "loss": 0.7818677425384521, "lr": 1.9972295447268312e-05, "epoch": 0.14534288638689866, "percentage": 7.27, "elapsed_time": "0:43:52", "remaining_time": "9:19:58"} +{"current_steps": 498, "total_steps": 6840, "loss": 0.8197327256202698, "lr": 1.9971934648641294e-05, "epoch": 0.14563532680216407, "percentage": 7.28, "elapsed_time": "0:43:57", "remaining_time": "9:19:53"} +{"current_steps": 499, "total_steps": 6840, "loss": 0.5898807644844055, "lr": 1.997157151913518e-05, "epoch": 0.14592776721742945, "percentage": 7.3, "elapsed_time": "0:44:03", "remaining_time": "9:19:53"} +{"current_steps": 500, "total_steps": 6840, "loss": 0.7980005741119385, "lr": 1.9971206058834857e-05, "epoch": 0.14622020763269483, "percentage": 7.31, "elapsed_time": "0:44:10", "remaining_time": "9:20:02"} +{"current_steps": 501, "total_steps": 6840, "loss": 0.7161837816238403, "lr": 1.997083826782574e-05, "epoch": 0.14651264804796021, "percentage": 7.32, "elapsed_time": "0:44:19", "remaining_time": "9:20:51"} +{"current_steps": 502, "total_steps": 6840, "loss": 0.7657293081283569, "lr": 1.99704681461938e-05, "epoch": 0.14680508846322562, "percentage": 7.34, "elapsed_time": "0:44:23", "remaining_time": "9:20:26"} +{"current_steps": 503, "total_steps": 6840, "loss": 0.6638028621673584, "lr": 1.9970095694025553e-05, "epoch": 0.147097528878491, "percentage": 7.35, "elapsed_time": "0:44:27", "remaining_time": "9:20:07"} +{"current_steps": 504, "total_steps": 6840, "loss": 0.8759262561798096, "lr": 1.996972091140806e-05, "epoch": 0.1473899692937564, "percentage": 7.37, "elapsed_time": "0:44:32", "remaining_time": "9:19:56"} +{"current_steps": 505, "total_steps": 6840, "loss": 0.6686065196990967, "lr": 1.9969343798428916e-05, "epoch": 0.1476824097090218, "percentage": 7.38, "elapsed_time": "0:44:38", "remaining_time": "9:19:55"} +{"current_steps": 506, "total_steps": 6840, "loss": 0.7900313138961792, "lr": 1.9968964355176276e-05, "epoch": 0.14797485012428718, "percentage": 7.4, "elapsed_time": "0:44:43", "remaining_time": "9:19:55"} +{"current_steps": 507, "total_steps": 6840, "loss": 0.699286937713623, "lr": 1.996858258173883e-05, "epoch": 0.14826729053955257, "percentage": 7.41, "elapsed_time": "0:44:49", "remaining_time": "9:19:50"} +{"current_steps": 508, "total_steps": 6840, "loss": 0.6613560914993286, "lr": 1.9968198478205817e-05, "epoch": 0.14855973095481795, "percentage": 7.43, "elapsed_time": "0:44:54", "remaining_time": "9:19:44"} +{"current_steps": 509, "total_steps": 6840, "loss": 0.8586459755897522, "lr": 1.9967812044667014e-05, "epoch": 0.14885217137008336, "percentage": 7.44, "elapsed_time": "0:44:59", "remaining_time": "9:19:36"} +{"current_steps": 510, "total_steps": 6840, "loss": 0.6620850563049316, "lr": 1.9967423281212754e-05, "epoch": 0.14914461178534874, "percentage": 7.46, "elapsed_time": "0:45:04", "remaining_time": "9:19:30"} +{"current_steps": 511, "total_steps": 6840, "loss": 0.7991048097610474, "lr": 1.9967032187933905e-05, "epoch": 0.14943705220061412, "percentage": 7.47, "elapsed_time": "0:45:09", "remaining_time": "9:19:14"} +{"current_steps": 512, "total_steps": 6840, "loss": 0.7301167845726013, "lr": 1.9966638764921882e-05, "epoch": 0.1497294926158795, "percentage": 7.49, "elapsed_time": "0:45:14", "remaining_time": "9:19:11"} +{"current_steps": 513, "total_steps": 6840, "loss": 0.6470698118209839, "lr": 1.9966243012268645e-05, "epoch": 0.15002193303114492, "percentage": 7.5, "elapsed_time": "0:45:20", "remaining_time": "9:19:10"} +{"current_steps": 514, "total_steps": 6840, "loss": 0.5766996145248413, "lr": 1.99658449300667e-05, "epoch": 0.1503143734464103, "percentage": 7.51, "elapsed_time": "0:45:25", "remaining_time": "9:19:03"} +{"current_steps": 515, "total_steps": 6840, "loss": 0.6365845203399658, "lr": 1.9965444518409098e-05, "epoch": 0.15060681386167568, "percentage": 7.53, "elapsed_time": "0:45:30", "remaining_time": "9:18:58"} +{"current_steps": 516, "total_steps": 6840, "loss": 0.6945745944976807, "lr": 1.9965041777389426e-05, "epoch": 0.15089925427694106, "percentage": 7.54, "elapsed_time": "0:45:35", "remaining_time": "9:18:45"} +{"current_steps": 517, "total_steps": 6840, "loss": 0.802032470703125, "lr": 1.996463670710183e-05, "epoch": 0.15119169469220647, "percentage": 7.56, "elapsed_time": "0:45:40", "remaining_time": "9:18:33"} +{"current_steps": 518, "total_steps": 6840, "loss": 0.7429964542388916, "lr": 1.996422930764099e-05, "epoch": 0.15148413510747186, "percentage": 7.57, "elapsed_time": "0:45:45", "remaining_time": "9:18:32"} +{"current_steps": 519, "total_steps": 6840, "loss": 0.6462180614471436, "lr": 1.9963819579102134e-05, "epoch": 0.15177657552273724, "percentage": 7.59, "elapsed_time": "0:45:49", "remaining_time": "9:18:10"} +{"current_steps": 520, "total_steps": 6840, "loss": 0.888412594795227, "lr": 1.996340752158103e-05, "epoch": 0.15206901593800262, "percentage": 7.6, "elapsed_time": "0:45:55", "remaining_time": "9:18:09"} +{"current_steps": 521, "total_steps": 6840, "loss": 0.6734700798988342, "lr": 1.9962993135173996e-05, "epoch": 0.15236145635326803, "percentage": 7.62, "elapsed_time": "0:45:59", "remaining_time": "9:17:48"} +{"current_steps": 522, "total_steps": 6840, "loss": 0.6951336860656738, "lr": 1.9962576419977894e-05, "epoch": 0.15265389676853341, "percentage": 7.63, "elapsed_time": "0:46:04", "remaining_time": "9:17:42"} +{"current_steps": 523, "total_steps": 6840, "loss": 0.7130852341651917, "lr": 1.9962157376090126e-05, "epoch": 0.1529463371837988, "percentage": 7.65, "elapsed_time": "0:46:10", "remaining_time": "9:17:38"} +{"current_steps": 524, "total_steps": 6840, "loss": 0.8322055339813232, "lr": 1.9961736003608646e-05, "epoch": 0.15323877759906418, "percentage": 7.66, "elapsed_time": "0:46:14", "remaining_time": "9:17:23"} +{"current_steps": 525, "total_steps": 6840, "loss": 0.7031791806221008, "lr": 1.996131230263194e-05, "epoch": 0.1535312180143296, "percentage": 7.68, "elapsed_time": "0:46:20", "remaining_time": "9:17:20"} +{"current_steps": 526, "total_steps": 6840, "loss": 0.8268769979476929, "lr": 1.9960886273259052e-05, "epoch": 0.15382365842959497, "percentage": 7.69, "elapsed_time": "0:46:24", "remaining_time": "9:17:10"} +{"current_steps": 527, "total_steps": 6840, "loss": 0.6843237280845642, "lr": 1.9960457915589557e-05, "epoch": 0.15411609884486036, "percentage": 7.7, "elapsed_time": "0:46:31", "remaining_time": "9:17:22"} +{"current_steps": 528, "total_steps": 6840, "loss": 0.8267906904220581, "lr": 1.9960027229723585e-05, "epoch": 0.15440853926012574, "percentage": 7.72, "elapsed_time": "0:46:36", "remaining_time": "9:17:07"} +{"current_steps": 529, "total_steps": 6840, "loss": 0.8259629011154175, "lr": 1.9959594215761807e-05, "epoch": 0.15470097967539115, "percentage": 7.73, "elapsed_time": "0:46:40", "remaining_time": "9:16:46"} +{"current_steps": 530, "total_steps": 6840, "loss": 0.654765248298645, "lr": 1.9959158873805435e-05, "epoch": 0.15499342009065653, "percentage": 7.75, "elapsed_time": "0:46:45", "remaining_time": "9:16:41"} +{"current_steps": 531, "total_steps": 6840, "loss": 0.7841149568557739, "lr": 1.9958721203956233e-05, "epoch": 0.1552858605059219, "percentage": 7.76, "elapsed_time": "0:46:50", "remaining_time": "9:16:31"} +{"current_steps": 532, "total_steps": 6840, "loss": 0.7364583015441895, "lr": 1.9958281206316497e-05, "epoch": 0.1555783009211873, "percentage": 7.78, "elapsed_time": "0:46:55", "remaining_time": "9:16:22"} +{"current_steps": 533, "total_steps": 6840, "loss": 0.7985796928405762, "lr": 1.9957838880989076e-05, "epoch": 0.1558707413364527, "percentage": 7.79, "elapsed_time": "0:47:01", "remaining_time": "9:16:22"} +{"current_steps": 534, "total_steps": 6840, "loss": 0.8432350754737854, "lr": 1.9957394228077363e-05, "epoch": 0.1561631817517181, "percentage": 7.81, "elapsed_time": "0:47:07", "remaining_time": "9:16:24"} +{"current_steps": 535, "total_steps": 6840, "loss": 0.713615894317627, "lr": 1.995694724768529e-05, "epoch": 0.15645562216698347, "percentage": 7.82, "elapsed_time": "0:47:12", "remaining_time": "9:16:23"} +{"current_steps": 536, "total_steps": 6840, "loss": 0.6472936868667603, "lr": 1.9956497939917336e-05, "epoch": 0.15674806258224885, "percentage": 7.84, "elapsed_time": "0:47:18", "remaining_time": "9:16:22"} +{"current_steps": 537, "total_steps": 6840, "loss": 0.7963594198226929, "lr": 1.9956046304878528e-05, "epoch": 0.15704050299751426, "percentage": 7.85, "elapsed_time": "0:47:23", "remaining_time": "9:16:18"} +{"current_steps": 538, "total_steps": 6840, "loss": 0.8043302893638611, "lr": 1.9955592342674427e-05, "epoch": 0.15733294341277965, "percentage": 7.87, "elapsed_time": "0:47:29", "remaining_time": "9:16:21"} +{"current_steps": 539, "total_steps": 6840, "loss": 0.6277294754981995, "lr": 1.995513605341115e-05, "epoch": 0.15762538382804503, "percentage": 7.88, "elapsed_time": "0:47:34", "remaining_time": "9:16:10"} +{"current_steps": 540, "total_steps": 6840, "loss": 0.569086492061615, "lr": 1.9954677437195345e-05, "epoch": 0.1579178242433104, "percentage": 7.89, "elapsed_time": "0:47:39", "remaining_time": "9:16:03"} +{"current_steps": 541, "total_steps": 6840, "loss": 0.7694308757781982, "lr": 1.9954216494134217e-05, "epoch": 0.15821026465857582, "percentage": 7.91, "elapsed_time": "0:47:45", "remaining_time": "9:16:05"} +{"current_steps": 542, "total_steps": 6840, "loss": 0.7782721519470215, "lr": 1.9953753224335504e-05, "epoch": 0.1585027050738412, "percentage": 7.92, "elapsed_time": "0:47:52", "remaining_time": "9:16:15"} +{"current_steps": 543, "total_steps": 6840, "loss": 0.6231539249420166, "lr": 1.9953287627907498e-05, "epoch": 0.1587951454891066, "percentage": 7.94, "elapsed_time": "0:47:58", "remaining_time": "9:16:22"} +{"current_steps": 544, "total_steps": 6840, "loss": 0.6431725025177002, "lr": 1.9952819704959022e-05, "epoch": 0.159087585904372, "percentage": 7.95, "elapsed_time": "0:48:03", "remaining_time": "9:16:14"} +{"current_steps": 545, "total_steps": 6840, "loss": 0.7062366008758545, "lr": 1.9952349455599455e-05, "epoch": 0.15938002631963738, "percentage": 7.97, "elapsed_time": "0:48:10", "remaining_time": "9:16:21"} +{"current_steps": 546, "total_steps": 6840, "loss": 0.5376520156860352, "lr": 1.9951876879938716e-05, "epoch": 0.15967246673490276, "percentage": 7.98, "elapsed_time": "0:48:15", "remaining_time": "9:16:19"} +{"current_steps": 547, "total_steps": 6840, "loss": 0.7693386077880859, "lr": 1.9951401978087267e-05, "epoch": 0.15996490715016815, "percentage": 8.0, "elapsed_time": "0:48:20", "remaining_time": "9:16:08"} +{"current_steps": 548, "total_steps": 6840, "loss": 0.6735765337944031, "lr": 1.9950924750156107e-05, "epoch": 0.16025734756543356, "percentage": 8.01, "elapsed_time": "0:48:25", "remaining_time": "9:15:59"} +{"current_steps": 549, "total_steps": 6840, "loss": 0.5333552360534668, "lr": 1.995044519625679e-05, "epoch": 0.16054978798069894, "percentage": 8.03, "elapsed_time": "0:48:31", "remaining_time": "9:16:05"} +{"current_steps": 550, "total_steps": 6840, "loss": 0.6694493293762207, "lr": 1.994996331650141e-05, "epoch": 0.16084222839596432, "percentage": 8.04, "elapsed_time": "0:48:37", "remaining_time": "9:16:06"} +{"current_steps": 551, "total_steps": 6840, "loss": 0.6056857109069824, "lr": 1.9949479111002596e-05, "epoch": 0.1611346688112297, "percentage": 8.06, "elapsed_time": "0:48:42", "remaining_time": "9:15:55"} +{"current_steps": 552, "total_steps": 6840, "loss": 0.7174896001815796, "lr": 1.9948992579873538e-05, "epoch": 0.1614271092264951, "percentage": 8.07, "elapsed_time": "0:48:46", "remaining_time": "9:15:36"} +{"current_steps": 553, "total_steps": 6840, "loss": 0.9150595664978027, "lr": 1.9948503723227954e-05, "epoch": 0.1617195496417605, "percentage": 8.08, "elapsed_time": "0:48:51", "remaining_time": "9:15:29"} +{"current_steps": 554, "total_steps": 6840, "loss": 0.7418098449707031, "lr": 1.9948012541180116e-05, "epoch": 0.16201199005702588, "percentage": 8.1, "elapsed_time": "0:48:55", "remaining_time": "9:15:10"} +{"current_steps": 555, "total_steps": 6840, "loss": 0.6937648057937622, "lr": 1.9947519033844828e-05, "epoch": 0.16230443047229126, "percentage": 8.11, "elapsed_time": "0:49:00", "remaining_time": "9:14:55"} +{"current_steps": 556, "total_steps": 6840, "loss": 0.628747820854187, "lr": 1.9947023201337448e-05, "epoch": 0.16259687088755667, "percentage": 8.13, "elapsed_time": "0:49:05", "remaining_time": "9:14:45"} +{"current_steps": 557, "total_steps": 6840, "loss": 0.6252326965332031, "lr": 1.9946525043773875e-05, "epoch": 0.16288931130282205, "percentage": 8.14, "elapsed_time": "0:49:10", "remaining_time": "9:14:37"} +{"current_steps": 558, "total_steps": 6840, "loss": 0.6243278980255127, "lr": 1.9946024561270547e-05, "epoch": 0.16318175171808744, "percentage": 8.16, "elapsed_time": "0:49:16", "remaining_time": "9:14:40"} +{"current_steps": 559, "total_steps": 6840, "loss": 0.7613602876663208, "lr": 1.994552175394445e-05, "epoch": 0.16347419213335282, "percentage": 8.17, "elapsed_time": "0:49:21", "remaining_time": "9:14:38"} +{"current_steps": 560, "total_steps": 6840, "loss": 0.7680152654647827, "lr": 1.9945016621913115e-05, "epoch": 0.16376663254861823, "percentage": 8.19, "elapsed_time": "0:49:27", "remaining_time": "9:14:32"} +{"current_steps": 561, "total_steps": 6840, "loss": 0.6926383972167969, "lr": 1.9944509165294614e-05, "epoch": 0.1640590729638836, "percentage": 8.2, "elapsed_time": "0:49:31", "remaining_time": "9:14:19"} +{"current_steps": 562, "total_steps": 6840, "loss": 0.6822172403335571, "lr": 1.9943999384207556e-05, "epoch": 0.164351513379149, "percentage": 8.22, "elapsed_time": "0:49:36", "remaining_time": "9:14:12"} +{"current_steps": 563, "total_steps": 6840, "loss": 0.6533722281455994, "lr": 1.99434872787711e-05, "epoch": 0.16464395379441438, "percentage": 8.23, "elapsed_time": "0:49:43", "remaining_time": "9:14:21"} +{"current_steps": 564, "total_steps": 6840, "loss": 0.6754113435745239, "lr": 1.9942972849104955e-05, "epoch": 0.1649363942096798, "percentage": 8.25, "elapsed_time": "0:49:48", "remaining_time": "9:14:15"} +{"current_steps": 565, "total_steps": 6840, "loss": 0.5585163235664368, "lr": 1.9942456095329357e-05, "epoch": 0.16522883462494517, "percentage": 8.26, "elapsed_time": "0:49:53", "remaining_time": "9:14:01"} +{"current_steps": 566, "total_steps": 6840, "loss": 0.6268453598022461, "lr": 1.99419370175651e-05, "epoch": 0.16552127504021055, "percentage": 8.27, "elapsed_time": "0:49:58", "remaining_time": "9:13:59"} +{"current_steps": 567, "total_steps": 6840, "loss": 0.6508245468139648, "lr": 1.994141561593351e-05, "epoch": 0.16581371545547594, "percentage": 8.29, "elapsed_time": "0:50:04", "remaining_time": "9:13:56"} +{"current_steps": 568, "total_steps": 6840, "loss": 0.7337379455566406, "lr": 1.9940891890556468e-05, "epoch": 0.16610615587074135, "percentage": 8.3, "elapsed_time": "0:50:07", "remaining_time": "9:13:32"} +{"current_steps": 569, "total_steps": 6840, "loss": 0.7888853549957275, "lr": 1.9940365841556385e-05, "epoch": 0.16639859628600673, "percentage": 8.32, "elapsed_time": "0:50:12", "remaining_time": "9:13:25"} +{"current_steps": 570, "total_steps": 6840, "loss": 0.777199923992157, "lr": 1.993983746905623e-05, "epoch": 0.1666910367012721, "percentage": 8.33, "elapsed_time": "0:50:17", "remaining_time": "9:13:12"} +{"current_steps": 571, "total_steps": 6840, "loss": 0.761531412601471, "lr": 1.9939306773179498e-05, "epoch": 0.1669834771165375, "percentage": 8.35, "elapsed_time": "0:50:23", "remaining_time": "9:13:10"} +{"current_steps": 572, "total_steps": 6840, "loss": 0.7060664296150208, "lr": 1.993877375405024e-05, "epoch": 0.1672759175318029, "percentage": 8.36, "elapsed_time": "0:50:26", "remaining_time": "9:12:49"} +{"current_steps": 573, "total_steps": 6840, "loss": 0.6797431707382202, "lr": 1.9938238411793045e-05, "epoch": 0.16756835794706829, "percentage": 8.38, "elapsed_time": "0:50:32", "remaining_time": "9:12:42"} +{"current_steps": 574, "total_steps": 6840, "loss": 0.7202910780906677, "lr": 1.9937700746533048e-05, "epoch": 0.16786079836233367, "percentage": 8.39, "elapsed_time": "0:50:37", "remaining_time": "9:12:34"} +{"current_steps": 575, "total_steps": 6840, "loss": 0.7241546511650085, "lr": 1.9937160758395923e-05, "epoch": 0.16815323877759905, "percentage": 8.41, "elapsed_time": "0:50:42", "remaining_time": "9:12:27"} +{"current_steps": 576, "total_steps": 6840, "loss": 0.7055338621139526, "lr": 1.993661844750789e-05, "epoch": 0.16844567919286446, "percentage": 8.42, "elapsed_time": "0:50:47", "remaining_time": "9:12:19"} +{"current_steps": 577, "total_steps": 6840, "loss": 0.6973986625671387, "lr": 1.993607381399571e-05, "epoch": 0.16873811960812984, "percentage": 8.44, "elapsed_time": "0:50:51", "remaining_time": "9:12:06"} +{"current_steps": 578, "total_steps": 6840, "loss": 0.693436861038208, "lr": 1.993552685798669e-05, "epoch": 0.16903056002339523, "percentage": 8.45, "elapsed_time": "0:50:57", "remaining_time": "9:11:59"} +{"current_steps": 579, "total_steps": 6840, "loss": 0.6687765121459961, "lr": 1.9934977579608676e-05, "epoch": 0.1693230004386606, "percentage": 8.46, "elapsed_time": "0:51:01", "remaining_time": "9:11:46"} +{"current_steps": 580, "total_steps": 6840, "loss": 0.7776578068733215, "lr": 1.9934425978990057e-05, "epoch": 0.16961544085392602, "percentage": 8.48, "elapsed_time": "0:51:06", "remaining_time": "9:11:35"} +{"current_steps": 581, "total_steps": 6840, "loss": 0.6914045810699463, "lr": 1.9933872056259768e-05, "epoch": 0.1699078812691914, "percentage": 8.49, "elapsed_time": "0:51:11", "remaining_time": "9:11:28"} +{"current_steps": 582, "total_steps": 6840, "loss": 0.8005306720733643, "lr": 1.9933315811547283e-05, "epoch": 0.17020032168445678, "percentage": 8.51, "elapsed_time": "0:51:17", "remaining_time": "9:11:28"} +{"current_steps": 583, "total_steps": 6840, "loss": 0.6936507225036621, "lr": 1.9932757244982625e-05, "epoch": 0.1704927620997222, "percentage": 8.52, "elapsed_time": "0:51:22", "remaining_time": "9:11:23"} +{"current_steps": 584, "total_steps": 6840, "loss": 0.6915504932403564, "lr": 1.9932196356696353e-05, "epoch": 0.17078520251498758, "percentage": 8.54, "elapsed_time": "0:51:28", "remaining_time": "9:11:25"} +{"current_steps": 585, "total_steps": 6840, "loss": 0.7583723664283752, "lr": 1.9931633146819573e-05, "epoch": 0.17107764293025296, "percentage": 8.55, "elapsed_time": "0:51:34", "remaining_time": "9:11:30"} +{"current_steps": 586, "total_steps": 6840, "loss": 0.7097266912460327, "lr": 1.9931067615483927e-05, "epoch": 0.17137008334551834, "percentage": 8.57, "elapsed_time": "0:51:40", "remaining_time": "9:11:25"} +{"current_steps": 587, "total_steps": 6840, "loss": 0.7586667537689209, "lr": 1.9930499762821608e-05, "epoch": 0.17166252376078375, "percentage": 8.58, "elapsed_time": "0:51:46", "remaining_time": "9:11:36"} +{"current_steps": 588, "total_steps": 6840, "loss": 0.7043411731719971, "lr": 1.9929929588965352e-05, "epoch": 0.17195496417604914, "percentage": 8.6, "elapsed_time": "0:51:51", "remaining_time": "9:11:28"} +{"current_steps": 589, "total_steps": 6840, "loss": 0.8502261638641357, "lr": 1.9929357094048425e-05, "epoch": 0.17224740459131452, "percentage": 8.61, "elapsed_time": "0:51:57", "remaining_time": "9:11:29"} +{"current_steps": 590, "total_steps": 6840, "loss": 0.7196993827819824, "lr": 1.992878227820465e-05, "epoch": 0.1725398450065799, "percentage": 8.63, "elapsed_time": "0:52:03", "remaining_time": "9:11:27"} +{"current_steps": 591, "total_steps": 6840, "loss": 0.6783720850944519, "lr": 1.9928205141568388e-05, "epoch": 0.1728322854218453, "percentage": 8.64, "elapsed_time": "0:52:08", "remaining_time": "9:11:22"} +{"current_steps": 592, "total_steps": 6840, "loss": 0.7128307819366455, "lr": 1.9927625684274534e-05, "epoch": 0.1731247258371107, "percentage": 8.65, "elapsed_time": "0:52:14", "remaining_time": "9:11:17"} +{"current_steps": 593, "total_steps": 6840, "loss": 0.7289423942565918, "lr": 1.9927043906458538e-05, "epoch": 0.17341716625237608, "percentage": 8.67, "elapsed_time": "0:52:18", "remaining_time": "9:11:03"} +{"current_steps": 594, "total_steps": 6840, "loss": 0.6306120157241821, "lr": 1.992645980825639e-05, "epoch": 0.17370960666764146, "percentage": 8.68, "elapsed_time": "0:52:22", "remaining_time": "9:10:42"} +{"current_steps": 595, "total_steps": 6840, "loss": 0.7910655736923218, "lr": 1.9925873389804614e-05, "epoch": 0.17400204708290687, "percentage": 8.7, "elapsed_time": "0:52:28", "remaining_time": "9:10:42"} +{"current_steps": 596, "total_steps": 6840, "loss": 0.6075282096862793, "lr": 1.9925284651240282e-05, "epoch": 0.17429448749817225, "percentage": 8.71, "elapsed_time": "0:52:34", "remaining_time": "9:10:50"} +{"current_steps": 597, "total_steps": 6840, "loss": 0.6270443201065063, "lr": 1.992469359270101e-05, "epoch": 0.17458692791343763, "percentage": 8.73, "elapsed_time": "0:52:40", "remaining_time": "9:10:55"} +{"current_steps": 598, "total_steps": 6840, "loss": 0.6487830877304077, "lr": 1.9924100214324955e-05, "epoch": 0.17487936832870302, "percentage": 8.74, "elapsed_time": "0:52:46", "remaining_time": "9:10:48"} +{"current_steps": 599, "total_steps": 6840, "loss": 0.5986843705177307, "lr": 1.9923504516250814e-05, "epoch": 0.17517180874396843, "percentage": 8.76, "elapsed_time": "0:52:49", "remaining_time": "9:10:27"} +{"current_steps": 600, "total_steps": 6840, "loss": 0.7734183073043823, "lr": 1.992290649861783e-05, "epoch": 0.1754642491592338, "percentage": 8.77, "elapsed_time": "0:52:55", "remaining_time": "9:10:23"} +{"current_steps": 601, "total_steps": 6840, "loss": 0.5784964561462402, "lr": 1.9922306161565782e-05, "epoch": 0.1757566895744992, "percentage": 8.79, "elapsed_time": "0:53:04", "remaining_time": "9:11:00"} +{"current_steps": 602, "total_steps": 6840, "loss": 0.8034321069717407, "lr": 1.9921703505234995e-05, "epoch": 0.17604912998976457, "percentage": 8.8, "elapsed_time": "0:53:08", "remaining_time": "9:10:42"} +{"current_steps": 603, "total_steps": 6840, "loss": 0.8153722882270813, "lr": 1.992109852976634e-05, "epoch": 0.17634157040502998, "percentage": 8.82, "elapsed_time": "0:53:13", "remaining_time": "9:10:33"} +{"current_steps": 604, "total_steps": 6840, "loss": 0.7293002605438232, "lr": 1.992049123530123e-05, "epoch": 0.17663401082029537, "percentage": 8.83, "elapsed_time": "0:53:18", "remaining_time": "9:10:18"} +{"current_steps": 605, "total_steps": 6840, "loss": 0.7108439207077026, "lr": 1.9919881621981606e-05, "epoch": 0.17692645123556075, "percentage": 8.85, "elapsed_time": "0:53:24", "remaining_time": "9:10:19"} +{"current_steps": 606, "total_steps": 6840, "loss": 0.7581946849822998, "lr": 1.9919269689949968e-05, "epoch": 0.17721889165082613, "percentage": 8.86, "elapsed_time": "0:53:27", "remaining_time": "9:09:59"} +{"current_steps": 607, "total_steps": 6840, "loss": 0.6821258068084717, "lr": 1.991865543934935e-05, "epoch": 0.17751133206609154, "percentage": 8.87, "elapsed_time": "0:53:32", "remaining_time": "9:09:47"} +{"current_steps": 608, "total_steps": 6840, "loss": 0.7116109728813171, "lr": 1.991803887032333e-05, "epoch": 0.17780377248135693, "percentage": 8.89, "elapsed_time": "0:53:37", "remaining_time": "9:09:41"} +{"current_steps": 609, "total_steps": 6840, "loss": 0.6680186986923218, "lr": 1.9917419983016025e-05, "epoch": 0.1780962128966223, "percentage": 8.9, "elapsed_time": "0:53:42", "remaining_time": "9:09:33"} +{"current_steps": 610, "total_steps": 6840, "loss": 0.6763704419136047, "lr": 1.99167987775721e-05, "epoch": 0.1783886533118877, "percentage": 8.92, "elapsed_time": "0:53:48", "remaining_time": "9:09:33"} +{"current_steps": 611, "total_steps": 6840, "loss": 0.756158709526062, "lr": 1.9916175254136755e-05, "epoch": 0.1786810937271531, "percentage": 8.93, "elapsed_time": "0:53:53", "remaining_time": "9:09:28"} +{"current_steps": 612, "total_steps": 6840, "loss": 0.600861132144928, "lr": 1.9915549412855734e-05, "epoch": 0.17897353414241848, "percentage": 8.95, "elapsed_time": "0:53:59", "remaining_time": "9:09:26"} +{"current_steps": 613, "total_steps": 6840, "loss": 0.6927047967910767, "lr": 1.991492125387533e-05, "epoch": 0.17926597455768387, "percentage": 8.96, "elapsed_time": "0:54:03", "remaining_time": "9:09:11"} +{"current_steps": 614, "total_steps": 6840, "loss": 0.6908516883850098, "lr": 1.9914290777342362e-05, "epoch": 0.17955841497294925, "percentage": 8.98, "elapsed_time": "0:54:07", "remaining_time": "9:08:51"} +{"current_steps": 615, "total_steps": 6840, "loss": 0.7968926429748535, "lr": 1.9913657983404206e-05, "epoch": 0.17985085538821466, "percentage": 8.99, "elapsed_time": "0:54:11", "remaining_time": "9:08:30"} +{"current_steps": 616, "total_steps": 6840, "loss": 0.6035164594650269, "lr": 1.9913022872208773e-05, "epoch": 0.18014329580348004, "percentage": 9.01, "elapsed_time": "0:54:16", "remaining_time": "9:08:23"} +{"current_steps": 617, "total_steps": 6840, "loss": 0.6733090877532959, "lr": 1.9912385443904518e-05, "epoch": 0.18043573621874542, "percentage": 9.02, "elapsed_time": "0:54:21", "remaining_time": "9:08:13"} +{"current_steps": 618, "total_steps": 6840, "loss": 0.6968391537666321, "lr": 1.9911745698640426e-05, "epoch": 0.1807281766340108, "percentage": 9.04, "elapsed_time": "0:54:26", "remaining_time": "9:08:08"} +{"current_steps": 619, "total_steps": 6840, "loss": 0.7126309871673584, "lr": 1.991110363656605e-05, "epoch": 0.18102061704927622, "percentage": 9.05, "elapsed_time": "0:54:32", "remaining_time": "9:08:08"} +{"current_steps": 620, "total_steps": 6840, "loss": 0.8604997396469116, "lr": 1.9910459257831455e-05, "epoch": 0.1813130574645416, "percentage": 9.06, "elapsed_time": "0:54:36", "remaining_time": "9:07:53"} +{"current_steps": 621, "total_steps": 6840, "loss": 0.674797534942627, "lr": 1.9909812562587266e-05, "epoch": 0.18160549787980698, "percentage": 9.08, "elapsed_time": "0:54:41", "remaining_time": "9:07:46"} +{"current_steps": 622, "total_steps": 6840, "loss": 0.7439107894897461, "lr": 1.9909163550984644e-05, "epoch": 0.1818979382950724, "percentage": 9.09, "elapsed_time": "0:54:47", "remaining_time": "9:07:40"} +{"current_steps": 623, "total_steps": 6840, "loss": 0.7137601971626282, "lr": 1.9908512223175293e-05, "epoch": 0.18219037871033777, "percentage": 9.11, "elapsed_time": "0:54:53", "remaining_time": "9:07:43"} +{"current_steps": 624, "total_steps": 6840, "loss": 0.6395502090454102, "lr": 1.9907858579311448e-05, "epoch": 0.18248281912560316, "percentage": 9.12, "elapsed_time": "0:54:59", "remaining_time": "9:07:45"} +{"current_steps": 625, "total_steps": 6840, "loss": 0.6747852563858032, "lr": 1.9907202619545905e-05, "epoch": 0.18277525954086854, "percentage": 9.14, "elapsed_time": "0:55:03", "remaining_time": "9:07:33"} +{"current_steps": 626, "total_steps": 6840, "loss": 0.6995632648468018, "lr": 1.9906544344031986e-05, "epoch": 0.18306769995613395, "percentage": 9.15, "elapsed_time": "0:55:08", "remaining_time": "9:07:26"} +{"current_steps": 627, "total_steps": 6840, "loss": 0.7006711363792419, "lr": 1.9905883752923557e-05, "epoch": 0.18336014037139933, "percentage": 9.17, "elapsed_time": "0:55:14", "remaining_time": "9:07:27"} +{"current_steps": 628, "total_steps": 6840, "loss": 0.660778820514679, "lr": 1.990522084637503e-05, "epoch": 0.18365258078666472, "percentage": 9.18, "elapsed_time": "0:55:18", "remaining_time": "9:07:07"} +{"current_steps": 629, "total_steps": 6840, "loss": 0.5826665163040161, "lr": 1.9904555624541362e-05, "epoch": 0.1839450212019301, "percentage": 9.2, "elapsed_time": "0:55:24", "remaining_time": "9:07:11"} +{"current_steps": 630, "total_steps": 6840, "loss": 0.8064266443252563, "lr": 1.990388808757803e-05, "epoch": 0.1842374616171955, "percentage": 9.21, "elapsed_time": "0:55:29", "remaining_time": "9:07:02"} +{"current_steps": 631, "total_steps": 6840, "loss": 0.6856451034545898, "lr": 1.9903218235641078e-05, "epoch": 0.1845299020324609, "percentage": 9.23, "elapsed_time": "0:55:34", "remaining_time": "9:06:52"} +{"current_steps": 632, "total_steps": 6840, "loss": 0.6423801183700562, "lr": 1.9902546068887076e-05, "epoch": 0.18482234244772627, "percentage": 9.24, "elapsed_time": "0:55:38", "remaining_time": "9:06:31"} +{"current_steps": 633, "total_steps": 6840, "loss": 0.6903005242347717, "lr": 1.9901871587473135e-05, "epoch": 0.18511478286299166, "percentage": 9.25, "elapsed_time": "0:55:43", "remaining_time": "9:06:26"} +{"current_steps": 634, "total_steps": 6840, "loss": 0.636742115020752, "lr": 1.9901194791556916e-05, "epoch": 0.18540722327825707, "percentage": 9.27, "elapsed_time": "0:55:49", "remaining_time": "9:06:22"} +{"current_steps": 635, "total_steps": 6840, "loss": 0.6541105508804321, "lr": 1.9900515681296614e-05, "epoch": 0.18569966369352245, "percentage": 9.28, "elapsed_time": "0:55:55", "remaining_time": "9:06:27"} +{"current_steps": 636, "total_steps": 6840, "loss": 0.7026485204696655, "lr": 1.9899834256850973e-05, "epoch": 0.18599210410878783, "percentage": 9.3, "elapsed_time": "0:55:59", "remaining_time": "9:06:14"} +{"current_steps": 637, "total_steps": 6840, "loss": 0.6232702732086182, "lr": 1.989915051837926e-05, "epoch": 0.1862845445240532, "percentage": 9.31, "elapsed_time": "0:56:06", "remaining_time": "9:06:19"} +{"current_steps": 638, "total_steps": 6840, "loss": 0.5971217155456543, "lr": 1.9898464466041306e-05, "epoch": 0.18657698493931862, "percentage": 9.33, "elapsed_time": "0:56:11", "remaining_time": "9:06:12"} +{"current_steps": 639, "total_steps": 6840, "loss": 0.7942230701446533, "lr": 1.9897776099997463e-05, "epoch": 0.186869425354584, "percentage": 9.34, "elapsed_time": "0:56:17", "remaining_time": "9:06:16"} +{"current_steps": 640, "total_steps": 6840, "loss": 0.6578072309494019, "lr": 1.9897085420408637e-05, "epoch": 0.1871618657698494, "percentage": 9.36, "elapsed_time": "0:56:21", "remaining_time": "9:05:54"} +{"current_steps": 641, "total_steps": 6840, "loss": 0.6928422451019287, "lr": 1.989639242743627e-05, "epoch": 0.18745430618511477, "percentage": 9.37, "elapsed_time": "0:56:26", "remaining_time": "9:05:54"} +{"current_steps": 642, "total_steps": 6840, "loss": 0.7656213641166687, "lr": 1.9895697121242346e-05, "epoch": 0.18774674660038018, "percentage": 9.39, "elapsed_time": "0:56:32", "remaining_time": "9:05:54"} +{"current_steps": 643, "total_steps": 6840, "loss": 0.6540038585662842, "lr": 1.9894999501989383e-05, "epoch": 0.18803918701564556, "percentage": 9.4, "elapsed_time": "0:56:37", "remaining_time": "9:05:45"} +{"current_steps": 644, "total_steps": 6840, "loss": 0.707741379737854, "lr": 1.989429956984045e-05, "epoch": 0.18833162743091095, "percentage": 9.42, "elapsed_time": "0:56:43", "remaining_time": "9:05:45"} +{"current_steps": 645, "total_steps": 6840, "loss": 0.6191326379776001, "lr": 1.9893597324959156e-05, "epoch": 0.18862406784617633, "percentage": 9.43, "elapsed_time": "0:56:49", "remaining_time": "9:05:47"} +{"current_steps": 646, "total_steps": 6840, "loss": 0.616736114025116, "lr": 1.9892892767509634e-05, "epoch": 0.18891650826144174, "percentage": 9.44, "elapsed_time": "0:56:56", "remaining_time": "9:05:55"} +{"current_steps": 647, "total_steps": 6840, "loss": 0.803301215171814, "lr": 1.989218589765658e-05, "epoch": 0.18920894867670712, "percentage": 9.46, "elapsed_time": "0:57:01", "remaining_time": "9:05:53"} +{"current_steps": 648, "total_steps": 6840, "loss": 0.6528021097183228, "lr": 1.989147671556522e-05, "epoch": 0.1895013890919725, "percentage": 9.47, "elapsed_time": "0:57:08", "remaining_time": "9:06:00"} +{"current_steps": 649, "total_steps": 6840, "loss": 0.6966919898986816, "lr": 1.9890765221401314e-05, "epoch": 0.1897938295072379, "percentage": 9.49, "elapsed_time": "0:57:13", "remaining_time": "9:05:53"} +{"current_steps": 650, "total_steps": 6840, "loss": 0.7223595380783081, "lr": 1.9890051415331178e-05, "epoch": 0.1900862699225033, "percentage": 9.5, "elapsed_time": "0:57:17", "remaining_time": "9:05:36"} +{"current_steps": 651, "total_steps": 6840, "loss": 0.6727452278137207, "lr": 1.9889335297521656e-05, "epoch": 0.19037871033776868, "percentage": 9.52, "elapsed_time": "0:57:22", "remaining_time": "9:05:25"} +{"current_steps": 652, "total_steps": 6840, "loss": 0.7008258104324341, "lr": 1.988861686814014e-05, "epoch": 0.19067115075303406, "percentage": 9.53, "elapsed_time": "0:57:27", "remaining_time": "9:05:20"} +{"current_steps": 653, "total_steps": 6840, "loss": 0.7624703049659729, "lr": 1.988789612735455e-05, "epoch": 0.19096359116829945, "percentage": 9.55, "elapsed_time": "0:57:32", "remaining_time": "9:05:13"} +{"current_steps": 654, "total_steps": 6840, "loss": 0.6813088655471802, "lr": 1.988717307533336e-05, "epoch": 0.19125603158356486, "percentage": 9.56, "elapsed_time": "0:57:38", "remaining_time": "9:05:12"} +{"current_steps": 655, "total_steps": 6840, "loss": 0.5401284694671631, "lr": 1.988644771224558e-05, "epoch": 0.19154847199883024, "percentage": 9.58, "elapsed_time": "0:57:43", "remaining_time": "9:05:01"} +{"current_steps": 656, "total_steps": 6840, "loss": 0.6805379986763, "lr": 1.9885720038260756e-05, "epoch": 0.19184091241409562, "percentage": 9.59, "elapsed_time": "0:57:48", "remaining_time": "9:04:58"} +{"current_steps": 657, "total_steps": 6840, "loss": 0.6449974775314331, "lr": 1.9884990053548982e-05, "epoch": 0.19213335282936103, "percentage": 9.61, "elapsed_time": "0:57:52", "remaining_time": "9:04:43"} +{"current_steps": 658, "total_steps": 6840, "loss": 0.6940032839775085, "lr": 1.988425775828088e-05, "epoch": 0.19242579324462641, "percentage": 9.62, "elapsed_time": "0:57:57", "remaining_time": "9:04:33"} +{"current_steps": 659, "total_steps": 6840, "loss": 0.7089565396308899, "lr": 1.9883523152627626e-05, "epoch": 0.1927182336598918, "percentage": 9.63, "elapsed_time": "0:58:02", "remaining_time": "9:04:20"} +{"current_steps": 660, "total_steps": 6840, "loss": 0.7508438229560852, "lr": 1.9882786236760932e-05, "epoch": 0.19301067407515718, "percentage": 9.65, "elapsed_time": "0:58:09", "remaining_time": "9:04:32"} +{"current_steps": 661, "total_steps": 6840, "loss": 0.6828616261482239, "lr": 1.988204701085304e-05, "epoch": 0.1933031144904226, "percentage": 9.66, "elapsed_time": "0:58:15", "remaining_time": "9:04:32"} +{"current_steps": 662, "total_steps": 6840, "loss": 0.6652963161468506, "lr": 1.9881305475076744e-05, "epoch": 0.19359555490568797, "percentage": 9.68, "elapsed_time": "0:58:19", "remaining_time": "9:04:22"} +{"current_steps": 663, "total_steps": 6840, "loss": 0.6859447360038757, "lr": 1.988056162960537e-05, "epoch": 0.19388799532095335, "percentage": 9.69, "elapsed_time": "0:58:25", "remaining_time": "9:04:19"} +{"current_steps": 664, "total_steps": 6840, "loss": 0.693805992603302, "lr": 1.9879815474612794e-05, "epoch": 0.19418043573621874, "percentage": 9.71, "elapsed_time": "0:58:31", "remaining_time": "9:04:21"} +{"current_steps": 665, "total_steps": 6840, "loss": 0.7028747200965881, "lr": 1.987906701027342e-05, "epoch": 0.19447287615148415, "percentage": 9.72, "elapsed_time": "0:58:37", "remaining_time": "9:04:18"} +{"current_steps": 666, "total_steps": 6840, "loss": 0.7492112517356873, "lr": 1.9878316236762195e-05, "epoch": 0.19476531656674953, "percentage": 9.74, "elapsed_time": "0:58:42", "remaining_time": "9:04:18"} +{"current_steps": 667, "total_steps": 6840, "loss": 0.5394963026046753, "lr": 1.9877563154254613e-05, "epoch": 0.1950577569820149, "percentage": 9.75, "elapsed_time": "0:58:49", "remaining_time": "9:04:20"} +{"current_steps": 668, "total_steps": 6840, "loss": 0.5185493230819702, "lr": 1.98768077629267e-05, "epoch": 0.1953501973972803, "percentage": 9.77, "elapsed_time": "0:58:54", "remaining_time": "9:04:20"} +{"current_steps": 669, "total_steps": 6840, "loss": 0.7279829382896423, "lr": 1.9876050062955027e-05, "epoch": 0.1956426378125457, "percentage": 9.78, "elapsed_time": "0:59:00", "remaining_time": "9:04:17"} +{"current_steps": 670, "total_steps": 6840, "loss": 0.7437206506729126, "lr": 1.9875290054516692e-05, "epoch": 0.1959350782278111, "percentage": 9.8, "elapsed_time": "0:59:05", "remaining_time": "9:04:10"} +{"current_steps": 671, "total_steps": 6840, "loss": 0.7294617891311646, "lr": 1.9874527737789358e-05, "epoch": 0.19622751864307647, "percentage": 9.81, "elapsed_time": "0:59:10", "remaining_time": "9:04:02"} +{"current_steps": 672, "total_steps": 6840, "loss": 0.7710307240486145, "lr": 1.9873763112951198e-05, "epoch": 0.19651995905834185, "percentage": 9.82, "elapsed_time": "0:59:15", "remaining_time": "9:03:56"} +{"current_steps": 673, "total_steps": 6840, "loss": 0.690025806427002, "lr": 1.9872996180180947e-05, "epoch": 0.19681239947360726, "percentage": 9.84, "elapsed_time": "0:59:20", "remaining_time": "9:03:47"} +{"current_steps": 674, "total_steps": 6840, "loss": 0.6690589189529419, "lr": 1.9872226939657867e-05, "epoch": 0.19710483988887265, "percentage": 9.85, "elapsed_time": "0:59:24", "remaining_time": "9:03:32"} +{"current_steps": 675, "total_steps": 6840, "loss": 0.7587239742279053, "lr": 1.9871455391561764e-05, "epoch": 0.19739728030413803, "percentage": 9.87, "elapsed_time": "0:59:30", "remaining_time": "9:03:32"} +{"current_steps": 676, "total_steps": 6840, "loss": 0.8048006296157837, "lr": 1.987068153607298e-05, "epoch": 0.1976897207194034, "percentage": 9.88, "elapsed_time": "0:59:35", "remaining_time": "9:03:26"} +{"current_steps": 677, "total_steps": 6840, "loss": 0.721023678779602, "lr": 1.9869905373372402e-05, "epoch": 0.19798216113466882, "percentage": 9.9, "elapsed_time": "0:59:39", "remaining_time": "9:03:10"} +{"current_steps": 678, "total_steps": 6840, "loss": 0.646798849105835, "lr": 1.9869126903641457e-05, "epoch": 0.1982746015499342, "percentage": 9.91, "elapsed_time": "0:59:45", "remaining_time": "9:03:05"} +{"current_steps": 679, "total_steps": 6840, "loss": 0.597393274307251, "lr": 1.9868346127062098e-05, "epoch": 0.1985670419651996, "percentage": 9.93, "elapsed_time": "0:59:50", "remaining_time": "9:02:56"} +{"current_steps": 680, "total_steps": 6840, "loss": 0.8619129657745361, "lr": 1.9867563043816836e-05, "epoch": 0.19885948238046497, "percentage": 9.94, "elapsed_time": "0:59:55", "remaining_time": "9:02:48"} +{"current_steps": 681, "total_steps": 6840, "loss": 0.5391764640808105, "lr": 1.986677765408871e-05, "epoch": 0.19915192279573038, "percentage": 9.96, "elapsed_time": "0:59:59", "remaining_time": "9:02:31"} +{"current_steps": 682, "total_steps": 6840, "loss": 0.8185729384422302, "lr": 1.9865989958061297e-05, "epoch": 0.19944436321099576, "percentage": 9.97, "elapsed_time": "1:00:05", "remaining_time": "9:02:30"} +{"current_steps": 683, "total_steps": 6840, "loss": 0.6629397869110107, "lr": 1.9865199955918712e-05, "epoch": 0.19973680362626114, "percentage": 9.99, "elapsed_time": "1:00:08", "remaining_time": "9:02:13"} +{"current_steps": 684, "total_steps": 6840, "loss": 0.6752325296401978, "lr": 1.9864407647845626e-05, "epoch": 0.20002924404152653, "percentage": 10.0, "elapsed_time": "1:00:13", "remaining_time": "9:02:05"} +{"current_steps": 685, "total_steps": 6840, "loss": 0.8509782552719116, "lr": 1.9863613034027224e-05, "epoch": 0.20032168445679194, "percentage": 10.01, "elapsed_time": "1:00:19", "remaining_time": "9:02:05"} +{"current_steps": 686, "total_steps": 6840, "loss": 0.5573478937149048, "lr": 1.986281611464925e-05, "epoch": 0.20061412487205732, "percentage": 10.03, "elapsed_time": "1:00:27", "remaining_time": "9:02:19"} +{"current_steps": 687, "total_steps": 6840, "loss": 0.8152032494544983, "lr": 1.9862016889897976e-05, "epoch": 0.2009065652873227, "percentage": 10.04, "elapsed_time": "1:00:31", "remaining_time": "9:02:09"} +{"current_steps": 688, "total_steps": 6840, "loss": 0.6346902847290039, "lr": 1.9861215359960217e-05, "epoch": 0.20119900570258809, "percentage": 10.06, "elapsed_time": "1:00:37", "remaining_time": "9:02:02"} +{"current_steps": 689, "total_steps": 6840, "loss": 0.6608721017837524, "lr": 1.986041152502332e-05, "epoch": 0.2014914461178535, "percentage": 10.07, "elapsed_time": "1:00:41", "remaining_time": "9:01:49"} +{"current_steps": 690, "total_steps": 6840, "loss": 0.7753713130950928, "lr": 1.9859605385275188e-05, "epoch": 0.20178388653311888, "percentage": 10.09, "elapsed_time": "1:00:47", "remaining_time": "9:01:47"} +{"current_steps": 691, "total_steps": 6840, "loss": 0.6747434139251709, "lr": 1.9858796940904238e-05, "epoch": 0.20207632694838426, "percentage": 10.1, "elapsed_time": "1:00:51", "remaining_time": "9:01:31"} +{"current_steps": 692, "total_steps": 6840, "loss": 0.7263737320899963, "lr": 1.9857986192099446e-05, "epoch": 0.20236876736364964, "percentage": 10.12, "elapsed_time": "1:00:55", "remaining_time": "9:01:16"} +{"current_steps": 693, "total_steps": 6840, "loss": 0.7910827994346619, "lr": 1.9857173139050324e-05, "epoch": 0.20266120777891505, "percentage": 10.13, "elapsed_time": "1:01:00", "remaining_time": "9:01:08"} +{"current_steps": 694, "total_steps": 6840, "loss": 0.7245683670043945, "lr": 1.9856357781946913e-05, "epoch": 0.20295364819418044, "percentage": 10.15, "elapsed_time": "1:01:05", "remaining_time": "9:01:04"} +{"current_steps": 695, "total_steps": 6840, "loss": 0.7440140247344971, "lr": 1.9855540120979794e-05, "epoch": 0.20324608860944582, "percentage": 10.16, "elapsed_time": "1:01:12", "remaining_time": "9:01:07"} +{"current_steps": 696, "total_steps": 6840, "loss": 0.7485358715057373, "lr": 1.9854720156340096e-05, "epoch": 0.20353852902471123, "percentage": 10.18, "elapsed_time": "1:01:16", "remaining_time": "9:00:57"} +{"current_steps": 697, "total_steps": 6840, "loss": 0.7658560872077942, "lr": 1.985389788821948e-05, "epoch": 0.2038309694399766, "percentage": 10.19, "elapsed_time": "1:01:20", "remaining_time": "9:00:41"} +{"current_steps": 698, "total_steps": 6840, "loss": 0.7366135120391846, "lr": 1.9853073316810144e-05, "epoch": 0.204123409855242, "percentage": 10.2, "elapsed_time": "1:01:25", "remaining_time": "9:00:33"} +{"current_steps": 699, "total_steps": 6840, "loss": 0.622355580329895, "lr": 1.985224644230483e-05, "epoch": 0.20441585027050738, "percentage": 10.22, "elapsed_time": "1:01:31", "remaining_time": "9:00:33"} +{"current_steps": 700, "total_steps": 6840, "loss": 0.6123125553131104, "lr": 1.985141726489681e-05, "epoch": 0.2047082906857728, "percentage": 10.23, "elapsed_time": "1:01:37", "remaining_time": "9:00:35"} +{"current_steps": 701, "total_steps": 6840, "loss": 0.6768301725387573, "lr": 1.9850585784779907e-05, "epoch": 0.20500073110103817, "percentage": 10.25, "elapsed_time": "1:01:47", "remaining_time": "9:01:12"} +{"current_steps": 702, "total_steps": 6840, "loss": 0.6562466621398926, "lr": 1.9849752002148465e-05, "epoch": 0.20529317151630355, "percentage": 10.26, "elapsed_time": "1:01:53", "remaining_time": "9:01:08"} +{"current_steps": 703, "total_steps": 6840, "loss": 0.7818280458450317, "lr": 1.984891591719738e-05, "epoch": 0.20558561193156893, "percentage": 10.28, "elapsed_time": "1:01:57", "remaining_time": "9:00:55"} +{"current_steps": 704, "total_steps": 6840, "loss": 0.7144001722335815, "lr": 1.9848077530122083e-05, "epoch": 0.20587805234683434, "percentage": 10.29, "elapsed_time": "1:02:02", "remaining_time": "9:00:47"} +{"current_steps": 705, "total_steps": 6840, "loss": 0.700564980506897, "lr": 1.9847236841118537e-05, "epoch": 0.20617049276209973, "percentage": 10.31, "elapsed_time": "1:02:07", "remaining_time": "9:00:38"} +{"current_steps": 706, "total_steps": 6840, "loss": 0.5933517217636108, "lr": 1.984639385038326e-05, "epoch": 0.2064629331773651, "percentage": 10.32, "elapsed_time": "1:02:13", "remaining_time": "9:00:37"} +{"current_steps": 707, "total_steps": 6840, "loss": 0.6174886226654053, "lr": 1.9845548558113278e-05, "epoch": 0.2067553735926305, "percentage": 10.34, "elapsed_time": "1:02:18", "remaining_time": "9:00:27"} +{"current_steps": 708, "total_steps": 6840, "loss": 0.7241572141647339, "lr": 1.9844700964506188e-05, "epoch": 0.2070478140078959, "percentage": 10.35, "elapsed_time": "1:02:23", "remaining_time": "9:00:21"} +{"current_steps": 709, "total_steps": 6840, "loss": 0.6620675325393677, "lr": 1.9843851069760103e-05, "epoch": 0.20734025442316129, "percentage": 10.37, "elapsed_time": "1:02:29", "remaining_time": "9:00:27"} +{"current_steps": 710, "total_steps": 6840, "loss": 0.6115273237228394, "lr": 1.9842998874073682e-05, "epoch": 0.20763269483842667, "percentage": 10.38, "elapsed_time": "1:02:35", "remaining_time": "9:00:25"} +{"current_steps": 711, "total_steps": 6840, "loss": 0.6871848106384277, "lr": 1.984214437764612e-05, "epoch": 0.20792513525369205, "percentage": 10.39, "elapsed_time": "1:02:40", "remaining_time": "9:00:15"} +{"current_steps": 712, "total_steps": 6840, "loss": 0.6887271404266357, "lr": 1.9841287580677152e-05, "epoch": 0.20821757566895746, "percentage": 10.41, "elapsed_time": "1:02:44", "remaining_time": "9:00:01"} +{"current_steps": 713, "total_steps": 6840, "loss": 0.8519056439399719, "lr": 1.9840428483367046e-05, "epoch": 0.20851001608422284, "percentage": 10.42, "elapsed_time": "1:02:50", "remaining_time": "9:00:04"} +{"current_steps": 714, "total_steps": 6840, "loss": 0.8168978691101074, "lr": 1.9839567085916617e-05, "epoch": 0.20880245649948823, "percentage": 10.44, "elapsed_time": "1:02:56", "remaining_time": "8:59:58"} +{"current_steps": 715, "total_steps": 6840, "loss": 0.6565415859222412, "lr": 1.98387033885272e-05, "epoch": 0.2090948969147536, "percentage": 10.45, "elapsed_time": "1:03:01", "remaining_time": "8:59:55"} +{"current_steps": 716, "total_steps": 6840, "loss": 0.7305471897125244, "lr": 1.9837837391400697e-05, "epoch": 0.20938733733001902, "percentage": 10.47, "elapsed_time": "1:03:05", "remaining_time": "8:59:41"} +{"current_steps": 717, "total_steps": 6840, "loss": 0.7676819562911987, "lr": 1.9836969094739512e-05, "epoch": 0.2096797777452844, "percentage": 10.48, "elapsed_time": "1:03:09", "remaining_time": "8:59:24"} +{"current_steps": 718, "total_steps": 6840, "loss": 0.6519052982330322, "lr": 1.983609849874661e-05, "epoch": 0.20997221816054978, "percentage": 10.5, "elapsed_time": "1:03:14", "remaining_time": "8:59:13"} +{"current_steps": 719, "total_steps": 6840, "loss": 0.6298089623451233, "lr": 1.9835225603625488e-05, "epoch": 0.21026465857581517, "percentage": 10.51, "elapsed_time": "1:03:19", "remaining_time": "8:59:02"} +{"current_steps": 720, "total_steps": 6840, "loss": 0.6384454369544983, "lr": 1.9834350409580184e-05, "epoch": 0.21055709899108058, "percentage": 10.53, "elapsed_time": "1:03:24", "remaining_time": "8:58:57"} +{"current_steps": 721, "total_steps": 6840, "loss": 0.6335986852645874, "lr": 1.9833472916815264e-05, "epoch": 0.21084953940634596, "percentage": 10.54, "elapsed_time": "1:03:29", "remaining_time": "8:58:47"} +{"current_steps": 722, "total_steps": 6840, "loss": 0.6587867736816406, "lr": 1.983259312553584e-05, "epoch": 0.21114197982161134, "percentage": 10.56, "elapsed_time": "1:03:35", "remaining_time": "8:58:50"} +{"current_steps": 723, "total_steps": 6840, "loss": 0.6884294748306274, "lr": 1.9831711035947552e-05, "epoch": 0.21143442023687672, "percentage": 10.57, "elapsed_time": "1:03:39", "remaining_time": "8:58:35"} +{"current_steps": 724, "total_steps": 6840, "loss": 0.7094298601150513, "lr": 1.983082664825659e-05, "epoch": 0.21172686065214213, "percentage": 10.58, "elapsed_time": "1:03:44", "remaining_time": "8:58:28"} +{"current_steps": 725, "total_steps": 6840, "loss": 0.736876368522644, "lr": 1.982993996266967e-05, "epoch": 0.21201930106740752, "percentage": 10.6, "elapsed_time": "1:03:49", "remaining_time": "8:58:17"} +{"current_steps": 726, "total_steps": 6840, "loss": 0.7802199125289917, "lr": 1.9829050979394052e-05, "epoch": 0.2123117414826729, "percentage": 10.61, "elapsed_time": "1:03:54", "remaining_time": "8:58:10"} +{"current_steps": 727, "total_steps": 6840, "loss": 0.602590799331665, "lr": 1.9828159698637527e-05, "epoch": 0.21260418189793828, "percentage": 10.63, "elapsed_time": "1:03:59", "remaining_time": "8:58:04"} +{"current_steps": 728, "total_steps": 6840, "loss": 0.6855295896530151, "lr": 1.982726612060843e-05, "epoch": 0.2128966223132037, "percentage": 10.64, "elapsed_time": "1:04:03", "remaining_time": "8:57:51"} +{"current_steps": 729, "total_steps": 6840, "loss": 0.7174949645996094, "lr": 1.982637024551563e-05, "epoch": 0.21318906272846908, "percentage": 10.66, "elapsed_time": "1:04:09", "remaining_time": "8:57:51"} +{"current_steps": 730, "total_steps": 6840, "loss": 0.7002695798873901, "lr": 1.9825472073568527e-05, "epoch": 0.21348150314373446, "percentage": 10.67, "elapsed_time": "1:04:14", "remaining_time": "8:57:38"} +{"current_steps": 731, "total_steps": 6840, "loss": 0.7256268262863159, "lr": 1.982457160497707e-05, "epoch": 0.21377394355899984, "percentage": 10.69, "elapsed_time": "1:04:19", "remaining_time": "8:57:33"} +{"current_steps": 732, "total_steps": 6840, "loss": 0.8223557472229004, "lr": 1.9823668839951732e-05, "epoch": 0.21406638397426525, "percentage": 10.7, "elapsed_time": "1:04:24", "remaining_time": "8:57:27"} +{"current_steps": 733, "total_steps": 6840, "loss": 0.760543942451477, "lr": 1.982276377870353e-05, "epoch": 0.21435882438953063, "percentage": 10.72, "elapsed_time": "1:04:30", "remaining_time": "8:57:23"} +{"current_steps": 734, "total_steps": 6840, "loss": 0.5587141513824463, "lr": 1.982185642144402e-05, "epoch": 0.21465126480479602, "percentage": 10.73, "elapsed_time": "1:04:35", "remaining_time": "8:57:19"} +{"current_steps": 735, "total_steps": 6840, "loss": 0.5775829553604126, "lr": 1.9820946768385295e-05, "epoch": 0.21494370522006143, "percentage": 10.75, "elapsed_time": "1:04:39", "remaining_time": "8:57:02"} +{"current_steps": 736, "total_steps": 6840, "loss": 0.6654443144798279, "lr": 1.982003481973997e-05, "epoch": 0.2152361456353268, "percentage": 10.76, "elapsed_time": "1:04:45", "remaining_time": "8:57:05"} +{"current_steps": 737, "total_steps": 6840, "loss": 0.7963466048240662, "lr": 1.9819120575721212e-05, "epoch": 0.2155285860505922, "percentage": 10.77, "elapsed_time": "1:04:50", "remaining_time": "8:56:57"} +{"current_steps": 738, "total_steps": 6840, "loss": 0.6748678684234619, "lr": 1.981820403654272e-05, "epoch": 0.21582102646585757, "percentage": 10.79, "elapsed_time": "1:04:56", "remaining_time": "8:56:55"} +{"current_steps": 739, "total_steps": 6840, "loss": 0.7041783928871155, "lr": 1.9817285202418733e-05, "epoch": 0.21611346688112298, "percentage": 10.8, "elapsed_time": "1:05:02", "remaining_time": "8:56:54"} +{"current_steps": 740, "total_steps": 6840, "loss": 0.8008041381835938, "lr": 1.981636407356402e-05, "epoch": 0.21640590729638837, "percentage": 10.82, "elapsed_time": "1:05:06", "remaining_time": "8:56:38"} +{"current_steps": 741, "total_steps": 6840, "loss": 0.6873682141304016, "lr": 1.9815440650193887e-05, "epoch": 0.21669834771165375, "percentage": 10.83, "elapsed_time": "1:05:11", "remaining_time": "8:56:37"} +{"current_steps": 742, "total_steps": 6840, "loss": 0.6316831111907959, "lr": 1.981451493252418e-05, "epoch": 0.21699078812691913, "percentage": 10.85, "elapsed_time": "1:05:16", "remaining_time": "8:56:22"} +{"current_steps": 743, "total_steps": 6840, "loss": 0.6481543779373169, "lr": 1.9813586920771283e-05, "epoch": 0.21728322854218454, "percentage": 10.86, "elapsed_time": "1:05:20", "remaining_time": "8:56:10"} +{"current_steps": 744, "total_steps": 6840, "loss": 0.6642731428146362, "lr": 1.9812656615152112e-05, "epoch": 0.21757566895744992, "percentage": 10.88, "elapsed_time": "1:05:25", "remaining_time": "8:56:02"} +{"current_steps": 745, "total_steps": 6840, "loss": 0.6769483089447021, "lr": 1.9811724015884115e-05, "epoch": 0.2178681093727153, "percentage": 10.89, "elapsed_time": "1:05:31", "remaining_time": "8:56:01"} +{"current_steps": 746, "total_steps": 6840, "loss": 0.6397525072097778, "lr": 1.981078912318529e-05, "epoch": 0.2181605497879807, "percentage": 10.91, "elapsed_time": "1:05:35", "remaining_time": "8:55:50"} +{"current_steps": 747, "total_steps": 6840, "loss": 0.4963756203651428, "lr": 1.9809851937274154e-05, "epoch": 0.2184529902032461, "percentage": 10.92, "elapsed_time": "1:05:40", "remaining_time": "8:55:40"} +{"current_steps": 748, "total_steps": 6840, "loss": 0.7352936267852783, "lr": 1.9808912458369774e-05, "epoch": 0.21874543061851148, "percentage": 10.94, "elapsed_time": "1:05:45", "remaining_time": "8:55:33"} +{"current_steps": 749, "total_steps": 6840, "loss": 0.7177609205245972, "lr": 1.980797068669175e-05, "epoch": 0.21903787103377687, "percentage": 10.95, "elapsed_time": "1:05:50", "remaining_time": "8:55:24"} +{"current_steps": 750, "total_steps": 6840, "loss": 0.76703941822052, "lr": 1.980702662246021e-05, "epoch": 0.21933031144904225, "percentage": 10.96, "elapsed_time": "1:05:56", "remaining_time": "8:55:23"} +{"current_steps": 751, "total_steps": 6840, "loss": 0.8591324090957642, "lr": 1.980608026589582e-05, "epoch": 0.21962275186430766, "percentage": 10.98, "elapsed_time": "1:06:00", "remaining_time": "8:55:13"} +{"current_steps": 752, "total_steps": 6840, "loss": 0.6216185092926025, "lr": 1.9805131617219792e-05, "epoch": 0.21991519227957304, "percentage": 10.99, "elapsed_time": "1:06:06", "remaining_time": "8:55:10"} +{"current_steps": 753, "total_steps": 6840, "loss": 0.6067323684692383, "lr": 1.9804180676653867e-05, "epoch": 0.22020763269483842, "percentage": 11.01, "elapsed_time": "1:06:12", "remaining_time": "8:55:08"} +{"current_steps": 754, "total_steps": 6840, "loss": 0.5832521319389343, "lr": 1.9803227444420316e-05, "epoch": 0.2205000731101038, "percentage": 11.02, "elapsed_time": "1:06:16", "remaining_time": "8:54:57"} +{"current_steps": 755, "total_steps": 6840, "loss": 0.6181083917617798, "lr": 1.9802271920741957e-05, "epoch": 0.22079251352536922, "percentage": 11.04, "elapsed_time": "1:06:20", "remaining_time": "8:54:42"} +{"current_steps": 756, "total_steps": 6840, "loss": 0.614393949508667, "lr": 1.9801314105842135e-05, "epoch": 0.2210849539406346, "percentage": 11.05, "elapsed_time": "1:06:26", "remaining_time": "8:54:39"} +{"current_steps": 757, "total_steps": 6840, "loss": 0.7598476409912109, "lr": 1.980035399994473e-05, "epoch": 0.22137739435589998, "percentage": 11.07, "elapsed_time": "1:06:31", "remaining_time": "8:54:36"} +{"current_steps": 758, "total_steps": 6840, "loss": 0.7185830473899841, "lr": 1.979939160327417e-05, "epoch": 0.22166983477116536, "percentage": 11.08, "elapsed_time": "1:06:36", "remaining_time": "8:54:26"} +{"current_steps": 759, "total_steps": 6840, "loss": 0.6672362089157104, "lr": 1.9798426916055403e-05, "epoch": 0.22196227518643077, "percentage": 11.1, "elapsed_time": "1:06:42", "remaining_time": "8:54:23"} +{"current_steps": 760, "total_steps": 6840, "loss": 0.60948646068573, "lr": 1.9797459938513918e-05, "epoch": 0.22225471560169616, "percentage": 11.11, "elapsed_time": "1:06:46", "remaining_time": "8:54:14"} +{"current_steps": 761, "total_steps": 6840, "loss": 0.6073893308639526, "lr": 1.979649067087574e-05, "epoch": 0.22254715601696154, "percentage": 11.13, "elapsed_time": "1:06:51", "remaining_time": "8:54:04"} +{"current_steps": 762, "total_steps": 6840, "loss": 0.7521525025367737, "lr": 1.9795519113367434e-05, "epoch": 0.22283959643222692, "percentage": 11.14, "elapsed_time": "1:06:56", "remaining_time": "8:53:59"} +{"current_steps": 763, "total_steps": 6840, "loss": 0.7281486988067627, "lr": 1.979454526621609e-05, "epoch": 0.22313203684749233, "percentage": 11.15, "elapsed_time": "1:07:02", "remaining_time": "8:53:55"} +{"current_steps": 764, "total_steps": 6840, "loss": 0.5628652572631836, "lr": 1.9793569129649345e-05, "epoch": 0.22342447726275771, "percentage": 11.17, "elapsed_time": "1:07:06", "remaining_time": "8:53:45"} +{"current_steps": 765, "total_steps": 6840, "loss": 0.9115084409713745, "lr": 1.9792590703895364e-05, "epoch": 0.2237169176780231, "percentage": 11.18, "elapsed_time": "1:07:11", "remaining_time": "8:53:36"} +{"current_steps": 766, "total_steps": 6840, "loss": 0.5793902277946472, "lr": 1.9791609989182843e-05, "epoch": 0.22400935809328848, "percentage": 11.2, "elapsed_time": "1:07:16", "remaining_time": "8:53:30"} +{"current_steps": 767, "total_steps": 6840, "loss": 0.5811150074005127, "lr": 1.979062698574102e-05, "epoch": 0.2243017985085539, "percentage": 11.21, "elapsed_time": "1:07:21", "remaining_time": "8:53:23"} +{"current_steps": 768, "total_steps": 6840, "loss": 0.7450643181800842, "lr": 1.978964169379967e-05, "epoch": 0.22459423892381927, "percentage": 11.23, "elapsed_time": "1:07:28", "remaining_time": "8:53:25"} +{"current_steps": 769, "total_steps": 6840, "loss": 0.6617515087127686, "lr": 1.9788654113589093e-05, "epoch": 0.22488667933908466, "percentage": 11.24, "elapsed_time": "1:07:34", "remaining_time": "8:53:25"} +{"current_steps": 770, "total_steps": 6840, "loss": 0.6240406036376953, "lr": 1.9787664245340137e-05, "epoch": 0.22517911975435004, "percentage": 11.26, "elapsed_time": "1:07:38", "remaining_time": "8:53:14"} +{"current_steps": 771, "total_steps": 6840, "loss": 0.694688081741333, "lr": 1.978667208928417e-05, "epoch": 0.22547156016961545, "percentage": 11.27, "elapsed_time": "1:07:43", "remaining_time": "8:53:09"} +{"current_steps": 772, "total_steps": 6840, "loss": 0.6855190396308899, "lr": 1.9785677645653107e-05, "epoch": 0.22576400058488083, "percentage": 11.29, "elapsed_time": "1:07:48", "remaining_time": "8:52:58"} +{"current_steps": 773, "total_steps": 6840, "loss": 0.8132567405700684, "lr": 1.978468091467939e-05, "epoch": 0.2260564410001462, "percentage": 11.3, "elapsed_time": "1:07:52", "remaining_time": "8:52:46"} +{"current_steps": 774, "total_steps": 6840, "loss": 0.7011039853096008, "lr": 1.9783681896596006e-05, "epoch": 0.22634888141541162, "percentage": 11.32, "elapsed_time": "1:07:58", "remaining_time": "8:52:42"} +{"current_steps": 775, "total_steps": 6840, "loss": 0.5754199028015137, "lr": 1.9782680591636462e-05, "epoch": 0.226641321830677, "percentage": 11.33, "elapsed_time": "1:08:03", "remaining_time": "8:52:38"} +{"current_steps": 776, "total_steps": 6840, "loss": 0.7518784403800964, "lr": 1.9781677000034807e-05, "epoch": 0.2269337622459424, "percentage": 11.35, "elapsed_time": "1:08:09", "remaining_time": "8:52:36"} +{"current_steps": 777, "total_steps": 6840, "loss": 0.6802738904953003, "lr": 1.978067112202563e-05, "epoch": 0.22722620266120777, "percentage": 11.36, "elapsed_time": "1:08:14", "remaining_time": "8:52:28"} +{"current_steps": 778, "total_steps": 6840, "loss": 0.7667055726051331, "lr": 1.9779662957844046e-05, "epoch": 0.22751864307647318, "percentage": 11.37, "elapsed_time": "1:08:20", "remaining_time": "8:52:31"} +{"current_steps": 779, "total_steps": 6840, "loss": 0.7590975165367126, "lr": 1.9778652507725704e-05, "epoch": 0.22781108349173856, "percentage": 11.39, "elapsed_time": "1:08:26", "remaining_time": "8:52:27"} +{"current_steps": 780, "total_steps": 6840, "loss": 0.8009685277938843, "lr": 1.9777639771906795e-05, "epoch": 0.22810352390700395, "percentage": 11.4, "elapsed_time": "1:08:32", "remaining_time": "8:52:27"} +{"current_steps": 781, "total_steps": 6840, "loss": 0.6094385981559753, "lr": 1.977662475062404e-05, "epoch": 0.22839596432226933, "percentage": 11.42, "elapsed_time": "1:08:37", "remaining_time": "8:52:26"} +{"current_steps": 782, "total_steps": 6840, "loss": 0.5919946432113647, "lr": 1.977560744411469e-05, "epoch": 0.22868840473753474, "percentage": 11.43, "elapsed_time": "1:08:42", "remaining_time": "8:52:12"} +{"current_steps": 783, "total_steps": 6840, "loss": 0.7616838216781616, "lr": 1.9774587852616537e-05, "epoch": 0.22898084515280012, "percentage": 11.45, "elapsed_time": "1:08:46", "remaining_time": "8:51:59"} +{"current_steps": 784, "total_steps": 6840, "loss": 0.5107603073120117, "lr": 1.9773565976367903e-05, "epoch": 0.2292732855680655, "percentage": 11.46, "elapsed_time": "1:08:52", "remaining_time": "8:52:00"} +{"current_steps": 785, "total_steps": 6840, "loss": 0.6819792985916138, "lr": 1.9772541815607645e-05, "epoch": 0.2295657259833309, "percentage": 11.48, "elapsed_time": "1:08:57", "remaining_time": "8:51:54"} +{"current_steps": 786, "total_steps": 6840, "loss": 0.748264729976654, "lr": 1.977151537057516e-05, "epoch": 0.2298581663985963, "percentage": 11.49, "elapsed_time": "1:09:02", "remaining_time": "8:51:50"} +{"current_steps": 787, "total_steps": 6840, "loss": 0.7341534495353699, "lr": 1.977048664151037e-05, "epoch": 0.23015060681386168, "percentage": 11.51, "elapsed_time": "1:09:08", "remaining_time": "8:51:44"} +{"current_steps": 788, "total_steps": 6840, "loss": 0.569247841835022, "lr": 1.976945562865373e-05, "epoch": 0.23044304722912706, "percentage": 11.52, "elapsed_time": "1:09:14", "remaining_time": "8:51:49"} +{"current_steps": 789, "total_steps": 6840, "loss": 0.7003188133239746, "lr": 1.9768422332246233e-05, "epoch": 0.23073548764439245, "percentage": 11.54, "elapsed_time": "1:09:19", "remaining_time": "8:51:43"} +{"current_steps": 790, "total_steps": 6840, "loss": 0.6484041810035706, "lr": 1.9767386752529415e-05, "epoch": 0.23102792805965786, "percentage": 11.55, "elapsed_time": "1:09:24", "remaining_time": "8:51:34"} +{"current_steps": 791, "total_steps": 6840, "loss": 0.6635721921920776, "lr": 1.9766348889745324e-05, "epoch": 0.23132036847492324, "percentage": 11.56, "elapsed_time": "1:09:30", "remaining_time": "8:51:36"} +{"current_steps": 792, "total_steps": 6840, "loss": 0.5855914354324341, "lr": 1.9765308744136568e-05, "epoch": 0.23161280889018862, "percentage": 11.58, "elapsed_time": "1:09:36", "remaining_time": "8:51:32"} +{"current_steps": 793, "total_steps": 6840, "loss": 0.7606059312820435, "lr": 1.976426631594626e-05, "epoch": 0.231905249305454, "percentage": 11.59, "elapsed_time": "1:09:41", "remaining_time": "8:51:27"} +{"current_steps": 794, "total_steps": 6840, "loss": 0.7080718278884888, "lr": 1.976322160541807e-05, "epoch": 0.2321976897207194, "percentage": 11.61, "elapsed_time": "1:09:45", "remaining_time": "8:51:11"} +{"current_steps": 795, "total_steps": 6840, "loss": 0.8838162422180176, "lr": 1.9762174612796195e-05, "epoch": 0.2324901301359848, "percentage": 11.62, "elapsed_time": "1:09:51", "remaining_time": "8:51:13"} +{"current_steps": 796, "total_steps": 6840, "loss": 0.5776950120925903, "lr": 1.9761125338325357e-05, "epoch": 0.23278257055125018, "percentage": 11.64, "elapsed_time": "1:09:56", "remaining_time": "8:51:06"} +{"current_steps": 797, "total_steps": 6840, "loss": 0.7455854415893555, "lr": 1.9760073782250817e-05, "epoch": 0.23307501096651556, "percentage": 11.65, "elapsed_time": "1:10:01", "remaining_time": "8:50:57"} +{"current_steps": 798, "total_steps": 6840, "loss": 0.7160001993179321, "lr": 1.9759019944818375e-05, "epoch": 0.23336745138178097, "percentage": 11.67, "elapsed_time": "1:10:07", "remaining_time": "8:50:54"} +{"current_steps": 799, "total_steps": 6840, "loss": 0.6282311081886292, "lr": 1.9757963826274357e-05, "epoch": 0.23365989179704635, "percentage": 11.68, "elapsed_time": "1:10:11", "remaining_time": "8:50:45"} +{"current_steps": 800, "total_steps": 6840, "loss": 0.6479916572570801, "lr": 1.9756905426865626e-05, "epoch": 0.23395233221231174, "percentage": 11.7, "elapsed_time": "1:10:16", "remaining_time": "8:50:32"} +{"current_steps": 801, "total_steps": 6840, "loss": 0.6519639492034912, "lr": 1.9755844746839573e-05, "epoch": 0.23424477262757712, "percentage": 11.71, "elapsed_time": "1:10:25", "remaining_time": "8:50:58"} +{"current_steps": 802, "total_steps": 6840, "loss": 0.5591464638710022, "lr": 1.9754781786444122e-05, "epoch": 0.23453721304284253, "percentage": 11.73, "elapsed_time": "1:10:31", "remaining_time": "8:50:57"} +{"current_steps": 803, "total_steps": 6840, "loss": 0.6378511190414429, "lr": 1.9753716545927745e-05, "epoch": 0.2348296534581079, "percentage": 11.74, "elapsed_time": "1:10:36", "remaining_time": "8:50:48"} +{"current_steps": 804, "total_steps": 6840, "loss": 0.7932485342025757, "lr": 1.9752649025539424e-05, "epoch": 0.2351220938733733, "percentage": 11.75, "elapsed_time": "1:10:40", "remaining_time": "8:50:34"} +{"current_steps": 805, "total_steps": 6840, "loss": 0.7344592809677124, "lr": 1.9751579225528694e-05, "epoch": 0.23541453428863868, "percentage": 11.77, "elapsed_time": "1:10:46", "remaining_time": "8:50:35"} +{"current_steps": 806, "total_steps": 6840, "loss": 0.7879096269607544, "lr": 1.975050714614561e-05, "epoch": 0.2357069747039041, "percentage": 11.78, "elapsed_time": "1:10:51", "remaining_time": "8:50:29"} +{"current_steps": 807, "total_steps": 6840, "loss": 0.6428436040878296, "lr": 1.9749432787640764e-05, "epoch": 0.23599941511916947, "percentage": 11.8, "elapsed_time": "1:10:57", "remaining_time": "8:50:27"} +{"current_steps": 808, "total_steps": 6840, "loss": 0.7018194198608398, "lr": 1.9748356150265283e-05, "epoch": 0.23629185553443485, "percentage": 11.81, "elapsed_time": "1:11:02", "remaining_time": "8:50:19"} +{"current_steps": 809, "total_steps": 6840, "loss": 0.7696131467819214, "lr": 1.974727723427082e-05, "epoch": 0.23658429594970024, "percentage": 11.83, "elapsed_time": "1:11:07", "remaining_time": "8:50:10"} +{"current_steps": 810, "total_steps": 6840, "loss": 0.6429424285888672, "lr": 1.974619603990957e-05, "epoch": 0.23687673636496565, "percentage": 11.84, "elapsed_time": "1:11:12", "remaining_time": "8:50:03"} +{"current_steps": 811, "total_steps": 6840, "loss": 0.7205626964569092, "lr": 1.9745112567434254e-05, "epoch": 0.23716917678023103, "percentage": 11.86, "elapsed_time": "1:11:17", "remaining_time": "8:49:56"} +{"current_steps": 812, "total_steps": 6840, "loss": 0.7018989324569702, "lr": 1.9744026817098122e-05, "epoch": 0.2374616171954964, "percentage": 11.87, "elapsed_time": "1:11:22", "remaining_time": "8:49:52"} +{"current_steps": 813, "total_steps": 6840, "loss": 0.6861958503723145, "lr": 1.974293878915497e-05, "epoch": 0.23775405761076182, "percentage": 11.89, "elapsed_time": "1:11:27", "remaining_time": "8:49:45"} +{"current_steps": 814, "total_steps": 6840, "loss": 0.687503457069397, "lr": 1.9741848483859117e-05, "epoch": 0.2380464980260272, "percentage": 11.9, "elapsed_time": "1:11:32", "remaining_time": "8:49:38"} +{"current_steps": 815, "total_steps": 6840, "loss": 0.7808526754379272, "lr": 1.9740755901465408e-05, "epoch": 0.23833893844129259, "percentage": 11.92, "elapsed_time": "1:11:38", "remaining_time": "8:49:34"} +{"current_steps": 816, "total_steps": 6840, "loss": 0.7387286424636841, "lr": 1.973966104222923e-05, "epoch": 0.23863137885655797, "percentage": 11.93, "elapsed_time": "1:11:42", "remaining_time": "8:49:23"} +{"current_steps": 817, "total_steps": 6840, "loss": 0.6262110471725464, "lr": 1.9738563906406508e-05, "epoch": 0.23892381927182338, "percentage": 11.94, "elapsed_time": "1:11:47", "remaining_time": "8:49:14"} +{"current_steps": 818, "total_steps": 6840, "loss": 0.6618830561637878, "lr": 1.973746449425368e-05, "epoch": 0.23921625968708876, "percentage": 11.96, "elapsed_time": "1:11:52", "remaining_time": "8:49:09"} +{"current_steps": 819, "total_steps": 6840, "loss": 0.5866184234619141, "lr": 1.9736362806027732e-05, "epoch": 0.23950870010235414, "percentage": 11.97, "elapsed_time": "1:11:58", "remaining_time": "8:49:06"} +{"current_steps": 820, "total_steps": 6840, "loss": 0.6413314342498779, "lr": 1.9735258841986175e-05, "epoch": 0.23980114051761953, "percentage": 11.99, "elapsed_time": "1:12:04", "remaining_time": "8:49:05"} +{"current_steps": 821, "total_steps": 6840, "loss": 0.6125906109809875, "lr": 1.9734152602387054e-05, "epoch": 0.24009358093288494, "percentage": 12.0, "elapsed_time": "1:12:09", "remaining_time": "8:48:58"} +{"current_steps": 822, "total_steps": 6840, "loss": 0.6128122806549072, "lr": 1.973304408748895e-05, "epoch": 0.24038602134815032, "percentage": 12.02, "elapsed_time": "1:12:15", "remaining_time": "8:48:57"} +{"current_steps": 823, "total_steps": 6840, "loss": 0.7763051986694336, "lr": 1.973193329755097e-05, "epoch": 0.2406784617634157, "percentage": 12.03, "elapsed_time": "1:12:19", "remaining_time": "8:48:47"} +{"current_steps": 824, "total_steps": 6840, "loss": 0.7187550067901611, "lr": 1.9730820232832747e-05, "epoch": 0.24097090217868108, "percentage": 12.05, "elapsed_time": "1:12:23", "remaining_time": "8:48:34"} +{"current_steps": 825, "total_steps": 6840, "loss": 0.6564748287200928, "lr": 1.972970489359446e-05, "epoch": 0.2412633425939465, "percentage": 12.06, "elapsed_time": "1:12:28", "remaining_time": "8:48:23"} +{"current_steps": 826, "total_steps": 6840, "loss": 0.6573271751403809, "lr": 1.9728587280096815e-05, "epoch": 0.24155578300921188, "percentage": 12.08, "elapsed_time": "1:12:33", "remaining_time": "8:48:19"} +{"current_steps": 827, "total_steps": 6840, "loss": 0.8032153844833374, "lr": 1.9727467392601042e-05, "epoch": 0.24184822342447726, "percentage": 12.09, "elapsed_time": "1:12:37", "remaining_time": "8:48:05"} +{"current_steps": 828, "total_steps": 6840, "loss": 0.6781449913978577, "lr": 1.972634523136891e-05, "epoch": 0.24214066383974264, "percentage": 12.11, "elapsed_time": "1:12:44", "remaining_time": "8:48:08"} +{"current_steps": 829, "total_steps": 6840, "loss": 0.580757737159729, "lr": 1.972522079666272e-05, "epoch": 0.24243310425500805, "percentage": 12.12, "elapsed_time": "1:12:48", "remaining_time": "8:47:57"} +{"current_steps": 830, "total_steps": 6840, "loss": 0.626894474029541, "lr": 1.97240940887453e-05, "epoch": 0.24272554467027344, "percentage": 12.13, "elapsed_time": "1:12:52", "remaining_time": "8:47:42"} +{"current_steps": 831, "total_steps": 6840, "loss": 0.8188163042068481, "lr": 1.9722965107880005e-05, "epoch": 0.24301798508553882, "percentage": 12.15, "elapsed_time": "1:12:56", "remaining_time": "8:47:24"} +{"current_steps": 832, "total_steps": 6840, "loss": 0.6943579912185669, "lr": 1.9721833854330734e-05, "epoch": 0.2433104255008042, "percentage": 12.16, "elapsed_time": "1:13:01", "remaining_time": "8:47:22"} +{"current_steps": 833, "total_steps": 6840, "loss": 0.6177504658699036, "lr": 1.972070032836191e-05, "epoch": 0.2436028659160696, "percentage": 12.18, "elapsed_time": "1:13:07", "remaining_time": "8:47:16"} +{"current_steps": 834, "total_steps": 6840, "loss": 0.683998703956604, "lr": 1.971956453023849e-05, "epoch": 0.243895306331335, "percentage": 12.19, "elapsed_time": "1:13:12", "remaining_time": "8:47:14"} +{"current_steps": 835, "total_steps": 6840, "loss": 0.77602219581604, "lr": 1.9718426460225952e-05, "epoch": 0.24418774674660038, "percentage": 12.21, "elapsed_time": "1:13:18", "remaining_time": "8:47:09"} +{"current_steps": 836, "total_steps": 6840, "loss": 0.4930742383003235, "lr": 1.971728611859032e-05, "epoch": 0.24448018716186576, "percentage": 12.22, "elapsed_time": "1:13:23", "remaining_time": "8:47:07"} +{"current_steps": 837, "total_steps": 6840, "loss": 0.634628415107727, "lr": 1.971614350559814e-05, "epoch": 0.24477262757713117, "percentage": 12.24, "elapsed_time": "1:13:30", "remaining_time": "8:47:14"} +{"current_steps": 838, "total_steps": 6840, "loss": 0.6439167857170105, "lr": 1.971499862151649e-05, "epoch": 0.24506506799239655, "percentage": 12.25, "elapsed_time": "1:13:37", "remaining_time": "8:47:16"} +{"current_steps": 839, "total_steps": 6840, "loss": 0.701258659362793, "lr": 1.9713851466612982e-05, "epoch": 0.24535750840766193, "percentage": 12.27, "elapsed_time": "1:13:43", "remaining_time": "8:47:19"} +{"current_steps": 840, "total_steps": 6840, "loss": 0.6488544344902039, "lr": 1.9712702041155753e-05, "epoch": 0.24564994882292732, "percentage": 12.28, "elapsed_time": "1:13:48", "remaining_time": "8:47:15"} +{"current_steps": 841, "total_steps": 6840, "loss": 0.6962910890579224, "lr": 1.9711550345413476e-05, "epoch": 0.24594238923819273, "percentage": 12.3, "elapsed_time": "1:13:55", "remaining_time": "8:47:19"} +{"current_steps": 842, "total_steps": 6840, "loss": 0.6617723703384399, "lr": 1.9710396379655355e-05, "epoch": 0.2462348296534581, "percentage": 12.31, "elapsed_time": "1:14:00", "remaining_time": "8:47:09"} +{"current_steps": 843, "total_steps": 6840, "loss": 0.7152801752090454, "lr": 1.970924014415112e-05, "epoch": 0.2465272700687235, "percentage": 12.32, "elapsed_time": "1:14:05", "remaining_time": "8:47:05"} +{"current_steps": 844, "total_steps": 6840, "loss": 0.6712393760681152, "lr": 1.9708081639171035e-05, "epoch": 0.24681971048398887, "percentage": 12.34, "elapsed_time": "1:14:11", "remaining_time": "8:47:03"} +{"current_steps": 845, "total_steps": 6840, "loss": 0.8413758277893066, "lr": 1.970692086498589e-05, "epoch": 0.24711215089925428, "percentage": 12.35, "elapsed_time": "1:14:16", "remaining_time": "8:46:54"} +{"current_steps": 846, "total_steps": 6840, "loss": 0.6460679769515991, "lr": 1.9705757821867015e-05, "epoch": 0.24740459131451967, "percentage": 12.37, "elapsed_time": "1:14:19", "remaining_time": "8:46:38"} +{"current_steps": 847, "total_steps": 6840, "loss": 0.759244441986084, "lr": 1.970459251008626e-05, "epoch": 0.24769703172978505, "percentage": 12.38, "elapsed_time": "1:14:24", "remaining_time": "8:46:28"} +{"current_steps": 848, "total_steps": 6840, "loss": 0.8148110508918762, "lr": 1.970342492991601e-05, "epoch": 0.24798947214505043, "percentage": 12.4, "elapsed_time": "1:14:29", "remaining_time": "8:46:20"} +{"current_steps": 849, "total_steps": 6840, "loss": 0.6620084047317505, "lr": 1.970225508162918e-05, "epoch": 0.24828191256031584, "percentage": 12.41, "elapsed_time": "1:14:35", "remaining_time": "8:46:22"} +{"current_steps": 850, "total_steps": 6840, "loss": 0.7090305089950562, "lr": 1.9701082965499217e-05, "epoch": 0.24857435297558123, "percentage": 12.43, "elapsed_time": "1:14:40", "remaining_time": "8:46:11"} +{"current_steps": 851, "total_steps": 6840, "loss": 0.6846730709075928, "lr": 1.9699908581800094e-05, "epoch": 0.2488667933908466, "percentage": 12.44, "elapsed_time": "1:14:46", "remaining_time": "8:46:11"} +{"current_steps": 852, "total_steps": 6840, "loss": 0.5183212757110596, "lr": 1.9698731930806315e-05, "epoch": 0.24915923380611202, "percentage": 12.46, "elapsed_time": "1:14:51", "remaining_time": "8:46:03"} +{"current_steps": 853, "total_steps": 6840, "loss": 0.6913097500801086, "lr": 1.9697553012792915e-05, "epoch": 0.2494516742213774, "percentage": 12.47, "elapsed_time": "1:14:55", "remaining_time": "8:45:50"} +{"current_steps": 854, "total_steps": 6840, "loss": 0.7896280884742737, "lr": 1.9696371828035466e-05, "epoch": 0.24974411463664278, "percentage": 12.49, "elapsed_time": "1:14:59", "remaining_time": "8:45:38"} +{"current_steps": 855, "total_steps": 6840, "loss": 0.947577714920044, "lr": 1.9695188376810055e-05, "epoch": 0.2500365550519082, "percentage": 12.5, "elapsed_time": "1:15:05", "remaining_time": "8:45:35"} +{"current_steps": 856, "total_steps": 6840, "loss": 0.7772419452667236, "lr": 1.9694002659393306e-05, "epoch": 0.2503289954671736, "percentage": 12.51, "elapsed_time": "1:15:10", "remaining_time": "8:45:30"} +{"current_steps": 857, "total_steps": 6840, "loss": 0.6255912780761719, "lr": 1.9692814676062376e-05, "epoch": 0.25062143588243896, "percentage": 12.53, "elapsed_time": "1:15:16", "remaining_time": "8:45:28"} +{"current_steps": 858, "total_steps": 6840, "loss": 0.6572105884552002, "lr": 1.969162442709495e-05, "epoch": 0.25091387629770434, "percentage": 12.54, "elapsed_time": "1:15:21", "remaining_time": "8:45:20"} +{"current_steps": 859, "total_steps": 6840, "loss": 0.6387436389923096, "lr": 1.969043191276924e-05, "epoch": 0.2512063167129697, "percentage": 12.56, "elapsed_time": "1:15:26", "remaining_time": "8:45:13"} +{"current_steps": 860, "total_steps": 6840, "loss": 0.9180483222007751, "lr": 1.968923713336399e-05, "epoch": 0.2514987571282351, "percentage": 12.57, "elapsed_time": "1:15:32", "remaining_time": "8:45:13"} +{"current_steps": 861, "total_steps": 6840, "loss": 0.6830536127090454, "lr": 1.9688040089158473e-05, "epoch": 0.2517911975435005, "percentage": 12.59, "elapsed_time": "1:15:36", "remaining_time": "8:45:02"} +{"current_steps": 862, "total_steps": 6840, "loss": 0.9061588644981384, "lr": 1.9686840780432487e-05, "epoch": 0.2520836379587659, "percentage": 12.6, "elapsed_time": "1:15:40", "remaining_time": "8:44:47"} +{"current_steps": 863, "total_steps": 6840, "loss": 0.558010458946228, "lr": 1.9685639207466365e-05, "epoch": 0.2523760783740313, "percentage": 12.62, "elapsed_time": "1:15:43", "remaining_time": "8:44:30"} +{"current_steps": 864, "total_steps": 6840, "loss": 0.6788249611854553, "lr": 1.968443537054097e-05, "epoch": 0.2526685187892967, "percentage": 12.63, "elapsed_time": "1:15:49", "remaining_time": "8:44:28"} +{"current_steps": 865, "total_steps": 6840, "loss": 0.576469898223877, "lr": 1.968322926993769e-05, "epoch": 0.2529609592045621, "percentage": 12.65, "elapsed_time": "1:15:54", "remaining_time": "8:44:21"} +{"current_steps": 866, "total_steps": 6840, "loss": 0.6994123458862305, "lr": 1.9682020905938438e-05, "epoch": 0.25325339961982746, "percentage": 12.66, "elapsed_time": "1:15:59", "remaining_time": "8:44:15"} +{"current_steps": 867, "total_steps": 6840, "loss": 0.6929521560668945, "lr": 1.9680810278825672e-05, "epoch": 0.25354584003509284, "percentage": 12.68, "elapsed_time": "1:16:06", "remaining_time": "8:44:17"} +{"current_steps": 868, "total_steps": 6840, "loss": 0.7596743106842041, "lr": 1.9679597388882363e-05, "epoch": 0.2538382804503582, "percentage": 12.69, "elapsed_time": "1:16:11", "remaining_time": "8:44:10"} +{"current_steps": 869, "total_steps": 6840, "loss": 0.7925904989242554, "lr": 1.9678382236392013e-05, "epoch": 0.2541307208656236, "percentage": 12.7, "elapsed_time": "1:16:15", "remaining_time": "8:44:00"} +{"current_steps": 870, "total_steps": 6840, "loss": 0.722467839717865, "lr": 1.9677164821638666e-05, "epoch": 0.25442316128088904, "percentage": 12.72, "elapsed_time": "1:16:21", "remaining_time": "8:43:55"} +{"current_steps": 871, "total_steps": 6840, "loss": 0.7165451049804688, "lr": 1.9675945144906882e-05, "epoch": 0.2547156016961544, "percentage": 12.73, "elapsed_time": "1:16:25", "remaining_time": "8:43:45"} +{"current_steps": 872, "total_steps": 6840, "loss": 0.5897061824798584, "lr": 1.9674723206481746e-05, "epoch": 0.2550080421114198, "percentage": 12.75, "elapsed_time": "1:16:30", "remaining_time": "8:43:38"} +{"current_steps": 873, "total_steps": 6840, "loss": 0.6634531021118164, "lr": 1.9673499006648885e-05, "epoch": 0.2553004825266852, "percentage": 12.76, "elapsed_time": "1:16:36", "remaining_time": "8:43:39"} +{"current_steps": 874, "total_steps": 6840, "loss": 0.7237584590911865, "lr": 1.9672272545694445e-05, "epoch": 0.2555929229419506, "percentage": 12.78, "elapsed_time": "1:16:41", "remaining_time": "8:43:27"} +{"current_steps": 875, "total_steps": 6840, "loss": 0.6382388472557068, "lr": 1.967104382390511e-05, "epoch": 0.25588536335721596, "percentage": 12.79, "elapsed_time": "1:16:46", "remaining_time": "8:43:20"} +{"current_steps": 876, "total_steps": 6840, "loss": 0.6788768768310547, "lr": 1.966981284156808e-05, "epoch": 0.25617780377248134, "percentage": 12.81, "elapsed_time": "1:16:52", "remaining_time": "8:43:22"} +{"current_steps": 877, "total_steps": 6840, "loss": 0.6347095966339111, "lr": 1.966857959897109e-05, "epoch": 0.2564702441877467, "percentage": 12.82, "elapsed_time": "1:16:56", "remaining_time": "8:43:05"} +{"current_steps": 878, "total_steps": 6840, "loss": 0.8896903991699219, "lr": 1.9667344096402406e-05, "epoch": 0.25676268460301216, "percentage": 12.84, "elapsed_time": "1:17:00", "remaining_time": "8:42:58"} +{"current_steps": 879, "total_steps": 6840, "loss": 0.71473228931427, "lr": 1.966610633415082e-05, "epoch": 0.25705512501827754, "percentage": 12.85, "elapsed_time": "1:17:04", "remaining_time": "8:42:44"} +{"current_steps": 880, "total_steps": 6840, "loss": 0.7311601638793945, "lr": 1.9664866312505646e-05, "epoch": 0.2573475654335429, "percentage": 12.87, "elapsed_time": "1:17:09", "remaining_time": "8:42:32"} +{"current_steps": 881, "total_steps": 6840, "loss": 0.6186199188232422, "lr": 1.9663624031756737e-05, "epoch": 0.2576400058488083, "percentage": 12.88, "elapsed_time": "1:17:14", "remaining_time": "8:42:25"} +{"current_steps": 882, "total_steps": 6840, "loss": 0.8059204816818237, "lr": 1.9662379492194467e-05, "epoch": 0.2579324462640737, "percentage": 12.89, "elapsed_time": "1:17:18", "remaining_time": "8:42:15"} +{"current_steps": 883, "total_steps": 6840, "loss": 0.6065236330032349, "lr": 1.9661132694109736e-05, "epoch": 0.25822488667933907, "percentage": 12.91, "elapsed_time": "1:17:22", "remaining_time": "8:42:02"} +{"current_steps": 884, "total_steps": 6840, "loss": 0.6491106152534485, "lr": 1.965988363779398e-05, "epoch": 0.25851732709460445, "percentage": 12.92, "elapsed_time": "1:17:28", "remaining_time": "8:41:59"} +{"current_steps": 885, "total_steps": 6840, "loss": 0.526267945766449, "lr": 1.9658632323539158e-05, "epoch": 0.25880976750986984, "percentage": 12.94, "elapsed_time": "1:17:33", "remaining_time": "8:41:55"} +{"current_steps": 886, "total_steps": 6840, "loss": 0.812760591506958, "lr": 1.9657378751637755e-05, "epoch": 0.2591022079251353, "percentage": 12.95, "elapsed_time": "1:17:38", "remaining_time": "8:41:45"} +{"current_steps": 887, "total_steps": 6840, "loss": 0.7957908511161804, "lr": 1.9656122922382786e-05, "epoch": 0.25939464834040066, "percentage": 12.97, "elapsed_time": "1:17:42", "remaining_time": "8:41:33"} +{"current_steps": 888, "total_steps": 6840, "loss": 0.7426323890686035, "lr": 1.9654864836067796e-05, "epoch": 0.25968708875566604, "percentage": 12.98, "elapsed_time": "1:17:46", "remaining_time": "8:41:19"} +{"current_steps": 889, "total_steps": 6840, "loss": 0.602961540222168, "lr": 1.9653604492986852e-05, "epoch": 0.2599795291709314, "percentage": 13.0, "elapsed_time": "1:17:51", "remaining_time": "8:41:13"} +{"current_steps": 890, "total_steps": 6840, "loss": 0.8706510066986084, "lr": 1.965234189343455e-05, "epoch": 0.2602719695861968, "percentage": 13.01, "elapsed_time": "1:17:57", "remaining_time": "8:41:08"} +{"current_steps": 891, "total_steps": 6840, "loss": 0.6245810985565186, "lr": 1.965107703770602e-05, "epoch": 0.2605644100014622, "percentage": 13.03, "elapsed_time": "1:18:02", "remaining_time": "8:41:04"} +{"current_steps": 892, "total_steps": 6840, "loss": 0.7455421686172485, "lr": 1.964980992609691e-05, "epoch": 0.26085685041672757, "percentage": 13.04, "elapsed_time": "1:18:09", "remaining_time": "8:41:08"} +{"current_steps": 893, "total_steps": 6840, "loss": 0.6917043328285217, "lr": 1.9648540558903404e-05, "epoch": 0.261149290831993, "percentage": 13.06, "elapsed_time": "1:18:14", "remaining_time": "8:41:05"} +{"current_steps": 894, "total_steps": 6840, "loss": 0.6488040685653687, "lr": 1.9647268936422204e-05, "epoch": 0.2614417312472584, "percentage": 13.07, "elapsed_time": "1:18:21", "remaining_time": "8:41:07"} +{"current_steps": 895, "total_steps": 6840, "loss": 0.7416148781776428, "lr": 1.964599505895055e-05, "epoch": 0.2617341716625238, "percentage": 13.08, "elapsed_time": "1:18:26", "remaining_time": "8:41:03"} +{"current_steps": 896, "total_steps": 6840, "loss": 0.7012773156166077, "lr": 1.9644718926786196e-05, "epoch": 0.26202661207778916, "percentage": 13.1, "elapsed_time": "1:18:32", "remaining_time": "8:41:02"} +{"current_steps": 897, "total_steps": 6840, "loss": 0.8644432425498962, "lr": 1.9643440540227438e-05, "epoch": 0.26231905249305454, "percentage": 13.11, "elapsed_time": "1:18:36", "remaining_time": "8:40:51"} +{"current_steps": 898, "total_steps": 6840, "loss": 0.614842414855957, "lr": 1.9642159899573084e-05, "epoch": 0.2626114929083199, "percentage": 13.13, "elapsed_time": "1:18:42", "remaining_time": "8:40:50"} +{"current_steps": 899, "total_steps": 6840, "loss": 0.7794508337974548, "lr": 1.964087700512248e-05, "epoch": 0.2629039333235853, "percentage": 13.14, "elapsed_time": "1:18:46", "remaining_time": "8:40:36"} +{"current_steps": 900, "total_steps": 6840, "loss": 0.49217259883880615, "lr": 1.9639591857175492e-05, "epoch": 0.2631963737388507, "percentage": 13.16, "elapsed_time": "1:18:52", "remaining_time": "8:40:32"} +{"current_steps": 901, "total_steps": 6840, "loss": 0.6319605708122253, "lr": 1.9638304456032516e-05, "epoch": 0.2634888141541161, "percentage": 13.17, "elapsed_time": "1:19:01", "remaining_time": "8:40:53"} +{"current_steps": 902, "total_steps": 6840, "loss": 0.6066744327545166, "lr": 1.9637014801994478e-05, "epoch": 0.2637812545693815, "percentage": 13.19, "elapsed_time": "1:19:06", "remaining_time": "8:40:47"} +{"current_steps": 903, "total_steps": 6840, "loss": 0.7529127597808838, "lr": 1.9635722895362824e-05, "epoch": 0.2640736949846469, "percentage": 13.2, "elapsed_time": "1:19:11", "remaining_time": "8:40:38"} +{"current_steps": 904, "total_steps": 6840, "loss": 0.6026389598846436, "lr": 1.9634428736439524e-05, "epoch": 0.26436613539991227, "percentage": 13.22, "elapsed_time": "1:19:17", "remaining_time": "8:40:38"} +{"current_steps": 905, "total_steps": 6840, "loss": 0.6227229237556458, "lr": 1.9633132325527092e-05, "epoch": 0.26465857581517765, "percentage": 13.23, "elapsed_time": "1:19:23", "remaining_time": "8:40:39"} +{"current_steps": 906, "total_steps": 6840, "loss": 0.5959285497665405, "lr": 1.9631833662928548e-05, "epoch": 0.26495101623044304, "percentage": 13.25, "elapsed_time": "1:19:29", "remaining_time": "8:40:36"} +{"current_steps": 907, "total_steps": 6840, "loss": 0.8104684352874756, "lr": 1.9630532748947445e-05, "epoch": 0.2652434566457084, "percentage": 13.26, "elapsed_time": "1:19:32", "remaining_time": "8:40:20"} +{"current_steps": 908, "total_steps": 6840, "loss": 0.6722325682640076, "lr": 1.962922958388787e-05, "epoch": 0.2655358970609738, "percentage": 13.27, "elapsed_time": "1:19:37", "remaining_time": "8:40:09"} +{"current_steps": 909, "total_steps": 6840, "loss": 0.5996029376983643, "lr": 1.962792416805442e-05, "epoch": 0.26582833747623924, "percentage": 13.29, "elapsed_time": "1:19:41", "remaining_time": "8:39:59"} +{"current_steps": 910, "total_steps": 6840, "loss": 0.7214776873588562, "lr": 1.962661650175224e-05, "epoch": 0.2661207778915046, "percentage": 13.3, "elapsed_time": "1:19:46", "remaining_time": "8:39:54"} +{"current_steps": 911, "total_steps": 6840, "loss": 0.6833420991897583, "lr": 1.9625306585286986e-05, "epoch": 0.26641321830677, "percentage": 13.32, "elapsed_time": "1:19:52", "remaining_time": "8:39:48"} +{"current_steps": 912, "total_steps": 6840, "loss": 0.5571368336677551, "lr": 1.9623994418964834e-05, "epoch": 0.2667056587220354, "percentage": 13.33, "elapsed_time": "1:19:58", "remaining_time": "8:39:47"} +{"current_steps": 913, "total_steps": 6840, "loss": 0.6748533248901367, "lr": 1.9622680003092503e-05, "epoch": 0.26699809913730077, "percentage": 13.35, "elapsed_time": "1:20:01", "remaining_time": "8:39:33"} +{"current_steps": 914, "total_steps": 6840, "loss": 0.6681679487228394, "lr": 1.9621363337977232e-05, "epoch": 0.26729053955256615, "percentage": 13.36, "elapsed_time": "1:20:07", "remaining_time": "8:39:27"} +{"current_steps": 915, "total_steps": 6840, "loss": 0.6839786767959595, "lr": 1.9620044423926775e-05, "epoch": 0.26758297996783154, "percentage": 13.38, "elapsed_time": "1:20:11", "remaining_time": "8:39:14"} +{"current_steps": 916, "total_steps": 6840, "loss": 0.7481753826141357, "lr": 1.961872326124943e-05, "epoch": 0.2678754203830969, "percentage": 13.39, "elapsed_time": "1:20:15", "remaining_time": "8:39:05"} +{"current_steps": 917, "total_steps": 6840, "loss": 0.6044093370437622, "lr": 1.9617399850254e-05, "epoch": 0.26816786079836236, "percentage": 13.41, "elapsed_time": "1:20:20", "remaining_time": "8:38:55"} +{"current_steps": 918, "total_steps": 6840, "loss": 0.6399786472320557, "lr": 1.9616074191249833e-05, "epoch": 0.26846030121362774, "percentage": 13.42, "elapsed_time": "1:20:26", "remaining_time": "8:38:55"} +{"current_steps": 919, "total_steps": 6840, "loss": 0.6769053339958191, "lr": 1.961474628454679e-05, "epoch": 0.2687527416288931, "percentage": 13.44, "elapsed_time": "1:20:32", "remaining_time": "8:38:55"} +{"current_steps": 920, "total_steps": 6840, "loss": 0.7508189678192139, "lr": 1.961341613045526e-05, "epoch": 0.2690451820441585, "percentage": 13.45, "elapsed_time": "1:20:37", "remaining_time": "8:38:48"} +{"current_steps": 921, "total_steps": 6840, "loss": 0.728675365447998, "lr": 1.9612083729286164e-05, "epoch": 0.2693376224594239, "percentage": 13.46, "elapsed_time": "1:20:43", "remaining_time": "8:38:45"} +{"current_steps": 922, "total_steps": 6840, "loss": 0.6886277794837952, "lr": 1.9610749081350934e-05, "epoch": 0.26963006287468927, "percentage": 13.48, "elapsed_time": "1:20:48", "remaining_time": "8:38:39"} +{"current_steps": 923, "total_steps": 6840, "loss": 0.6756877899169922, "lr": 1.9609412186961542e-05, "epoch": 0.26992250328995465, "percentage": 13.49, "elapsed_time": "1:20:54", "remaining_time": "8:38:41"} +{"current_steps": 924, "total_steps": 6840, "loss": 0.6761744022369385, "lr": 1.960807304643048e-05, "epoch": 0.27021494370522003, "percentage": 13.51, "elapsed_time": "1:20:59", "remaining_time": "8:38:31"} +{"current_steps": 925, "total_steps": 6840, "loss": 0.6475736498832703, "lr": 1.9606731660070758e-05, "epoch": 0.2705073841204855, "percentage": 13.52, "elapsed_time": "1:21:03", "remaining_time": "8:38:20"} +{"current_steps": 926, "total_steps": 6840, "loss": 0.6169984936714172, "lr": 1.9605388028195922e-05, "epoch": 0.27079982453575085, "percentage": 13.54, "elapsed_time": "1:21:08", "remaining_time": "8:38:14"} +{"current_steps": 927, "total_steps": 6840, "loss": 0.6411685943603516, "lr": 1.9604042151120035e-05, "epoch": 0.27109226495101624, "percentage": 13.55, "elapsed_time": "1:21:13", "remaining_time": "8:38:08"} +{"current_steps": 928, "total_steps": 6840, "loss": 0.6802625060081482, "lr": 1.960269402915769e-05, "epoch": 0.2713847053662816, "percentage": 13.57, "elapsed_time": "1:21:19", "remaining_time": "8:38:04"} +{"current_steps": 929, "total_steps": 6840, "loss": 0.6321320533752441, "lr": 1.9601343662624e-05, "epoch": 0.271677145781547, "percentage": 13.58, "elapsed_time": "1:21:25", "remaining_time": "8:38:05"} +{"current_steps": 930, "total_steps": 6840, "loss": 0.6242578029632568, "lr": 1.959999105183461e-05, "epoch": 0.2719695861968124, "percentage": 13.6, "elapsed_time": "1:21:32", "remaining_time": "8:38:08"} +{"current_steps": 931, "total_steps": 6840, "loss": 0.8106271624565125, "lr": 1.9598636197105672e-05, "epoch": 0.27226202661207777, "percentage": 13.61, "elapsed_time": "1:21:37", "remaining_time": "8:38:06"} +{"current_steps": 932, "total_steps": 6840, "loss": 0.6810879707336426, "lr": 1.9597279098753893e-05, "epoch": 0.2725544670273432, "percentage": 13.63, "elapsed_time": "1:21:43", "remaining_time": "8:38:05"} +{"current_steps": 933, "total_steps": 6840, "loss": 0.6121781468391418, "lr": 1.959591975709647e-05, "epoch": 0.2728469074426086, "percentage": 13.64, "elapsed_time": "1:21:49", "remaining_time": "8:38:03"} +{"current_steps": 934, "total_steps": 6840, "loss": 0.7347930669784546, "lr": 1.9594558172451153e-05, "epoch": 0.27313934785787397, "percentage": 13.65, "elapsed_time": "1:21:54", "remaining_time": "8:37:58"} +{"current_steps": 935, "total_steps": 6840, "loss": 0.8280940651893616, "lr": 1.9593194345136196e-05, "epoch": 0.27343178827313935, "percentage": 13.67, "elapsed_time": "1:21:59", "remaining_time": "8:37:47"} +{"current_steps": 936, "total_steps": 6840, "loss": 0.8171218633651733, "lr": 1.959182827547039e-05, "epoch": 0.27372422868840474, "percentage": 13.68, "elapsed_time": "1:22:04", "remaining_time": "8:37:39"} +{"current_steps": 937, "total_steps": 6840, "loss": 0.7350337505340576, "lr": 1.9590459963773043e-05, "epoch": 0.2740166691036701, "percentage": 13.7, "elapsed_time": "1:22:09", "remaining_time": "8:37:37"} +{"current_steps": 938, "total_steps": 6840, "loss": 0.5648026466369629, "lr": 1.9589089410363992e-05, "epoch": 0.2743091095189355, "percentage": 13.71, "elapsed_time": "1:22:15", "remaining_time": "8:37:31"} +{"current_steps": 939, "total_steps": 6840, "loss": 0.630626916885376, "lr": 1.9587716615563592e-05, "epoch": 0.2746015499342009, "percentage": 13.73, "elapsed_time": "1:22:20", "remaining_time": "8:37:25"} +{"current_steps": 940, "total_steps": 6840, "loss": 0.658649206161499, "lr": 1.9586341579692728e-05, "epoch": 0.2748939903494663, "percentage": 13.74, "elapsed_time": "1:22:25", "remaining_time": "8:37:20"} +{"current_steps": 941, "total_steps": 6840, "loss": 0.6938339471817017, "lr": 1.9584964303072804e-05, "epoch": 0.2751864307647317, "percentage": 13.76, "elapsed_time": "1:22:30", "remaining_time": "8:37:14"} +{"current_steps": 942, "total_steps": 6840, "loss": 0.7124238014221191, "lr": 1.9583584786025755e-05, "epoch": 0.2754788711799971, "percentage": 13.77, "elapsed_time": "1:22:36", "remaining_time": "8:37:11"} +{"current_steps": 943, "total_steps": 6840, "loss": 0.5879669189453125, "lr": 1.9582203028874027e-05, "epoch": 0.27577131159526247, "percentage": 13.79, "elapsed_time": "1:22:40", "remaining_time": "8:37:03"} +{"current_steps": 944, "total_steps": 6840, "loss": 0.6169895529747009, "lr": 1.9580819031940605e-05, "epoch": 0.27606375201052785, "percentage": 13.8, "elapsed_time": "1:22:46", "remaining_time": "8:36:57"} +{"current_steps": 945, "total_steps": 6840, "loss": 0.6367429494857788, "lr": 1.9579432795548986e-05, "epoch": 0.27635619242579323, "percentage": 13.82, "elapsed_time": "1:22:50", "remaining_time": "8:36:47"} +{"current_steps": 946, "total_steps": 6840, "loss": 0.6198331117630005, "lr": 1.9578044320023195e-05, "epoch": 0.2766486328410586, "percentage": 13.83, "elapsed_time": "1:22:56", "remaining_time": "8:36:46"} +{"current_steps": 947, "total_steps": 6840, "loss": 0.6731230616569519, "lr": 1.9576653605687782e-05, "epoch": 0.276941073256324, "percentage": 13.85, "elapsed_time": "1:23:01", "remaining_time": "8:36:39"} +{"current_steps": 948, "total_steps": 6840, "loss": 0.7185516953468323, "lr": 1.957526065286781e-05, "epoch": 0.27723351367158944, "percentage": 13.86, "elapsed_time": "1:23:05", "remaining_time": "8:36:28"} +{"current_steps": 949, "total_steps": 6840, "loss": 0.7362357378005981, "lr": 1.9573865461888882e-05, "epoch": 0.2775259540868548, "percentage": 13.87, "elapsed_time": "1:23:10", "remaining_time": "8:36:21"} +{"current_steps": 950, "total_steps": 6840, "loss": 0.7051525712013245, "lr": 1.9572468033077113e-05, "epoch": 0.2778183945021202, "percentage": 13.89, "elapsed_time": "1:23:16", "remaining_time": "8:36:18"} +{"current_steps": 951, "total_steps": 6840, "loss": 0.6267420053482056, "lr": 1.9571068366759143e-05, "epoch": 0.2781108349173856, "percentage": 13.9, "elapsed_time": "1:23:21", "remaining_time": "8:36:09"} +{"current_steps": 952, "total_steps": 6840, "loss": 0.649080753326416, "lr": 1.9569666463262136e-05, "epoch": 0.27840327533265097, "percentage": 13.92, "elapsed_time": "1:23:25", "remaining_time": "8:35:59"} +{"current_steps": 953, "total_steps": 6840, "loss": 0.5700061321258545, "lr": 1.9568262322913777e-05, "epoch": 0.27869571574791635, "percentage": 13.93, "elapsed_time": "1:23:31", "remaining_time": "8:35:55"} +{"current_steps": 954, "total_steps": 6840, "loss": 0.6121870875358582, "lr": 1.9566855946042274e-05, "epoch": 0.27898815616318173, "percentage": 13.95, "elapsed_time": "1:23:36", "remaining_time": "8:35:49"} +{"current_steps": 955, "total_steps": 6840, "loss": 0.8294541239738464, "lr": 1.9565447332976362e-05, "epoch": 0.2792805965784471, "percentage": 13.96, "elapsed_time": "1:23:41", "remaining_time": "8:35:41"} +{"current_steps": 956, "total_steps": 6840, "loss": 0.6979323625564575, "lr": 1.9564036484045295e-05, "epoch": 0.27957303699371255, "percentage": 13.98, "elapsed_time": "1:23:46", "remaining_time": "8:35:35"} +{"current_steps": 957, "total_steps": 6840, "loss": 0.6847009658813477, "lr": 1.9562623399578853e-05, "epoch": 0.27986547740897794, "percentage": 13.99, "elapsed_time": "1:23:51", "remaining_time": "8:35:29"} +{"current_steps": 958, "total_steps": 6840, "loss": 0.6821733713150024, "lr": 1.956120807990733e-05, "epoch": 0.2801579178242433, "percentage": 14.01, "elapsed_time": "1:23:55", "remaining_time": "8:35:19"} +{"current_steps": 959, "total_steps": 6840, "loss": 0.6943963766098022, "lr": 1.955979052536155e-05, "epoch": 0.2804503582395087, "percentage": 14.02, "elapsed_time": "1:24:01", "remaining_time": "8:35:15"} +{"current_steps": 960, "total_steps": 6840, "loss": 0.5841893553733826, "lr": 1.955837073627286e-05, "epoch": 0.2807427986547741, "percentage": 14.04, "elapsed_time": "1:24:05", "remaining_time": "8:35:00"} +{"current_steps": 961, "total_steps": 6840, "loss": 0.7196778059005737, "lr": 1.955694871297313e-05, "epoch": 0.28103523907003947, "percentage": 14.05, "elapsed_time": "1:24:09", "remaining_time": "8:34:49"} +{"current_steps": 962, "total_steps": 6840, "loss": 0.697501540184021, "lr": 1.9555524455794743e-05, "epoch": 0.28132767948530485, "percentage": 14.06, "elapsed_time": "1:24:13", "remaining_time": "8:34:39"} +{"current_steps": 963, "total_steps": 6840, "loss": 0.7265810966491699, "lr": 1.9554097965070612e-05, "epoch": 0.28162011990057023, "percentage": 14.08, "elapsed_time": "1:24:20", "remaining_time": "8:34:41"} +{"current_steps": 964, "total_steps": 6840, "loss": 0.5766021013259888, "lr": 1.955266924113417e-05, "epoch": 0.28191256031583567, "percentage": 14.09, "elapsed_time": "1:24:25", "remaining_time": "8:34:33"} +{"current_steps": 965, "total_steps": 6840, "loss": 0.6885402202606201, "lr": 1.955123828431938e-05, "epoch": 0.28220500073110105, "percentage": 14.11, "elapsed_time": "1:24:30", "remaining_time": "8:34:28"} +{"current_steps": 966, "total_steps": 6840, "loss": 0.719329297542572, "lr": 1.954980509496071e-05, "epoch": 0.28249744114636643, "percentage": 14.12, "elapsed_time": "1:24:34", "remaining_time": "8:34:17"} +{"current_steps": 967, "total_steps": 6840, "loss": 0.5621368885040283, "lr": 1.954836967339316e-05, "epoch": 0.2827898815616318, "percentage": 14.14, "elapsed_time": "1:24:40", "remaining_time": "8:34:14"} +{"current_steps": 968, "total_steps": 6840, "loss": 0.6323715448379517, "lr": 1.954693201995226e-05, "epoch": 0.2830823219768972, "percentage": 14.15, "elapsed_time": "1:24:45", "remaining_time": "8:34:10"} +{"current_steps": 969, "total_steps": 6840, "loss": 0.6265028119087219, "lr": 1.954549213497404e-05, "epoch": 0.2833747623921626, "percentage": 14.17, "elapsed_time": "1:24:50", "remaining_time": "8:34:04"} +{"current_steps": 970, "total_steps": 6840, "loss": 0.6234713792800903, "lr": 1.9544050018795076e-05, "epoch": 0.28366720280742797, "percentage": 14.18, "elapsed_time": "1:24:56", "remaining_time": "8:34:00"} +{"current_steps": 971, "total_steps": 6840, "loss": 0.6505804657936096, "lr": 1.9542605671752447e-05, "epoch": 0.2839596432226934, "percentage": 14.2, "elapsed_time": "1:25:02", "remaining_time": "8:34:00"} +{"current_steps": 972, "total_steps": 6840, "loss": 0.7756558656692505, "lr": 1.954115909418376e-05, "epoch": 0.2842520836379588, "percentage": 14.21, "elapsed_time": "1:25:07", "remaining_time": "8:33:56"} +{"current_steps": 973, "total_steps": 6840, "loss": 0.767257034778595, "lr": 1.953971028642715e-05, "epoch": 0.28454452405322417, "percentage": 14.23, "elapsed_time": "1:25:14", "remaining_time": "8:33:56"} +{"current_steps": 974, "total_steps": 6840, "loss": 0.6702018976211548, "lr": 1.9538259248821265e-05, "epoch": 0.28483696446848955, "percentage": 14.24, "elapsed_time": "1:25:18", "remaining_time": "8:33:48"} +{"current_steps": 975, "total_steps": 6840, "loss": 0.7072827816009521, "lr": 1.953680598170527e-05, "epoch": 0.28512940488375493, "percentage": 14.25, "elapsed_time": "1:25:24", "remaining_time": "8:33:48"} +{"current_steps": 976, "total_steps": 6840, "loss": 0.6343571543693542, "lr": 1.953535048541886e-05, "epoch": 0.2854218452990203, "percentage": 14.27, "elapsed_time": "1:25:29", "remaining_time": "8:33:40"} +{"current_steps": 977, "total_steps": 6840, "loss": 0.6361520290374756, "lr": 1.953389276030225e-05, "epoch": 0.2857142857142857, "percentage": 14.28, "elapsed_time": "1:25:35", "remaining_time": "8:33:38"} +{"current_steps": 978, "total_steps": 6840, "loss": 0.6757364273071289, "lr": 1.9532432806696178e-05, "epoch": 0.2860067261295511, "percentage": 14.3, "elapsed_time": "1:25:41", "remaining_time": "8:33:38"} +{"current_steps": 979, "total_steps": 6840, "loss": 0.6311759948730469, "lr": 1.9530970624941896e-05, "epoch": 0.2862991665448165, "percentage": 14.31, "elapsed_time": "1:25:47", "remaining_time": "8:33:38"} +{"current_steps": 980, "total_steps": 6840, "loss": 0.6207036972045898, "lr": 1.9529506215381176e-05, "epoch": 0.2865916069600819, "percentage": 14.33, "elapsed_time": "1:25:52", "remaining_time": "8:33:28"} +{"current_steps": 981, "total_steps": 6840, "loss": 0.5154495239257812, "lr": 1.952803957835632e-05, "epoch": 0.2868840473753473, "percentage": 14.34, "elapsed_time": "1:25:57", "remaining_time": "8:33:20"} +{"current_steps": 982, "total_steps": 6840, "loss": 0.797666072845459, "lr": 1.9526570714210146e-05, "epoch": 0.28717648779061267, "percentage": 14.36, "elapsed_time": "1:26:02", "remaining_time": "8:33:14"} +{"current_steps": 983, "total_steps": 6840, "loss": 0.659400224685669, "lr": 1.9525099623285983e-05, "epoch": 0.28746892820587805, "percentage": 14.37, "elapsed_time": "1:26:06", "remaining_time": "8:33:04"} +{"current_steps": 984, "total_steps": 6840, "loss": 0.7638698816299438, "lr": 1.9523626305927706e-05, "epoch": 0.28776136862114343, "percentage": 14.39, "elapsed_time": "1:26:11", "remaining_time": "8:32:54"} +{"current_steps": 985, "total_steps": 6840, "loss": 0.6656497120857239, "lr": 1.952215076247968e-05, "epoch": 0.2880538090364088, "percentage": 14.4, "elapsed_time": "1:26:15", "remaining_time": "8:32:45"} +{"current_steps": 986, "total_steps": 6840, "loss": 0.7701614499092102, "lr": 1.9520672993286807e-05, "epoch": 0.2883462494516742, "percentage": 14.42, "elapsed_time": "1:26:20", "remaining_time": "8:32:40"} +{"current_steps": 987, "total_steps": 6840, "loss": 0.6710221767425537, "lr": 1.951919299869451e-05, "epoch": 0.28863868986693964, "percentage": 14.43, "elapsed_time": "1:26:24", "remaining_time": "8:32:27"} +{"current_steps": 988, "total_steps": 6840, "loss": 0.6307191848754883, "lr": 1.951771077904873e-05, "epoch": 0.288931130282205, "percentage": 14.44, "elapsed_time": "1:26:28", "remaining_time": "8:32:13"} +{"current_steps": 989, "total_steps": 6840, "loss": 0.8226636648178101, "lr": 1.951622633469592e-05, "epoch": 0.2892235706974704, "percentage": 14.46, "elapsed_time": "1:26:34", "remaining_time": "8:32:13"} +{"current_steps": 990, "total_steps": 6840, "loss": 0.6286089420318604, "lr": 1.9514739665983065e-05, "epoch": 0.2895160111127358, "percentage": 14.47, "elapsed_time": "1:26:39", "remaining_time": "8:32:06"} +{"current_steps": 991, "total_steps": 6840, "loss": 0.8167316317558289, "lr": 1.9513250773257667e-05, "epoch": 0.28980845152800117, "percentage": 14.49, "elapsed_time": "1:26:44", "remaining_time": "8:31:57"} +{"current_steps": 992, "total_steps": 6840, "loss": 0.6840806603431702, "lr": 1.9511759656867738e-05, "epoch": 0.29010089194326655, "percentage": 14.5, "elapsed_time": "1:26:50", "remaining_time": "8:31:55"} +{"current_steps": 993, "total_steps": 6840, "loss": 0.5731699466705322, "lr": 1.9510266317161823e-05, "epoch": 0.29039333235853193, "percentage": 14.52, "elapsed_time": "1:26:56", "remaining_time": "8:31:56"} +{"current_steps": 994, "total_steps": 6840, "loss": 0.696578860282898, "lr": 1.950877075448898e-05, "epoch": 0.2906857727737973, "percentage": 14.53, "elapsed_time": "1:27:01", "remaining_time": "8:31:50"} +{"current_steps": 995, "total_steps": 6840, "loss": 0.7194398641586304, "lr": 1.9507272969198787e-05, "epoch": 0.29097821318906275, "percentage": 14.55, "elapsed_time": "1:27:07", "remaining_time": "8:31:48"} +{"current_steps": 996, "total_steps": 6840, "loss": 0.7041016817092896, "lr": 1.9505772961641342e-05, "epoch": 0.29127065360432813, "percentage": 14.56, "elapsed_time": "1:27:12", "remaining_time": "8:31:41"} +{"current_steps": 997, "total_steps": 6840, "loss": 0.7073841691017151, "lr": 1.9504270732167267e-05, "epoch": 0.2915630940195935, "percentage": 14.58, "elapsed_time": "1:27:17", "remaining_time": "8:31:33"} +{"current_steps": 998, "total_steps": 6840, "loss": 0.5097789764404297, "lr": 1.9502766281127693e-05, "epoch": 0.2918555344348589, "percentage": 14.59, "elapsed_time": "1:27:23", "remaining_time": "8:31:34"} +{"current_steps": 999, "total_steps": 6840, "loss": 0.6522337198257446, "lr": 1.9501259608874276e-05, "epoch": 0.2921479748501243, "percentage": 14.61, "elapsed_time": "1:27:29", "remaining_time": "8:31:31"} +{"current_steps": 1000, "total_steps": 6840, "loss": 0.8276036381721497, "lr": 1.9499750715759197e-05, "epoch": 0.29244041526538966, "percentage": 14.62, "elapsed_time": "1:27:35", "remaining_time": "8:31:32"} +{"current_steps": 1001, "total_steps": 6840, "loss": 0.7701225876808167, "lr": 1.9498239602135145e-05, "epoch": 0.29273285568065505, "percentage": 14.63, "elapsed_time": "1:27:44", "remaining_time": "8:31:50"} +{"current_steps": 1002, "total_steps": 6840, "loss": 0.6112316846847534, "lr": 1.949672626835534e-05, "epoch": 0.29302529609592043, "percentage": 14.65, "elapsed_time": "1:27:49", "remaining_time": "8:31:41"} +{"current_steps": 1003, "total_steps": 6840, "loss": 0.7196093201637268, "lr": 1.9495210714773506e-05, "epoch": 0.29331773651118587, "percentage": 14.66, "elapsed_time": "1:27:54", "remaining_time": "8:31:36"} +{"current_steps": 1004, "total_steps": 6840, "loss": 0.708210825920105, "lr": 1.9493692941743903e-05, "epoch": 0.29361017692645125, "percentage": 14.68, "elapsed_time": "1:28:00", "remaining_time": "8:31:36"} +{"current_steps": 1005, "total_steps": 6840, "loss": 0.6156430244445801, "lr": 1.9492172949621298e-05, "epoch": 0.29390261734171663, "percentage": 14.69, "elapsed_time": "1:28:05", "remaining_time": "8:31:29"} +{"current_steps": 1006, "total_steps": 6840, "loss": 0.6125216484069824, "lr": 1.9490650738760977e-05, "epoch": 0.294195057756982, "percentage": 14.71, "elapsed_time": "1:28:11", "remaining_time": "8:31:26"} +{"current_steps": 1007, "total_steps": 6840, "loss": 0.5691695213317871, "lr": 1.9489126309518752e-05, "epoch": 0.2944874981722474, "percentage": 14.72, "elapsed_time": "1:28:17", "remaining_time": "8:31:25"} +{"current_steps": 1008, "total_steps": 6840, "loss": 0.6733062267303467, "lr": 1.9487599662250945e-05, "epoch": 0.2947799385875128, "percentage": 14.74, "elapsed_time": "1:28:23", "remaining_time": "8:31:24"} +{"current_steps": 1009, "total_steps": 6840, "loss": 0.6069025993347168, "lr": 1.94860707973144e-05, "epoch": 0.29507237900277816, "percentage": 14.75, "elapsed_time": "1:28:27", "remaining_time": "8:31:13"} +{"current_steps": 1010, "total_steps": 6840, "loss": 0.6191028356552124, "lr": 1.9484539715066488e-05, "epoch": 0.2953648194180436, "percentage": 14.77, "elapsed_time": "1:28:32", "remaining_time": "8:31:07"} +{"current_steps": 1011, "total_steps": 6840, "loss": 0.7423045635223389, "lr": 1.9483006415865082e-05, "epoch": 0.295657259833309, "percentage": 14.78, "elapsed_time": "1:28:37", "remaining_time": "8:30:59"} +{"current_steps": 1012, "total_steps": 6840, "loss": 0.854878306388855, "lr": 1.9481470900068585e-05, "epoch": 0.29594970024857437, "percentage": 14.8, "elapsed_time": "1:28:42", "remaining_time": "8:30:50"} +{"current_steps": 1013, "total_steps": 6840, "loss": 0.6950500011444092, "lr": 1.9479933168035914e-05, "epoch": 0.29624214066383975, "percentage": 14.81, "elapsed_time": "1:28:46", "remaining_time": "8:30:41"} +{"current_steps": 1014, "total_steps": 6840, "loss": 0.6944484710693359, "lr": 1.9478393220126503e-05, "epoch": 0.29653458107910513, "percentage": 14.82, "elapsed_time": "1:28:52", "remaining_time": "8:30:39"} +{"current_steps": 1015, "total_steps": 6840, "loss": 0.7120212316513062, "lr": 1.9476851056700303e-05, "epoch": 0.2968270214943705, "percentage": 14.84, "elapsed_time": "1:28:57", "remaining_time": "8:30:32"} +{"current_steps": 1016, "total_steps": 6840, "loss": 0.6271052956581116, "lr": 1.9475306678117792e-05, "epoch": 0.2971194619096359, "percentage": 14.85, "elapsed_time": "1:29:03", "remaining_time": "8:30:28"} +{"current_steps": 1017, "total_steps": 6840, "loss": 0.6398453712463379, "lr": 1.9473760084739958e-05, "epoch": 0.2974119023249013, "percentage": 14.87, "elapsed_time": "1:29:08", "remaining_time": "8:30:23"} +{"current_steps": 1018, "total_steps": 6840, "loss": 0.5563585758209229, "lr": 1.94722112769283e-05, "epoch": 0.2977043427401667, "percentage": 14.88, "elapsed_time": "1:29:13", "remaining_time": "8:30:15"} +{"current_steps": 1019, "total_steps": 6840, "loss": 0.7895959615707397, "lr": 1.947066025504485e-05, "epoch": 0.2979967831554321, "percentage": 14.9, "elapsed_time": "1:29:18", "remaining_time": "8:30:08"} +{"current_steps": 1020, "total_steps": 6840, "loss": 0.6304349303245544, "lr": 1.9469107019452148e-05, "epoch": 0.2982892235706975, "percentage": 14.91, "elapsed_time": "1:29:24", "remaining_time": "8:30:08"} +{"current_steps": 1021, "total_steps": 6840, "loss": 0.6915549039840698, "lr": 1.9467551570513257e-05, "epoch": 0.29858166398596286, "percentage": 14.93, "elapsed_time": "1:29:29", "remaining_time": "8:30:05"} +{"current_steps": 1022, "total_steps": 6840, "loss": 0.6257511377334595, "lr": 1.9465993908591748e-05, "epoch": 0.29887410440122825, "percentage": 14.94, "elapsed_time": "1:29:35", "remaining_time": "8:30:00"} +{"current_steps": 1023, "total_steps": 6840, "loss": 0.6409085988998413, "lr": 1.9464434034051716e-05, "epoch": 0.29916654481649363, "percentage": 14.96, "elapsed_time": "1:29:41", "remaining_time": "8:30:01"} +{"current_steps": 1024, "total_steps": 6840, "loss": 0.7281351089477539, "lr": 1.9462871947257772e-05, "epoch": 0.299458985231759, "percentage": 14.97, "elapsed_time": "1:29:47", "remaining_time": "8:30:00"} +{"current_steps": 1025, "total_steps": 6840, "loss": 0.8016781806945801, "lr": 1.9461307648575047e-05, "epoch": 0.2997514256470244, "percentage": 14.99, "elapsed_time": "1:29:52", "remaining_time": "8:29:51"} +{"current_steps": 1026, "total_steps": 6840, "loss": 0.5883209705352783, "lr": 1.9459741138369186e-05, "epoch": 0.30004386606228983, "percentage": 15.0, "elapsed_time": "1:29:57", "remaining_time": "8:29:45"} +{"current_steps": 1027, "total_steps": 6840, "loss": 0.6414197683334351, "lr": 1.9458172417006347e-05, "epoch": 0.3003363064775552, "percentage": 15.01, "elapsed_time": "1:30:01", "remaining_time": "8:29:34"} +{"current_steps": 1028, "total_steps": 6840, "loss": 0.7076515555381775, "lr": 1.9456601484853218e-05, "epoch": 0.3006287468928206, "percentage": 15.03, "elapsed_time": "1:30:06", "remaining_time": "8:29:27"} +{"current_steps": 1029, "total_steps": 6840, "loss": 0.8102637529373169, "lr": 1.9455028342276984e-05, "epoch": 0.300921187308086, "percentage": 15.04, "elapsed_time": "1:30:10", "remaining_time": "8:29:15"} +{"current_steps": 1030, "total_steps": 6840, "loss": 0.6954574584960938, "lr": 1.9453452989645362e-05, "epoch": 0.30121362772335136, "percentage": 15.06, "elapsed_time": "1:30:16", "remaining_time": "8:29:13"} +{"current_steps": 1031, "total_steps": 6840, "loss": 0.6647125482559204, "lr": 1.9451875427326585e-05, "epoch": 0.30150606813861675, "percentage": 15.07, "elapsed_time": "1:30:21", "remaining_time": "8:29:05"} +{"current_steps": 1032, "total_steps": 6840, "loss": 0.5501933097839355, "lr": 1.9450295655689392e-05, "epoch": 0.3017985085538821, "percentage": 15.09, "elapsed_time": "1:30:25", "remaining_time": "8:28:56"} +{"current_steps": 1033, "total_steps": 6840, "loss": 0.6561415195465088, "lr": 1.944871367510305e-05, "epoch": 0.3020909489691475, "percentage": 15.1, "elapsed_time": "1:30:31", "remaining_time": "8:28:55"} +{"current_steps": 1034, "total_steps": 6840, "loss": 0.6768229007720947, "lr": 1.9447129485937335e-05, "epoch": 0.30238338938441295, "percentage": 15.12, "elapsed_time": "1:30:36", "remaining_time": "8:28:48"} +{"current_steps": 1035, "total_steps": 6840, "loss": 0.5693868398666382, "lr": 1.9445543088562543e-05, "epoch": 0.30267582979967833, "percentage": 15.13, "elapsed_time": "1:30:41", "remaining_time": "8:28:39"} +{"current_steps": 1036, "total_steps": 6840, "loss": 0.6165708303451538, "lr": 1.9443954483349485e-05, "epoch": 0.3029682702149437, "percentage": 15.15, "elapsed_time": "1:30:47", "remaining_time": "8:28:37"} +{"current_steps": 1037, "total_steps": 6840, "loss": 0.6116082668304443, "lr": 1.944236367066948e-05, "epoch": 0.3032607106302091, "percentage": 15.16, "elapsed_time": "1:30:52", "remaining_time": "8:28:31"} +{"current_steps": 1038, "total_steps": 6840, "loss": 0.7027714848518372, "lr": 1.9440770650894384e-05, "epoch": 0.3035531510454745, "percentage": 15.18, "elapsed_time": "1:30:57", "remaining_time": "8:28:27"} +{"current_steps": 1039, "total_steps": 6840, "loss": 0.7339189052581787, "lr": 1.943917542439655e-05, "epoch": 0.30384559146073986, "percentage": 15.19, "elapsed_time": "1:31:03", "remaining_time": "8:28:24"} +{"current_steps": 1040, "total_steps": 6840, "loss": 0.7975895404815674, "lr": 1.943757799154885e-05, "epoch": 0.30413803187600524, "percentage": 15.2, "elapsed_time": "1:31:08", "remaining_time": "8:28:19"} +{"current_steps": 1041, "total_steps": 6840, "loss": 0.6421841382980347, "lr": 1.9435978352724673e-05, "epoch": 0.3044304722912706, "percentage": 15.22, "elapsed_time": "1:31:14", "remaining_time": "8:28:18"} +{"current_steps": 1042, "total_steps": 6840, "loss": 0.6731791496276855, "lr": 1.943437650829793e-05, "epoch": 0.30472291270653606, "percentage": 15.23, "elapsed_time": "1:31:20", "remaining_time": "8:28:13"} +{"current_steps": 1043, "total_steps": 6840, "loss": 0.7008551359176636, "lr": 1.943277245864304e-05, "epoch": 0.30501535312180145, "percentage": 15.25, "elapsed_time": "1:31:26", "remaining_time": "8:28:11"} +{"current_steps": 1044, "total_steps": 6840, "loss": 0.6777141094207764, "lr": 1.943116620413494e-05, "epoch": 0.30530779353706683, "percentage": 15.26, "elapsed_time": "1:31:31", "remaining_time": "8:28:05"} +{"current_steps": 1045, "total_steps": 6840, "loss": 0.7649033069610596, "lr": 1.9429557745149084e-05, "epoch": 0.3056002339523322, "percentage": 15.28, "elapsed_time": "1:31:36", "remaining_time": "8:27:58"} +{"current_steps": 1046, "total_steps": 6840, "loss": 0.6460477709770203, "lr": 1.9427947082061432e-05, "epoch": 0.3058926743675976, "percentage": 15.29, "elapsed_time": "1:31:41", "remaining_time": "8:27:51"} +{"current_steps": 1047, "total_steps": 6840, "loss": 0.5939697623252869, "lr": 1.942633421524848e-05, "epoch": 0.306185114782863, "percentage": 15.31, "elapsed_time": "1:31:45", "remaining_time": "8:27:42"} +{"current_steps": 1048, "total_steps": 6840, "loss": 0.606407880783081, "lr": 1.9424719145087216e-05, "epoch": 0.30647755519812836, "percentage": 15.32, "elapsed_time": "1:31:52", "remaining_time": "8:27:44"} +{"current_steps": 1049, "total_steps": 6840, "loss": 0.5515298843383789, "lr": 1.9423101871955153e-05, "epoch": 0.3067699956133938, "percentage": 15.34, "elapsed_time": "1:31:57", "remaining_time": "8:27:40"} +{"current_steps": 1050, "total_steps": 6840, "loss": 0.7397217154502869, "lr": 1.942148239623032e-05, "epoch": 0.3070624360286592, "percentage": 15.35, "elapsed_time": "1:32:02", "remaining_time": "8:27:33"} +{"current_steps": 1051, "total_steps": 6840, "loss": 0.6397782564163208, "lr": 1.9419860718291265e-05, "epoch": 0.30735487644392456, "percentage": 15.37, "elapsed_time": "1:32:08", "remaining_time": "8:27:29"} +{"current_steps": 1052, "total_steps": 6840, "loss": 0.589732825756073, "lr": 1.9418236838517036e-05, "epoch": 0.30764731685918995, "percentage": 15.38, "elapsed_time": "1:32:15", "remaining_time": "8:27:33"} +{"current_steps": 1053, "total_steps": 6840, "loss": 0.7968351244926453, "lr": 1.941661075728721e-05, "epoch": 0.30793975727445533, "percentage": 15.39, "elapsed_time": "1:32:19", "remaining_time": "8:27:25"} +{"current_steps": 1054, "total_steps": 6840, "loss": 0.5740514397621155, "lr": 1.9414982474981877e-05, "epoch": 0.3082321976897207, "percentage": 15.41, "elapsed_time": "1:32:25", "remaining_time": "8:27:21"} +{"current_steps": 1055, "total_steps": 6840, "loss": 0.656599760055542, "lr": 1.9413351991981632e-05, "epoch": 0.3085246381049861, "percentage": 15.42, "elapsed_time": "1:32:30", "remaining_time": "8:27:15"} +{"current_steps": 1056, "total_steps": 6840, "loss": 0.5638262033462524, "lr": 1.9411719308667593e-05, "epoch": 0.3088170785202515, "percentage": 15.44, "elapsed_time": "1:32:34", "remaining_time": "8:27:03"} +{"current_steps": 1057, "total_steps": 6840, "loss": 0.6391294002532959, "lr": 1.9410084425421392e-05, "epoch": 0.3091095189355169, "percentage": 15.45, "elapsed_time": "1:32:40", "remaining_time": "8:27:02"} +{"current_steps": 1058, "total_steps": 6840, "loss": 0.7109906077384949, "lr": 1.9408447342625167e-05, "epoch": 0.3094019593507823, "percentage": 15.47, "elapsed_time": "1:32:46", "remaining_time": "8:27:01"} +{"current_steps": 1059, "total_steps": 6840, "loss": 0.6922626495361328, "lr": 1.9406808060661583e-05, "epoch": 0.3096943997660477, "percentage": 15.48, "elapsed_time": "1:32:52", "remaining_time": "8:26:57"} +{"current_steps": 1060, "total_steps": 6840, "loss": 0.6708151698112488, "lr": 1.9405166579913808e-05, "epoch": 0.30998684018131306, "percentage": 15.5, "elapsed_time": "1:32:57", "remaining_time": "8:26:54"} +{"current_steps": 1061, "total_steps": 6840, "loss": 0.6259905099868774, "lr": 1.940352290076553e-05, "epoch": 0.31027928059657844, "percentage": 15.51, "elapsed_time": "1:33:04", "remaining_time": "8:26:54"} +{"current_steps": 1062, "total_steps": 6840, "loss": 0.6590703725814819, "lr": 1.940187702360095e-05, "epoch": 0.3105717210118438, "percentage": 15.53, "elapsed_time": "1:33:07", "remaining_time": "8:26:41"} +{"current_steps": 1063, "total_steps": 6840, "loss": 0.7371482849121094, "lr": 1.9400228948804777e-05, "epoch": 0.3108641614271092, "percentage": 15.54, "elapsed_time": "1:33:13", "remaining_time": "8:26:37"} +{"current_steps": 1064, "total_steps": 6840, "loss": 0.6954984664916992, "lr": 1.9398578676762243e-05, "epoch": 0.3111566018423746, "percentage": 15.56, "elapsed_time": "1:33:18", "remaining_time": "8:26:30"} +{"current_steps": 1065, "total_steps": 6840, "loss": 0.604501485824585, "lr": 1.9396926207859085e-05, "epoch": 0.31144904225764003, "percentage": 15.57, "elapsed_time": "1:33:24", "remaining_time": "8:26:31"} +{"current_steps": 1066, "total_steps": 6840, "loss": 0.7580305337905884, "lr": 1.939527154248156e-05, "epoch": 0.3117414826729054, "percentage": 15.58, "elapsed_time": "1:33:29", "remaining_time": "8:26:24"} +{"current_steps": 1067, "total_steps": 6840, "loss": 0.5996969938278198, "lr": 1.9393614681016443e-05, "epoch": 0.3120339230881708, "percentage": 15.6, "elapsed_time": "1:33:35", "remaining_time": "8:26:22"} +{"current_steps": 1068, "total_steps": 6840, "loss": 0.5939687490463257, "lr": 1.9391955623851e-05, "epoch": 0.3123263635034362, "percentage": 15.61, "elapsed_time": "1:33:41", "remaining_time": "8:26:22"} +{"current_steps": 1069, "total_steps": 6840, "loss": 0.6194947957992554, "lr": 1.939029437137304e-05, "epoch": 0.31261880391870156, "percentage": 15.63, "elapsed_time": "1:33:47", "remaining_time": "8:26:17"} +{"current_steps": 1070, "total_steps": 6840, "loss": 0.7419420480728149, "lr": 1.9388630923970862e-05, "epoch": 0.31291124433396694, "percentage": 15.64, "elapsed_time": "1:33:51", "remaining_time": "8:26:10"} +{"current_steps": 1071, "total_steps": 6840, "loss": 0.6950613856315613, "lr": 1.938696528203329e-05, "epoch": 0.3132036847492323, "percentage": 15.66, "elapsed_time": "1:33:57", "remaining_time": "8:26:04"} +{"current_steps": 1072, "total_steps": 6840, "loss": 0.7376282215118408, "lr": 1.9385297445949657e-05, "epoch": 0.3134961251644977, "percentage": 15.67, "elapsed_time": "1:34:02", "remaining_time": "8:26:00"} +{"current_steps": 1073, "total_steps": 6840, "loss": 0.800892174243927, "lr": 1.938362741610981e-05, "epoch": 0.31378856557976315, "percentage": 15.69, "elapsed_time": "1:34:06", "remaining_time": "8:25:50"} +{"current_steps": 1074, "total_steps": 6840, "loss": 0.5747013688087463, "lr": 1.938195519290411e-05, "epoch": 0.31408100599502853, "percentage": 15.7, "elapsed_time": "1:34:11", "remaining_time": "8:25:43"} +{"current_steps": 1075, "total_steps": 6840, "loss": 0.7341697216033936, "lr": 1.9380280776723422e-05, "epoch": 0.3143734464102939, "percentage": 15.72, "elapsed_time": "1:34:17", "remaining_time": "8:25:41"} +{"current_steps": 1076, "total_steps": 6840, "loss": 0.6229791641235352, "lr": 1.9378604167959138e-05, "epoch": 0.3146658868255593, "percentage": 15.73, "elapsed_time": "1:34:23", "remaining_time": "8:25:36"} +{"current_steps": 1077, "total_steps": 6840, "loss": 0.7266645431518555, "lr": 1.937692536700315e-05, "epoch": 0.3149583272408247, "percentage": 15.75, "elapsed_time": "1:34:28", "remaining_time": "8:25:32"} +{"current_steps": 1078, "total_steps": 6840, "loss": 0.6163127422332764, "lr": 1.937524437424787e-05, "epoch": 0.31525076765609006, "percentage": 15.76, "elapsed_time": "1:34:34", "remaining_time": "8:25:31"} +{"current_steps": 1079, "total_steps": 6840, "loss": 0.6609925031661987, "lr": 1.9373561190086225e-05, "epoch": 0.31554320807135544, "percentage": 15.77, "elapsed_time": "1:34:40", "remaining_time": "8:25:26"} +{"current_steps": 1080, "total_steps": 6840, "loss": 0.7157741785049438, "lr": 1.937187581491164e-05, "epoch": 0.3158356484866208, "percentage": 15.79, "elapsed_time": "1:34:44", "remaining_time": "8:25:18"} +{"current_steps": 1081, "total_steps": 6840, "loss": 0.6486212015151978, "lr": 1.937018824911807e-05, "epoch": 0.31612808890188626, "percentage": 15.8, "elapsed_time": "1:34:50", "remaining_time": "8:25:15"} +{"current_steps": 1082, "total_steps": 6840, "loss": 0.6931928396224976, "lr": 1.9368498493099963e-05, "epoch": 0.31642052931715164, "percentage": 15.82, "elapsed_time": "1:34:55", "remaining_time": "8:25:11"} +{"current_steps": 1083, "total_steps": 6840, "loss": 0.9291354417800903, "lr": 1.9366806547252295e-05, "epoch": 0.316712969732417, "percentage": 15.83, "elapsed_time": "1:35:01", "remaining_time": "8:25:06"} +{"current_steps": 1084, "total_steps": 6840, "loss": 0.6276642084121704, "lr": 1.936511241197055e-05, "epoch": 0.3170054101476824, "percentage": 15.85, "elapsed_time": "1:35:06", "remaining_time": "8:25:01"} +{"current_steps": 1085, "total_steps": 6840, "loss": 0.7836581468582153, "lr": 1.936341608765072e-05, "epoch": 0.3172978505629478, "percentage": 15.86, "elapsed_time": "1:35:11", "remaining_time": "8:24:52"} +{"current_steps": 1086, "total_steps": 6840, "loss": 0.6785084009170532, "lr": 1.9361717574689308e-05, "epoch": 0.3175902909782132, "percentage": 15.88, "elapsed_time": "1:35:16", "remaining_time": "8:24:49"} +{"current_steps": 1087, "total_steps": 6840, "loss": 0.5715218782424927, "lr": 1.936001687348333e-05, "epoch": 0.31788273139347856, "percentage": 15.89, "elapsed_time": "1:35:23", "remaining_time": "8:24:50"} +{"current_steps": 1088, "total_steps": 6840, "loss": 0.8417775630950928, "lr": 1.9358313984430324e-05, "epoch": 0.318175171808744, "percentage": 15.91, "elapsed_time": "1:35:28", "remaining_time": "8:24:44"} +{"current_steps": 1089, "total_steps": 6840, "loss": 0.598076343536377, "lr": 1.935660890792832e-05, "epoch": 0.3184676122240094, "percentage": 15.92, "elapsed_time": "1:35:33", "remaining_time": "8:24:38"} +{"current_steps": 1090, "total_steps": 6840, "loss": 0.5830154418945312, "lr": 1.9354901644375876e-05, "epoch": 0.31876005263927476, "percentage": 15.94, "elapsed_time": "1:35:38", "remaining_time": "8:24:30"} +{"current_steps": 1091, "total_steps": 6840, "loss": 0.5746437311172485, "lr": 1.935319219417205e-05, "epoch": 0.31905249305454014, "percentage": 15.95, "elapsed_time": "1:35:43", "remaining_time": "8:24:25"} +{"current_steps": 1092, "total_steps": 6840, "loss": 0.5520191788673401, "lr": 1.9351480557716414e-05, "epoch": 0.3193449334698055, "percentage": 15.96, "elapsed_time": "1:35:49", "remaining_time": "8:24:22"} +{"current_steps": 1093, "total_steps": 6840, "loss": 0.5847123861312866, "lr": 1.9349766735409058e-05, "epoch": 0.3196373738850709, "percentage": 15.98, "elapsed_time": "1:35:54", "remaining_time": "8:24:19"} +{"current_steps": 1094, "total_steps": 6840, "loss": 0.7390924692153931, "lr": 1.9348050727650577e-05, "epoch": 0.3199298143003363, "percentage": 15.99, "elapsed_time": "1:36:01", "remaining_time": "8:24:18"} +{"current_steps": 1095, "total_steps": 6840, "loss": 0.5812145471572876, "lr": 1.9346332534842074e-05, "epoch": 0.3202222547156017, "percentage": 16.01, "elapsed_time": "1:36:05", "remaining_time": "8:24:07"} +{"current_steps": 1096, "total_steps": 6840, "loss": 0.6958816647529602, "lr": 1.9344612157385166e-05, "epoch": 0.3205146951308671, "percentage": 16.02, "elapsed_time": "1:36:09", "remaining_time": "8:23:56"} +{"current_steps": 1097, "total_steps": 6840, "loss": 0.5618177652359009, "lr": 1.9342889595681986e-05, "epoch": 0.3208071355461325, "percentage": 16.04, "elapsed_time": "1:36:13", "remaining_time": "8:23:46"} +{"current_steps": 1098, "total_steps": 6840, "loss": 0.6099411845207214, "lr": 1.9341164850135163e-05, "epoch": 0.3210995759613979, "percentage": 16.05, "elapsed_time": "1:36:19", "remaining_time": "8:23:44"} +{"current_steps": 1099, "total_steps": 6840, "loss": 0.6772094964981079, "lr": 1.9339437921147854e-05, "epoch": 0.32139201637666326, "percentage": 16.07, "elapsed_time": "1:36:24", "remaining_time": "8:23:39"} +{"current_steps": 1100, "total_steps": 6840, "loss": 0.6916643381118774, "lr": 1.9337708809123718e-05, "epoch": 0.32168445679192864, "percentage": 16.08, "elapsed_time": "1:36:29", "remaining_time": "8:23:28"} +{"current_steps": 1101, "total_steps": 6840, "loss": 0.5716762542724609, "lr": 1.933597751446692e-05, "epoch": 0.321976897207194, "percentage": 16.1, "elapsed_time": "1:36:38", "remaining_time": "8:23:44"} +{"current_steps": 1102, "total_steps": 6840, "loss": 0.68224036693573, "lr": 1.9334244037582143e-05, "epoch": 0.3222693376224594, "percentage": 16.11, "elapsed_time": "1:36:42", "remaining_time": "8:23:33"} +{"current_steps": 1103, "total_steps": 6840, "loss": 0.6888231635093689, "lr": 1.933250837887457e-05, "epoch": 0.3225617780377248, "percentage": 16.13, "elapsed_time": "1:36:48", "remaining_time": "8:23:29"} +{"current_steps": 1104, "total_steps": 6840, "loss": 0.6469036340713501, "lr": 1.933077053874991e-05, "epoch": 0.3228542184529902, "percentage": 16.14, "elapsed_time": "1:36:53", "remaining_time": "8:23:25"} +{"current_steps": 1105, "total_steps": 6840, "loss": 0.6202501058578491, "lr": 1.932903051761437e-05, "epoch": 0.3231466588682556, "percentage": 16.15, "elapsed_time": "1:36:58", "remaining_time": "8:23:16"} +{"current_steps": 1106, "total_steps": 6840, "loss": 0.6041314601898193, "lr": 1.932728831587467e-05, "epoch": 0.323439099283521, "percentage": 16.17, "elapsed_time": "1:37:02", "remaining_time": "8:23:07"} +{"current_steps": 1107, "total_steps": 6840, "loss": 0.7081667184829712, "lr": 1.9325543933938034e-05, "epoch": 0.3237315396987864, "percentage": 16.18, "elapsed_time": "1:37:07", "remaining_time": "8:23:00"} +{"current_steps": 1108, "total_steps": 6840, "loss": 0.7743494510650635, "lr": 1.9323797372212204e-05, "epoch": 0.32402398011405176, "percentage": 16.2, "elapsed_time": "1:37:11", "remaining_time": "8:22:48"} +{"current_steps": 1109, "total_steps": 6840, "loss": 0.6122584342956543, "lr": 1.9322048631105428e-05, "epoch": 0.32431642052931714, "percentage": 16.21, "elapsed_time": "1:37:17", "remaining_time": "8:22:45"} +{"current_steps": 1110, "total_steps": 6840, "loss": 0.6106122732162476, "lr": 1.932029771102646e-05, "epoch": 0.3246088609445825, "percentage": 16.23, "elapsed_time": "1:37:22", "remaining_time": "8:22:41"} +{"current_steps": 1111, "total_steps": 6840, "loss": 0.5082784295082092, "lr": 1.9318544612384572e-05, "epoch": 0.3249013013598479, "percentage": 16.24, "elapsed_time": "1:37:29", "remaining_time": "8:22:42"} +{"current_steps": 1112, "total_steps": 6840, "loss": 0.6845188140869141, "lr": 1.9316789335589542e-05, "epoch": 0.32519374177511334, "percentage": 16.26, "elapsed_time": "1:37:34", "remaining_time": "8:22:38"} +{"current_steps": 1113, "total_steps": 6840, "loss": 0.5972481966018677, "lr": 1.9315031881051653e-05, "epoch": 0.3254861821903787, "percentage": 16.27, "elapsed_time": "1:37:39", "remaining_time": "8:22:32"} +{"current_steps": 1114, "total_steps": 6840, "loss": 0.6312427520751953, "lr": 1.931327224918169e-05, "epoch": 0.3257786226056441, "percentage": 16.29, "elapsed_time": "1:37:45", "remaining_time": "8:22:31"} +{"current_steps": 1115, "total_steps": 6840, "loss": 0.7904551029205322, "lr": 1.9311510440390973e-05, "epoch": 0.3260710630209095, "percentage": 16.3, "elapsed_time": "1:37:49", "remaining_time": "8:22:18"} +{"current_steps": 1116, "total_steps": 6840, "loss": 0.6513646841049194, "lr": 1.9309746455091302e-05, "epoch": 0.3263635034361749, "percentage": 16.32, "elapsed_time": "1:37:54", "remaining_time": "8:22:10"} +{"current_steps": 1117, "total_steps": 6840, "loss": 0.5349715948104858, "lr": 1.9307980293694997e-05, "epoch": 0.32665594385144026, "percentage": 16.33, "elapsed_time": "1:37:58", "remaining_time": "8:22:00"} +{"current_steps": 1118, "total_steps": 6840, "loss": 0.5815087556838989, "lr": 1.93062119566149e-05, "epoch": 0.32694838426670564, "percentage": 16.35, "elapsed_time": "1:38:05", "remaining_time": "8:22:01"} +{"current_steps": 1119, "total_steps": 6840, "loss": 0.6380286812782288, "lr": 1.9304441444264335e-05, "epoch": 0.327240824681971, "percentage": 16.36, "elapsed_time": "1:38:11", "remaining_time": "8:21:59"} +{"current_steps": 1120, "total_steps": 6840, "loss": 0.7222728729248047, "lr": 1.9302668757057157e-05, "epoch": 0.32753326509723646, "percentage": 16.37, "elapsed_time": "1:38:16", "remaining_time": "8:21:53"} +{"current_steps": 1121, "total_steps": 6840, "loss": 0.6262868642807007, "lr": 1.9300893895407715e-05, "epoch": 0.32782570551250184, "percentage": 16.39, "elapsed_time": "1:38:22", "remaining_time": "8:21:50"} +{"current_steps": 1122, "total_steps": 6840, "loss": 0.6387197971343994, "lr": 1.929911685973088e-05, "epoch": 0.3281181459277672, "percentage": 16.4, "elapsed_time": "1:38:27", "remaining_time": "8:21:44"} +{"current_steps": 1123, "total_steps": 6840, "loss": 0.77378249168396, "lr": 1.9297337650442015e-05, "epoch": 0.3284105863430326, "percentage": 16.42, "elapsed_time": "1:38:31", "remaining_time": "8:21:34"} +{"current_steps": 1124, "total_steps": 6840, "loss": 0.7589142322540283, "lr": 1.9295556267957004e-05, "epoch": 0.328703026758298, "percentage": 16.43, "elapsed_time": "1:38:37", "remaining_time": "8:21:30"} +{"current_steps": 1125, "total_steps": 6840, "loss": 0.7153090238571167, "lr": 1.9293772712692233e-05, "epoch": 0.32899546717356337, "percentage": 16.45, "elapsed_time": "1:38:42", "remaining_time": "8:21:23"} +{"current_steps": 1126, "total_steps": 6840, "loss": 0.5738104581832886, "lr": 1.9291986985064595e-05, "epoch": 0.32928790758882875, "percentage": 16.46, "elapsed_time": "1:38:46", "remaining_time": "8:21:16"} +{"current_steps": 1127, "total_steps": 6840, "loss": 0.6530819535255432, "lr": 1.92901990854915e-05, "epoch": 0.3295803480040942, "percentage": 16.48, "elapsed_time": "1:38:52", "remaining_time": "8:21:11"} +{"current_steps": 1128, "total_steps": 6840, "loss": 0.59107506275177, "lr": 1.9288409014390854e-05, "epoch": 0.3298727884193596, "percentage": 16.49, "elapsed_time": "1:38:57", "remaining_time": "8:21:05"} +{"current_steps": 1129, "total_steps": 6840, "loss": 0.5798863172531128, "lr": 1.9286616772181072e-05, "epoch": 0.33016522883462496, "percentage": 16.51, "elapsed_time": "1:39:01", "remaining_time": "8:20:55"} +{"current_steps": 1130, "total_steps": 6840, "loss": 0.6957223415374756, "lr": 1.9284822359281085e-05, "epoch": 0.33045766924989034, "percentage": 16.52, "elapsed_time": "1:39:05", "remaining_time": "8:20:43"} +{"current_steps": 1131, "total_steps": 6840, "loss": 0.6933379173278809, "lr": 1.9283025776110326e-05, "epoch": 0.3307501096651557, "percentage": 16.54, "elapsed_time": "1:39:09", "remaining_time": "8:20:33"} +{"current_steps": 1132, "total_steps": 6840, "loss": 0.527482271194458, "lr": 1.928122702308873e-05, "epoch": 0.3310425500804211, "percentage": 16.55, "elapsed_time": "1:39:14", "remaining_time": "8:20:25"} +{"current_steps": 1133, "total_steps": 6840, "loss": 0.7244399785995483, "lr": 1.927942610063675e-05, "epoch": 0.3313349904956865, "percentage": 16.56, "elapsed_time": "1:39:19", "remaining_time": "8:20:18"} +{"current_steps": 1134, "total_steps": 6840, "loss": 0.7881563901901245, "lr": 1.9277623009175338e-05, "epoch": 0.33162743091095187, "percentage": 16.58, "elapsed_time": "1:39:25", "remaining_time": "8:20:15"} +{"current_steps": 1135, "total_steps": 6840, "loss": 0.7523232698440552, "lr": 1.9275817749125956e-05, "epoch": 0.3319198713262173, "percentage": 16.59, "elapsed_time": "1:39:30", "remaining_time": "8:20:07"} +{"current_steps": 1136, "total_steps": 6840, "loss": 0.7226657867431641, "lr": 1.9274010320910575e-05, "epoch": 0.3322123117414827, "percentage": 16.61, "elapsed_time": "1:39:34", "remaining_time": "8:20:00"} +{"current_steps": 1137, "total_steps": 6840, "loss": 0.6461686491966248, "lr": 1.9272200724951666e-05, "epoch": 0.3325047521567481, "percentage": 16.62, "elapsed_time": "1:39:39", "remaining_time": "8:19:54"} +{"current_steps": 1138, "total_steps": 6840, "loss": 0.6343599557876587, "lr": 1.9270388961672214e-05, "epoch": 0.33279719257201346, "percentage": 16.64, "elapsed_time": "1:39:44", "remaining_time": "8:19:47"} +{"current_steps": 1139, "total_steps": 6840, "loss": 0.5510993599891663, "lr": 1.926857503149571e-05, "epoch": 0.33308963298727884, "percentage": 16.65, "elapsed_time": "1:39:50", "remaining_time": "8:19:43"} +{"current_steps": 1140, "total_steps": 6840, "loss": 0.6022439002990723, "lr": 1.9266758934846142e-05, "epoch": 0.3333820734025442, "percentage": 16.67, "elapsed_time": "1:39:56", "remaining_time": "8:19:41"} +{"current_steps": 1141, "total_steps": 6840, "loss": 0.708207368850708, "lr": 1.9264940672148018e-05, "epoch": 0.3336745138178096, "percentage": 16.68, "elapsed_time": "1:40:02", "remaining_time": "8:19:38"} +{"current_steps": 1142, "total_steps": 6840, "loss": 0.566935122013092, "lr": 1.9263120243826345e-05, "epoch": 0.333966954233075, "percentage": 16.7, "elapsed_time": "1:40:08", "remaining_time": "8:19:37"} +{"current_steps": 1143, "total_steps": 6840, "loss": 0.6848355531692505, "lr": 1.9261297650306635e-05, "epoch": 0.3342593946483404, "percentage": 16.71, "elapsed_time": "1:40:12", "remaining_time": "8:19:28"} +{"current_steps": 1144, "total_steps": 6840, "loss": 0.7335090637207031, "lr": 1.9259472892014907e-05, "epoch": 0.3345518350636058, "percentage": 16.73, "elapsed_time": "1:40:16", "remaining_time": "8:19:16"} +{"current_steps": 1145, "total_steps": 6840, "loss": 0.7323876023292542, "lr": 1.925764596937769e-05, "epoch": 0.3348442754788712, "percentage": 16.74, "elapsed_time": "1:40:21", "remaining_time": "8:19:10"} +{"current_steps": 1146, "total_steps": 6840, "loss": 0.5564731955528259, "lr": 1.9255816882822017e-05, "epoch": 0.33513671589413657, "percentage": 16.75, "elapsed_time": "1:40:26", "remaining_time": "8:19:04"} +{"current_steps": 1147, "total_steps": 6840, "loss": 0.7699049711227417, "lr": 1.925398563277542e-05, "epoch": 0.33542915630940195, "percentage": 16.77, "elapsed_time": "1:40:31", "remaining_time": "8:18:54"} +{"current_steps": 1148, "total_steps": 6840, "loss": 0.688602089881897, "lr": 1.925215221966595e-05, "epoch": 0.33572159672466734, "percentage": 16.78, "elapsed_time": "1:40:36", "remaining_time": "8:18:48"} +{"current_steps": 1149, "total_steps": 6840, "loss": 0.7103208899497986, "lr": 1.9250316643922153e-05, "epoch": 0.3360140371399327, "percentage": 16.8, "elapsed_time": "1:40:40", "remaining_time": "8:18:37"} +{"current_steps": 1150, "total_steps": 6840, "loss": 0.6313603520393372, "lr": 1.9248478905973078e-05, "epoch": 0.3363064775551981, "percentage": 16.81, "elapsed_time": "1:40:44", "remaining_time": "8:18:26"} +{"current_steps": 1151, "total_steps": 6840, "loss": 0.8420118093490601, "lr": 1.9246639006248294e-05, "epoch": 0.33659891797046354, "percentage": 16.83, "elapsed_time": "1:40:50", "remaining_time": "8:18:26"} +{"current_steps": 1152, "total_steps": 6840, "loss": 0.6566640734672546, "lr": 1.9244796945177864e-05, "epoch": 0.3368913583857289, "percentage": 16.84, "elapsed_time": "1:40:56", "remaining_time": "8:18:22"} +{"current_steps": 1153, "total_steps": 6840, "loss": 0.6455206274986267, "lr": 1.9242952723192357e-05, "epoch": 0.3371837988009943, "percentage": 16.86, "elapsed_time": "1:41:00", "remaining_time": "8:18:14"} +{"current_steps": 1154, "total_steps": 6840, "loss": 0.7348071336746216, "lr": 1.924110634072285e-05, "epoch": 0.3374762392162597, "percentage": 16.87, "elapsed_time": "1:41:05", "remaining_time": "8:18:05"} +{"current_steps": 1155, "total_steps": 6840, "loss": 0.7187973260879517, "lr": 1.9239257798200918e-05, "epoch": 0.33776867963152507, "percentage": 16.89, "elapsed_time": "1:41:10", "remaining_time": "8:17:59"} +{"current_steps": 1156, "total_steps": 6840, "loss": 0.683181643486023, "lr": 1.9237407096058655e-05, "epoch": 0.33806112004679045, "percentage": 16.9, "elapsed_time": "1:41:16", "remaining_time": "8:17:57"} +{"current_steps": 1157, "total_steps": 6840, "loss": 0.7296931743621826, "lr": 1.9235554234728646e-05, "epoch": 0.33835356046205584, "percentage": 16.92, "elapsed_time": "1:41:21", "remaining_time": "8:17:50"} +{"current_steps": 1158, "total_steps": 6840, "loss": 0.6656480431556702, "lr": 1.923369921464399e-05, "epoch": 0.3386460008773212, "percentage": 16.93, "elapsed_time": "1:41:26", "remaining_time": "8:17:44"} +{"current_steps": 1159, "total_steps": 6840, "loss": 0.6284874677658081, "lr": 1.923184203623828e-05, "epoch": 0.33893844129258666, "percentage": 16.94, "elapsed_time": "1:41:32", "remaining_time": "8:17:41"} +{"current_steps": 1160, "total_steps": 6840, "loss": 0.7065030336380005, "lr": 1.922998269994563e-05, "epoch": 0.33923088170785204, "percentage": 16.96, "elapsed_time": "1:41:37", "remaining_time": "8:17:36"} +{"current_steps": 1161, "total_steps": 6840, "loss": 0.7077580690383911, "lr": 1.9228121206200637e-05, "epoch": 0.3395233221231174, "percentage": 16.97, "elapsed_time": "1:41:41", "remaining_time": "8:17:27"} +{"current_steps": 1162, "total_steps": 6840, "loss": 0.6012637615203857, "lr": 1.9226257555438428e-05, "epoch": 0.3398157625383828, "percentage": 16.99, "elapsed_time": "1:41:45", "remaining_time": "8:17:16"} +{"current_steps": 1163, "total_steps": 6840, "loss": 0.8166115283966064, "lr": 1.9224391748094607e-05, "epoch": 0.3401082029536482, "percentage": 17.0, "elapsed_time": "1:41:51", "remaining_time": "8:17:12"} +{"current_steps": 1164, "total_steps": 6840, "loss": 0.6066576242446899, "lr": 1.92225237846053e-05, "epoch": 0.34040064336891357, "percentage": 17.02, "elapsed_time": "1:41:56", "remaining_time": "8:17:06"} +{"current_steps": 1165, "total_steps": 6840, "loss": 0.7226361632347107, "lr": 1.922065366540713e-05, "epoch": 0.34069308378417895, "percentage": 17.03, "elapsed_time": "1:42:02", "remaining_time": "8:17:04"} +{"current_steps": 1166, "total_steps": 6840, "loss": 0.7786005139350891, "lr": 1.9218781390937233e-05, "epoch": 0.3409855241994444, "percentage": 17.05, "elapsed_time": "1:42:07", "remaining_time": "8:16:56"} +{"current_steps": 1167, "total_steps": 6840, "loss": 0.6534268856048584, "lr": 1.9216906961633234e-05, "epoch": 0.3412779646147098, "percentage": 17.06, "elapsed_time": "1:42:12", "remaining_time": "8:16:52"} +{"current_steps": 1168, "total_steps": 6840, "loss": 0.6048434376716614, "lr": 1.9215030377933274e-05, "epoch": 0.34157040502997515, "percentage": 17.08, "elapsed_time": "1:42:18", "remaining_time": "8:16:48"} +{"current_steps": 1169, "total_steps": 6840, "loss": 0.8321201205253601, "lr": 1.921315164027599e-05, "epoch": 0.34186284544524054, "percentage": 17.09, "elapsed_time": "1:42:24", "remaining_time": "8:16:45"} +{"current_steps": 1170, "total_steps": 6840, "loss": 0.7142379283905029, "lr": 1.9211270749100527e-05, "epoch": 0.3421552858605059, "percentage": 17.11, "elapsed_time": "1:42:28", "remaining_time": "8:16:35"} +{"current_steps": 1171, "total_steps": 6840, "loss": 0.6262812614440918, "lr": 1.9209387704846535e-05, "epoch": 0.3424477262757713, "percentage": 17.12, "elapsed_time": "1:42:34", "remaining_time": "8:16:34"} +{"current_steps": 1172, "total_steps": 6840, "loss": 0.7242386341094971, "lr": 1.920750250795416e-05, "epoch": 0.3427401666910367, "percentage": 17.13, "elapsed_time": "1:42:39", "remaining_time": "8:16:28"} +{"current_steps": 1173, "total_steps": 6840, "loss": 0.6064128875732422, "lr": 1.9205615158864054e-05, "epoch": 0.34303260710630207, "percentage": 17.15, "elapsed_time": "1:42:45", "remaining_time": "8:16:24"} +{"current_steps": 1174, "total_steps": 6840, "loss": 0.6720623970031738, "lr": 1.9203725658017374e-05, "epoch": 0.3433250475215675, "percentage": 17.16, "elapsed_time": "1:42:50", "remaining_time": "8:16:21"} +{"current_steps": 1175, "total_steps": 6840, "loss": 0.745712161064148, "lr": 1.9201834005855785e-05, "epoch": 0.3436174879368329, "percentage": 17.18, "elapsed_time": "1:42:55", "remaining_time": "8:16:14"} +{"current_steps": 1176, "total_steps": 6840, "loss": 0.6387969255447388, "lr": 1.9199940202821445e-05, "epoch": 0.34390992835209827, "percentage": 17.19, "elapsed_time": "1:43:01", "remaining_time": "8:16:09"} +{"current_steps": 1177, "total_steps": 6840, "loss": 0.6634984612464905, "lr": 1.9198044249357018e-05, "epoch": 0.34420236876736365, "percentage": 17.21, "elapsed_time": "1:43:06", "remaining_time": "8:16:03"} +{"current_steps": 1178, "total_steps": 6840, "loss": 0.4732145667076111, "lr": 1.919614614590567e-05, "epoch": 0.34449480918262904, "percentage": 17.22, "elapsed_time": "1:43:11", "remaining_time": "8:15:56"} +{"current_steps": 1179, "total_steps": 6840, "loss": 0.6621897220611572, "lr": 1.9194245892911077e-05, "epoch": 0.3447872495978944, "percentage": 17.24, "elapsed_time": "1:43:15", "remaining_time": "8:15:45"} +{"current_steps": 1180, "total_steps": 6840, "loss": 0.5691112279891968, "lr": 1.9192343490817412e-05, "epoch": 0.3450796900131598, "percentage": 17.25, "elapsed_time": "1:43:20", "remaining_time": "8:15:39"} +{"current_steps": 1181, "total_steps": 6840, "loss": 0.6326683759689331, "lr": 1.919043894006934e-05, "epoch": 0.3453721304284252, "percentage": 17.27, "elapsed_time": "1:43:25", "remaining_time": "8:15:34"} +{"current_steps": 1182, "total_steps": 6840, "loss": 0.6068567037582397, "lr": 1.9188532241112047e-05, "epoch": 0.3456645708436906, "percentage": 17.28, "elapsed_time": "1:43:28", "remaining_time": "8:15:21"} +{"current_steps": 1183, "total_steps": 6840, "loss": 0.707065761089325, "lr": 1.918662339439121e-05, "epoch": 0.345957011258956, "percentage": 17.3, "elapsed_time": "1:43:34", "remaining_time": "8:15:14"} +{"current_steps": 1184, "total_steps": 6840, "loss": 0.7821887135505676, "lr": 1.9184712400353008e-05, "epoch": 0.3462494516742214, "percentage": 17.31, "elapsed_time": "1:43:38", "remaining_time": "8:15:06"} +{"current_steps": 1185, "total_steps": 6840, "loss": 0.6759425401687622, "lr": 1.918279925944413e-05, "epoch": 0.34654189208948677, "percentage": 17.32, "elapsed_time": "1:43:44", "remaining_time": "8:15:03"} +{"current_steps": 1186, "total_steps": 6840, "loss": 0.5660048127174377, "lr": 1.9180883972111756e-05, "epoch": 0.34683433250475215, "percentage": 17.34, "elapsed_time": "1:43:49", "remaining_time": "8:14:59"} +{"current_steps": 1187, "total_steps": 6840, "loss": 0.708798885345459, "lr": 1.9178966538803574e-05, "epoch": 0.34712677292001753, "percentage": 17.35, "elapsed_time": "1:43:56", "remaining_time": "8:15:00"} +{"current_steps": 1188, "total_steps": 6840, "loss": 0.603208065032959, "lr": 1.9177046959967774e-05, "epoch": 0.3474192133352829, "percentage": 17.37, "elapsed_time": "1:44:01", "remaining_time": "8:14:55"} +{"current_steps": 1189, "total_steps": 6840, "loss": 0.8259323835372925, "lr": 1.9175125236053043e-05, "epoch": 0.3477116537505483, "percentage": 17.38, "elapsed_time": "1:44:06", "remaining_time": "8:14:46"} +{"current_steps": 1190, "total_steps": 6840, "loss": 0.573014497756958, "lr": 1.9173201367508572e-05, "epoch": 0.34800409416581374, "percentage": 17.4, "elapsed_time": "1:44:11", "remaining_time": "8:14:40"} +{"current_steps": 1191, "total_steps": 6840, "loss": 0.8202974200248718, "lr": 1.9171275354784062e-05, "epoch": 0.3482965345810791, "percentage": 17.41, "elapsed_time": "1:44:16", "remaining_time": "8:14:36"} +{"current_steps": 1192, "total_steps": 6840, "loss": 0.5352192521095276, "lr": 1.9169347198329693e-05, "epoch": 0.3485889749963445, "percentage": 17.43, "elapsed_time": "1:44:20", "remaining_time": "8:14:25"} +{"current_steps": 1193, "total_steps": 6840, "loss": 0.7303881645202637, "lr": 1.916741689859617e-05, "epoch": 0.3488814154116099, "percentage": 17.44, "elapsed_time": "1:44:27", "remaining_time": "8:14:24"} +{"current_steps": 1194, "total_steps": 6840, "loss": 0.670224666595459, "lr": 1.9165484456034683e-05, "epoch": 0.34917385582687527, "percentage": 17.46, "elapsed_time": "1:44:32", "remaining_time": "8:14:21"} +{"current_steps": 1195, "total_steps": 6840, "loss": 0.7311158776283264, "lr": 1.9163549871096934e-05, "epoch": 0.34946629624214065, "percentage": 17.47, "elapsed_time": "1:44:38", "remaining_time": "8:14:18"} +{"current_steps": 1196, "total_steps": 6840, "loss": 0.6346032619476318, "lr": 1.9161613144235117e-05, "epoch": 0.34975873665740603, "percentage": 17.49, "elapsed_time": "1:44:42", "remaining_time": "8:14:08"} +{"current_steps": 1197, "total_steps": 6840, "loss": 0.66914302110672, "lr": 1.9159674275901932e-05, "epoch": 0.3500511770726714, "percentage": 17.5, "elapsed_time": "1:44:46", "remaining_time": "8:13:56"} +{"current_steps": 1198, "total_steps": 6840, "loss": 0.6775194406509399, "lr": 1.9157733266550577e-05, "epoch": 0.35034361748793685, "percentage": 17.51, "elapsed_time": "1:44:52", "remaining_time": "8:13:53"} +{"current_steps": 1199, "total_steps": 6840, "loss": 0.6887085437774658, "lr": 1.915579011663475e-05, "epoch": 0.35063605790320224, "percentage": 17.53, "elapsed_time": "1:44:58", "remaining_time": "8:13:53"} +{"current_steps": 1200, "total_steps": 6840, "loss": 0.7474929690361023, "lr": 1.9153844826608652e-05, "epoch": 0.3509284983184676, "percentage": 17.54, "elapsed_time": "1:45:03", "remaining_time": "8:13:47"} +{"current_steps": 1201, "total_steps": 6840, "loss": 0.5665907859802246, "lr": 1.915189739692698e-05, "epoch": 0.351220938733733, "percentage": 17.56, "elapsed_time": "1:45:12", "remaining_time": "8:13:59"} +{"current_steps": 1202, "total_steps": 6840, "loss": 0.6044580340385437, "lr": 1.9149947828044938e-05, "epoch": 0.3515133791489984, "percentage": 17.57, "elapsed_time": "1:45:16", "remaining_time": "8:13:49"} +{"current_steps": 1203, "total_steps": 6840, "loss": 0.6590601205825806, "lr": 1.914799612041822e-05, "epoch": 0.35180581956426377, "percentage": 17.59, "elapsed_time": "1:45:22", "remaining_time": "8:13:46"} +{"current_steps": 1204, "total_steps": 6840, "loss": 0.5204451084136963, "lr": 1.9146042274503033e-05, "epoch": 0.35209825997952915, "percentage": 17.6, "elapsed_time": "1:45:28", "remaining_time": "8:13:42"} +{"current_steps": 1205, "total_steps": 6840, "loss": 0.6036473512649536, "lr": 1.9144086290756077e-05, "epoch": 0.3523907003947946, "percentage": 17.62, "elapsed_time": "1:45:33", "remaining_time": "8:13:36"} +{"current_steps": 1206, "total_steps": 6840, "loss": 0.5652757883071899, "lr": 1.914212816963454e-05, "epoch": 0.35268314081005997, "percentage": 17.63, "elapsed_time": "1:45:38", "remaining_time": "8:13:31"} +{"current_steps": 1207, "total_steps": 6840, "loss": 0.707310676574707, "lr": 1.9140167911596133e-05, "epoch": 0.35297558122532535, "percentage": 17.65, "elapsed_time": "1:45:44", "remaining_time": "8:13:30"} +{"current_steps": 1208, "total_steps": 6840, "loss": 0.8539729714393616, "lr": 1.9138205517099048e-05, "epoch": 0.35326802164059073, "percentage": 17.66, "elapsed_time": "1:45:49", "remaining_time": "8:13:23"} +{"current_steps": 1209, "total_steps": 6840, "loss": 0.6502546072006226, "lr": 1.9136240986601986e-05, "epoch": 0.3535604620558561, "percentage": 17.68, "elapsed_time": "1:45:55", "remaining_time": "8:13:19"} +{"current_steps": 1210, "total_steps": 6840, "loss": 0.7279889583587646, "lr": 1.9134274320564145e-05, "epoch": 0.3538529024711215, "percentage": 17.69, "elapsed_time": "1:46:00", "remaining_time": "8:13:13"} +{"current_steps": 1211, "total_steps": 6840, "loss": 0.6916895508766174, "lr": 1.9132305519445215e-05, "epoch": 0.3541453428863869, "percentage": 17.7, "elapsed_time": "1:46:04", "remaining_time": "8:13:02"} +{"current_steps": 1212, "total_steps": 6840, "loss": 0.6941961050033569, "lr": 1.9130334583705395e-05, "epoch": 0.35443778330165226, "percentage": 17.72, "elapsed_time": "1:46:09", "remaining_time": "8:12:55"} +{"current_steps": 1213, "total_steps": 6840, "loss": 0.6686822175979614, "lr": 1.912836151380538e-05, "epoch": 0.3547302237169177, "percentage": 17.73, "elapsed_time": "1:46:14", "remaining_time": "8:12:48"} +{"current_steps": 1214, "total_steps": 6840, "loss": 0.818913459777832, "lr": 1.912638631020636e-05, "epoch": 0.3550226641321831, "percentage": 17.75, "elapsed_time": "1:46:19", "remaining_time": "8:12:46"} +{"current_steps": 1215, "total_steps": 6840, "loss": 0.6461240649223328, "lr": 1.9124408973370034e-05, "epoch": 0.35531510454744847, "percentage": 17.76, "elapsed_time": "1:46:25", "remaining_time": "8:12:43"} +{"current_steps": 1216, "total_steps": 6840, "loss": 0.6982225179672241, "lr": 1.9122429503758586e-05, "epoch": 0.35560754496271385, "percentage": 17.78, "elapsed_time": "1:46:31", "remaining_time": "8:12:39"} +{"current_steps": 1217, "total_steps": 6840, "loss": 0.6319124698638916, "lr": 1.9120447901834708e-05, "epoch": 0.35589998537797923, "percentage": 17.79, "elapsed_time": "1:46:36", "remaining_time": "8:12:32"} +{"current_steps": 1218, "total_steps": 6840, "loss": 0.7092441320419312, "lr": 1.9118464168061584e-05, "epoch": 0.3561924257932446, "percentage": 17.81, "elapsed_time": "1:46:41", "remaining_time": "8:12:26"} +{"current_steps": 1219, "total_steps": 6840, "loss": 0.7696874141693115, "lr": 1.9116478302902904e-05, "epoch": 0.35648486620851, "percentage": 17.82, "elapsed_time": "1:46:45", "remaining_time": "8:12:16"} +{"current_steps": 1220, "total_steps": 6840, "loss": 0.6944275498390198, "lr": 1.9114490306822846e-05, "epoch": 0.3567773066237754, "percentage": 17.84, "elapsed_time": "1:46:50", "remaining_time": "8:12:10"} +{"current_steps": 1221, "total_steps": 6840, "loss": 0.6179015636444092, "lr": 1.9112500180286098e-05, "epoch": 0.3570697470390408, "percentage": 17.85, "elapsed_time": "1:46:55", "remaining_time": "8:12:02"} +{"current_steps": 1222, "total_steps": 6840, "loss": 0.6964149475097656, "lr": 1.911050792375784e-05, "epoch": 0.3573621874543062, "percentage": 17.87, "elapsed_time": "1:47:00", "remaining_time": "8:11:57"} +{"current_steps": 1223, "total_steps": 6840, "loss": 0.6923096776008606, "lr": 1.9108513537703746e-05, "epoch": 0.3576546278695716, "percentage": 17.88, "elapsed_time": "1:47:05", "remaining_time": "8:11:50"} +{"current_steps": 1224, "total_steps": 6840, "loss": 0.5205660462379456, "lr": 1.9106517022589993e-05, "epoch": 0.35794706828483697, "percentage": 17.89, "elapsed_time": "1:47:10", "remaining_time": "8:11:45"} +{"current_steps": 1225, "total_steps": 6840, "loss": 0.7488006353378296, "lr": 1.910451837888325e-05, "epoch": 0.35823950870010235, "percentage": 17.91, "elapsed_time": "1:47:15", "remaining_time": "8:11:38"} +{"current_steps": 1226, "total_steps": 6840, "loss": 0.5414390563964844, "lr": 1.91025176070507e-05, "epoch": 0.35853194911536773, "percentage": 17.92, "elapsed_time": "1:47:20", "remaining_time": "8:11:30"} +{"current_steps": 1227, "total_steps": 6840, "loss": 0.6891577839851379, "lr": 1.910051470756e-05, "epoch": 0.3588243895306331, "percentage": 17.94, "elapsed_time": "1:47:25", "remaining_time": "8:11:27"} +{"current_steps": 1228, "total_steps": 6840, "loss": 0.6496376991271973, "lr": 1.9098509680879318e-05, "epoch": 0.3591168299458985, "percentage": 17.95, "elapsed_time": "1:47:30", "remaining_time": "8:11:18"} +{"current_steps": 1229, "total_steps": 6840, "loss": 0.758609414100647, "lr": 1.909650252747732e-05, "epoch": 0.35940927036116394, "percentage": 17.97, "elapsed_time": "1:47:35", "remaining_time": "8:11:11"} +{"current_steps": 1230, "total_steps": 6840, "loss": 0.7509145736694336, "lr": 1.9094493247823164e-05, "epoch": 0.3597017107764293, "percentage": 17.98, "elapsed_time": "1:47:41", "remaining_time": "8:11:10"} +{"current_steps": 1231, "total_steps": 6840, "loss": 0.7432405352592468, "lr": 1.9092481842386506e-05, "epoch": 0.3599941511916947, "percentage": 18.0, "elapsed_time": "1:47:46", "remaining_time": "8:11:03"} +{"current_steps": 1232, "total_steps": 6840, "loss": 0.7208698391914368, "lr": 1.90904683116375e-05, "epoch": 0.3602865916069601, "percentage": 18.01, "elapsed_time": "1:47:50", "remaining_time": "8:10:53"} +{"current_steps": 1233, "total_steps": 6840, "loss": 0.638593852519989, "lr": 1.9088452656046798e-05, "epoch": 0.36057903202222547, "percentage": 18.03, "elapsed_time": "1:47:55", "remaining_time": "8:10:46"} +{"current_steps": 1234, "total_steps": 6840, "loss": 0.6663007736206055, "lr": 1.9086434876085548e-05, "epoch": 0.36087147243749085, "percentage": 18.04, "elapsed_time": "1:48:00", "remaining_time": "8:10:38"} +{"current_steps": 1235, "total_steps": 6840, "loss": 0.7132781744003296, "lr": 1.908441497222539e-05, "epoch": 0.36116391285275623, "percentage": 18.06, "elapsed_time": "1:48:04", "remaining_time": "8:10:30"} +{"current_steps": 1236, "total_steps": 6840, "loss": 0.6545308828353882, "lr": 1.9082392944938467e-05, "epoch": 0.3614563532680216, "percentage": 18.07, "elapsed_time": "1:48:09", "remaining_time": "8:10:24"} +{"current_steps": 1237, "total_steps": 6840, "loss": 0.7525626420974731, "lr": 1.908036879469741e-05, "epoch": 0.36174879368328705, "percentage": 18.08, "elapsed_time": "1:48:15", "remaining_time": "8:10:22"} +{"current_steps": 1238, "total_steps": 6840, "loss": 0.7336804866790771, "lr": 1.9078342521975365e-05, "epoch": 0.36204123409855243, "percentage": 18.1, "elapsed_time": "1:48:21", "remaining_time": "8:10:18"} +{"current_steps": 1239, "total_steps": 6840, "loss": 0.5822359323501587, "lr": 1.907631412724595e-05, "epoch": 0.3623336745138178, "percentage": 18.11, "elapsed_time": "1:48:27", "remaining_time": "8:10:18"} +{"current_steps": 1240, "total_steps": 6840, "loss": 0.6110040545463562, "lr": 1.907428361098329e-05, "epoch": 0.3626261149290832, "percentage": 18.13, "elapsed_time": "1:48:33", "remaining_time": "8:10:16"} +{"current_steps": 1241, "total_steps": 6840, "loss": 0.5363205671310425, "lr": 1.9072250973662008e-05, "epoch": 0.3629185553443486, "percentage": 18.14, "elapsed_time": "1:48:38", "remaining_time": "8:10:11"} +{"current_steps": 1242, "total_steps": 6840, "loss": 0.6804911494255066, "lr": 1.9070216215757225e-05, "epoch": 0.36321099575961396, "percentage": 18.16, "elapsed_time": "1:48:44", "remaining_time": "8:10:09"} +{"current_steps": 1243, "total_steps": 6840, "loss": 0.5670056343078613, "lr": 1.906817933774455e-05, "epoch": 0.36350343617487935, "percentage": 18.17, "elapsed_time": "1:48:50", "remaining_time": "8:10:03"} +{"current_steps": 1244, "total_steps": 6840, "loss": 0.6839423775672913, "lr": 1.9066140340100086e-05, "epoch": 0.3637958765901448, "percentage": 18.19, "elapsed_time": "1:48:54", "remaining_time": "8:09:55"} +{"current_steps": 1245, "total_steps": 6840, "loss": 0.6512447595596313, "lr": 1.906409922330044e-05, "epoch": 0.36408831700541017, "percentage": 18.2, "elapsed_time": "1:49:01", "remaining_time": "8:09:56"} +{"current_steps": 1246, "total_steps": 6840, "loss": 0.6602088212966919, "lr": 1.9062055987822713e-05, "epoch": 0.36438075742067555, "percentage": 18.22, "elapsed_time": "1:49:06", "remaining_time": "8:09:52"} +{"current_steps": 1247, "total_steps": 6840, "loss": 0.6859074831008911, "lr": 1.9060010634144502e-05, "epoch": 0.36467319783594093, "percentage": 18.23, "elapsed_time": "1:49:11", "remaining_time": "8:09:45"} +{"current_steps": 1248, "total_steps": 6840, "loss": 0.6871531009674072, "lr": 1.9057963162743888e-05, "epoch": 0.3649656382512063, "percentage": 18.25, "elapsed_time": "1:49:16", "remaining_time": "8:09:38"} +{"current_steps": 1249, "total_steps": 6840, "loss": 0.7396048307418823, "lr": 1.9055913574099454e-05, "epoch": 0.3652580786664717, "percentage": 18.26, "elapsed_time": "1:49:22", "remaining_time": "8:09:34"} +{"current_steps": 1250, "total_steps": 6840, "loss": 0.7013602256774902, "lr": 1.9053861868690283e-05, "epoch": 0.3655505190817371, "percentage": 18.27, "elapsed_time": "1:49:27", "remaining_time": "8:09:30"} +{"current_steps": 1251, "total_steps": 6840, "loss": 0.6355527639389038, "lr": 1.905180804699595e-05, "epoch": 0.36584295949700246, "percentage": 18.29, "elapsed_time": "1:49:33", "remaining_time": "8:09:26"} +{"current_steps": 1252, "total_steps": 6840, "loss": 0.6869304180145264, "lr": 1.9049752109496526e-05, "epoch": 0.3661353999122679, "percentage": 18.3, "elapsed_time": "1:49:38", "remaining_time": "8:09:22"} +{"current_steps": 1253, "total_steps": 6840, "loss": 0.5267671346664429, "lr": 1.9047694056672566e-05, "epoch": 0.3664278403275333, "percentage": 18.32, "elapsed_time": "1:49:43", "remaining_time": "8:09:16"} +{"current_steps": 1254, "total_steps": 6840, "loss": 0.6586635112762451, "lr": 1.9045633889005134e-05, "epoch": 0.36672028074279867, "percentage": 18.33, "elapsed_time": "1:49:50", "remaining_time": "8:09:16"} +{"current_steps": 1255, "total_steps": 6840, "loss": 0.6743361949920654, "lr": 1.9043571606975776e-05, "epoch": 0.36701272115806405, "percentage": 18.35, "elapsed_time": "1:49:55", "remaining_time": "8:09:13"} +{"current_steps": 1256, "total_steps": 6840, "loss": 0.5779668688774109, "lr": 1.9041507211066543e-05, "epoch": 0.36730516157332943, "percentage": 18.36, "elapsed_time": "1:50:00", "remaining_time": "8:09:06"} +{"current_steps": 1257, "total_steps": 6840, "loss": 0.693313479423523, "lr": 1.9039440701759972e-05, "epoch": 0.3675976019885948, "percentage": 18.38, "elapsed_time": "1:50:06", "remaining_time": "8:09:04"} +{"current_steps": 1258, "total_steps": 6840, "loss": 0.6314960718154907, "lr": 1.9037372079539096e-05, "epoch": 0.3678900424038602, "percentage": 18.39, "elapsed_time": "1:50:11", "remaining_time": "8:08:57"} +{"current_steps": 1259, "total_steps": 6840, "loss": 0.6483266949653625, "lr": 1.9035301344887445e-05, "epoch": 0.3681824828191256, "percentage": 18.41, "elapsed_time": "1:50:17", "remaining_time": "8:08:53"} +{"current_steps": 1260, "total_steps": 6840, "loss": 0.5896739959716797, "lr": 1.903322849828904e-05, "epoch": 0.368474923234391, "percentage": 18.42, "elapsed_time": "1:50:23", "remaining_time": "8:08:51"} +{"current_steps": 1261, "total_steps": 6840, "loss": 0.6760983467102051, "lr": 1.9031153540228398e-05, "epoch": 0.3687673636496564, "percentage": 18.44, "elapsed_time": "1:50:27", "remaining_time": "8:08:42"} +{"current_steps": 1262, "total_steps": 6840, "loss": 0.7453440427780151, "lr": 1.9029076471190525e-05, "epoch": 0.3690598040649218, "percentage": 18.45, "elapsed_time": "1:50:32", "remaining_time": "8:08:36"} +{"current_steps": 1263, "total_steps": 6840, "loss": 0.7382408380508423, "lr": 1.9026997291660926e-05, "epoch": 0.36935224448018716, "percentage": 18.46, "elapsed_time": "1:50:38", "remaining_time": "8:08:32"} +{"current_steps": 1264, "total_steps": 6840, "loss": 0.6420471668243408, "lr": 1.9024916002125594e-05, "epoch": 0.36964468489545255, "percentage": 18.48, "elapsed_time": "1:50:42", "remaining_time": "8:08:20"} +{"current_steps": 1265, "total_steps": 6840, "loss": 0.6436389684677124, "lr": 1.9022832603071017e-05, "epoch": 0.36993712531071793, "percentage": 18.49, "elapsed_time": "1:50:48", "remaining_time": "8:08:20"} +{"current_steps": 1266, "total_steps": 6840, "loss": 0.689171314239502, "lr": 1.9020747094984182e-05, "epoch": 0.3702295657259833, "percentage": 18.51, "elapsed_time": "1:50:53", "remaining_time": "8:08:16"} +{"current_steps": 1267, "total_steps": 6840, "loss": 0.6704196333885193, "lr": 1.9018659478352556e-05, "epoch": 0.3705220061412487, "percentage": 18.52, "elapsed_time": "1:50:58", "remaining_time": "8:08:09"} +{"current_steps": 1268, "total_steps": 6840, "loss": 0.6598329544067383, "lr": 1.9016569753664118e-05, "epoch": 0.37081444655651413, "percentage": 18.54, "elapsed_time": "1:51:04", "remaining_time": "8:08:05"} +{"current_steps": 1269, "total_steps": 6840, "loss": 0.7353986501693726, "lr": 1.901447792140732e-05, "epoch": 0.3711068869717795, "percentage": 18.55, "elapsed_time": "1:51:09", "remaining_time": "8:08:01"} +{"current_steps": 1270, "total_steps": 6840, "loss": 0.666167140007019, "lr": 1.9012383982071112e-05, "epoch": 0.3713993273870449, "percentage": 18.57, "elapsed_time": "1:51:15", "remaining_time": "8:07:55"} +{"current_steps": 1271, "total_steps": 6840, "loss": 0.6097015738487244, "lr": 1.9010287936144948e-05, "epoch": 0.3716917678023103, "percentage": 18.58, "elapsed_time": "1:51:20", "remaining_time": "8:07:51"} +{"current_steps": 1272, "total_steps": 6840, "loss": 0.6352437138557434, "lr": 1.9008189784118764e-05, "epoch": 0.37198420821757566, "percentage": 18.6, "elapsed_time": "1:51:26", "remaining_time": "8:07:48"} +{"current_steps": 1273, "total_steps": 6840, "loss": 0.6686104536056519, "lr": 1.9006089526482982e-05, "epoch": 0.37227664863284105, "percentage": 18.61, "elapsed_time": "1:51:30", "remaining_time": "8:07:37"} +{"current_steps": 1274, "total_steps": 6840, "loss": 0.6504377126693726, "lr": 1.9003987163728535e-05, "epoch": 0.3725690890481064, "percentage": 18.63, "elapsed_time": "1:51:35", "remaining_time": "8:07:30"} +{"current_steps": 1275, "total_steps": 6840, "loss": 0.5834585428237915, "lr": 1.9001882696346835e-05, "epoch": 0.3728615294633718, "percentage": 18.64, "elapsed_time": "1:51:41", "remaining_time": "8:07:30"} +{"current_steps": 1276, "total_steps": 6840, "loss": 0.665432870388031, "lr": 1.8999776124829788e-05, "epoch": 0.37315396987863725, "percentage": 18.65, "elapsed_time": "1:51:47", "remaining_time": "8:07:29"} +{"current_steps": 1277, "total_steps": 6840, "loss": 0.659697949886322, "lr": 1.899766744966979e-05, "epoch": 0.37344641029390263, "percentage": 18.67, "elapsed_time": "1:51:52", "remaining_time": "8:07:22"} +{"current_steps": 1278, "total_steps": 6840, "loss": 0.5703476071357727, "lr": 1.899555667135973e-05, "epoch": 0.373738850709168, "percentage": 18.68, "elapsed_time": "1:51:58", "remaining_time": "8:07:19"} +{"current_steps": 1279, "total_steps": 6840, "loss": 0.809308648109436, "lr": 1.8993443790392994e-05, "epoch": 0.3740312911244334, "percentage": 18.7, "elapsed_time": "1:52:03", "remaining_time": "8:07:13"} +{"current_steps": 1280, "total_steps": 6840, "loss": 0.7120508551597595, "lr": 1.8991328807263455e-05, "epoch": 0.3743237315396988, "percentage": 18.71, "elapsed_time": "1:52:08", "remaining_time": "8:07:07"} +{"current_steps": 1281, "total_steps": 6840, "loss": 0.625985860824585, "lr": 1.898921172246547e-05, "epoch": 0.37461617195496416, "percentage": 18.73, "elapsed_time": "1:52:14", "remaining_time": "8:07:05"} +{"current_steps": 1282, "total_steps": 6840, "loss": 0.637261152267456, "lr": 1.898709253649391e-05, "epoch": 0.37490861237022954, "percentage": 18.74, "elapsed_time": "1:52:19", "remaining_time": "8:06:59"} +{"current_steps": 1283, "total_steps": 6840, "loss": 0.7420133352279663, "lr": 1.89849712498441e-05, "epoch": 0.375201052785495, "percentage": 18.76, "elapsed_time": "1:52:25", "remaining_time": "8:06:54"} +{"current_steps": 1284, "total_steps": 6840, "loss": 0.6230417490005493, "lr": 1.8982847863011898e-05, "epoch": 0.37549349320076036, "percentage": 18.77, "elapsed_time": "1:52:30", "remaining_time": "8:06:48"} +{"current_steps": 1285, "total_steps": 6840, "loss": 0.6896604299545288, "lr": 1.8980722376493622e-05, "epoch": 0.37578593361602575, "percentage": 18.79, "elapsed_time": "1:52:35", "remaining_time": "8:06:44"} +{"current_steps": 1286, "total_steps": 6840, "loss": 0.5767710208892822, "lr": 1.8978594790786092e-05, "epoch": 0.37607837403129113, "percentage": 18.8, "elapsed_time": "1:52:39", "remaining_time": "8:06:33"} +{"current_steps": 1287, "total_steps": 6840, "loss": 0.6945392489433289, "lr": 1.8976465106386625e-05, "epoch": 0.3763708144465565, "percentage": 18.82, "elapsed_time": "1:52:44", "remaining_time": "8:06:26"} +{"current_steps": 1288, "total_steps": 6840, "loss": 0.7208314538002014, "lr": 1.8974333323793014e-05, "epoch": 0.3766632548618219, "percentage": 18.83, "elapsed_time": "1:52:50", "remaining_time": "8:06:22"} +{"current_steps": 1289, "total_steps": 6840, "loss": 0.7201139330863953, "lr": 1.8972199443503556e-05, "epoch": 0.3769556952770873, "percentage": 18.85, "elapsed_time": "1:52:55", "remaining_time": "8:06:18"} +{"current_steps": 1290, "total_steps": 6840, "loss": 0.6791107654571533, "lr": 1.8970063466017028e-05, "epoch": 0.37724813569235266, "percentage": 18.86, "elapsed_time": "1:53:00", "remaining_time": "8:06:13"} +{"current_steps": 1291, "total_steps": 6840, "loss": 0.6535364389419556, "lr": 1.89679253918327e-05, "epoch": 0.3775405761076181, "percentage": 18.87, "elapsed_time": "1:53:05", "remaining_time": "8:06:05"} +{"current_steps": 1292, "total_steps": 6840, "loss": 0.5910370349884033, "lr": 1.8965785221450343e-05, "epoch": 0.3778330165228835, "percentage": 18.89, "elapsed_time": "1:53:09", "remaining_time": "8:05:54"} +{"current_steps": 1293, "total_steps": 6840, "loss": 0.7025415897369385, "lr": 1.8963642955370203e-05, "epoch": 0.37812545693814886, "percentage": 18.9, "elapsed_time": "1:53:15", "remaining_time": "8:05:54"} +{"current_steps": 1294, "total_steps": 6840, "loss": 0.8007702827453613, "lr": 1.8961498594093018e-05, "epoch": 0.37841789735341425, "percentage": 18.92, "elapsed_time": "1:53:20", "remaining_time": "8:05:45"} +{"current_steps": 1295, "total_steps": 6840, "loss": 0.6947172284126282, "lr": 1.895935213812003e-05, "epoch": 0.37871033776867963, "percentage": 18.93, "elapsed_time": "1:53:26", "remaining_time": "8:05:42"} +{"current_steps": 1296, "total_steps": 6840, "loss": 0.7024818658828735, "lr": 1.895720358795295e-05, "epoch": 0.379002778183945, "percentage": 18.95, "elapsed_time": "1:53:32", "remaining_time": "8:05:42"} +{"current_steps": 1297, "total_steps": 6840, "loss": 0.8202607035636902, "lr": 1.895505294409399e-05, "epoch": 0.3792952185992104, "percentage": 18.96, "elapsed_time": "1:53:37", "remaining_time": "8:05:34"} +{"current_steps": 1298, "total_steps": 6840, "loss": 0.8001795411109924, "lr": 1.8952900207045853e-05, "epoch": 0.3795876590144758, "percentage": 18.98, "elapsed_time": "1:53:42", "remaining_time": "8:05:31"} +{"current_steps": 1299, "total_steps": 6840, "loss": 0.8068668842315674, "lr": 1.895074537731173e-05, "epoch": 0.3798800994297412, "percentage": 18.99, "elapsed_time": "1:53:48", "remaining_time": "8:05:28"} +{"current_steps": 1300, "total_steps": 6840, "loss": 0.7685220241546631, "lr": 1.8948588455395294e-05, "epoch": 0.3801725398450066, "percentage": 19.01, "elapsed_time": "1:53:53", "remaining_time": "8:05:22"} +{"current_steps": 1301, "total_steps": 6840, "loss": 0.695665717124939, "lr": 1.8946429441800715e-05, "epoch": 0.380464980260272, "percentage": 19.02, "elapsed_time": "1:54:02", "remaining_time": "8:05:33"} +{"current_steps": 1302, "total_steps": 6840, "loss": 0.6073132753372192, "lr": 1.894426833703265e-05, "epoch": 0.38075742067553736, "percentage": 19.04, "elapsed_time": "1:54:07", "remaining_time": "8:05:26"} +{"current_steps": 1303, "total_steps": 6840, "loss": 0.6334577798843384, "lr": 1.894210514159624e-05, "epoch": 0.38104986109080274, "percentage": 19.05, "elapsed_time": "1:54:13", "remaining_time": "8:05:21"} +{"current_steps": 1304, "total_steps": 6840, "loss": 0.6448806524276733, "lr": 1.8939939855997125e-05, "epoch": 0.3813423015060681, "percentage": 19.06, "elapsed_time": "1:54:18", "remaining_time": "8:05:15"} +{"current_steps": 1305, "total_steps": 6840, "loss": 0.7587993144989014, "lr": 1.8937772480741427e-05, "epoch": 0.3816347419213335, "percentage": 19.08, "elapsed_time": "1:54:22", "remaining_time": "8:05:07"} +{"current_steps": 1306, "total_steps": 6840, "loss": 0.6924787759780884, "lr": 1.8935603016335752e-05, "epoch": 0.3819271823365989, "percentage": 19.09, "elapsed_time": "1:54:28", "remaining_time": "8:05:04"} +{"current_steps": 1307, "total_steps": 6840, "loss": 0.678055465221405, "lr": 1.8933431463287197e-05, "epoch": 0.38221962275186433, "percentage": 19.11, "elapsed_time": "1:54:32", "remaining_time": "8:04:55"} +{"current_steps": 1308, "total_steps": 6840, "loss": 0.7519007325172424, "lr": 1.8931257822103357e-05, "epoch": 0.3825120631671297, "percentage": 19.12, "elapsed_time": "1:54:38", "remaining_time": "8:04:51"} +{"current_steps": 1309, "total_steps": 6840, "loss": 0.6905468702316284, "lr": 1.8929082093292306e-05, "epoch": 0.3828045035823951, "percentage": 19.14, "elapsed_time": "1:54:44", "remaining_time": "8:04:48"} +{"current_steps": 1310, "total_steps": 6840, "loss": 0.6718122363090515, "lr": 1.8926904277362603e-05, "epoch": 0.3830969439976605, "percentage": 19.15, "elapsed_time": "1:54:50", "remaining_time": "8:04:47"} +{"current_steps": 1311, "total_steps": 6840, "loss": 0.6903961896896362, "lr": 1.89247243748233e-05, "epoch": 0.38338938441292586, "percentage": 19.17, "elapsed_time": "1:54:55", "remaining_time": "8:04:40"} +{"current_steps": 1312, "total_steps": 6840, "loss": 0.6947582960128784, "lr": 1.8922542386183942e-05, "epoch": 0.38368182482819124, "percentage": 19.18, "elapsed_time": "1:55:00", "remaining_time": "8:04:34"} +{"current_steps": 1313, "total_steps": 6840, "loss": 0.5850759148597717, "lr": 1.8920358311954548e-05, "epoch": 0.3839742652434566, "percentage": 19.2, "elapsed_time": "1:55:06", "remaining_time": "8:04:34"} +{"current_steps": 1314, "total_steps": 6840, "loss": 0.6512178778648376, "lr": 1.891817215264564e-05, "epoch": 0.38426670565872206, "percentage": 19.21, "elapsed_time": "1:55:12", "remaining_time": "8:04:29"} +{"current_steps": 1315, "total_steps": 6840, "loss": 0.5910850167274475, "lr": 1.891598390876821e-05, "epoch": 0.38455914607398745, "percentage": 19.23, "elapsed_time": "1:55:17", "remaining_time": "8:04:24"} +{"current_steps": 1316, "total_steps": 6840, "loss": 0.7113536596298218, "lr": 1.891379358083375e-05, "epoch": 0.38485158648925283, "percentage": 19.24, "elapsed_time": "1:55:22", "remaining_time": "8:04:19"} +{"current_steps": 1317, "total_steps": 6840, "loss": 0.705318808555603, "lr": 1.891160116935424e-05, "epoch": 0.3851440269045182, "percentage": 19.25, "elapsed_time": "1:55:28", "remaining_time": "8:04:17"} +{"current_steps": 1318, "total_steps": 6840, "loss": 0.7524716258049011, "lr": 1.890940667484214e-05, "epoch": 0.3854364673197836, "percentage": 19.27, "elapsed_time": "1:55:34", "remaining_time": "8:04:11"} +{"current_steps": 1319, "total_steps": 6840, "loss": 0.6130248308181763, "lr": 1.89072100978104e-05, "epoch": 0.385728907735049, "percentage": 19.28, "elapsed_time": "1:55:39", "remaining_time": "8:04:08"} +{"current_steps": 1320, "total_steps": 6840, "loss": 0.6535071134567261, "lr": 1.8905011438772455e-05, "epoch": 0.38602134815031436, "percentage": 19.3, "elapsed_time": "1:55:45", "remaining_time": "8:04:04"} +{"current_steps": 1321, "total_steps": 6840, "loss": 0.7027082443237305, "lr": 1.890281069824223e-05, "epoch": 0.38631378856557974, "percentage": 19.31, "elapsed_time": "1:55:50", "remaining_time": "8:04:00"} +{"current_steps": 1322, "total_steps": 6840, "loss": 0.7055719494819641, "lr": 1.8900607876734133e-05, "epoch": 0.3866062289808452, "percentage": 19.33, "elapsed_time": "1:55:55", "remaining_time": "8:03:54"} +{"current_steps": 1323, "total_steps": 6840, "loss": 0.7403384447097778, "lr": 1.8898402974763063e-05, "epoch": 0.38689866939611056, "percentage": 19.34, "elapsed_time": "1:55:59", "remaining_time": "8:03:42"} +{"current_steps": 1324, "total_steps": 6840, "loss": 0.6771470308303833, "lr": 1.88961959928444e-05, "epoch": 0.38719110981137594, "percentage": 19.36, "elapsed_time": "1:56:06", "remaining_time": "8:03:42"} +{"current_steps": 1325, "total_steps": 6840, "loss": 0.7258767485618591, "lr": 1.8893986931494015e-05, "epoch": 0.3874835502266413, "percentage": 19.37, "elapsed_time": "1:56:10", "remaining_time": "8:03:32"} +{"current_steps": 1326, "total_steps": 6840, "loss": 0.7587069869041443, "lr": 1.889177579122826e-05, "epoch": 0.3877759906419067, "percentage": 19.39, "elapsed_time": "1:56:14", "remaining_time": "8:03:20"} +{"current_steps": 1327, "total_steps": 6840, "loss": 0.5434668660163879, "lr": 1.888956257256398e-05, "epoch": 0.3880684310571721, "percentage": 19.4, "elapsed_time": "1:56:20", "remaining_time": "8:03:21"} +{"current_steps": 1328, "total_steps": 6840, "loss": 0.5311154127120972, "lr": 1.8887347276018496e-05, "epoch": 0.3883608714724375, "percentage": 19.42, "elapsed_time": "1:56:26", "remaining_time": "8:03:16"} +{"current_steps": 1329, "total_steps": 6840, "loss": 0.5651747584342957, "lr": 1.888512990210962e-05, "epoch": 0.38865331188770286, "percentage": 19.43, "elapsed_time": "1:56:31", "remaining_time": "8:03:10"} +{"current_steps": 1330, "total_steps": 6840, "loss": 0.628046989440918, "lr": 1.8882910451355654e-05, "epoch": 0.3889457523029683, "percentage": 19.44, "elapsed_time": "1:56:37", "remaining_time": "8:03:10"} +{"current_steps": 1331, "total_steps": 6840, "loss": 0.644639253616333, "lr": 1.888068892427538e-05, "epoch": 0.3892381927182337, "percentage": 19.46, "elapsed_time": "1:56:42", "remaining_time": "8:03:04"} +{"current_steps": 1332, "total_steps": 6840, "loss": 0.6290382742881775, "lr": 1.887846532138806e-05, "epoch": 0.38953063313349906, "percentage": 19.47, "elapsed_time": "1:56:48", "remaining_time": "8:03:00"} +{"current_steps": 1333, "total_steps": 6840, "loss": 0.6881425380706787, "lr": 1.8876239643213456e-05, "epoch": 0.38982307354876444, "percentage": 19.49, "elapsed_time": "1:56:52", "remaining_time": "8:02:49"} +{"current_steps": 1334, "total_steps": 6840, "loss": 0.645643949508667, "lr": 1.8874011890271807e-05, "epoch": 0.3901155139640298, "percentage": 19.5, "elapsed_time": "1:56:58", "remaining_time": "8:02:48"} +{"current_steps": 1335, "total_steps": 6840, "loss": 0.6965867280960083, "lr": 1.887178206308383e-05, "epoch": 0.3904079543792952, "percentage": 19.52, "elapsed_time": "1:57:03", "remaining_time": "8:02:43"} +{"current_steps": 1336, "total_steps": 6840, "loss": 0.7326550483703613, "lr": 1.886955016217074e-05, "epoch": 0.3907003947945606, "percentage": 19.53, "elapsed_time": "1:57:10", "remaining_time": "8:02:44"} +{"current_steps": 1337, "total_steps": 6840, "loss": 0.6570208668708801, "lr": 1.886731618805422e-05, "epoch": 0.390992835209826, "percentage": 19.55, "elapsed_time": "1:57:15", "remaining_time": "8:02:39"} +{"current_steps": 1338, "total_steps": 6840, "loss": 0.7391610145568848, "lr": 1.886508014125646e-05, "epoch": 0.3912852756250914, "percentage": 19.56, "elapsed_time": "1:57:20", "remaining_time": "8:02:33"} +{"current_steps": 1339, "total_steps": 6840, "loss": 0.651665985584259, "lr": 1.8862842022300124e-05, "epoch": 0.3915777160403568, "percentage": 19.58, "elapsed_time": "1:57:27", "remaining_time": "8:02:31"} +{"current_steps": 1340, "total_steps": 6840, "loss": 0.695915699005127, "lr": 1.8860601831708346e-05, "epoch": 0.3918701564556222, "percentage": 19.59, "elapsed_time": "1:57:31", "remaining_time": "8:02:21"} +{"current_steps": 1341, "total_steps": 6840, "loss": 0.7209347486495972, "lr": 1.885835957000476e-05, "epoch": 0.39216259687088756, "percentage": 19.61, "elapsed_time": "1:57:35", "remaining_time": "8:02:10"} +{"current_steps": 1342, "total_steps": 6840, "loss": 0.6083317995071411, "lr": 1.885611523771349e-05, "epoch": 0.39245503728615294, "percentage": 19.62, "elapsed_time": "1:57:40", "remaining_time": "8:02:06"} +{"current_steps": 1343, "total_steps": 6840, "loss": 0.5544713735580444, "lr": 1.8853868835359127e-05, "epoch": 0.3927474777014183, "percentage": 19.63, "elapsed_time": "1:57:46", "remaining_time": "8:02:05"} +{"current_steps": 1344, "total_steps": 6840, "loss": 0.6333836317062378, "lr": 1.8851620363466756e-05, "epoch": 0.3930399181166837, "percentage": 19.65, "elapsed_time": "1:57:51", "remaining_time": "8:01:58"} +{"current_steps": 1345, "total_steps": 6840, "loss": 0.554995059967041, "lr": 1.8849369822561943e-05, "epoch": 0.3933323585319491, "percentage": 19.66, "elapsed_time": "1:57:57", "remaining_time": "8:01:53"} +{"current_steps": 1346, "total_steps": 6840, "loss": 0.6696420907974243, "lr": 1.884711721317074e-05, "epoch": 0.3936247989472145, "percentage": 19.68, "elapsed_time": "1:58:03", "remaining_time": "8:01:52"} +{"current_steps": 1347, "total_steps": 6840, "loss": 0.6031695604324341, "lr": 1.8844862535819682e-05, "epoch": 0.3939172393624799, "percentage": 19.69, "elapsed_time": "1:58:07", "remaining_time": "8:01:42"} +{"current_steps": 1348, "total_steps": 6840, "loss": 0.686814546585083, "lr": 1.884260579103578e-05, "epoch": 0.3942096797777453, "percentage": 19.71, "elapsed_time": "1:58:13", "remaining_time": "8:01:39"} +{"current_steps": 1349, "total_steps": 6840, "loss": 0.8113317489624023, "lr": 1.884034697934654e-05, "epoch": 0.3945021201930107, "percentage": 19.72, "elapsed_time": "1:58:18", "remaining_time": "8:01:35"} +{"current_steps": 1350, "total_steps": 6840, "loss": 0.5645952820777893, "lr": 1.8838086101279946e-05, "epoch": 0.39479456060827606, "percentage": 19.74, "elapsed_time": "1:58:24", "remaining_time": "8:01:32"} +{"current_steps": 1351, "total_steps": 6840, "loss": 0.6431643962860107, "lr": 1.883582315736446e-05, "epoch": 0.39508700102354144, "percentage": 19.75, "elapsed_time": "1:58:29", "remaining_time": "8:01:27"} +{"current_steps": 1352, "total_steps": 6840, "loss": 0.7691985368728638, "lr": 1.8833558148129034e-05, "epoch": 0.3953794414388068, "percentage": 19.77, "elapsed_time": "1:58:34", "remaining_time": "8:01:19"} +{"current_steps": 1353, "total_steps": 6840, "loss": 0.5951793789863586, "lr": 1.88312910741031e-05, "epoch": 0.39567188185407226, "percentage": 19.78, "elapsed_time": "1:58:40", "remaining_time": "8:01:16"} +{"current_steps": 1354, "total_steps": 6840, "loss": 0.671844482421875, "lr": 1.8829021935816572e-05, "epoch": 0.39596432226933764, "percentage": 19.8, "elapsed_time": "1:58:46", "remaining_time": "8:01:14"} +{"current_steps": 1355, "total_steps": 6840, "loss": 0.5290843844413757, "lr": 1.8826750733799845e-05, "epoch": 0.396256762684603, "percentage": 19.81, "elapsed_time": "1:58:51", "remaining_time": "8:01:07"} +{"current_steps": 1356, "total_steps": 6840, "loss": 0.6492103934288025, "lr": 1.8824477468583806e-05, "epoch": 0.3965492030998684, "percentage": 19.82, "elapsed_time": "1:58:57", "remaining_time": "8:01:05"} +{"current_steps": 1357, "total_steps": 6840, "loss": 0.6111055016517639, "lr": 1.882220214069981e-05, "epoch": 0.3968416435151338, "percentage": 19.84, "elapsed_time": "1:59:03", "remaining_time": "8:01:05"} +{"current_steps": 1358, "total_steps": 6840, "loss": 0.8123398423194885, "lr": 1.8819924750679702e-05, "epoch": 0.3971340839303992, "percentage": 19.85, "elapsed_time": "1:59:08", "remaining_time": "8:00:58"} +{"current_steps": 1359, "total_steps": 6840, "loss": 0.6118077039718628, "lr": 1.8817645299055815e-05, "epoch": 0.39742652434566456, "percentage": 19.87, "elapsed_time": "1:59:15", "remaining_time": "8:00:58"} +{"current_steps": 1360, "total_steps": 6840, "loss": 0.6916248798370361, "lr": 1.8815363786360948e-05, "epoch": 0.39771896476092994, "percentage": 19.88, "elapsed_time": "1:59:21", "remaining_time": "8:00:55"} +{"current_steps": 1361, "total_steps": 6840, "loss": 0.5586028099060059, "lr": 1.8813080213128394e-05, "epoch": 0.3980114051761954, "percentage": 19.9, "elapsed_time": "1:59:26", "remaining_time": "8:00:51"} +{"current_steps": 1362, "total_steps": 6840, "loss": 0.6132841110229492, "lr": 1.8810794579891925e-05, "epoch": 0.39830384559146076, "percentage": 19.91, "elapsed_time": "1:59:31", "remaining_time": "8:00:42"} +{"current_steps": 1363, "total_steps": 6840, "loss": 0.7329133749008179, "lr": 1.8808506887185793e-05, "epoch": 0.39859628600672614, "percentage": 19.93, "elapsed_time": "1:59:35", "remaining_time": "8:00:34"} +{"current_steps": 1364, "total_steps": 6840, "loss": 0.5975138545036316, "lr": 1.8806217135544736e-05, "epoch": 0.3988887264219915, "percentage": 19.94, "elapsed_time": "1:59:40", "remaining_time": "8:00:28"} +{"current_steps": 1365, "total_steps": 6840, "loss": 0.5790295600891113, "lr": 1.8803925325503963e-05, "epoch": 0.3991811668372569, "percentage": 19.96, "elapsed_time": "1:59:46", "remaining_time": "8:00:24"} +{"current_steps": 1366, "total_steps": 6840, "loss": 0.6506124138832092, "lr": 1.8801631457599173e-05, "epoch": 0.3994736072525223, "percentage": 19.97, "elapsed_time": "1:59:51", "remaining_time": "8:00:19"} +{"current_steps": 1367, "total_steps": 6840, "loss": 0.5577528476715088, "lr": 1.8799335532366547e-05, "epoch": 0.39976604766778767, "percentage": 19.99, "elapsed_time": "1:59:56", "remaining_time": "8:00:13"} +{"current_steps": 1368, "total_steps": 6840, "loss": 0.675471305847168, "lr": 1.879703755034274e-05, "epoch": 0.40005848808305305, "percentage": 20.0, "elapsed_time": "2:00:02", "remaining_time": "8:00:09"} +{"current_steps": 1369, "total_steps": 6840, "loss": 0.5826357007026672, "lr": 1.879473751206489e-05, "epoch": 0.4003509284983185, "percentage": 20.01, "elapsed_time": "2:00:07", "remaining_time": "8:00:03"} +{"current_steps": 1370, "total_steps": 6840, "loss": 0.5146772265434265, "lr": 1.8792435418070623e-05, "epoch": 0.4006433689135839, "percentage": 20.03, "elapsed_time": "2:00:11", "remaining_time": "7:59:51"} +{"current_steps": 1371, "total_steps": 6840, "loss": 0.6049208641052246, "lr": 1.879013126889804e-05, "epoch": 0.40093580932884926, "percentage": 20.04, "elapsed_time": "2:00:17", "remaining_time": "7:59:50"} +{"current_steps": 1372, "total_steps": 6840, "loss": 0.7058207392692566, "lr": 1.878782506508571e-05, "epoch": 0.40122824974411464, "percentage": 20.06, "elapsed_time": "2:00:23", "remaining_time": "7:59:47"} +{"current_steps": 1373, "total_steps": 6840, "loss": 0.6281940937042236, "lr": 1.8785516807172704e-05, "epoch": 0.40152069015938, "percentage": 20.07, "elapsed_time": "2:00:28", "remaining_time": "7:59:44"} +{"current_steps": 1374, "total_steps": 6840, "loss": 0.6318703889846802, "lr": 1.878320649569856e-05, "epoch": 0.4018131305746454, "percentage": 20.09, "elapsed_time": "2:00:33", "remaining_time": "7:59:37"} +{"current_steps": 1375, "total_steps": 6840, "loss": 0.6595311164855957, "lr": 1.87808941312033e-05, "epoch": 0.4021055709899108, "percentage": 20.1, "elapsed_time": "2:00:38", "remaining_time": "7:59:31"} +{"current_steps": 1376, "total_steps": 6840, "loss": 0.6964930295944214, "lr": 1.8778579714227433e-05, "epoch": 0.40239801140517617, "percentage": 20.12, "elapsed_time": "2:00:45", "remaining_time": "7:59:29"} +{"current_steps": 1377, "total_steps": 6840, "loss": 0.6093966364860535, "lr": 1.8776263245311926e-05, "epoch": 0.4026904518204416, "percentage": 20.13, "elapsed_time": "2:00:49", "remaining_time": "7:59:22"} +{"current_steps": 1378, "total_steps": 6840, "loss": 0.5337893962860107, "lr": 1.8773944724998248e-05, "epoch": 0.402982892235707, "percentage": 20.15, "elapsed_time": "2:00:55", "remaining_time": "7:59:17"} +{"current_steps": 1379, "total_steps": 6840, "loss": 0.5899128317832947, "lr": 1.8771624153828336e-05, "epoch": 0.4032753326509724, "percentage": 20.16, "elapsed_time": "2:01:00", "remaining_time": "7:59:12"} +{"current_steps": 1380, "total_steps": 6840, "loss": 0.7054699659347534, "lr": 1.876930153234461e-05, "epoch": 0.40356777306623776, "percentage": 20.18, "elapsed_time": "2:01:05", "remaining_time": "7:59:07"} +{"current_steps": 1381, "total_steps": 6840, "loss": 0.6910602450370789, "lr": 1.876697686108997e-05, "epoch": 0.40386021348150314, "percentage": 20.19, "elapsed_time": "2:01:10", "remaining_time": "7:58:58"} +{"current_steps": 1382, "total_steps": 6840, "loss": 0.605659008026123, "lr": 1.876465014060779e-05, "epoch": 0.4041526538967685, "percentage": 20.2, "elapsed_time": "2:01:15", "remaining_time": "7:58:52"} +{"current_steps": 1383, "total_steps": 6840, "loss": 0.7159937620162964, "lr": 1.8762321371441934e-05, "epoch": 0.4044450943120339, "percentage": 20.22, "elapsed_time": "2:01:20", "remaining_time": "7:58:48"} +{"current_steps": 1384, "total_steps": 6840, "loss": 0.7568333148956299, "lr": 1.8759990554136733e-05, "epoch": 0.4047375347272993, "percentage": 20.23, "elapsed_time": "2:01:25", "remaining_time": "7:58:40"} +{"current_steps": 1385, "total_steps": 6840, "loss": 0.7117356657981873, "lr": 1.8757657689236998e-05, "epoch": 0.4050299751425647, "percentage": 20.25, "elapsed_time": "2:01:31", "remaining_time": "7:58:37"} +{"current_steps": 1386, "total_steps": 6840, "loss": 0.6429109573364258, "lr": 1.8755322777288027e-05, "epoch": 0.4053224155578301, "percentage": 20.26, "elapsed_time": "2:01:35", "remaining_time": "7:58:29"} +{"current_steps": 1387, "total_steps": 6840, "loss": 0.643811821937561, "lr": 1.875298581883559e-05, "epoch": 0.4056148559730955, "percentage": 20.28, "elapsed_time": "2:01:40", "remaining_time": "7:58:21"} +{"current_steps": 1388, "total_steps": 6840, "loss": 0.7143295407295227, "lr": 1.875064681442594e-05, "epoch": 0.40590729638836087, "percentage": 20.29, "elapsed_time": "2:01:45", "remaining_time": "7:58:17"} +{"current_steps": 1389, "total_steps": 6840, "loss": 0.732312023639679, "lr": 1.8748305764605798e-05, "epoch": 0.40619973680362625, "percentage": 20.31, "elapsed_time": "2:01:50", "remaining_time": "7:58:07"} +{"current_steps": 1390, "total_steps": 6840, "loss": 0.6878848075866699, "lr": 1.8745962669922375e-05, "epoch": 0.40649217721889164, "percentage": 20.32, "elapsed_time": "2:01:55", "remaining_time": "7:58:03"} +{"current_steps": 1391, "total_steps": 6840, "loss": 0.6716262698173523, "lr": 1.8743617530923356e-05, "epoch": 0.406784617634157, "percentage": 20.34, "elapsed_time": "2:02:01", "remaining_time": "7:57:59"} +{"current_steps": 1392, "total_steps": 6840, "loss": 0.4990834593772888, "lr": 1.87412703481569e-05, "epoch": 0.40707705804942246, "percentage": 20.35, "elapsed_time": "2:02:06", "remaining_time": "7:57:54"} +{"current_steps": 1393, "total_steps": 6840, "loss": 0.6541857719421387, "lr": 1.8738921122171647e-05, "epoch": 0.40736949846468784, "percentage": 20.37, "elapsed_time": "2:02:12", "remaining_time": "7:57:52"} +{"current_steps": 1394, "total_steps": 6840, "loss": 0.6310811042785645, "lr": 1.8736569853516715e-05, "epoch": 0.4076619388799532, "percentage": 20.38, "elapsed_time": "2:02:17", "remaining_time": "7:57:44"} +{"current_steps": 1395, "total_steps": 6840, "loss": 0.6335423588752747, "lr": 1.8734216542741702e-05, "epoch": 0.4079543792952186, "percentage": 20.39, "elapsed_time": "2:02:20", "remaining_time": "7:57:33"} +{"current_steps": 1396, "total_steps": 6840, "loss": 0.6315034627914429, "lr": 1.873186119039667e-05, "epoch": 0.408246819710484, "percentage": 20.41, "elapsed_time": "2:02:26", "remaining_time": "7:57:27"} +{"current_steps": 1397, "total_steps": 6840, "loss": 0.6822362542152405, "lr": 1.872950379703218e-05, "epoch": 0.40853926012574937, "percentage": 20.42, "elapsed_time": "2:02:31", "remaining_time": "7:57:21"} +{"current_steps": 1398, "total_steps": 6840, "loss": 0.699965238571167, "lr": 1.8727144363199257e-05, "epoch": 0.40883170054101475, "percentage": 20.44, "elapsed_time": "2:02:37", "remaining_time": "7:57:19"} +{"current_steps": 1399, "total_steps": 6840, "loss": 0.6769841313362122, "lr": 1.8724782889449397e-05, "epoch": 0.40912414095628014, "percentage": 20.45, "elapsed_time": "2:02:41", "remaining_time": "7:57:12"} +{"current_steps": 1400, "total_steps": 6840, "loss": 0.5219473838806152, "lr": 1.8722419376334584e-05, "epoch": 0.4094165813715456, "percentage": 20.47, "elapsed_time": "2:02:47", "remaining_time": "7:57:07"} +{"current_steps": 1401, "total_steps": 6840, "loss": 0.6091574430465698, "lr": 1.872005382440728e-05, "epoch": 0.40970902178681096, "percentage": 20.48, "elapsed_time": "2:02:56", "remaining_time": "7:57:18"} +{"current_steps": 1402, "total_steps": 6840, "loss": 0.7589390277862549, "lr": 1.8717686234220406e-05, "epoch": 0.41000146220207634, "percentage": 20.5, "elapsed_time": "2:03:03", "remaining_time": "7:57:18"} +{"current_steps": 1403, "total_steps": 6840, "loss": 0.7042895555496216, "lr": 1.8715316606327384e-05, "epoch": 0.4102939026173417, "percentage": 20.51, "elapsed_time": "2:03:08", "remaining_time": "7:57:10"} +{"current_steps": 1404, "total_steps": 6840, "loss": 0.6490949988365173, "lr": 1.8712944941282095e-05, "epoch": 0.4105863430326071, "percentage": 20.53, "elapsed_time": "2:03:13", "remaining_time": "7:57:07"} +{"current_steps": 1405, "total_steps": 6840, "loss": 0.6614132523536682, "lr": 1.87105712396389e-05, "epoch": 0.4108787834478725, "percentage": 20.54, "elapsed_time": "2:03:19", "remaining_time": "7:57:03"} +{"current_steps": 1406, "total_steps": 6840, "loss": 0.666157603263855, "lr": 1.8708195501952637e-05, "epoch": 0.41117122386313787, "percentage": 20.56, "elapsed_time": "2:03:25", "remaining_time": "7:57:01"} +{"current_steps": 1407, "total_steps": 6840, "loss": 0.7347884178161621, "lr": 1.8705817728778626e-05, "epoch": 0.41146366427840325, "percentage": 20.57, "elapsed_time": "2:03:31", "remaining_time": "7:56:57"} +{"current_steps": 1408, "total_steps": 6840, "loss": 0.8129836320877075, "lr": 1.8703437920672652e-05, "epoch": 0.4117561046936687, "percentage": 20.58, "elapsed_time": "2:03:36", "remaining_time": "7:56:51"} +{"current_steps": 1409, "total_steps": 6840, "loss": 0.645210862159729, "lr": 1.870105607819098e-05, "epoch": 0.4120485451089341, "percentage": 20.6, "elapsed_time": "2:03:41", "remaining_time": "7:56:46"} +{"current_steps": 1410, "total_steps": 6840, "loss": 0.6716916561126709, "lr": 1.8698672201890355e-05, "epoch": 0.41234098552419945, "percentage": 20.61, "elapsed_time": "2:03:47", "remaining_time": "7:56:42"} +{"current_steps": 1411, "total_steps": 6840, "loss": 0.8190855383872986, "lr": 1.869628629232799e-05, "epoch": 0.41263342593946484, "percentage": 20.63, "elapsed_time": "2:03:52", "remaining_time": "7:56:36"} +{"current_steps": 1412, "total_steps": 6840, "loss": 0.7618075609207153, "lr": 1.8693898350061582e-05, "epoch": 0.4129258663547302, "percentage": 20.64, "elapsed_time": "2:03:57", "remaining_time": "7:56:30"} +{"current_steps": 1413, "total_steps": 6840, "loss": 0.719980001449585, "lr": 1.869150837564929e-05, "epoch": 0.4132183067699956, "percentage": 20.66, "elapsed_time": "2:04:02", "remaining_time": "7:56:22"} +{"current_steps": 1414, "total_steps": 6840, "loss": 0.6601548194885254, "lr": 1.8689116369649763e-05, "epoch": 0.413510747185261, "percentage": 20.67, "elapsed_time": "2:04:07", "remaining_time": "7:56:18"} +{"current_steps": 1415, "total_steps": 6840, "loss": 0.5991787314414978, "lr": 1.8686722332622112e-05, "epoch": 0.41380318760052637, "percentage": 20.69, "elapsed_time": "2:04:11", "remaining_time": "7:56:08"} +{"current_steps": 1416, "total_steps": 6840, "loss": 0.6089641451835632, "lr": 1.8684326265125935e-05, "epoch": 0.4140956280157918, "percentage": 20.7, "elapsed_time": "2:04:16", "remaining_time": "7:56:01"} +{"current_steps": 1417, "total_steps": 6840, "loss": 0.8143327236175537, "lr": 1.8681928167721297e-05, "epoch": 0.4143880684310572, "percentage": 20.72, "elapsed_time": "2:04:21", "remaining_time": "7:55:55"} +{"current_steps": 1418, "total_steps": 6840, "loss": 0.6127045154571533, "lr": 1.8679528040968733e-05, "epoch": 0.41468050884632257, "percentage": 20.73, "elapsed_time": "2:04:27", "remaining_time": "7:55:53"} +{"current_steps": 1419, "total_steps": 6840, "loss": 0.659069299697876, "lr": 1.8677125885429262e-05, "epoch": 0.41497294926158795, "percentage": 20.75, "elapsed_time": "2:04:32", "remaining_time": "7:55:46"} +{"current_steps": 1420, "total_steps": 6840, "loss": 0.7277505397796631, "lr": 1.8674721701664377e-05, "epoch": 0.41526538967685334, "percentage": 20.76, "elapsed_time": "2:04:37", "remaining_time": "7:55:40"} +{"current_steps": 1421, "total_steps": 6840, "loss": 0.6128710508346558, "lr": 1.8672315490236034e-05, "epoch": 0.4155578300921187, "percentage": 20.77, "elapsed_time": "2:04:41", "remaining_time": "7:55:32"} +{"current_steps": 1422, "total_steps": 6840, "loss": 0.7439340949058533, "lr": 1.866990725170667e-05, "epoch": 0.4158502705073841, "percentage": 20.79, "elapsed_time": "2:04:47", "remaining_time": "7:55:27"} +{"current_steps": 1423, "total_steps": 6840, "loss": 0.5855459570884705, "lr": 1.8667496986639206e-05, "epoch": 0.4161427109226495, "percentage": 20.8, "elapsed_time": "2:04:53", "remaining_time": "7:55:23"} +{"current_steps": 1424, "total_steps": 6840, "loss": 0.6865170001983643, "lr": 1.866508469559702e-05, "epoch": 0.4164351513379149, "percentage": 20.82, "elapsed_time": "2:04:58", "remaining_time": "7:55:19"} +{"current_steps": 1425, "total_steps": 6840, "loss": 0.6648446917533875, "lr": 1.866267037914397e-05, "epoch": 0.4167275917531803, "percentage": 20.83, "elapsed_time": "2:05:03", "remaining_time": "7:55:11"} +{"current_steps": 1426, "total_steps": 6840, "loss": 0.6919275522232056, "lr": 1.866025403784439e-05, "epoch": 0.4170200321684457, "percentage": 20.85, "elapsed_time": "2:05:07", "remaining_time": "7:55:04"} +{"current_steps": 1427, "total_steps": 6840, "loss": 0.7270313501358032, "lr": 1.865783567226308e-05, "epoch": 0.41731247258371107, "percentage": 20.86, "elapsed_time": "2:05:12", "remaining_time": "7:54:55"} +{"current_steps": 1428, "total_steps": 6840, "loss": 0.5938387513160706, "lr": 1.8655415282965327e-05, "epoch": 0.41760491299897645, "percentage": 20.88, "elapsed_time": "2:05:17", "remaining_time": "7:54:48"} +{"current_steps": 1429, "total_steps": 6840, "loss": 0.6517149209976196, "lr": 1.8652992870516872e-05, "epoch": 0.41789735341424183, "percentage": 20.89, "elapsed_time": "2:05:23", "remaining_time": "7:54:47"} +{"current_steps": 1430, "total_steps": 6840, "loss": 0.6688356399536133, "lr": 1.8650568435483948e-05, "epoch": 0.4181897938295072, "percentage": 20.91, "elapsed_time": "2:05:27", "remaining_time": "7:54:38"} +{"current_steps": 1431, "total_steps": 6840, "loss": 0.5300855040550232, "lr": 1.864814197843325e-05, "epoch": 0.41848223424477266, "percentage": 20.92, "elapsed_time": "2:05:33", "remaining_time": "7:54:34"} +{"current_steps": 1432, "total_steps": 6840, "loss": 0.6404704451560974, "lr": 1.8645713499931943e-05, "epoch": 0.41877467466003804, "percentage": 20.94, "elapsed_time": "2:05:37", "remaining_time": "7:54:27"} +{"current_steps": 1433, "total_steps": 6840, "loss": 0.6758813858032227, "lr": 1.8643283000547673e-05, "epoch": 0.4190671150753034, "percentage": 20.95, "elapsed_time": "2:05:42", "remaining_time": "7:54:19"} +{"current_steps": 1434, "total_steps": 6840, "loss": 0.6328250169754028, "lr": 1.8640850480848552e-05, "epoch": 0.4193595554905688, "percentage": 20.96, "elapsed_time": "2:05:47", "remaining_time": "7:54:12"} +{"current_steps": 1435, "total_steps": 6840, "loss": 0.6747157573699951, "lr": 1.863841594140317e-05, "epoch": 0.4196519959058342, "percentage": 20.98, "elapsed_time": "2:05:53", "remaining_time": "7:54:09"} +{"current_steps": 1436, "total_steps": 6840, "loss": 0.5314475893974304, "lr": 1.8635979382780584e-05, "epoch": 0.41994443632109957, "percentage": 20.99, "elapsed_time": "2:05:58", "remaining_time": "7:54:04"} +{"current_steps": 1437, "total_steps": 6840, "loss": 0.478320837020874, "lr": 1.863354080555033e-05, "epoch": 0.42023687673636495, "percentage": 21.01, "elapsed_time": "2:06:04", "remaining_time": "7:54:00"} +{"current_steps": 1438, "total_steps": 6840, "loss": 0.7389972805976868, "lr": 1.86311002102824e-05, "epoch": 0.42052931715163033, "percentage": 21.02, "elapsed_time": "2:06:08", "remaining_time": "7:53:52"} +{"current_steps": 1439, "total_steps": 6840, "loss": 0.5449938178062439, "lr": 1.8628657597547273e-05, "epoch": 0.42082175756689577, "percentage": 21.04, "elapsed_time": "2:06:13", "remaining_time": "7:53:44"} +{"current_steps": 1440, "total_steps": 6840, "loss": 0.6752811670303345, "lr": 1.8626212967915897e-05, "epoch": 0.42111419798216115, "percentage": 21.05, "elapsed_time": "2:06:17", "remaining_time": "7:53:36"} +{"current_steps": 1441, "total_steps": 6840, "loss": 0.7750412821769714, "lr": 1.862376632195969e-05, "epoch": 0.42140663839742654, "percentage": 21.07, "elapsed_time": "2:06:23", "remaining_time": "7:53:34"} +{"current_steps": 1442, "total_steps": 6840, "loss": 0.5967680215835571, "lr": 1.8621317660250535e-05, "epoch": 0.4216990788126919, "percentage": 21.08, "elapsed_time": "2:06:28", "remaining_time": "7:53:27"} +{"current_steps": 1443, "total_steps": 6840, "loss": 0.6781327724456787, "lr": 1.86188669833608e-05, "epoch": 0.4219915192279573, "percentage": 21.1, "elapsed_time": "2:06:33", "remaining_time": "7:53:21"} +{"current_steps": 1444, "total_steps": 6840, "loss": 0.7539681196212769, "lr": 1.8616414291863307e-05, "epoch": 0.4222839596432227, "percentage": 21.11, "elapsed_time": "2:06:38", "remaining_time": "7:53:14"} +{"current_steps": 1445, "total_steps": 6840, "loss": 0.6976957321166992, "lr": 1.8613959586331364e-05, "epoch": 0.42257640005848807, "percentage": 21.13, "elapsed_time": "2:06:43", "remaining_time": "7:53:07"} +{"current_steps": 1446, "total_steps": 6840, "loss": 0.6616528034210205, "lr": 1.861150286733874e-05, "epoch": 0.42286884047375345, "percentage": 21.14, "elapsed_time": "2:06:48", "remaining_time": "7:53:02"} +{"current_steps": 1447, "total_steps": 6840, "loss": 0.6407957077026367, "lr": 1.860904413545968e-05, "epoch": 0.4231612808890189, "percentage": 21.15, "elapsed_time": "2:06:53", "remaining_time": "7:52:56"} +{"current_steps": 1448, "total_steps": 6840, "loss": 0.5918550491333008, "lr": 1.86065833912689e-05, "epoch": 0.42345372130428427, "percentage": 21.17, "elapsed_time": "2:06:59", "remaining_time": "7:52:54"} +{"current_steps": 1449, "total_steps": 6840, "loss": 0.6142056584358215, "lr": 1.8604120635341574e-05, "epoch": 0.42374616171954965, "percentage": 21.18, "elapsed_time": "2:07:05", "remaining_time": "7:52:50"} +{"current_steps": 1450, "total_steps": 6840, "loss": 0.6359597444534302, "lr": 1.8601655868253368e-05, "epoch": 0.42403860213481503, "percentage": 21.2, "elapsed_time": "2:07:11", "remaining_time": "7:52:47"} +{"current_steps": 1451, "total_steps": 6840, "loss": 0.7149467468261719, "lr": 1.8599189090580402e-05, "epoch": 0.4243310425500804, "percentage": 21.21, "elapsed_time": "2:07:16", "remaining_time": "7:52:43"} +{"current_steps": 1452, "total_steps": 6840, "loss": 0.6015822887420654, "lr": 1.8596720302899272e-05, "epoch": 0.4246234829653458, "percentage": 21.23, "elapsed_time": "2:07:22", "remaining_time": "7:52:41"} +{"current_steps": 1453, "total_steps": 6840, "loss": 0.6389881372451782, "lr": 1.8594249505787035e-05, "epoch": 0.4249159233806112, "percentage": 21.24, "elapsed_time": "2:07:28", "remaining_time": "7:52:36"} +{"current_steps": 1454, "total_steps": 6840, "loss": 0.7479783892631531, "lr": 1.8591776699821235e-05, "epoch": 0.42520836379587656, "percentage": 21.26, "elapsed_time": "2:07:34", "remaining_time": "7:52:32"} +{"current_steps": 1455, "total_steps": 6840, "loss": 0.6574498414993286, "lr": 1.8589301885579866e-05, "epoch": 0.425500804211142, "percentage": 21.27, "elapsed_time": "2:07:39", "remaining_time": "7:52:27"} +{"current_steps": 1456, "total_steps": 6840, "loss": 0.6314088702201843, "lr": 1.858682506364141e-05, "epoch": 0.4257932446264074, "percentage": 21.29, "elapsed_time": "2:07:44", "remaining_time": "7:52:22"} +{"current_steps": 1457, "total_steps": 6840, "loss": 0.605385959148407, "lr": 1.85843462345848e-05, "epoch": 0.42608568504167277, "percentage": 21.3, "elapsed_time": "2:07:48", "remaining_time": "7:52:11"} +{"current_steps": 1458, "total_steps": 6840, "loss": 0.6355551481246948, "lr": 1.8581865398989452e-05, "epoch": 0.42637812545693815, "percentage": 21.32, "elapsed_time": "2:07:53", "remaining_time": "7:52:03"} +{"current_steps": 1459, "total_steps": 6840, "loss": 0.6303017139434814, "lr": 1.8579382557435247e-05, "epoch": 0.42667056587220353, "percentage": 21.33, "elapsed_time": "2:07:57", "remaining_time": "7:51:57"} +{"current_steps": 1460, "total_steps": 6840, "loss": 0.5916526317596436, "lr": 1.8576897710502532e-05, "epoch": 0.4269630062874689, "percentage": 21.35, "elapsed_time": "2:08:03", "remaining_time": "7:51:53"} +{"current_steps": 1461, "total_steps": 6840, "loss": 0.5709279179573059, "lr": 1.8574410858772126e-05, "epoch": 0.4272554467027343, "percentage": 21.36, "elapsed_time": "2:08:09", "remaining_time": "7:51:50"} +{"current_steps": 1462, "total_steps": 6840, "loss": 0.571231484413147, "lr": 1.8571922002825317e-05, "epoch": 0.4275478871179997, "percentage": 21.37, "elapsed_time": "2:08:15", "remaining_time": "7:51:47"} +{"current_steps": 1463, "total_steps": 6840, "loss": 0.6352202892303467, "lr": 1.8569431143243856e-05, "epoch": 0.4278403275332651, "percentage": 21.39, "elapsed_time": "2:08:19", "remaining_time": "7:51:36"} +{"current_steps": 1464, "total_steps": 6840, "loss": 0.553265392780304, "lr": 1.8566938280609965e-05, "epoch": 0.4281327679485305, "percentage": 21.4, "elapsed_time": "2:08:25", "remaining_time": "7:51:35"} +{"current_steps": 1465, "total_steps": 6840, "loss": 0.4913727045059204, "lr": 1.8564443415506343e-05, "epoch": 0.4284252083637959, "percentage": 21.42, "elapsed_time": "2:08:29", "remaining_time": "7:51:26"} +{"current_steps": 1466, "total_steps": 6840, "loss": 0.542539119720459, "lr": 1.8561946548516143e-05, "epoch": 0.42871764877906127, "percentage": 21.43, "elapsed_time": "2:08:35", "remaining_time": "7:51:22"} +{"current_steps": 1467, "total_steps": 6840, "loss": 0.719292163848877, "lr": 1.8559447680222994e-05, "epoch": 0.42901008919432665, "percentage": 21.45, "elapsed_time": "2:08:40", "remaining_time": "7:51:15"} +{"current_steps": 1468, "total_steps": 6840, "loss": 0.8443170785903931, "lr": 1.8556946811210993e-05, "epoch": 0.42930252960959203, "percentage": 21.46, "elapsed_time": "2:08:44", "remaining_time": "7:51:07"} +{"current_steps": 1469, "total_steps": 6840, "loss": 0.7899821996688843, "lr": 1.8554443942064705e-05, "epoch": 0.4295949700248574, "percentage": 21.48, "elapsed_time": "2:08:50", "remaining_time": "7:51:05"} +{"current_steps": 1470, "total_steps": 6840, "loss": 0.617426872253418, "lr": 1.8551939073369155e-05, "epoch": 0.42988741044012285, "percentage": 21.49, "elapsed_time": "2:08:56", "remaining_time": "7:51:02"} +{"current_steps": 1471, "total_steps": 6840, "loss": 0.5573505163192749, "lr": 1.8549432205709842e-05, "epoch": 0.43017985085538823, "percentage": 21.51, "elapsed_time": "2:09:01", "remaining_time": "7:50:56"} +{"current_steps": 1472, "total_steps": 6840, "loss": 0.5571975111961365, "lr": 1.8546923339672734e-05, "epoch": 0.4304722912706536, "percentage": 21.52, "elapsed_time": "2:09:07", "remaining_time": "7:50:51"} +{"current_steps": 1473, "total_steps": 6840, "loss": 0.6411981582641602, "lr": 1.854441247584426e-05, "epoch": 0.430764731685919, "percentage": 21.54, "elapsed_time": "2:09:11", "remaining_time": "7:50:44"} +{"current_steps": 1474, "total_steps": 6840, "loss": 0.4766804277896881, "lr": 1.8541899614811323e-05, "epoch": 0.4310571721011844, "percentage": 21.55, "elapsed_time": "2:09:16", "remaining_time": "7:50:36"} +{"current_steps": 1475, "total_steps": 6840, "loss": 0.7479405403137207, "lr": 1.8539384757161285e-05, "epoch": 0.43134961251644977, "percentage": 21.56, "elapsed_time": "2:09:22", "remaining_time": "7:50:33"} +{"current_steps": 1476, "total_steps": 6840, "loss": 0.6848211288452148, "lr": 1.8536867903481983e-05, "epoch": 0.43164205293171515, "percentage": 21.58, "elapsed_time": "2:09:26", "remaining_time": "7:50:24"} +{"current_steps": 1477, "total_steps": 6840, "loss": 0.7413634061813354, "lr": 1.8534349054361708e-05, "epoch": 0.43193449334698053, "percentage": 21.59, "elapsed_time": "2:09:32", "remaining_time": "7:50:22"} +{"current_steps": 1478, "total_steps": 6840, "loss": 0.5880843997001648, "lr": 1.8531828210389236e-05, "epoch": 0.43222693376224597, "percentage": 21.61, "elapsed_time": "2:09:38", "remaining_time": "7:50:21"} +{"current_steps": 1479, "total_steps": 6840, "loss": 0.5885627269744873, "lr": 1.852930537215379e-05, "epoch": 0.43251937417751135, "percentage": 21.62, "elapsed_time": "2:09:45", "remaining_time": "7:50:19"} +{"current_steps": 1480, "total_steps": 6840, "loss": 0.706636905670166, "lr": 1.8526780540245077e-05, "epoch": 0.43281181459277673, "percentage": 21.64, "elapsed_time": "2:09:50", "remaining_time": "7:50:13"} +{"current_steps": 1481, "total_steps": 6840, "loss": 0.6521843075752258, "lr": 1.8524253715253255e-05, "epoch": 0.4331042550080421, "percentage": 21.65, "elapsed_time": "2:09:55", "remaining_time": "7:50:08"} +{"current_steps": 1482, "total_steps": 6840, "loss": 0.6231021881103516, "lr": 1.8521724897768955e-05, "epoch": 0.4333966954233075, "percentage": 21.67, "elapsed_time": "2:10:00", "remaining_time": "7:50:00"} +{"current_steps": 1483, "total_steps": 6840, "loss": 0.6859451532363892, "lr": 1.851919408838327e-05, "epoch": 0.4336891358385729, "percentage": 21.68, "elapsed_time": "2:10:03", "remaining_time": "7:49:50"} +{"current_steps": 1484, "total_steps": 6840, "loss": 0.7948323488235474, "lr": 1.851666128768777e-05, "epoch": 0.43398157625383826, "percentage": 21.7, "elapsed_time": "2:10:08", "remaining_time": "7:49:40"} +{"current_steps": 1485, "total_steps": 6840, "loss": 0.7815203070640564, "lr": 1.8514126496274473e-05, "epoch": 0.43427401666910365, "percentage": 21.71, "elapsed_time": "2:10:14", "remaining_time": "7:49:38"} +{"current_steps": 1486, "total_steps": 6840, "loss": 0.6941452622413635, "lr": 1.8511589714735875e-05, "epoch": 0.4345664570843691, "percentage": 21.73, "elapsed_time": "2:10:19", "remaining_time": "7:49:33"} +{"current_steps": 1487, "total_steps": 6840, "loss": 0.5500549674034119, "lr": 1.850905094366493e-05, "epoch": 0.43485889749963447, "percentage": 21.74, "elapsed_time": "2:10:24", "remaining_time": "7:49:25"} +{"current_steps": 1488, "total_steps": 6840, "loss": 0.6616400480270386, "lr": 1.8506510183655066e-05, "epoch": 0.43515133791489985, "percentage": 21.75, "elapsed_time": "2:10:30", "remaining_time": "7:49:23"} +{"current_steps": 1489, "total_steps": 6840, "loss": 0.6920043230056763, "lr": 1.8503967435300166e-05, "epoch": 0.43544377833016523, "percentage": 21.77, "elapsed_time": "2:10:36", "remaining_time": "7:49:21"} +{"current_steps": 1490, "total_steps": 6840, "loss": 0.6080813407897949, "lr": 1.8501422699194584e-05, "epoch": 0.4357362187454306, "percentage": 21.78, "elapsed_time": "2:10:42", "remaining_time": "7:49:18"} +{"current_steps": 1491, "total_steps": 6840, "loss": 0.576184868812561, "lr": 1.8498875975933135e-05, "epoch": 0.436028659160696, "percentage": 21.8, "elapsed_time": "2:10:46", "remaining_time": "7:49:10"} +{"current_steps": 1492, "total_steps": 6840, "loss": 0.6647310256958008, "lr": 1.84963272661111e-05, "epoch": 0.4363210995759614, "percentage": 21.81, "elapsed_time": "2:10:50", "remaining_time": "7:49:01"} +{"current_steps": 1493, "total_steps": 6840, "loss": 0.6738306283950806, "lr": 1.8493776570324224e-05, "epoch": 0.43661353999122676, "percentage": 21.83, "elapsed_time": "2:10:55", "remaining_time": "7:48:55"} +{"current_steps": 1494, "total_steps": 6840, "loss": 0.681056022644043, "lr": 1.849122388916872e-05, "epoch": 0.4369059804064922, "percentage": 21.84, "elapsed_time": "2:11:02", "remaining_time": "7:48:53"} +{"current_steps": 1495, "total_steps": 6840, "loss": 0.7844547033309937, "lr": 1.848866922324126e-05, "epoch": 0.4371984208217576, "percentage": 21.86, "elapsed_time": "2:11:08", "remaining_time": "7:48:50"} +{"current_steps": 1496, "total_steps": 6840, "loss": 0.6478928327560425, "lr": 1.8486112573138977e-05, "epoch": 0.43749086123702297, "percentage": 21.87, "elapsed_time": "2:11:14", "remaining_time": "7:48:47"} +{"current_steps": 1497, "total_steps": 6840, "loss": 0.6035341024398804, "lr": 1.8483553939459477e-05, "epoch": 0.43778330165228835, "percentage": 21.89, "elapsed_time": "2:11:19", "remaining_time": "7:48:44"} +{"current_steps": 1498, "total_steps": 6840, "loss": 0.6664912700653076, "lr": 1.8480993322800826e-05, "epoch": 0.43807574206755373, "percentage": 21.9, "elapsed_time": "2:11:24", "remaining_time": "7:48:36"} +{"current_steps": 1499, "total_steps": 6840, "loss": 0.7171953916549683, "lr": 1.847843072376155e-05, "epoch": 0.4383681824828191, "percentage": 21.92, "elapsed_time": "2:11:30", "remaining_time": "7:48:32"} +{"current_steps": 1500, "total_steps": 6840, "loss": 0.8400344848632812, "lr": 1.8475866142940646e-05, "epoch": 0.4386606228980845, "percentage": 21.93, "elapsed_time": "2:11:36", "remaining_time": "7:48:30"} +{"current_steps": 1501, "total_steps": 6840, "loss": 0.5119056701660156, "lr": 1.8473299580937563e-05, "epoch": 0.4389530633133499, "percentage": 21.94, "elapsed_time": "2:11:44", "remaining_time": "7:48:36"} +{"current_steps": 1502, "total_steps": 6840, "loss": 0.5864866375923157, "lr": 1.847073103835222e-05, "epoch": 0.4392455037286153, "percentage": 21.96, "elapsed_time": "2:11:49", "remaining_time": "7:48:30"} +{"current_steps": 1503, "total_steps": 6840, "loss": 0.6389576196670532, "lr": 1.8468160515785e-05, "epoch": 0.4395379441438807, "percentage": 21.97, "elapsed_time": "2:11:56", "remaining_time": "7:48:29"} +{"current_steps": 1504, "total_steps": 6840, "loss": 0.6745110750198364, "lr": 1.846558801383675e-05, "epoch": 0.4398303845591461, "percentage": 21.99, "elapsed_time": "2:12:00", "remaining_time": "7:48:21"} +{"current_steps": 1505, "total_steps": 6840, "loss": 0.6207559704780579, "lr": 1.846301353310877e-05, "epoch": 0.44012282497441146, "percentage": 22.0, "elapsed_time": "2:12:05", "remaining_time": "7:48:13"} +{"current_steps": 1506, "total_steps": 6840, "loss": 0.6818139553070068, "lr": 1.8460437074202832e-05, "epoch": 0.44041526538967685, "percentage": 22.02, "elapsed_time": "2:12:10", "remaining_time": "7:48:06"} +{"current_steps": 1507, "total_steps": 6840, "loss": 0.652062714099884, "lr": 1.845785863772117e-05, "epoch": 0.44070770580494223, "percentage": 22.03, "elapsed_time": "2:12:13", "remaining_time": "7:47:56"} +{"current_steps": 1508, "total_steps": 6840, "loss": 0.6842166185379028, "lr": 1.8455278224266476e-05, "epoch": 0.4410001462202076, "percentage": 22.05, "elapsed_time": "2:12:18", "remaining_time": "7:47:49"} +{"current_steps": 1509, "total_steps": 6840, "loss": 0.6459342837333679, "lr": 1.8452695834441904e-05, "epoch": 0.44129258663547305, "percentage": 22.06, "elapsed_time": "2:12:23", "remaining_time": "7:47:42"} +{"current_steps": 1510, "total_steps": 6840, "loss": 0.6036739349365234, "lr": 1.8450111468851078e-05, "epoch": 0.44158502705073843, "percentage": 22.08, "elapsed_time": "2:12:28", "remaining_time": "7:47:35"} +{"current_steps": 1511, "total_steps": 6840, "loss": 0.7530199289321899, "lr": 1.844752512809807e-05, "epoch": 0.4418774674660038, "percentage": 22.09, "elapsed_time": "2:12:32", "remaining_time": "7:47:25"} +{"current_steps": 1512, "total_steps": 6840, "loss": 0.6098290085792542, "lr": 1.8444936812787428e-05, "epoch": 0.4421699078812692, "percentage": 22.11, "elapsed_time": "2:12:37", "remaining_time": "7:47:22"} +{"current_steps": 1513, "total_steps": 6840, "loss": 0.7142464518547058, "lr": 1.844234652352415e-05, "epoch": 0.4424623482965346, "percentage": 22.12, "elapsed_time": "2:12:43", "remaining_time": "7:47:18"} +{"current_steps": 1514, "total_steps": 6840, "loss": 0.4895970821380615, "lr": 1.8439754260913703e-05, "epoch": 0.44275478871179996, "percentage": 22.13, "elapsed_time": "2:12:48", "remaining_time": "7:47:13"} +{"current_steps": 1515, "total_steps": 6840, "loss": 0.6166520118713379, "lr": 1.8437160025562012e-05, "epoch": 0.44304722912706535, "percentage": 22.15, "elapsed_time": "2:12:53", "remaining_time": "7:47:07"} +{"current_steps": 1516, "total_steps": 6840, "loss": 0.6020585894584656, "lr": 1.8434563818075462e-05, "epoch": 0.4433396695423307, "percentage": 22.16, "elapsed_time": "2:12:58", "remaining_time": "7:47:00"} +{"current_steps": 1517, "total_steps": 6840, "loss": 0.6879030466079712, "lr": 1.8431965639060904e-05, "epoch": 0.44363210995759617, "percentage": 22.18, "elapsed_time": "2:13:04", "remaining_time": "7:46:58"} +{"current_steps": 1518, "total_steps": 6840, "loss": 0.5753897428512573, "lr": 1.8429365489125644e-05, "epoch": 0.44392455037286155, "percentage": 22.19, "elapsed_time": "2:13:09", "remaining_time": "7:46:49"} +{"current_steps": 1519, "total_steps": 6840, "loss": 0.5165301561355591, "lr": 1.8426763368877455e-05, "epoch": 0.44421699078812693, "percentage": 22.21, "elapsed_time": "2:13:14", "remaining_time": "7:46:44"} +{"current_steps": 1520, "total_steps": 6840, "loss": 0.6377310752868652, "lr": 1.842415927892456e-05, "epoch": 0.4445094312033923, "percentage": 22.22, "elapsed_time": "2:13:19", "remaining_time": "7:46:38"} +{"current_steps": 1521, "total_steps": 6840, "loss": 0.7429912090301514, "lr": 1.842155321987566e-05, "epoch": 0.4448018716186577, "percentage": 22.24, "elapsed_time": "2:13:25", "remaining_time": "7:46:36"} +{"current_steps": 1522, "total_steps": 6840, "loss": 0.6177542209625244, "lr": 1.8418945192339892e-05, "epoch": 0.4450943120339231, "percentage": 22.25, "elapsed_time": "2:13:30", "remaining_time": "7:46:29"} +{"current_steps": 1523, "total_steps": 6840, "loss": 0.662541389465332, "lr": 1.8416335196926877e-05, "epoch": 0.44538675244918846, "percentage": 22.27, "elapsed_time": "2:13:36", "remaining_time": "7:46:26"} +{"current_steps": 1524, "total_steps": 6840, "loss": 0.6026759743690491, "lr": 1.841372323424668e-05, "epoch": 0.44567919286445384, "percentage": 22.28, "elapsed_time": "2:13:41", "remaining_time": "7:46:21"} +{"current_steps": 1525, "total_steps": 6840, "loss": 0.7902384400367737, "lr": 1.8411109304909837e-05, "epoch": 0.4459716332797193, "percentage": 22.3, "elapsed_time": "2:13:46", "remaining_time": "7:46:13"} +{"current_steps": 1526, "total_steps": 6840, "loss": 0.6588590145111084, "lr": 1.840849340952733e-05, "epoch": 0.44626407369498466, "percentage": 22.31, "elapsed_time": "2:13:52", "remaining_time": "7:46:12"} +{"current_steps": 1527, "total_steps": 6840, "loss": 0.49133825302124023, "lr": 1.8405875548710614e-05, "epoch": 0.44655651411025005, "percentage": 22.32, "elapsed_time": "2:13:57", "remaining_time": "7:46:06"} +{"current_steps": 1528, "total_steps": 6840, "loss": 0.6644654273986816, "lr": 1.8403255723071597e-05, "epoch": 0.44684895452551543, "percentage": 22.34, "elapsed_time": "2:14:02", "remaining_time": "7:46:00"} +{"current_steps": 1529, "total_steps": 6840, "loss": 0.6257454752922058, "lr": 1.8400633933222647e-05, "epoch": 0.4471413949407808, "percentage": 22.35, "elapsed_time": "2:14:09", "remaining_time": "7:45:58"} +{"current_steps": 1530, "total_steps": 6840, "loss": 0.6671919226646423, "lr": 1.8398010179776597e-05, "epoch": 0.4474338353560462, "percentage": 22.37, "elapsed_time": "2:14:14", "remaining_time": "7:45:54"} +{"current_steps": 1531, "total_steps": 6840, "loss": 0.6001447439193726, "lr": 1.839538446334672e-05, "epoch": 0.4477262757713116, "percentage": 22.38, "elapsed_time": "2:14:20", "remaining_time": "7:45:49"} +{"current_steps": 1532, "total_steps": 6840, "loss": 0.8103213310241699, "lr": 1.8392756784546775e-05, "epoch": 0.44801871618657696, "percentage": 22.4, "elapsed_time": "2:14:25", "remaining_time": "7:45:44"} +{"current_steps": 1533, "total_steps": 6840, "loss": 0.7010835409164429, "lr": 1.839012714399096e-05, "epoch": 0.4483111566018424, "percentage": 22.41, "elapsed_time": "2:14:30", "remaining_time": "7:45:40"} +{"current_steps": 1534, "total_steps": 6840, "loss": 0.5709215402603149, "lr": 1.8387495542293935e-05, "epoch": 0.4486035970171078, "percentage": 22.43, "elapsed_time": "2:14:35", "remaining_time": "7:45:32"} +{"current_steps": 1535, "total_steps": 6840, "loss": 0.6410949230194092, "lr": 1.8384861980070826e-05, "epoch": 0.44889603743237316, "percentage": 22.44, "elapsed_time": "2:14:40", "remaining_time": "7:45:26"} +{"current_steps": 1536, "total_steps": 6840, "loss": 0.8036839962005615, "lr": 1.838222645793721e-05, "epoch": 0.44918847784763855, "percentage": 22.46, "elapsed_time": "2:14:46", "remaining_time": "7:45:23"} +{"current_steps": 1537, "total_steps": 6840, "loss": 0.49213099479675293, "lr": 1.8379588976509123e-05, "epoch": 0.44948091826290393, "percentage": 22.47, "elapsed_time": "2:14:53", "remaining_time": "7:45:22"} +{"current_steps": 1538, "total_steps": 6840, "loss": 0.7111018896102905, "lr": 1.8376949536403063e-05, "epoch": 0.4497733586781693, "percentage": 22.49, "elapsed_time": "2:14:59", "remaining_time": "7:45:20"} +{"current_steps": 1539, "total_steps": 6840, "loss": 0.8506999015808105, "lr": 1.837430813823598e-05, "epoch": 0.4500657990934347, "percentage": 22.5, "elapsed_time": "2:15:04", "remaining_time": "7:45:15"} +{"current_steps": 1540, "total_steps": 6840, "loss": 0.7369798421859741, "lr": 1.8371664782625287e-05, "epoch": 0.4503582395087001, "percentage": 22.51, "elapsed_time": "2:15:10", "remaining_time": "7:45:13"} +{"current_steps": 1541, "total_steps": 6840, "loss": 0.5982831120491028, "lr": 1.8369019470188855e-05, "epoch": 0.4506506799239655, "percentage": 22.53, "elapsed_time": "2:15:16", "remaining_time": "7:45:08"} +{"current_steps": 1542, "total_steps": 6840, "loss": 0.6129144430160522, "lr": 1.8366372201545002e-05, "epoch": 0.4509431203392309, "percentage": 22.54, "elapsed_time": "2:15:21", "remaining_time": "7:45:05"} +{"current_steps": 1543, "total_steps": 6840, "loss": 0.7142921686172485, "lr": 1.8363722977312512e-05, "epoch": 0.4512355607544963, "percentage": 22.56, "elapsed_time": "2:15:26", "remaining_time": "7:44:57"} +{"current_steps": 1544, "total_steps": 6840, "loss": 0.515651524066925, "lr": 1.8361071798110635e-05, "epoch": 0.45152800116976166, "percentage": 22.57, "elapsed_time": "2:15:31", "remaining_time": "7:44:52"} +{"current_steps": 1545, "total_steps": 6840, "loss": 0.5544168949127197, "lr": 1.8358418664559058e-05, "epoch": 0.45182044158502704, "percentage": 22.59, "elapsed_time": "2:15:36", "remaining_time": "7:44:45"} +{"current_steps": 1546, "total_steps": 6840, "loss": 0.6801918745040894, "lr": 1.8355763577277938e-05, "epoch": 0.4521128820002924, "percentage": 22.6, "elapsed_time": "2:15:41", "remaining_time": "7:44:38"} +{"current_steps": 1547, "total_steps": 6840, "loss": 0.683785080909729, "lr": 1.835310653688789e-05, "epoch": 0.4524053224155578, "percentage": 22.62, "elapsed_time": "2:15:47", "remaining_time": "7:44:35"} +{"current_steps": 1548, "total_steps": 6840, "loss": 0.5689892172813416, "lr": 1.835044754400997e-05, "epoch": 0.45269776283082325, "percentage": 22.63, "elapsed_time": "2:15:51", "remaining_time": "7:44:27"} +{"current_steps": 1549, "total_steps": 6840, "loss": 0.5260726809501648, "lr": 1.8347786599265713e-05, "epoch": 0.45299020324608863, "percentage": 22.65, "elapsed_time": "2:15:58", "remaining_time": "7:44:26"} +{"current_steps": 1550, "total_steps": 6840, "loss": 0.6792432069778442, "lr": 1.834512370327709e-05, "epoch": 0.453282643661354, "percentage": 22.66, "elapsed_time": "2:16:02", "remaining_time": "7:44:18"} +{"current_steps": 1551, "total_steps": 6840, "loss": 0.6336524486541748, "lr": 1.8342458856666545e-05, "epoch": 0.4535750840766194, "percentage": 22.68, "elapsed_time": "2:16:08", "remaining_time": "7:44:16"} +{"current_steps": 1552, "total_steps": 6840, "loss": 0.5929614901542664, "lr": 1.8339792060056965e-05, "epoch": 0.4538675244918848, "percentage": 22.69, "elapsed_time": "2:16:14", "remaining_time": "7:44:12"} +{"current_steps": 1553, "total_steps": 6840, "loss": 0.6683382391929626, "lr": 1.8337123314071696e-05, "epoch": 0.45415996490715016, "percentage": 22.7, "elapsed_time": "2:16:20", "remaining_time": "7:44:09"} +{"current_steps": 1554, "total_steps": 6840, "loss": 0.6256811618804932, "lr": 1.833445261933454e-05, "epoch": 0.45445240532241554, "percentage": 22.72, "elapsed_time": "2:16:26", "remaining_time": "7:44:05"} +{"current_steps": 1555, "total_steps": 6840, "loss": 0.5974653959274292, "lr": 1.8331779976469765e-05, "epoch": 0.4547448457376809, "percentage": 22.73, "elapsed_time": "2:16:30", "remaining_time": "7:43:58"} +{"current_steps": 1556, "total_steps": 6840, "loss": 0.5471535325050354, "lr": 1.8329105386102074e-05, "epoch": 0.45503728615294636, "percentage": 22.75, "elapsed_time": "2:16:36", "remaining_time": "7:43:53"} +{"current_steps": 1557, "total_steps": 6840, "loss": 0.5751267075538635, "lr": 1.832642884885664e-05, "epoch": 0.45532972656821175, "percentage": 22.76, "elapsed_time": "2:16:42", "remaining_time": "7:43:51"} +{"current_steps": 1558, "total_steps": 6840, "loss": 0.7003380060195923, "lr": 1.8323750365359092e-05, "epoch": 0.45562216698347713, "percentage": 22.78, "elapsed_time": "2:16:48", "remaining_time": "7:43:48"} +{"current_steps": 1559, "total_steps": 6840, "loss": 0.6351351737976074, "lr": 1.8321069936235503e-05, "epoch": 0.4559146073987425, "percentage": 22.79, "elapsed_time": "2:16:53", "remaining_time": "7:43:41"} +{"current_steps": 1560, "total_steps": 6840, "loss": 0.6083345413208008, "lr": 1.8318387562112407e-05, "epoch": 0.4562070478140079, "percentage": 22.81, "elapsed_time": "2:16:58", "remaining_time": "7:43:35"} +{"current_steps": 1561, "total_steps": 6840, "loss": 0.589935302734375, "lr": 1.83157032436168e-05, "epoch": 0.4564994882292733, "percentage": 22.82, "elapsed_time": "2:17:04", "remaining_time": "7:43:32"} +{"current_steps": 1562, "total_steps": 6840, "loss": 0.7648014426231384, "lr": 1.8313016981376116e-05, "epoch": 0.45679192864453866, "percentage": 22.84, "elapsed_time": "2:17:10", "remaining_time": "7:43:29"} +{"current_steps": 1563, "total_steps": 6840, "loss": 0.7309973239898682, "lr": 1.831032877601826e-05, "epoch": 0.45708436905980404, "percentage": 22.85, "elapsed_time": "2:17:16", "remaining_time": "7:43:26"} +{"current_steps": 1564, "total_steps": 6840, "loss": 0.7231593728065491, "lr": 1.8307638628171575e-05, "epoch": 0.4573768094750695, "percentage": 22.87, "elapsed_time": "2:17:20", "remaining_time": "7:43:20"} +{"current_steps": 1565, "total_steps": 6840, "loss": 0.7321262359619141, "lr": 1.8304946538464876e-05, "epoch": 0.45766924989033486, "percentage": 22.88, "elapsed_time": "2:17:26", "remaining_time": "7:43:15"} +{"current_steps": 1566, "total_steps": 6840, "loss": 0.5866271257400513, "lr": 1.830225250752742e-05, "epoch": 0.45796169030560024, "percentage": 22.89, "elapsed_time": "2:17:32", "remaining_time": "7:43:12"} +{"current_steps": 1567, "total_steps": 6840, "loss": 0.7146202325820923, "lr": 1.8299556535988917e-05, "epoch": 0.4582541307208656, "percentage": 22.91, "elapsed_time": "2:17:36", "remaining_time": "7:43:03"} +{"current_steps": 1568, "total_steps": 6840, "loss": 0.4600168466567993, "lr": 1.8296858624479536e-05, "epoch": 0.458546571136131, "percentage": 22.92, "elapsed_time": "2:17:41", "remaining_time": "7:42:56"} +{"current_steps": 1569, "total_steps": 6840, "loss": 0.5710705518722534, "lr": 1.8294158773629896e-05, "epoch": 0.4588390115513964, "percentage": 22.94, "elapsed_time": "2:17:46", "remaining_time": "7:42:50"} +{"current_steps": 1570, "total_steps": 6840, "loss": 0.7075216770172119, "lr": 1.8291456984071073e-05, "epoch": 0.4591314519666618, "percentage": 22.95, "elapsed_time": "2:17:51", "remaining_time": "7:42:44"} +{"current_steps": 1571, "total_steps": 6840, "loss": 0.5262739062309265, "lr": 1.828875325643459e-05, "epoch": 0.45942389238192716, "percentage": 22.97, "elapsed_time": "2:17:56", "remaining_time": "7:42:38"} +{"current_steps": 1572, "total_steps": 6840, "loss": 0.724657416343689, "lr": 1.8286047591352436e-05, "epoch": 0.4597163327971926, "percentage": 22.98, "elapsed_time": "2:18:00", "remaining_time": "7:42:30"} +{"current_steps": 1573, "total_steps": 6840, "loss": 0.6047587394714355, "lr": 1.8283339989457033e-05, "epoch": 0.460008773212458, "percentage": 23.0, "elapsed_time": "2:18:04", "remaining_time": "7:42:19"} +{"current_steps": 1574, "total_steps": 6840, "loss": 0.6647980213165283, "lr": 1.828063045138127e-05, "epoch": 0.46030121362772336, "percentage": 23.01, "elapsed_time": "2:18:09", "remaining_time": "7:42:13"} +{"current_steps": 1575, "total_steps": 6840, "loss": 0.6081969738006592, "lr": 1.827791897775849e-05, "epoch": 0.46059365404298874, "percentage": 23.03, "elapsed_time": "2:18:15", "remaining_time": "7:42:09"} +{"current_steps": 1576, "total_steps": 6840, "loss": 0.6815003156661987, "lr": 1.827520556922248e-05, "epoch": 0.4608860944582541, "percentage": 23.04, "elapsed_time": "2:18:19", "remaining_time": "7:42:00"} +{"current_steps": 1577, "total_steps": 6840, "loss": 0.5571715235710144, "lr": 1.8272490226407476e-05, "epoch": 0.4611785348735195, "percentage": 23.06, "elapsed_time": "2:18:25", "remaining_time": "7:41:59"} +{"current_steps": 1578, "total_steps": 6840, "loss": 0.7562757730484009, "lr": 1.8269772949948185e-05, "epoch": 0.4614709752887849, "percentage": 23.07, "elapsed_time": "2:18:31", "remaining_time": "7:41:54"} +{"current_steps": 1579, "total_steps": 6840, "loss": 0.6330382227897644, "lr": 1.8267053740479745e-05, "epoch": 0.4617634157040503, "percentage": 23.08, "elapsed_time": "2:18:36", "remaining_time": "7:41:48"} +{"current_steps": 1580, "total_steps": 6840, "loss": 0.7696597576141357, "lr": 1.826433259863776e-05, "epoch": 0.4620558561193157, "percentage": 23.1, "elapsed_time": "2:18:40", "remaining_time": "7:41:39"} +{"current_steps": 1581, "total_steps": 6840, "loss": 0.6953772306442261, "lr": 1.8261609525058275e-05, "epoch": 0.4623482965345811, "percentage": 23.11, "elapsed_time": "2:18:44", "remaining_time": "7:41:31"} +{"current_steps": 1582, "total_steps": 6840, "loss": 0.5856037735939026, "lr": 1.8258884520377797e-05, "epoch": 0.4626407369498465, "percentage": 23.13, "elapsed_time": "2:18:49", "remaining_time": "7:41:24"} +{"current_steps": 1583, "total_steps": 6840, "loss": 0.5988172888755798, "lr": 1.8256157585233277e-05, "epoch": 0.46293317736511186, "percentage": 23.14, "elapsed_time": "2:18:54", "remaining_time": "7:41:19"} +{"current_steps": 1584, "total_steps": 6840, "loss": 0.6320241689682007, "lr": 1.8253428720262117e-05, "epoch": 0.46322561778037724, "percentage": 23.16, "elapsed_time": "2:19:00", "remaining_time": "7:41:15"} +{"current_steps": 1585, "total_steps": 6840, "loss": 0.5758935213088989, "lr": 1.8250697926102182e-05, "epoch": 0.4635180581956426, "percentage": 23.17, "elapsed_time": "2:19:06", "remaining_time": "7:41:13"} +{"current_steps": 1586, "total_steps": 6840, "loss": 0.7104986906051636, "lr": 1.8247965203391763e-05, "epoch": 0.463810498610908, "percentage": 23.19, "elapsed_time": "2:19:12", "remaining_time": "7:41:10"} +{"current_steps": 1587, "total_steps": 6840, "loss": 0.6322015523910522, "lr": 1.8245230552769634e-05, "epoch": 0.46410293902617344, "percentage": 23.2, "elapsed_time": "2:19:18", "remaining_time": "7:41:06"} +{"current_steps": 1588, "total_steps": 6840, "loss": 0.5881235003471375, "lr": 1.824249397487499e-05, "epoch": 0.4643953794414388, "percentage": 23.22, "elapsed_time": "2:19:23", "remaining_time": "7:41:00"} +{"current_steps": 1589, "total_steps": 6840, "loss": 0.8097240924835205, "lr": 1.8239755470347497e-05, "epoch": 0.4646878198567042, "percentage": 23.23, "elapsed_time": "2:19:27", "remaining_time": "7:40:50"} +{"current_steps": 1590, "total_steps": 6840, "loss": 0.6538649201393127, "lr": 1.823701503982726e-05, "epoch": 0.4649802602719696, "percentage": 23.25, "elapsed_time": "2:19:31", "remaining_time": "7:40:43"} +{"current_steps": 1591, "total_steps": 6840, "loss": 0.5868922472000122, "lr": 1.8234272683954842e-05, "epoch": 0.465272700687235, "percentage": 23.26, "elapsed_time": "2:19:37", "remaining_time": "7:40:39"} +{"current_steps": 1592, "total_steps": 6840, "loss": 0.6747265458106995, "lr": 1.8231528403371248e-05, "epoch": 0.46556514110250036, "percentage": 23.27, "elapsed_time": "2:19:43", "remaining_time": "7:40:37"} +{"current_steps": 1593, "total_steps": 6840, "loss": 0.6519996523857117, "lr": 1.8228782198717936e-05, "epoch": 0.46585758151776574, "percentage": 23.29, "elapsed_time": "2:19:49", "remaining_time": "7:40:33"} +{"current_steps": 1594, "total_steps": 6840, "loss": 0.7268975973129272, "lr": 1.822603407063682e-05, "epoch": 0.4661500219330311, "percentage": 23.3, "elapsed_time": "2:19:56", "remaining_time": "7:40:34"} +{"current_steps": 1595, "total_steps": 6840, "loss": 0.6554980278015137, "lr": 1.8223284019770252e-05, "epoch": 0.46644246234829656, "percentage": 23.32, "elapsed_time": "2:20:01", "remaining_time": "7:40:27"} +{"current_steps": 1596, "total_steps": 6840, "loss": 0.7014105319976807, "lr": 1.8220532046761047e-05, "epoch": 0.46673490276356194, "percentage": 23.33, "elapsed_time": "2:20:06", "remaining_time": "7:40:22"} +{"current_steps": 1597, "total_steps": 6840, "loss": 0.5766602158546448, "lr": 1.821777815225245e-05, "epoch": 0.4670273431788273, "percentage": 23.35, "elapsed_time": "2:20:11", "remaining_time": "7:40:15"} +{"current_steps": 1598, "total_steps": 6840, "loss": 0.5106521844863892, "lr": 1.8215022336888182e-05, "epoch": 0.4673197835940927, "percentage": 23.36, "elapsed_time": "2:20:16", "remaining_time": "7:40:07"} +{"current_steps": 1599, "total_steps": 6840, "loss": 0.801375150680542, "lr": 1.821226460131239e-05, "epoch": 0.4676122240093581, "percentage": 23.38, "elapsed_time": "2:20:21", "remaining_time": "7:40:04"} +{"current_steps": 1600, "total_steps": 6840, "loss": 0.6189062595367432, "lr": 1.8209504946169677e-05, "epoch": 0.4679046644246235, "percentage": 23.39, "elapsed_time": "2:20:27", "remaining_time": "7:39:58"} +{"current_steps": 1601, "total_steps": 6840, "loss": 0.6719359159469604, "lr": 1.8206743372105098e-05, "epoch": 0.46819710483988886, "percentage": 23.41, "elapsed_time": "2:20:37", "remaining_time": "7:40:10"} +{"current_steps": 1602, "total_steps": 6840, "loss": 0.7437123656272888, "lr": 1.8203979879764153e-05, "epoch": 0.46848954525515424, "percentage": 23.42, "elapsed_time": "2:20:43", "remaining_time": "7:40:05"} +{"current_steps": 1603, "total_steps": 6840, "loss": 0.7273217439651489, "lr": 1.8201214469792793e-05, "epoch": 0.4687819856704197, "percentage": 23.44, "elapsed_time": "2:20:48", "remaining_time": "7:40:00"} +{"current_steps": 1604, "total_steps": 6840, "loss": 0.6467087268829346, "lr": 1.8198447142837416e-05, "epoch": 0.46907442608568506, "percentage": 23.45, "elapsed_time": "2:20:53", "remaining_time": "7:39:55"} +{"current_steps": 1605, "total_steps": 6840, "loss": 0.5764428973197937, "lr": 1.8195677899544866e-05, "epoch": 0.46936686650095044, "percentage": 23.46, "elapsed_time": "2:20:59", "remaining_time": "7:39:50"} +{"current_steps": 1606, "total_steps": 6840, "loss": 0.5969977378845215, "lr": 1.8192906740562437e-05, "epoch": 0.4696593069162158, "percentage": 23.48, "elapsed_time": "2:21:03", "remaining_time": "7:39:42"} +{"current_steps": 1607, "total_steps": 6840, "loss": 0.7237746119499207, "lr": 1.819013366653787e-05, "epoch": 0.4699517473314812, "percentage": 23.49, "elapsed_time": "2:21:07", "remaining_time": "7:39:34"} +{"current_steps": 1608, "total_steps": 6840, "loss": 0.6289568543434143, "lr": 1.8187358678119355e-05, "epoch": 0.4702441877467466, "percentage": 23.51, "elapsed_time": "2:21:13", "remaining_time": "7:39:28"} +{"current_steps": 1609, "total_steps": 6840, "loss": 0.5773013234138489, "lr": 1.8184581775955533e-05, "epoch": 0.47053662816201197, "percentage": 23.52, "elapsed_time": "2:21:19", "remaining_time": "7:39:26"} +{"current_steps": 1610, "total_steps": 6840, "loss": 0.5940284729003906, "lr": 1.818180296069548e-05, "epoch": 0.47082906857727735, "percentage": 23.54, "elapsed_time": "2:21:24", "remaining_time": "7:39:22"} +{"current_steps": 1611, "total_steps": 6840, "loss": 0.7051881551742554, "lr": 1.8179022232988735e-05, "epoch": 0.4711215089925428, "percentage": 23.55, "elapsed_time": "2:21:29", "remaining_time": "7:39:15"} +{"current_steps": 1612, "total_steps": 6840, "loss": 0.6427813768386841, "lr": 1.8176239593485267e-05, "epoch": 0.4714139494078082, "percentage": 23.57, "elapsed_time": "2:21:34", "remaining_time": "7:39:09"} +{"current_steps": 1613, "total_steps": 6840, "loss": 0.7041782736778259, "lr": 1.817345504283551e-05, "epoch": 0.47170638982307356, "percentage": 23.58, "elapsed_time": "2:21:39", "remaining_time": "7:39:02"} +{"current_steps": 1614, "total_steps": 6840, "loss": 0.6568688154220581, "lr": 1.817066858169033e-05, "epoch": 0.47199883023833894, "percentage": 23.6, "elapsed_time": "2:21:44", "remaining_time": "7:38:58"} +{"current_steps": 1615, "total_steps": 6840, "loss": 0.4784452021121979, "lr": 1.816788021070105e-05, "epoch": 0.4722912706536043, "percentage": 23.61, "elapsed_time": "2:21:50", "remaining_time": "7:38:53"} +{"current_steps": 1616, "total_steps": 6840, "loss": 0.6012705564498901, "lr": 1.816508993051943e-05, "epoch": 0.4725837110688697, "percentage": 23.63, "elapsed_time": "2:21:55", "remaining_time": "7:38:46"} +{"current_steps": 1617, "total_steps": 6840, "loss": 0.6414428949356079, "lr": 1.8162297741797685e-05, "epoch": 0.4728761514841351, "percentage": 23.64, "elapsed_time": "2:22:00", "remaining_time": "7:38:41"} +{"current_steps": 1618, "total_steps": 6840, "loss": 0.6446187496185303, "lr": 1.815950364518847e-05, "epoch": 0.47316859189940047, "percentage": 23.65, "elapsed_time": "2:22:05", "remaining_time": "7:38:35"} +{"current_steps": 1619, "total_steps": 6840, "loss": 0.5153034329414368, "lr": 1.8156707641344885e-05, "epoch": 0.4734610323146659, "percentage": 23.67, "elapsed_time": "2:22:11", "remaining_time": "7:38:34"} +{"current_steps": 1620, "total_steps": 6840, "loss": 0.7209463715553284, "lr": 1.8153909730920485e-05, "epoch": 0.4737534727299313, "percentage": 23.68, "elapsed_time": "2:22:16", "remaining_time": "7:38:27"} +{"current_steps": 1621, "total_steps": 6840, "loss": 0.5990744829177856, "lr": 1.8151109914569267e-05, "epoch": 0.4740459131451967, "percentage": 23.7, "elapsed_time": "2:22:21", "remaining_time": "7:38:20"} +{"current_steps": 1622, "total_steps": 6840, "loss": 0.5706672668457031, "lr": 1.814830819294566e-05, "epoch": 0.47433835356046206, "percentage": 23.71, "elapsed_time": "2:22:27", "remaining_time": "7:38:17"} +{"current_steps": 1623, "total_steps": 6840, "loss": 0.538548469543457, "lr": 1.814550456670456e-05, "epoch": 0.47463079397572744, "percentage": 23.73, "elapsed_time": "2:22:33", "remaining_time": "7:38:13"} +{"current_steps": 1624, "total_steps": 6840, "loss": 0.6450623273849487, "lr": 1.8142699036501288e-05, "epoch": 0.4749232343909928, "percentage": 23.74, "elapsed_time": "2:22:38", "remaining_time": "7:38:09"} +{"current_steps": 1625, "total_steps": 6840, "loss": 0.6537624597549438, "lr": 1.813989160299163e-05, "epoch": 0.4752156748062582, "percentage": 23.76, "elapsed_time": "2:22:44", "remaining_time": "7:38:03"} +{"current_steps": 1626, "total_steps": 6840, "loss": 0.7126362323760986, "lr": 1.8137082266831794e-05, "epoch": 0.47550811522152364, "percentage": 23.77, "elapsed_time": "2:22:49", "remaining_time": "7:37:59"} +{"current_steps": 1627, "total_steps": 6840, "loss": 0.6686921119689941, "lr": 1.813427102867846e-05, "epoch": 0.475800555636789, "percentage": 23.79, "elapsed_time": "2:22:54", "remaining_time": "7:37:54"} +{"current_steps": 1628, "total_steps": 6840, "loss": 0.5925619602203369, "lr": 1.8131457889188723e-05, "epoch": 0.4760929960520544, "percentage": 23.8, "elapsed_time": "2:23:00", "remaining_time": "7:37:50"} +{"current_steps": 1629, "total_steps": 6840, "loss": 0.7251017689704895, "lr": 1.8128642849020147e-05, "epoch": 0.4763854364673198, "percentage": 23.82, "elapsed_time": "2:23:05", "remaining_time": "7:37:43"} +{"current_steps": 1630, "total_steps": 6840, "loss": 0.7524283528327942, "lr": 1.8125825908830733e-05, "epoch": 0.47667787688258517, "percentage": 23.83, "elapsed_time": "2:23:09", "remaining_time": "7:37:36"} +{"current_steps": 1631, "total_steps": 6840, "loss": 0.7593197226524353, "lr": 1.8123007069278914e-05, "epoch": 0.47697031729785055, "percentage": 23.85, "elapsed_time": "2:23:14", "remaining_time": "7:37:28"} +{"current_steps": 1632, "total_steps": 6840, "loss": 0.43353578448295593, "lr": 1.812018633102358e-05, "epoch": 0.47726275771311594, "percentage": 23.86, "elapsed_time": "2:23:18", "remaining_time": "7:37:20"} +{"current_steps": 1633, "total_steps": 6840, "loss": 0.6254708766937256, "lr": 1.8117363694724063e-05, "epoch": 0.4775551981283813, "percentage": 23.87, "elapsed_time": "2:23:24", "remaining_time": "7:37:15"} +{"current_steps": 1634, "total_steps": 6840, "loss": 0.5970091223716736, "lr": 1.811453916104014e-05, "epoch": 0.47784763854364676, "percentage": 23.89, "elapsed_time": "2:23:29", "remaining_time": "7:37:10"} +{"current_steps": 1635, "total_steps": 6840, "loss": 0.6299331188201904, "lr": 1.8111712730632024e-05, "epoch": 0.47814007895891214, "percentage": 23.9, "elapsed_time": "2:23:34", "remaining_time": "7:37:04"} +{"current_steps": 1636, "total_steps": 6840, "loss": 0.7461789846420288, "lr": 1.810888440416038e-05, "epoch": 0.4784325193741775, "percentage": 23.92, "elapsed_time": "2:23:39", "remaining_time": "7:36:57"} +{"current_steps": 1637, "total_steps": 6840, "loss": 0.5053290724754333, "lr": 1.8106054182286305e-05, "epoch": 0.4787249597894429, "percentage": 23.93, "elapsed_time": "2:23:45", "remaining_time": "7:36:56"} +{"current_steps": 1638, "total_steps": 6840, "loss": 0.6853327751159668, "lr": 1.810322206567135e-05, "epoch": 0.4790174002047083, "percentage": 23.95, "elapsed_time": "2:23:50", "remaining_time": "7:36:49"} +{"current_steps": 1639, "total_steps": 6840, "loss": 0.5337134599685669, "lr": 1.8100388054977508e-05, "epoch": 0.47930984061997367, "percentage": 23.96, "elapsed_time": "2:23:55", "remaining_time": "7:36:43"} +{"current_steps": 1640, "total_steps": 6840, "loss": 0.7082560062408447, "lr": 1.809755215086721e-05, "epoch": 0.47960228103523905, "percentage": 23.98, "elapsed_time": "2:24:00", "remaining_time": "7:36:37"} +{"current_steps": 1641, "total_steps": 6840, "loss": 0.680424153804779, "lr": 1.8094714354003325e-05, "epoch": 0.47989472145050444, "percentage": 23.99, "elapsed_time": "2:24:05", "remaining_time": "7:36:30"} +{"current_steps": 1642, "total_steps": 6840, "loss": 0.5235139727592468, "lr": 1.8091874665049183e-05, "epoch": 0.4801871618657699, "percentage": 24.01, "elapsed_time": "2:24:11", "remaining_time": "7:36:27"} +{"current_steps": 1643, "total_steps": 6840, "loss": 0.7843992114067078, "lr": 1.8089033084668535e-05, "epoch": 0.48047960228103526, "percentage": 24.02, "elapsed_time": "2:24:15", "remaining_time": "7:36:19"} +{"current_steps": 1644, "total_steps": 6840, "loss": 0.6736497282981873, "lr": 1.8086189613525587e-05, "epoch": 0.48077204269630064, "percentage": 24.04, "elapsed_time": "2:24:21", "remaining_time": "7:36:14"} +{"current_steps": 1645, "total_steps": 6840, "loss": 0.6898948550224304, "lr": 1.808334425228498e-05, "epoch": 0.481064483111566, "percentage": 24.05, "elapsed_time": "2:24:26", "remaining_time": "7:36:10"} +{"current_steps": 1646, "total_steps": 6840, "loss": 0.6719726324081421, "lr": 1.80804970016118e-05, "epoch": 0.4813569235268314, "percentage": 24.06, "elapsed_time": "2:24:31", "remaining_time": "7:36:04"} +{"current_steps": 1647, "total_steps": 6840, "loss": 0.6904356479644775, "lr": 1.807764786217158e-05, "epoch": 0.4816493639420968, "percentage": 24.08, "elapsed_time": "2:24:35", "remaining_time": "7:35:54"} +{"current_steps": 1648, "total_steps": 6840, "loss": 0.5956645011901855, "lr": 1.8074796834630285e-05, "epoch": 0.48194180435736217, "percentage": 24.09, "elapsed_time": "2:24:41", "remaining_time": "7:35:51"} +{"current_steps": 1649, "total_steps": 6840, "loss": 0.5676499009132385, "lr": 1.8071943919654323e-05, "epoch": 0.48223424477262755, "percentage": 24.11, "elapsed_time": "2:24:48", "remaining_time": "7:35:50"} +{"current_steps": 1650, "total_steps": 6840, "loss": 0.6006937026977539, "lr": 1.8069089117910547e-05, "epoch": 0.482526685187893, "percentage": 24.12, "elapsed_time": "2:24:53", "remaining_time": "7:35:46"} +{"current_steps": 1651, "total_steps": 6840, "loss": 0.6241977214813232, "lr": 1.806623243006625e-05, "epoch": 0.48281912560315837, "percentage": 24.14, "elapsed_time": "2:24:58", "remaining_time": "7:35:40"} +{"current_steps": 1652, "total_steps": 6840, "loss": 0.7359870672225952, "lr": 1.806337385678917e-05, "epoch": 0.48311156601842375, "percentage": 24.15, "elapsed_time": "2:25:03", "remaining_time": "7:35:32"} +{"current_steps": 1653, "total_steps": 6840, "loss": 0.6113119125366211, "lr": 1.806051339874748e-05, "epoch": 0.48340400643368914, "percentage": 24.17, "elapsed_time": "2:25:08", "remaining_time": "7:35:27"} +{"current_steps": 1654, "total_steps": 6840, "loss": 0.642951488494873, "lr": 1.8057651056609784e-05, "epoch": 0.4836964468489545, "percentage": 24.18, "elapsed_time": "2:25:15", "remaining_time": "7:35:25"} +{"current_steps": 1655, "total_steps": 6840, "loss": 0.7020113468170166, "lr": 1.8054786831045147e-05, "epoch": 0.4839888872642199, "percentage": 24.2, "elapsed_time": "2:25:20", "remaining_time": "7:35:20"} +{"current_steps": 1656, "total_steps": 6840, "loss": 0.678231418132782, "lr": 1.8051920722723063e-05, "epoch": 0.4842813276794853, "percentage": 24.21, "elapsed_time": "2:25:25", "remaining_time": "7:35:13"} +{"current_steps": 1657, "total_steps": 6840, "loss": 0.604765772819519, "lr": 1.8049052732313466e-05, "epoch": 0.48457376809475067, "percentage": 24.23, "elapsed_time": "2:25:29", "remaining_time": "7:35:05"} +{"current_steps": 1658, "total_steps": 6840, "loss": 0.6812270879745483, "lr": 1.8046182860486735e-05, "epoch": 0.4848662085100161, "percentage": 24.24, "elapsed_time": "2:25:34", "remaining_time": "7:34:58"} +{"current_steps": 1659, "total_steps": 6840, "loss": 0.6284930109977722, "lr": 1.8043311107913675e-05, "epoch": 0.4851586489252815, "percentage": 24.25, "elapsed_time": "2:25:39", "remaining_time": "7:34:53"} +{"current_steps": 1660, "total_steps": 6840, "loss": 0.665177583694458, "lr": 1.8040437475265554e-05, "epoch": 0.48545108934054687, "percentage": 24.27, "elapsed_time": "2:25:44", "remaining_time": "7:34:47"} +{"current_steps": 1661, "total_steps": 6840, "loss": 0.7628738284111023, "lr": 1.8037561963214058e-05, "epoch": 0.48574352975581225, "percentage": 24.28, "elapsed_time": "2:25:49", "remaining_time": "7:34:40"} +{"current_steps": 1662, "total_steps": 6840, "loss": 0.6372654438018799, "lr": 1.8034684572431322e-05, "epoch": 0.48603597017107764, "percentage": 24.3, "elapsed_time": "2:25:54", "remaining_time": "7:34:33"} +{"current_steps": 1663, "total_steps": 6840, "loss": 0.5915756225585938, "lr": 1.803180530358992e-05, "epoch": 0.486328410586343, "percentage": 24.31, "elapsed_time": "2:25:59", "remaining_time": "7:34:28"} +{"current_steps": 1664, "total_steps": 6840, "loss": 0.6821908950805664, "lr": 1.802892415736286e-05, "epoch": 0.4866208510016084, "percentage": 24.33, "elapsed_time": "2:26:05", "remaining_time": "7:34:25"} +{"current_steps": 1665, "total_steps": 6840, "loss": 0.6418279409408569, "lr": 1.80260411344236e-05, "epoch": 0.48691329141687384, "percentage": 24.34, "elapsed_time": "2:26:10", "remaining_time": "7:34:18"} +{"current_steps": 1666, "total_steps": 6840, "loss": 0.5582526922225952, "lr": 1.802315623544602e-05, "epoch": 0.4872057318321392, "percentage": 24.36, "elapsed_time": "2:26:15", "remaining_time": "7:34:12"} +{"current_steps": 1667, "total_steps": 6840, "loss": 0.7145007848739624, "lr": 1.8020269461104448e-05, "epoch": 0.4874981722474046, "percentage": 24.37, "elapsed_time": "2:26:20", "remaining_time": "7:34:07"} +{"current_steps": 1668, "total_steps": 6840, "loss": 0.5415871739387512, "lr": 1.8017380812073658e-05, "epoch": 0.48779061266267, "percentage": 24.39, "elapsed_time": "2:26:27", "remaining_time": "7:34:06"} +{"current_steps": 1669, "total_steps": 6840, "loss": 0.728327751159668, "lr": 1.801449028902885e-05, "epoch": 0.48808305307793537, "percentage": 24.4, "elapsed_time": "2:26:31", "remaining_time": "7:33:57"} +{"current_steps": 1670, "total_steps": 6840, "loss": 0.6469160914421082, "lr": 1.8011597892645665e-05, "epoch": 0.48837549349320075, "percentage": 24.42, "elapsed_time": "2:26:37", "remaining_time": "7:33:54"} +{"current_steps": 1671, "total_steps": 6840, "loss": 0.7107353210449219, "lr": 1.8008703623600185e-05, "epoch": 0.48866793390846613, "percentage": 24.43, "elapsed_time": "2:26:42", "remaining_time": "7:33:48"} +{"current_steps": 1672, "total_steps": 6840, "loss": 0.6918982267379761, "lr": 1.8005807482568926e-05, "epoch": 0.4889603743237315, "percentage": 24.44, "elapsed_time": "2:26:46", "remaining_time": "7:33:38"} +{"current_steps": 1673, "total_steps": 6840, "loss": 0.661738932132721, "lr": 1.800290947022884e-05, "epoch": 0.48925281473899696, "percentage": 24.46, "elapsed_time": "2:26:51", "remaining_time": "7:33:34"} +{"current_steps": 1674, "total_steps": 6840, "loss": 0.6816283464431763, "lr": 1.800000958725733e-05, "epoch": 0.48954525515426234, "percentage": 24.47, "elapsed_time": "2:26:55", "remaining_time": "7:33:24"} +{"current_steps": 1675, "total_steps": 6840, "loss": 0.6988941431045532, "lr": 1.7997107834332217e-05, "epoch": 0.4898376955695277, "percentage": 24.49, "elapsed_time": "2:27:01", "remaining_time": "7:33:22"} +{"current_steps": 1676, "total_steps": 6840, "loss": 0.7997519969940186, "lr": 1.799420421213177e-05, "epoch": 0.4901301359847931, "percentage": 24.5, "elapsed_time": "2:27:05", "remaining_time": "7:33:13"} +{"current_steps": 1677, "total_steps": 6840, "loss": 0.6552794575691223, "lr": 1.7991298721334697e-05, "epoch": 0.4904225764000585, "percentage": 24.52, "elapsed_time": "2:27:10", "remaining_time": "7:33:06"} +{"current_steps": 1678, "total_steps": 6840, "loss": 0.6144021153450012, "lr": 1.7988391362620135e-05, "epoch": 0.49071501681532387, "percentage": 24.53, "elapsed_time": "2:27:15", "remaining_time": "7:33:00"} +{"current_steps": 1679, "total_steps": 6840, "loss": 0.5036276578903198, "lr": 1.798548213666766e-05, "epoch": 0.49100745723058925, "percentage": 24.55, "elapsed_time": "2:27:21", "remaining_time": "7:32:58"} +{"current_steps": 1680, "total_steps": 6840, "loss": 0.5152162313461304, "lr": 1.7982571044157288e-05, "epoch": 0.49129989764585463, "percentage": 24.56, "elapsed_time": "2:27:27", "remaining_time": "7:32:53"} +{"current_steps": 1681, "total_steps": 6840, "loss": 0.7249797582626343, "lr": 1.797965808576947e-05, "epoch": 0.49159233806112007, "percentage": 24.58, "elapsed_time": "2:27:33", "remaining_time": "7:32:52"} +{"current_steps": 1682, "total_steps": 6840, "loss": 0.5769079923629761, "lr": 1.7976743262185094e-05, "epoch": 0.49188477847638545, "percentage": 24.59, "elapsed_time": "2:27:39", "remaining_time": "7:32:49"} +{"current_steps": 1683, "total_steps": 6840, "loss": 0.7017331123352051, "lr": 1.797382657408548e-05, "epoch": 0.49217721889165084, "percentage": 24.61, "elapsed_time": "2:27:45", "remaining_time": "7:32:43"} +{"current_steps": 1684, "total_steps": 6840, "loss": 0.788599967956543, "lr": 1.797090802215238e-05, "epoch": 0.4924696593069162, "percentage": 24.62, "elapsed_time": "2:27:50", "remaining_time": "7:32:39"} +{"current_steps": 1685, "total_steps": 6840, "loss": 0.5716612935066223, "lr": 1.7967987607067997e-05, "epoch": 0.4927620997221816, "percentage": 24.63, "elapsed_time": "2:27:55", "remaining_time": "7:32:34"} +{"current_steps": 1686, "total_steps": 6840, "loss": 0.6808345913887024, "lr": 1.796506532951496e-05, "epoch": 0.493054540137447, "percentage": 24.65, "elapsed_time": "2:28:01", "remaining_time": "7:32:29"} +{"current_steps": 1687, "total_steps": 6840, "loss": 0.6540817022323608, "lr": 1.7962141190176326e-05, "epoch": 0.49334698055271237, "percentage": 24.66, "elapsed_time": "2:28:07", "remaining_time": "7:32:27"} +{"current_steps": 1688, "total_steps": 6840, "loss": 0.6522870063781738, "lr": 1.7959215189735604e-05, "epoch": 0.49363942096797775, "percentage": 24.68, "elapsed_time": "2:28:12", "remaining_time": "7:32:21"} +{"current_steps": 1689, "total_steps": 6840, "loss": 0.5217882990837097, "lr": 1.7956287328876724e-05, "epoch": 0.4939318613832432, "percentage": 24.69, "elapsed_time": "2:28:18", "remaining_time": "7:32:17"} +{"current_steps": 1690, "total_steps": 6840, "loss": 0.6985372304916382, "lr": 1.795335760828405e-05, "epoch": 0.49422430179850857, "percentage": 24.71, "elapsed_time": "2:28:24", "remaining_time": "7:32:15"} +{"current_steps": 1691, "total_steps": 6840, "loss": 0.7199063301086426, "lr": 1.7950426028642397e-05, "epoch": 0.49451674221377395, "percentage": 24.72, "elapsed_time": "2:28:28", "remaining_time": "7:32:04"} +{"current_steps": 1692, "total_steps": 6840, "loss": 0.5810575485229492, "lr": 1.7947492590636998e-05, "epoch": 0.49480918262903933, "percentage": 24.74, "elapsed_time": "2:28:33", "remaining_time": "7:31:59"} +{"current_steps": 1693, "total_steps": 6840, "loss": 0.7443726658821106, "lr": 1.7944557294953528e-05, "epoch": 0.4951016230443047, "percentage": 24.75, "elapsed_time": "2:28:37", "remaining_time": "7:31:51"} +{"current_steps": 1694, "total_steps": 6840, "loss": 0.6774560213088989, "lr": 1.7941620142278092e-05, "epoch": 0.4953940634595701, "percentage": 24.77, "elapsed_time": "2:28:42", "remaining_time": "7:31:44"} +{"current_steps": 1695, "total_steps": 6840, "loss": 0.6983137726783752, "lr": 1.793868113329724e-05, "epoch": 0.4956865038748355, "percentage": 24.78, "elapsed_time": "2:28:47", "remaining_time": "7:31:38"} +{"current_steps": 1696, "total_steps": 6840, "loss": 0.6481274366378784, "lr": 1.793574026869793e-05, "epoch": 0.49597894429010086, "percentage": 24.8, "elapsed_time": "2:28:52", "remaining_time": "7:31:32"} +{"current_steps": 1697, "total_steps": 6840, "loss": 0.6489002704620361, "lr": 1.793279754916759e-05, "epoch": 0.4962713847053663, "percentage": 24.81, "elapsed_time": "2:28:56", "remaining_time": "7:31:24"} +{"current_steps": 1698, "total_steps": 6840, "loss": 0.7054505348205566, "lr": 1.7929852975394056e-05, "epoch": 0.4965638251206317, "percentage": 24.82, "elapsed_time": "2:29:02", "remaining_time": "7:31:19"} +{"current_steps": 1699, "total_steps": 6840, "loss": 0.5257681608200073, "lr": 1.79269065480656e-05, "epoch": 0.49685626553589707, "percentage": 24.84, "elapsed_time": "2:29:07", "remaining_time": "7:31:12"} +{"current_steps": 1700, "total_steps": 6840, "loss": 0.8625251054763794, "lr": 1.7923958267870936e-05, "epoch": 0.49714870595116245, "percentage": 24.85, "elapsed_time": "2:29:11", "remaining_time": "7:31:06"} +{"current_steps": 1701, "total_steps": 6840, "loss": 0.6736147999763489, "lr": 1.7921008135499205e-05, "epoch": 0.49744114636642783, "percentage": 24.87, "elapsed_time": "2:29:21", "remaining_time": "7:31:14"} +{"current_steps": 1702, "total_steps": 6840, "loss": 0.5079643130302429, "lr": 1.7918056151639985e-05, "epoch": 0.4977335867816932, "percentage": 24.88, "elapsed_time": "2:29:26", "remaining_time": "7:31:07"} +{"current_steps": 1703, "total_steps": 6840, "loss": 0.597242534160614, "lr": 1.791510231698328e-05, "epoch": 0.4980260271969586, "percentage": 24.9, "elapsed_time": "2:29:32", "remaining_time": "7:31:05"} +{"current_steps": 1704, "total_steps": 6840, "loss": 0.6695376038551331, "lr": 1.791214663221953e-05, "epoch": 0.49831846761222404, "percentage": 24.91, "elapsed_time": "2:29:36", "remaining_time": "7:30:56"} +{"current_steps": 1705, "total_steps": 6840, "loss": 0.6411684155464172, "lr": 1.7909189098039616e-05, "epoch": 0.4986109080274894, "percentage": 24.93, "elapsed_time": "2:29:42", "remaining_time": "7:30:52"} +{"current_steps": 1706, "total_steps": 6840, "loss": 0.6671754121780396, "lr": 1.790622971513484e-05, "epoch": 0.4989033484427548, "percentage": 24.94, "elapsed_time": "2:29:46", "remaining_time": "7:30:43"} +{"current_steps": 1707, "total_steps": 6840, "loss": 0.5312573909759521, "lr": 1.7903268484196936e-05, "epoch": 0.4991957888580202, "percentage": 24.96, "elapsed_time": "2:29:52", "remaining_time": "7:30:39"} +{"current_steps": 1708, "total_steps": 6840, "loss": 0.643236517906189, "lr": 1.7900305405918076e-05, "epoch": 0.49948822927328557, "percentage": 24.97, "elapsed_time": "2:29:58", "remaining_time": "7:30:38"} +{"current_steps": 1709, "total_steps": 6840, "loss": 0.7942951321601868, "lr": 1.7897340480990863e-05, "epoch": 0.49978066968855095, "percentage": 24.99, "elapsed_time": "2:30:03", "remaining_time": "7:30:30"} +{"current_steps": 1710, "total_steps": 6840, "loss": 0.701362133026123, "lr": 1.789437371010833e-05, "epoch": 0.5000731101038164, "percentage": 25.0, "elapsed_time": "2:30:07", "remaining_time": "7:30:22"} +{"current_steps": 1711, "total_steps": 6840, "loss": 0.6993157863616943, "lr": 1.789140509396394e-05, "epoch": 0.5003655505190817, "percentage": 25.01, "elapsed_time": "2:30:12", "remaining_time": "7:30:15"} +{"current_steps": 1712, "total_steps": 6840, "loss": 0.568405270576477, "lr": 1.788843463325159e-05, "epoch": 0.5006579909343472, "percentage": 25.03, "elapsed_time": "2:30:18", "remaining_time": "7:30:13"} +{"current_steps": 1713, "total_steps": 6840, "loss": 0.4948374032974243, "lr": 1.7885462328665605e-05, "epoch": 0.5009504313496125, "percentage": 25.04, "elapsed_time": "2:30:24", "remaining_time": "7:30:11"} +{"current_steps": 1714, "total_steps": 6840, "loss": 0.6679480671882629, "lr": 1.7882488180900743e-05, "epoch": 0.5012428717648779, "percentage": 25.06, "elapsed_time": "2:30:30", "remaining_time": "7:30:08"} +{"current_steps": 1715, "total_steps": 6840, "loss": 0.706131100654602, "lr": 1.78795121906522e-05, "epoch": 0.5015353121801432, "percentage": 25.07, "elapsed_time": "2:30:35", "remaining_time": "7:30:01"} +{"current_steps": 1716, "total_steps": 6840, "loss": 0.6691830158233643, "lr": 1.787653435861559e-05, "epoch": 0.5018277525954087, "percentage": 25.09, "elapsed_time": "2:30:40", "remaining_time": "7:29:56"} +{"current_steps": 1717, "total_steps": 6840, "loss": 0.5624213218688965, "lr": 1.787355468548696e-05, "epoch": 0.5021201930106741, "percentage": 25.1, "elapsed_time": "2:30:44", "remaining_time": "7:29:47"} +{"current_steps": 1718, "total_steps": 6840, "loss": 0.4589618444442749, "lr": 1.78705731719628e-05, "epoch": 0.5024126334259394, "percentage": 25.12, "elapsed_time": "2:30:49", "remaining_time": "7:29:39"} +{"current_steps": 1719, "total_steps": 6840, "loss": 0.571403980255127, "lr": 1.7867589818740012e-05, "epoch": 0.5027050738412049, "percentage": 25.13, "elapsed_time": "2:30:54", "remaining_time": "7:29:35"} +{"current_steps": 1720, "total_steps": 6840, "loss": 0.5395561456680298, "lr": 1.786460462651594e-05, "epoch": 0.5029975142564702, "percentage": 25.15, "elapsed_time": "2:30:59", "remaining_time": "7:29:29"} +{"current_steps": 1721, "total_steps": 6840, "loss": 0.6166945695877075, "lr": 1.7861617595988355e-05, "epoch": 0.5032899546717357, "percentage": 25.16, "elapsed_time": "2:31:06", "remaining_time": "7:29:27"} +{"current_steps": 1722, "total_steps": 6840, "loss": 0.6812523603439331, "lr": 1.7858628727855458e-05, "epoch": 0.503582395087001, "percentage": 25.18, "elapsed_time": "2:31:11", "remaining_time": "7:29:20"} +{"current_steps": 1723, "total_steps": 6840, "loss": 0.6602752208709717, "lr": 1.7855638022815872e-05, "epoch": 0.5038748355022664, "percentage": 25.19, "elapsed_time": "2:31:17", "remaining_time": "7:29:17"} +{"current_steps": 1724, "total_steps": 6840, "loss": 0.49925822019577026, "lr": 1.7852645481568665e-05, "epoch": 0.5041672759175319, "percentage": 25.2, "elapsed_time": "2:31:22", "remaining_time": "7:29:13"} +{"current_steps": 1725, "total_steps": 6840, "loss": 0.5557682514190674, "lr": 1.784965110481332e-05, "epoch": 0.5044597163327972, "percentage": 25.22, "elapsed_time": "2:31:28", "remaining_time": "7:29:10"} +{"current_steps": 1726, "total_steps": 6840, "loss": 0.6576372981071472, "lr": 1.7846654893249756e-05, "epoch": 0.5047521567480626, "percentage": 25.23, "elapsed_time": "2:31:34", "remaining_time": "7:29:04"} +{"current_steps": 1727, "total_steps": 6840, "loss": 0.5266367197036743, "lr": 1.7843656847578317e-05, "epoch": 0.505044597163328, "percentage": 25.25, "elapsed_time": "2:31:39", "remaining_time": "7:28:59"} +{"current_steps": 1728, "total_steps": 6840, "loss": 0.7368261218070984, "lr": 1.7840656968499782e-05, "epoch": 0.5053370375785934, "percentage": 25.26, "elapsed_time": "2:31:43", "remaining_time": "7:28:52"} +{"current_steps": 1729, "total_steps": 6840, "loss": 0.6583619117736816, "lr": 1.7837655256715355e-05, "epoch": 0.5056294779938587, "percentage": 25.28, "elapsed_time": "2:31:48", "remaining_time": "7:28:44"} +{"current_steps": 1730, "total_steps": 6840, "loss": 0.7323073148727417, "lr": 1.7834651712926662e-05, "epoch": 0.5059219184091241, "percentage": 25.29, "elapsed_time": "2:31:53", "remaining_time": "7:28:40"} +{"current_steps": 1731, "total_steps": 6840, "loss": 0.6059812307357788, "lr": 1.783164633783577e-05, "epoch": 0.5062143588243895, "percentage": 25.31, "elapsed_time": "2:31:58", "remaining_time": "7:28:32"} +{"current_steps": 1732, "total_steps": 6840, "loss": 0.5992608070373535, "lr": 1.782863913214516e-05, "epoch": 0.5065067992396549, "percentage": 25.32, "elapsed_time": "2:32:02", "remaining_time": "7:28:24"} +{"current_steps": 1733, "total_steps": 6840, "loss": 0.5729147791862488, "lr": 1.7825630096557754e-05, "epoch": 0.5067992396549204, "percentage": 25.34, "elapsed_time": "2:32:07", "remaining_time": "7:28:17"} +{"current_steps": 1734, "total_steps": 6840, "loss": 0.6708269119262695, "lr": 1.782261923177689e-05, "epoch": 0.5070916800701857, "percentage": 25.35, "elapsed_time": "2:32:12", "remaining_time": "7:28:12"} +{"current_steps": 1735, "total_steps": 6840, "loss": 0.5377235412597656, "lr": 1.7819606538506347e-05, "epoch": 0.5073841204854511, "percentage": 25.37, "elapsed_time": "2:32:18", "remaining_time": "7:28:08"} +{"current_steps": 1736, "total_steps": 6840, "loss": 0.6899171471595764, "lr": 1.781659201745032e-05, "epoch": 0.5076765609007164, "percentage": 25.38, "elapsed_time": "2:32:23", "remaining_time": "7:28:02"} +{"current_steps": 1737, "total_steps": 6840, "loss": 0.6712576150894165, "lr": 1.7813575669313434e-05, "epoch": 0.5079690013159819, "percentage": 25.39, "elapsed_time": "2:32:28", "remaining_time": "7:27:57"} +{"current_steps": 1738, "total_steps": 6840, "loss": 0.6989667415618896, "lr": 1.781055749480074e-05, "epoch": 0.5082614417312472, "percentage": 25.41, "elapsed_time": "2:32:35", "remaining_time": "7:27:55"} +{"current_steps": 1739, "total_steps": 6840, "loss": 0.6103490591049194, "lr": 1.7807537494617723e-05, "epoch": 0.5085538821465126, "percentage": 25.42, "elapsed_time": "2:32:39", "remaining_time": "7:27:48"} +{"current_steps": 1740, "total_steps": 6840, "loss": 0.4882289171218872, "lr": 1.7804515669470287e-05, "epoch": 0.5088463225617781, "percentage": 25.44, "elapsed_time": "2:32:44", "remaining_time": "7:27:40"} +{"current_steps": 1741, "total_steps": 6840, "loss": 0.7244713306427002, "lr": 1.7801492020064764e-05, "epoch": 0.5091387629770434, "percentage": 25.45, "elapsed_time": "2:32:48", "remaining_time": "7:27:31"} +{"current_steps": 1742, "total_steps": 6840, "loss": 0.6055952310562134, "lr": 1.7798466547107918e-05, "epoch": 0.5094312033923089, "percentage": 25.47, "elapsed_time": "2:32:55", "remaining_time": "7:27:31"} +{"current_steps": 1743, "total_steps": 6840, "loss": 0.5893995761871338, "lr": 1.779543925130693e-05, "epoch": 0.5097236438075742, "percentage": 25.48, "elapsed_time": "2:33:01", "remaining_time": "7:27:29"} +{"current_steps": 1744, "total_steps": 6840, "loss": 0.6154330968856812, "lr": 1.7792410133369413e-05, "epoch": 0.5100160842228396, "percentage": 25.5, "elapsed_time": "2:33:07", "remaining_time": "7:27:27"} +{"current_steps": 1745, "total_steps": 6840, "loss": 0.6227806806564331, "lr": 1.778937919400341e-05, "epoch": 0.5103085246381049, "percentage": 25.51, "elapsed_time": "2:33:13", "remaining_time": "7:27:21"} +{"current_steps": 1746, "total_steps": 6840, "loss": 0.6192313432693481, "lr": 1.7786346433917376e-05, "epoch": 0.5106009650533704, "percentage": 25.53, "elapsed_time": "2:33:19", "remaining_time": "7:27:18"} +{"current_steps": 1747, "total_steps": 6840, "loss": 0.6175359487533569, "lr": 1.7783311853820205e-05, "epoch": 0.5108934054686358, "percentage": 25.54, "elapsed_time": "2:33:25", "remaining_time": "7:27:16"} +{"current_steps": 1748, "total_steps": 6840, "loss": 0.5588991641998291, "lr": 1.7780275454421218e-05, "epoch": 0.5111858458839011, "percentage": 25.56, "elapsed_time": "2:33:31", "remaining_time": "7:27:13"} +{"current_steps": 1749, "total_steps": 6840, "loss": 0.637115478515625, "lr": 1.777723723643014e-05, "epoch": 0.5114782862991666, "percentage": 25.57, "elapsed_time": "2:33:37", "remaining_time": "7:27:10"} +{"current_steps": 1750, "total_steps": 6840, "loss": 0.6762860417366028, "lr": 1.777419720055715e-05, "epoch": 0.5117707267144319, "percentage": 25.58, "elapsed_time": "2:33:42", "remaining_time": "7:27:04"} +{"current_steps": 1751, "total_steps": 6840, "loss": 0.6980293989181519, "lr": 1.7771155347512828e-05, "epoch": 0.5120631671296973, "percentage": 25.6, "elapsed_time": "2:33:47", "remaining_time": "7:26:59"} +{"current_steps": 1752, "total_steps": 6840, "loss": 0.6587250232696533, "lr": 1.7768111678008194e-05, "epoch": 0.5123556075449627, "percentage": 25.61, "elapsed_time": "2:33:52", "remaining_time": "7:26:51"} +{"current_steps": 1753, "total_steps": 6840, "loss": 0.6571120619773865, "lr": 1.776506619275469e-05, "epoch": 0.5126480479602281, "percentage": 25.63, "elapsed_time": "2:33:57", "remaining_time": "7:26:46"} +{"current_steps": 1754, "total_steps": 6840, "loss": 0.8127633333206177, "lr": 1.7762018892464172e-05, "epoch": 0.5129404883754934, "percentage": 25.64, "elapsed_time": "2:34:02", "remaining_time": "7:26:40"} +{"current_steps": 1755, "total_steps": 6840, "loss": 0.6585550308227539, "lr": 1.7758969777848935e-05, "epoch": 0.5132329287907589, "percentage": 25.66, "elapsed_time": "2:34:07", "remaining_time": "7:26:33"} +{"current_steps": 1756, "total_steps": 6840, "loss": 0.6347511410713196, "lr": 1.7755918849621686e-05, "epoch": 0.5135253692060243, "percentage": 25.67, "elapsed_time": "2:34:13", "remaining_time": "7:26:29"} +{"current_steps": 1757, "total_steps": 6840, "loss": 0.5918457508087158, "lr": 1.775286610849556e-05, "epoch": 0.5138178096212896, "percentage": 25.69, "elapsed_time": "2:34:17", "remaining_time": "7:26:20"} +{"current_steps": 1758, "total_steps": 6840, "loss": 0.7042769193649292, "lr": 1.774981155518412e-05, "epoch": 0.5141102500365551, "percentage": 25.7, "elapsed_time": "2:34:23", "remaining_time": "7:26:17"} +{"current_steps": 1759, "total_steps": 6840, "loss": 0.8014250993728638, "lr": 1.7746755190401353e-05, "epoch": 0.5144026904518204, "percentage": 25.72, "elapsed_time": "2:34:27", "remaining_time": "7:26:10"} +{"current_steps": 1760, "total_steps": 6840, "loss": 0.6703939437866211, "lr": 1.774369701486166e-05, "epoch": 0.5146951308670858, "percentage": 25.73, "elapsed_time": "2:34:33", "remaining_time": "7:26:05"} +{"current_steps": 1761, "total_steps": 6840, "loss": 0.6189682483673096, "lr": 1.774063702927987e-05, "epoch": 0.5149875712823512, "percentage": 25.75, "elapsed_time": "2:34:38", "remaining_time": "7:26:01"} +{"current_steps": 1762, "total_steps": 6840, "loss": 0.5386991500854492, "lr": 1.7737575234371238e-05, "epoch": 0.5152800116976166, "percentage": 25.76, "elapsed_time": "2:34:43", "remaining_time": "7:25:54"} +{"current_steps": 1763, "total_steps": 6840, "loss": 0.6389357447624207, "lr": 1.773451163085144e-05, "epoch": 0.515572452112882, "percentage": 25.77, "elapsed_time": "2:34:48", "remaining_time": "7:25:47"} +{"current_steps": 1764, "total_steps": 6840, "loss": 0.7247746586799622, "lr": 1.7731446219436577e-05, "epoch": 0.5158648925281474, "percentage": 25.79, "elapsed_time": "2:34:54", "remaining_time": "7:25:45"} +{"current_steps": 1765, "total_steps": 6840, "loss": 0.5538983941078186, "lr": 1.7728379000843164e-05, "epoch": 0.5161573329434128, "percentage": 25.8, "elapsed_time": "2:35:00", "remaining_time": "7:25:40"} +{"current_steps": 1766, "total_steps": 6840, "loss": 0.6003320813179016, "lr": 1.7725309975788155e-05, "epoch": 0.5164497733586781, "percentage": 25.82, "elapsed_time": "2:35:06", "remaining_time": "7:25:39"} +{"current_steps": 1767, "total_steps": 6840, "loss": 0.603177011013031, "lr": 1.7722239144988908e-05, "epoch": 0.5167422137739436, "percentage": 25.83, "elapsed_time": "2:35:10", "remaining_time": "7:25:31"} +{"current_steps": 1768, "total_steps": 6840, "loss": 0.6071338653564453, "lr": 1.771916650916321e-05, "epoch": 0.5170346541892089, "percentage": 25.85, "elapsed_time": "2:35:17", "remaining_time": "7:25:30"} +{"current_steps": 1769, "total_steps": 6840, "loss": 0.6148535013198853, "lr": 1.7716092069029275e-05, "epoch": 0.5173270946044743, "percentage": 25.86, "elapsed_time": "2:35:23", "remaining_time": "7:25:26"} +{"current_steps": 1770, "total_steps": 6840, "loss": 0.6236969828605652, "lr": 1.7713015825305735e-05, "epoch": 0.5176195350197397, "percentage": 25.88, "elapsed_time": "2:35:27", "remaining_time": "7:25:17"} +{"current_steps": 1771, "total_steps": 6840, "loss": 0.5439775586128235, "lr": 1.770993777871164e-05, "epoch": 0.5179119754350051, "percentage": 25.89, "elapsed_time": "2:35:32", "remaining_time": "7:25:11"} +{"current_steps": 1772, "total_steps": 6840, "loss": 0.6498249769210815, "lr": 1.770685792996647e-05, "epoch": 0.5182044158502705, "percentage": 25.91, "elapsed_time": "2:35:38", "remaining_time": "7:25:08"} +{"current_steps": 1773, "total_steps": 6840, "loss": 0.5838749408721924, "lr": 1.7703776279790113e-05, "epoch": 0.5184968562655359, "percentage": 25.92, "elapsed_time": "2:35:43", "remaining_time": "7:25:01"} +{"current_steps": 1774, "total_steps": 6840, "loss": 0.6467812657356262, "lr": 1.770069282890289e-05, "epoch": 0.5187892966808013, "percentage": 25.94, "elapsed_time": "2:35:48", "remaining_time": "7:24:56"} +{"current_steps": 1775, "total_steps": 6840, "loss": 0.5878627896308899, "lr": 1.7697607578025543e-05, "epoch": 0.5190817370960666, "percentage": 25.95, "elapsed_time": "2:35:52", "remaining_time": "7:24:47"} +{"current_steps": 1776, "total_steps": 6840, "loss": 0.6252161264419556, "lr": 1.7694520527879223e-05, "epoch": 0.5193741775113321, "percentage": 25.96, "elapsed_time": "2:35:56", "remaining_time": "7:24:39"} +{"current_steps": 1777, "total_steps": 6840, "loss": 0.6098401546478271, "lr": 1.7691431679185518e-05, "epoch": 0.5196666179265974, "percentage": 25.98, "elapsed_time": "2:36:01", "remaining_time": "7:24:33"} +{"current_steps": 1778, "total_steps": 6840, "loss": 0.7401748299598694, "lr": 1.7688341032666415e-05, "epoch": 0.5199590583418628, "percentage": 25.99, "elapsed_time": "2:36:07", "remaining_time": "7:24:29"} +{"current_steps": 1779, "total_steps": 6840, "loss": 0.5398571491241455, "lr": 1.768524858904435e-05, "epoch": 0.5202514987571283, "percentage": 26.01, "elapsed_time": "2:36:14", "remaining_time": "7:24:28"} +{"current_steps": 1780, "total_steps": 6840, "loss": 0.5565935969352722, "lr": 1.768215434904215e-05, "epoch": 0.5205439391723936, "percentage": 26.02, "elapsed_time": "2:36:20", "remaining_time": "7:24:25"} +{"current_steps": 1781, "total_steps": 6840, "loss": 0.5510461926460266, "lr": 1.7679058313383078e-05, "epoch": 0.520836379587659, "percentage": 26.04, "elapsed_time": "2:36:26", "remaining_time": "7:24:22"} +{"current_steps": 1782, "total_steps": 6840, "loss": 0.670242428779602, "lr": 1.7675960482790818e-05, "epoch": 0.5211288200029244, "percentage": 26.05, "elapsed_time": "2:36:30", "remaining_time": "7:24:13"} +{"current_steps": 1783, "total_steps": 6840, "loss": 0.6556246280670166, "lr": 1.7672860857989463e-05, "epoch": 0.5214212604181898, "percentage": 26.07, "elapsed_time": "2:36:36", "remaining_time": "7:24:09"} +{"current_steps": 1784, "total_steps": 6840, "loss": 0.7133421897888184, "lr": 1.7669759439703537e-05, "epoch": 0.5217137008334551, "percentage": 26.08, "elapsed_time": "2:36:40", "remaining_time": "7:24:03"} +{"current_steps": 1785, "total_steps": 6840, "loss": 0.5520647168159485, "lr": 1.766665622865797e-05, "epoch": 0.5220061412487206, "percentage": 26.1, "elapsed_time": "2:36:46", "remaining_time": "7:23:57"} +{"current_steps": 1786, "total_steps": 6840, "loss": 0.6906430125236511, "lr": 1.766355122557813e-05, "epoch": 0.522298581663986, "percentage": 26.11, "elapsed_time": "2:36:52", "remaining_time": "7:23:56"} +{"current_steps": 1787, "total_steps": 6840, "loss": 0.6847748756408691, "lr": 1.766044443118978e-05, "epoch": 0.5225910220792513, "percentage": 26.13, "elapsed_time": "2:36:57", "remaining_time": "7:23:49"} +{"current_steps": 1788, "total_steps": 6840, "loss": 0.6690354347229004, "lr": 1.7657335846219125e-05, "epoch": 0.5228834624945168, "percentage": 26.14, "elapsed_time": "2:37:01", "remaining_time": "7:23:40"} +{"current_steps": 1789, "total_steps": 6840, "loss": 0.6508032083511353, "lr": 1.765422547139277e-05, "epoch": 0.5231759029097821, "percentage": 26.15, "elapsed_time": "2:37:06", "remaining_time": "7:23:33"} +{"current_steps": 1790, "total_steps": 6840, "loss": 0.7686585187911987, "lr": 1.7651113307437754e-05, "epoch": 0.5234683433250475, "percentage": 26.17, "elapsed_time": "2:37:10", "remaining_time": "7:23:25"} +{"current_steps": 1791, "total_steps": 6840, "loss": 0.7669490575790405, "lr": 1.764799935508152e-05, "epoch": 0.5237607837403129, "percentage": 26.18, "elapsed_time": "2:37:15", "remaining_time": "7:23:18"} +{"current_steps": 1792, "total_steps": 6840, "loss": 0.6630266308784485, "lr": 1.7644883615051936e-05, "epoch": 0.5240532241555783, "percentage": 26.2, "elapsed_time": "2:37:20", "remaining_time": "7:23:12"} +{"current_steps": 1793, "total_steps": 6840, "loss": 0.6054951548576355, "lr": 1.764176608807729e-05, "epoch": 0.5243456645708436, "percentage": 26.21, "elapsed_time": "2:37:26", "remaining_time": "7:23:09"} +{"current_steps": 1794, "total_steps": 6840, "loss": 0.6519330739974976, "lr": 1.7638646774886282e-05, "epoch": 0.5246381049861091, "percentage": 26.23, "elapsed_time": "2:37:32", "remaining_time": "7:23:06"} +{"current_steps": 1795, "total_steps": 6840, "loss": 0.6797915101051331, "lr": 1.7635525676208034e-05, "epoch": 0.5249305454013745, "percentage": 26.24, "elapsed_time": "2:37:37", "remaining_time": "7:23:02"} +{"current_steps": 1796, "total_steps": 6840, "loss": 0.7296736240386963, "lr": 1.7632402792772084e-05, "epoch": 0.5252229858166398, "percentage": 26.26, "elapsed_time": "2:37:42", "remaining_time": "7:22:53"} +{"current_steps": 1797, "total_steps": 6840, "loss": 0.6371006965637207, "lr": 1.7629278125308388e-05, "epoch": 0.5255154262319053, "percentage": 26.27, "elapsed_time": "2:37:47", "remaining_time": "7:22:49"} +{"current_steps": 1798, "total_steps": 6840, "loss": 0.5315746068954468, "lr": 1.762615167454732e-05, "epoch": 0.5258078666471706, "percentage": 26.29, "elapsed_time": "2:37:53", "remaining_time": "7:22:46"} +{"current_steps": 1799, "total_steps": 6840, "loss": 0.5285685062408447, "lr": 1.762302344121966e-05, "epoch": 0.526100307062436, "percentage": 26.3, "elapsed_time": "2:37:58", "remaining_time": "7:22:40"} +{"current_steps": 1800, "total_steps": 6840, "loss": 0.623146653175354, "lr": 1.7619893426056622e-05, "epoch": 0.5263927474777014, "percentage": 26.32, "elapsed_time": "2:38:03", "remaining_time": "7:22:34"} +{"current_steps": 1801, "total_steps": 6840, "loss": 0.5433363318443298, "lr": 1.7616761629789824e-05, "epoch": 0.5266851878929668, "percentage": 26.33, "elapsed_time": "2:38:12", "remaining_time": "7:22:39"} +{"current_steps": 1802, "total_steps": 6840, "loss": 0.5035480260848999, "lr": 1.7613628053151307e-05, "epoch": 0.5269776283082322, "percentage": 26.35, "elapsed_time": "2:38:17", "remaining_time": "7:22:33"} +{"current_steps": 1803, "total_steps": 6840, "loss": 0.678544819355011, "lr": 1.7610492696873523e-05, "epoch": 0.5272700687234976, "percentage": 26.36, "elapsed_time": "2:38:21", "remaining_time": "7:22:24"} +{"current_steps": 1804, "total_steps": 6840, "loss": 0.6237714290618896, "lr": 1.7607355561689347e-05, "epoch": 0.527562509138763, "percentage": 26.37, "elapsed_time": "2:38:26", "remaining_time": "7:22:19"} +{"current_steps": 1805, "total_steps": 6840, "loss": 0.6943943500518799, "lr": 1.760421664833206e-05, "epoch": 0.5278549495540283, "percentage": 26.39, "elapsed_time": "2:38:30", "remaining_time": "7:22:10"} +{"current_steps": 1806, "total_steps": 6840, "loss": 0.5477268695831299, "lr": 1.7601075957535366e-05, "epoch": 0.5281473899692938, "percentage": 26.4, "elapsed_time": "2:38:37", "remaining_time": "7:22:08"} +{"current_steps": 1807, "total_steps": 6840, "loss": 0.6627641320228577, "lr": 1.759793349003338e-05, "epoch": 0.5284398303845591, "percentage": 26.42, "elapsed_time": "2:38:42", "remaining_time": "7:22:03"} +{"current_steps": 1808, "total_steps": 6840, "loss": 0.5394496917724609, "lr": 1.7594789246560638e-05, "epoch": 0.5287322707998245, "percentage": 26.43, "elapsed_time": "2:38:48", "remaining_time": "7:21:58"} +{"current_steps": 1809, "total_steps": 6840, "loss": 0.7824013233184814, "lr": 1.759164322785209e-05, "epoch": 0.5290247112150899, "percentage": 26.45, "elapsed_time": "2:38:54", "remaining_time": "7:21:56"} +{"current_steps": 1810, "total_steps": 6840, "loss": 0.6959671974182129, "lr": 1.7588495434643094e-05, "epoch": 0.5293171516303553, "percentage": 26.46, "elapsed_time": "2:38:59", "remaining_time": "7:21:49"} +{"current_steps": 1811, "total_steps": 6840, "loss": 0.7036902904510498, "lr": 1.7585345867669427e-05, "epoch": 0.5296095920456207, "percentage": 26.48, "elapsed_time": "2:39:04", "remaining_time": "7:21:45"} +{"current_steps": 1812, "total_steps": 6840, "loss": 0.6700775623321533, "lr": 1.7582194527667285e-05, "epoch": 0.5299020324608861, "percentage": 26.49, "elapsed_time": "2:39:11", "remaining_time": "7:21:44"} +{"current_steps": 1813, "total_steps": 6840, "loss": 0.648280918598175, "lr": 1.7579041415373273e-05, "epoch": 0.5301944728761515, "percentage": 26.51, "elapsed_time": "2:39:17", "remaining_time": "7:21:41"} +{"current_steps": 1814, "total_steps": 6840, "loss": 0.688485324382782, "lr": 1.757588653152441e-05, "epoch": 0.5304869132914168, "percentage": 26.52, "elapsed_time": "2:39:23", "remaining_time": "7:21:37"} +{"current_steps": 1815, "total_steps": 6840, "loss": 0.6743370890617371, "lr": 1.757272987685813e-05, "epoch": 0.5307793537066823, "percentage": 26.54, "elapsed_time": "2:39:28", "remaining_time": "7:21:30"} +{"current_steps": 1816, "total_steps": 6840, "loss": 0.5597015619277954, "lr": 1.7569571452112288e-05, "epoch": 0.5310717941219476, "percentage": 26.55, "elapsed_time": "2:39:34", "remaining_time": "7:21:26"} +{"current_steps": 1817, "total_steps": 6840, "loss": 0.48607051372528076, "lr": 1.756641125802514e-05, "epoch": 0.531364234537213, "percentage": 26.56, "elapsed_time": "2:39:40", "remaining_time": "7:21:24"} +{"current_steps": 1818, "total_steps": 6840, "loss": 0.6712289452552795, "lr": 1.7563249295335366e-05, "epoch": 0.5316566749524785, "percentage": 26.58, "elapsed_time": "2:39:44", "remaining_time": "7:21:16"} +{"current_steps": 1819, "total_steps": 6840, "loss": 0.5937772989273071, "lr": 1.7560085564782057e-05, "epoch": 0.5319491153677438, "percentage": 26.59, "elapsed_time": "2:39:49", "remaining_time": "7:21:11"} +{"current_steps": 1820, "total_steps": 6840, "loss": 0.7416468262672424, "lr": 1.7556920067104714e-05, "epoch": 0.5322415557830092, "percentage": 26.61, "elapsed_time": "2:39:53", "remaining_time": "7:21:01"} +{"current_steps": 1821, "total_steps": 6840, "loss": 0.6302096247673035, "lr": 1.7553752803043247e-05, "epoch": 0.5325339961982746, "percentage": 26.62, "elapsed_time": "2:39:58", "remaining_time": "7:20:55"} +{"current_steps": 1822, "total_steps": 6840, "loss": 0.5576045513153076, "lr": 1.7550583773337992e-05, "epoch": 0.53282643661354, "percentage": 26.64, "elapsed_time": "2:40:04", "remaining_time": "7:20:50"} +{"current_steps": 1823, "total_steps": 6840, "loss": 0.5436257123947144, "lr": 1.7547412978729688e-05, "epoch": 0.5331188770288053, "percentage": 26.65, "elapsed_time": "2:40:08", "remaining_time": "7:20:43"} +{"current_steps": 1824, "total_steps": 6840, "loss": 0.5674831867218018, "lr": 1.754424041995949e-05, "epoch": 0.5334113174440708, "percentage": 26.67, "elapsed_time": "2:40:12", "remaining_time": "7:20:35"} +{"current_steps": 1825, "total_steps": 6840, "loss": 0.7254515290260315, "lr": 1.7541066097768965e-05, "epoch": 0.5337037578593362, "percentage": 26.68, "elapsed_time": "2:40:17", "remaining_time": "7:20:28"} +{"current_steps": 1826, "total_steps": 6840, "loss": 0.5706701278686523, "lr": 1.7537890012900088e-05, "epoch": 0.5339961982746015, "percentage": 26.7, "elapsed_time": "2:40:23", "remaining_time": "7:20:25"} +{"current_steps": 1827, "total_steps": 6840, "loss": 0.6801357269287109, "lr": 1.7534712166095253e-05, "epoch": 0.534288638689867, "percentage": 26.71, "elapsed_time": "2:40:28", "remaining_time": "7:20:18"} +{"current_steps": 1828, "total_steps": 6840, "loss": 0.6851463317871094, "lr": 1.753153255809726e-05, "epoch": 0.5345810791051323, "percentage": 26.73, "elapsed_time": "2:40:34", "remaining_time": "7:20:16"} +{"current_steps": 1829, "total_steps": 6840, "loss": 0.6475861072540283, "lr": 1.7528351189649324e-05, "epoch": 0.5348735195203977, "percentage": 26.74, "elapsed_time": "2:40:39", "remaining_time": "7:20:09"} +{"current_steps": 1830, "total_steps": 6840, "loss": 0.6953648924827576, "lr": 1.752516806149507e-05, "epoch": 0.5351659599356631, "percentage": 26.75, "elapsed_time": "2:40:45", "remaining_time": "7:20:06"} +{"current_steps": 1831, "total_steps": 6840, "loss": 0.5128777623176575, "lr": 1.7521983174378537e-05, "epoch": 0.5354584003509285, "percentage": 26.77, "elapsed_time": "2:40:51", "remaining_time": "7:20:03"} +{"current_steps": 1832, "total_steps": 6840, "loss": 0.5780255198478699, "lr": 1.751879652904417e-05, "epoch": 0.5357508407661938, "percentage": 26.78, "elapsed_time": "2:40:55", "remaining_time": "7:19:55"} +{"current_steps": 1833, "total_steps": 6840, "loss": 0.581814169883728, "lr": 1.751560812623683e-05, "epoch": 0.5360432811814593, "percentage": 26.8, "elapsed_time": "2:41:01", "remaining_time": "7:19:50"} +{"current_steps": 1834, "total_steps": 6840, "loss": 0.5609169006347656, "lr": 1.7512417966701788e-05, "epoch": 0.5363357215967247, "percentage": 26.81, "elapsed_time": "2:41:05", "remaining_time": "7:19:43"} +{"current_steps": 1835, "total_steps": 6840, "loss": 0.6029868125915527, "lr": 1.7509226051184716e-05, "epoch": 0.53662816201199, "percentage": 26.83, "elapsed_time": "2:41:11", "remaining_time": "7:19:38"} +{"current_steps": 1836, "total_steps": 6840, "loss": 0.6749545335769653, "lr": 1.7506032380431718e-05, "epoch": 0.5369206024272555, "percentage": 26.84, "elapsed_time": "2:41:17", "remaining_time": "7:19:34"} +{"current_steps": 1837, "total_steps": 6840, "loss": 0.7710991501808167, "lr": 1.750283695518929e-05, "epoch": 0.5372130428425208, "percentage": 26.86, "elapsed_time": "2:41:21", "remaining_time": "7:19:26"} +{"current_steps": 1838, "total_steps": 6840, "loss": 0.6330907940864563, "lr": 1.7499639776204334e-05, "epoch": 0.5375054832577862, "percentage": 26.87, "elapsed_time": "2:41:25", "remaining_time": "7:19:18"} +{"current_steps": 1839, "total_steps": 6840, "loss": 0.655827522277832, "lr": 1.7496440844224186e-05, "epoch": 0.5377979236730516, "percentage": 26.89, "elapsed_time": "2:41:30", "remaining_time": "7:19:11"} +{"current_steps": 1840, "total_steps": 6840, "loss": 0.723412275314331, "lr": 1.7493240159996565e-05, "epoch": 0.538090364088317, "percentage": 26.9, "elapsed_time": "2:41:35", "remaining_time": "7:19:05"} +{"current_steps": 1841, "total_steps": 6840, "loss": 0.5504157543182373, "lr": 1.7490037724269618e-05, "epoch": 0.5383828045035824, "percentage": 26.92, "elapsed_time": "2:41:41", "remaining_time": "7:19:02"} +{"current_steps": 1842, "total_steps": 6840, "loss": 0.6258282661437988, "lr": 1.7486833537791895e-05, "epoch": 0.5386752449188478, "percentage": 26.93, "elapsed_time": "2:41:46", "remaining_time": "7:18:56"} +{"current_steps": 1843, "total_steps": 6840, "loss": 0.7044231295585632, "lr": 1.748362760131235e-05, "epoch": 0.5389676853341132, "percentage": 26.94, "elapsed_time": "2:41:52", "remaining_time": "7:18:55"} +{"current_steps": 1844, "total_steps": 6840, "loss": 0.5979568362236023, "lr": 1.7480419915580357e-05, "epoch": 0.5392601257493785, "percentage": 26.96, "elapsed_time": "2:41:57", "remaining_time": "7:18:47"} +{"current_steps": 1845, "total_steps": 6840, "loss": 0.558562159538269, "lr": 1.7477210481345686e-05, "epoch": 0.539552566164644, "percentage": 26.97, "elapsed_time": "2:42:02", "remaining_time": "7:18:40"} +{"current_steps": 1846, "total_steps": 6840, "loss": 0.5965149402618408, "lr": 1.747399929935853e-05, "epoch": 0.5398450065799093, "percentage": 26.99, "elapsed_time": "2:42:07", "remaining_time": "7:18:36"} +{"current_steps": 1847, "total_steps": 6840, "loss": 0.6202878355979919, "lr": 1.7470786370369483e-05, "epoch": 0.5401374469951747, "percentage": 27.0, "elapsed_time": "2:42:13", "remaining_time": "7:18:32"} +{"current_steps": 1848, "total_steps": 6840, "loss": 0.652141273021698, "lr": 1.746757169512954e-05, "epoch": 0.5404298874104401, "percentage": 27.02, "elapsed_time": "2:42:18", "remaining_time": "7:18:26"} +{"current_steps": 1849, "total_steps": 6840, "loss": 0.5713402628898621, "lr": 1.746435527439012e-05, "epoch": 0.5407223278257055, "percentage": 27.03, "elapsed_time": "2:42:22", "remaining_time": "7:18:18"} +{"current_steps": 1850, "total_steps": 6840, "loss": 0.49776554107666016, "lr": 1.7461137108903042e-05, "epoch": 0.541014768240971, "percentage": 27.05, "elapsed_time": "2:42:27", "remaining_time": "7:18:11"} +{"current_steps": 1851, "total_steps": 6840, "loss": 0.7047991752624512, "lr": 1.7457917199420525e-05, "epoch": 0.5413072086562363, "percentage": 27.06, "elapsed_time": "2:42:32", "remaining_time": "7:18:05"} +{"current_steps": 1852, "total_steps": 6840, "loss": 0.7019875049591064, "lr": 1.7454695546695207e-05, "epoch": 0.5415996490715017, "percentage": 27.08, "elapsed_time": "2:42:37", "remaining_time": "7:17:58"} +{"current_steps": 1853, "total_steps": 6840, "loss": 0.5448435544967651, "lr": 1.745147215148013e-05, "epoch": 0.541892089486767, "percentage": 27.09, "elapsed_time": "2:42:41", "remaining_time": "7:17:50"} +{"current_steps": 1854, "total_steps": 6840, "loss": 0.6042202711105347, "lr": 1.7448247014528745e-05, "epoch": 0.5421845299020325, "percentage": 27.11, "elapsed_time": "2:42:47", "remaining_time": "7:17:48"} +{"current_steps": 1855, "total_steps": 6840, "loss": 0.8448539972305298, "lr": 1.744502013659491e-05, "epoch": 0.5424769703172978, "percentage": 27.12, "elapsed_time": "2:42:53", "remaining_time": "7:17:44"} +{"current_steps": 1856, "total_steps": 6840, "loss": 0.6541755795478821, "lr": 1.7441791518432877e-05, "epoch": 0.5427694107325632, "percentage": 27.13, "elapsed_time": "2:42:57", "remaining_time": "7:17:36"} +{"current_steps": 1857, "total_steps": 6840, "loss": 0.6700184345245361, "lr": 1.7438561160797326e-05, "epoch": 0.5430618511478287, "percentage": 27.15, "elapsed_time": "2:43:03", "remaining_time": "7:17:31"} +{"current_steps": 1858, "total_steps": 6840, "loss": 0.6407896280288696, "lr": 1.7435329064443335e-05, "epoch": 0.543354291563094, "percentage": 27.16, "elapsed_time": "2:43:07", "remaining_time": "7:17:25"} +{"current_steps": 1859, "total_steps": 6840, "loss": 0.5380120277404785, "lr": 1.7432095230126382e-05, "epoch": 0.5436467319783594, "percentage": 27.18, "elapsed_time": "2:43:13", "remaining_time": "7:17:20"} +{"current_steps": 1860, "total_steps": 6840, "loss": 0.6561373472213745, "lr": 1.7428859658602353e-05, "epoch": 0.5439391723936248, "percentage": 27.19, "elapsed_time": "2:43:18", "remaining_time": "7:17:13"} +{"current_steps": 1861, "total_steps": 6840, "loss": 0.724541962146759, "lr": 1.7425622350627545e-05, "epoch": 0.5442316128088902, "percentage": 27.21, "elapsed_time": "2:43:23", "remaining_time": "7:17:08"} +{"current_steps": 1862, "total_steps": 6840, "loss": 0.6258946657180786, "lr": 1.7422383306958666e-05, "epoch": 0.5445240532241555, "percentage": 27.22, "elapsed_time": "2:43:29", "remaining_time": "7:17:06"} +{"current_steps": 1863, "total_steps": 6840, "loss": 0.560769259929657, "lr": 1.7419142528352815e-05, "epoch": 0.544816493639421, "percentage": 27.24, "elapsed_time": "2:43:34", "remaining_time": "7:16:58"} +{"current_steps": 1864, "total_steps": 6840, "loss": 0.7782202363014221, "lr": 1.741590001556751e-05, "epoch": 0.5451089340546864, "percentage": 27.25, "elapsed_time": "2:43:38", "remaining_time": "7:16:51"} +{"current_steps": 1865, "total_steps": 6840, "loss": 0.5956888198852539, "lr": 1.7412655769360663e-05, "epoch": 0.5454013744699517, "percentage": 27.27, "elapsed_time": "2:43:45", "remaining_time": "7:16:50"} +{"current_steps": 1866, "total_steps": 6840, "loss": 0.6251999139785767, "lr": 1.7409409790490602e-05, "epoch": 0.5456938148852172, "percentage": 27.28, "elapsed_time": "2:43:50", "remaining_time": "7:16:44"} +{"current_steps": 1867, "total_steps": 6840, "loss": 0.5864061713218689, "lr": 1.740616207971605e-05, "epoch": 0.5459862553004825, "percentage": 27.3, "elapsed_time": "2:43:55", "remaining_time": "7:16:38"} +{"current_steps": 1868, "total_steps": 6840, "loss": 0.6241225004196167, "lr": 1.7402912637796146e-05, "epoch": 0.5462786957157479, "percentage": 27.31, "elapsed_time": "2:44:01", "remaining_time": "7:16:33"} +{"current_steps": 1869, "total_steps": 6840, "loss": 0.7190053462982178, "lr": 1.739966146549042e-05, "epoch": 0.5465711361310133, "percentage": 27.32, "elapsed_time": "2:44:06", "remaining_time": "7:16:29"} +{"current_steps": 1870, "total_steps": 6840, "loss": 0.6771985292434692, "lr": 1.739640856355882e-05, "epoch": 0.5468635765462787, "percentage": 27.34, "elapsed_time": "2:44:11", "remaining_time": "7:16:21"} +{"current_steps": 1871, "total_steps": 6840, "loss": 0.5480636954307556, "lr": 1.7393153932761687e-05, "epoch": 0.547156016961544, "percentage": 27.35, "elapsed_time": "2:44:16", "remaining_time": "7:16:17"} +{"current_steps": 1872, "total_steps": 6840, "loss": 0.7362977862358093, "lr": 1.7389897573859773e-05, "epoch": 0.5474484573768095, "percentage": 27.37, "elapsed_time": "2:44:21", "remaining_time": "7:16:11"} +{"current_steps": 1873, "total_steps": 6840, "loss": 0.6483198404312134, "lr": 1.7386639487614232e-05, "epoch": 0.5477408977920749, "percentage": 27.38, "elapsed_time": "2:44:26", "remaining_time": "7:16:06"} +{"current_steps": 1874, "total_steps": 6840, "loss": 0.479977548122406, "lr": 1.7383379674786622e-05, "epoch": 0.5480333382073402, "percentage": 27.4, "elapsed_time": "2:44:32", "remaining_time": "7:16:02"} +{"current_steps": 1875, "total_steps": 6840, "loss": 0.6824718117713928, "lr": 1.738011813613891e-05, "epoch": 0.5483257786226057, "percentage": 27.41, "elapsed_time": "2:44:38", "remaining_time": "7:15:57"} +{"current_steps": 1876, "total_steps": 6840, "loss": 0.564873218536377, "lr": 1.737685487243345e-05, "epoch": 0.548618219037871, "percentage": 27.43, "elapsed_time": "2:44:43", "remaining_time": "7:15:51"} +{"current_steps": 1877, "total_steps": 6840, "loss": 0.5748772621154785, "lr": 1.7373589884433015e-05, "epoch": 0.5489106594531364, "percentage": 27.44, "elapsed_time": "2:44:49", "remaining_time": "7:15:48"} +{"current_steps": 1878, "total_steps": 6840, "loss": 0.6403437852859497, "lr": 1.7370323172900778e-05, "epoch": 0.5492030998684018, "percentage": 27.46, "elapsed_time": "2:44:53", "remaining_time": "7:15:40"} +{"current_steps": 1879, "total_steps": 6840, "loss": 0.8253078460693359, "lr": 1.7367054738600312e-05, "epoch": 0.5494955402836672, "percentage": 27.47, "elapsed_time": "2:44:57", "remaining_time": "7:15:32"} +{"current_steps": 1880, "total_steps": 6840, "loss": 0.6823058128356934, "lr": 1.7363784582295596e-05, "epoch": 0.5497879806989326, "percentage": 27.49, "elapsed_time": "2:45:02", "remaining_time": "7:15:27"} +{"current_steps": 1881, "total_steps": 6840, "loss": 0.48659563064575195, "lr": 1.7360512704751003e-05, "epoch": 0.550080421114198, "percentage": 27.5, "elapsed_time": "2:45:08", "remaining_time": "7:15:22"} +{"current_steps": 1882, "total_steps": 6840, "loss": 0.6380710601806641, "lr": 1.735723910673132e-05, "epoch": 0.5503728615294634, "percentage": 27.51, "elapsed_time": "2:45:13", "remaining_time": "7:15:17"} +{"current_steps": 1883, "total_steps": 6840, "loss": 0.6956683397293091, "lr": 1.7353963789001723e-05, "epoch": 0.5506653019447287, "percentage": 27.53, "elapsed_time": "2:45:18", "remaining_time": "7:15:10"} +{"current_steps": 1884, "total_steps": 6840, "loss": 0.5751473903656006, "lr": 1.735068675232781e-05, "epoch": 0.5509577423599942, "percentage": 27.54, "elapsed_time": "2:45:22", "remaining_time": "7:15:02"} +{"current_steps": 1885, "total_steps": 6840, "loss": 0.7265490293502808, "lr": 1.734740799747556e-05, "epoch": 0.5512501827752595, "percentage": 27.56, "elapsed_time": "2:45:27", "remaining_time": "7:14:56"} +{"current_steps": 1886, "total_steps": 6840, "loss": 0.7419843673706055, "lr": 1.734412752521136e-05, "epoch": 0.5515426231905249, "percentage": 27.57, "elapsed_time": "2:45:31", "remaining_time": "7:14:47"} +{"current_steps": 1887, "total_steps": 6840, "loss": 0.7381073236465454, "lr": 1.734084533630201e-05, "epoch": 0.5518350636057903, "percentage": 27.59, "elapsed_time": "2:45:36", "remaining_time": "7:14:41"} +{"current_steps": 1888, "total_steps": 6840, "loss": 0.6542054414749146, "lr": 1.7337561431514692e-05, "epoch": 0.5521275040210557, "percentage": 27.6, "elapsed_time": "2:45:40", "remaining_time": "7:14:32"} +{"current_steps": 1889, "total_steps": 6840, "loss": 0.6283866167068481, "lr": 1.7334275811617e-05, "epoch": 0.5524199444363211, "percentage": 27.62, "elapsed_time": "2:45:45", "remaining_time": "7:14:27"} +{"current_steps": 1890, "total_steps": 6840, "loss": 0.6171330809593201, "lr": 1.7330988477376935e-05, "epoch": 0.5527123848515865, "percentage": 27.63, "elapsed_time": "2:45:50", "remaining_time": "7:14:22"} +{"current_steps": 1891, "total_steps": 6840, "loss": 0.5181430578231812, "lr": 1.7327699429562887e-05, "epoch": 0.5530048252668519, "percentage": 27.65, "elapsed_time": "2:45:56", "remaining_time": "7:14:16"} +{"current_steps": 1892, "total_steps": 6840, "loss": 0.7337771058082581, "lr": 1.7324408668943645e-05, "epoch": 0.5532972656821172, "percentage": 27.66, "elapsed_time": "2:46:00", "remaining_time": "7:14:08"} +{"current_steps": 1893, "total_steps": 6840, "loss": 0.5193721055984497, "lr": 1.7321116196288413e-05, "epoch": 0.5535897060973827, "percentage": 27.68, "elapsed_time": "2:46:04", "remaining_time": "7:14:00"} +{"current_steps": 1894, "total_steps": 6840, "loss": 0.7743211388587952, "lr": 1.731782201236678e-05, "epoch": 0.553882146512648, "percentage": 27.69, "elapsed_time": "2:46:09", "remaining_time": "7:13:54"} +{"current_steps": 1895, "total_steps": 6840, "loss": 0.8244242072105408, "lr": 1.731452611794875e-05, "epoch": 0.5541745869279134, "percentage": 27.7, "elapsed_time": "2:46:14", "remaining_time": "7:13:48"} +{"current_steps": 1896, "total_steps": 6840, "loss": 0.6276153326034546, "lr": 1.7311228513804712e-05, "epoch": 0.5544670273431789, "percentage": 27.72, "elapsed_time": "2:46:19", "remaining_time": "7:13:41"} +{"current_steps": 1897, "total_steps": 6840, "loss": 0.7919771671295166, "lr": 1.7307929200705463e-05, "epoch": 0.5547594677584442, "percentage": 27.73, "elapsed_time": "2:46:24", "remaining_time": "7:13:37"} +{"current_steps": 1898, "total_steps": 6840, "loss": 0.6187459230422974, "lr": 1.7304628179422192e-05, "epoch": 0.5550519081737096, "percentage": 27.75, "elapsed_time": "2:46:30", "remaining_time": "7:13:32"} +{"current_steps": 1899, "total_steps": 6840, "loss": 0.6190363764762878, "lr": 1.7301325450726497e-05, "epoch": 0.555344348588975, "percentage": 27.76, "elapsed_time": "2:46:35", "remaining_time": "7:13:26"} +{"current_steps": 1900, "total_steps": 6840, "loss": 0.5537956953048706, "lr": 1.7298021015390375e-05, "epoch": 0.5556367890042404, "percentage": 27.78, "elapsed_time": "2:46:41", "remaining_time": "7:13:24"} +{"current_steps": 1901, "total_steps": 6840, "loss": 0.7164788246154785, "lr": 1.729471487418621e-05, "epoch": 0.5559292294195057, "percentage": 27.79, "elapsed_time": "2:46:50", "remaining_time": "7:13:28"} +{"current_steps": 1902, "total_steps": 6840, "loss": 0.6101689338684082, "lr": 1.7291407027886796e-05, "epoch": 0.5562216698347712, "percentage": 27.81, "elapsed_time": "2:46:56", "remaining_time": "7:13:24"} +{"current_steps": 1903, "total_steps": 6840, "loss": 0.7112093567848206, "lr": 1.7288097477265322e-05, "epoch": 0.5565141102500366, "percentage": 27.82, "elapsed_time": "2:47:02", "remaining_time": "7:13:21"} +{"current_steps": 1904, "total_steps": 6840, "loss": 0.7807149291038513, "lr": 1.7284786223095376e-05, "epoch": 0.5568065506653019, "percentage": 27.84, "elapsed_time": "2:47:06", "remaining_time": "7:13:13"} +{"current_steps": 1905, "total_steps": 6840, "loss": 0.5723121166229248, "lr": 1.7281473266150942e-05, "epoch": 0.5570989910805674, "percentage": 27.85, "elapsed_time": "2:47:11", "remaining_time": "7:13:07"} +{"current_steps": 1906, "total_steps": 6840, "loss": 0.6901307106018066, "lr": 1.7278158607206402e-05, "epoch": 0.5573914314958327, "percentage": 27.87, "elapsed_time": "2:47:15", "remaining_time": "7:12:59"} +{"current_steps": 1907, "total_steps": 6840, "loss": 0.8247314095497131, "lr": 1.7274842247036547e-05, "epoch": 0.5576838719110981, "percentage": 27.88, "elapsed_time": "2:47:21", "remaining_time": "7:12:55"} +{"current_steps": 1908, "total_steps": 6840, "loss": 0.758405327796936, "lr": 1.727152418641654e-05, "epoch": 0.5579763123263635, "percentage": 27.89, "elapsed_time": "2:47:28", "remaining_time": "7:12:53"} +{"current_steps": 1909, "total_steps": 6840, "loss": 0.6448276042938232, "lr": 1.7268204426121967e-05, "epoch": 0.5582687527416289, "percentage": 27.91, "elapsed_time": "2:47:33", "remaining_time": "7:12:48"} +{"current_steps": 1910, "total_steps": 6840, "loss": 0.6846790313720703, "lr": 1.7264882966928803e-05, "epoch": 0.5585611931568942, "percentage": 27.92, "elapsed_time": "2:47:37", "remaining_time": "7:12:41"} +{"current_steps": 1911, "total_steps": 6840, "loss": 0.6427637338638306, "lr": 1.726155980961342e-05, "epoch": 0.5588536335721597, "percentage": 27.94, "elapsed_time": "2:47:42", "remaining_time": "7:12:34"} +{"current_steps": 1912, "total_steps": 6840, "loss": 0.7105496525764465, "lr": 1.7258234954952578e-05, "epoch": 0.5591460739874251, "percentage": 27.95, "elapsed_time": "2:47:47", "remaining_time": "7:12:27"} +{"current_steps": 1913, "total_steps": 6840, "loss": 0.6307404041290283, "lr": 1.7254908403723446e-05, "epoch": 0.5594385144026904, "percentage": 27.97, "elapsed_time": "2:47:52", "remaining_time": "7:12:22"} +{"current_steps": 1914, "total_steps": 6840, "loss": 0.7194197177886963, "lr": 1.7251580156703587e-05, "epoch": 0.5597309548179559, "percentage": 27.98, "elapsed_time": "2:47:58", "remaining_time": "7:12:18"} +{"current_steps": 1915, "total_steps": 6840, "loss": 0.676772952079773, "lr": 1.7248250214670955e-05, "epoch": 0.5600233952332212, "percentage": 28.0, "elapsed_time": "2:48:03", "remaining_time": "7:12:13"} +{"current_steps": 1916, "total_steps": 6840, "loss": 0.6047924160957336, "lr": 1.724491857840391e-05, "epoch": 0.5603158356484866, "percentage": 28.01, "elapsed_time": "2:48:07", "remaining_time": "7:12:05"} +{"current_steps": 1917, "total_steps": 6840, "loss": 0.7412474155426025, "lr": 1.7241585248681192e-05, "epoch": 0.560608276063752, "percentage": 28.03, "elapsed_time": "2:48:13", "remaining_time": "7:12:01"} +{"current_steps": 1918, "total_steps": 6840, "loss": 0.6337922215461731, "lr": 1.7238250226281952e-05, "epoch": 0.5609007164790174, "percentage": 28.04, "elapsed_time": "2:48:18", "remaining_time": "7:11:55"} +{"current_steps": 1919, "total_steps": 6840, "loss": 0.7192416787147522, "lr": 1.7234913511985733e-05, "epoch": 0.5611931568942828, "percentage": 28.06, "elapsed_time": "2:48:22", "remaining_time": "7:11:46"} +{"current_steps": 1920, "total_steps": 6840, "loss": 0.6576168537139893, "lr": 1.723157510657247e-05, "epoch": 0.5614855973095482, "percentage": 28.07, "elapsed_time": "2:48:27", "remaining_time": "7:11:40"} +{"current_steps": 1921, "total_steps": 6840, "loss": 0.6592451333999634, "lr": 1.722823501082249e-05, "epoch": 0.5617780377248136, "percentage": 28.08, "elapsed_time": "2:48:33", "remaining_time": "7:11:37"} +{"current_steps": 1922, "total_steps": 6840, "loss": 0.8042774796485901, "lr": 1.722489322551653e-05, "epoch": 0.5620704781400789, "percentage": 28.1, "elapsed_time": "2:48:38", "remaining_time": "7:11:31"} +{"current_steps": 1923, "total_steps": 6840, "loss": 0.727135181427002, "lr": 1.7221549751435706e-05, "epoch": 0.5623629185553444, "percentage": 28.11, "elapsed_time": "2:48:43", "remaining_time": "7:11:25"} +{"current_steps": 1924, "total_steps": 6840, "loss": 0.5641134977340698, "lr": 1.7218204589361535e-05, "epoch": 0.5626553589706097, "percentage": 28.13, "elapsed_time": "2:48:48", "remaining_time": "7:11:20"} +{"current_steps": 1925, "total_steps": 6840, "loss": 0.6354084610939026, "lr": 1.7214857740075924e-05, "epoch": 0.5629477993858751, "percentage": 28.14, "elapsed_time": "2:48:55", "remaining_time": "7:11:17"} +{"current_steps": 1926, "total_steps": 6840, "loss": 0.6044377088546753, "lr": 1.7211509204361187e-05, "epoch": 0.5632402398011405, "percentage": 28.16, "elapsed_time": "2:49:00", "remaining_time": "7:11:13"} +{"current_steps": 1927, "total_steps": 6840, "loss": 0.5519559383392334, "lr": 1.7208158983000022e-05, "epoch": 0.5635326802164059, "percentage": 28.17, "elapsed_time": "2:49:06", "remaining_time": "7:11:08"} +{"current_steps": 1928, "total_steps": 6840, "loss": 0.4480612277984619, "lr": 1.7204807076775514e-05, "epoch": 0.5638251206316713, "percentage": 28.19, "elapsed_time": "2:49:11", "remaining_time": "7:11:04"} +{"current_steps": 1929, "total_steps": 6840, "loss": 0.5929607152938843, "lr": 1.7201453486471167e-05, "epoch": 0.5641175610469367, "percentage": 28.2, "elapsed_time": "2:49:17", "remaining_time": "7:10:59"} +{"current_steps": 1930, "total_steps": 6840, "loss": 0.6863572001457214, "lr": 1.7198098212870847e-05, "epoch": 0.5644100014622021, "percentage": 28.22, "elapsed_time": "2:49:22", "remaining_time": "7:10:54"} +{"current_steps": 1931, "total_steps": 6840, "loss": 0.5551834106445312, "lr": 1.719474125675884e-05, "epoch": 0.5647024418774674, "percentage": 28.23, "elapsed_time": "2:49:28", "remaining_time": "7:10:51"} +{"current_steps": 1932, "total_steps": 6840, "loss": 0.6113166809082031, "lr": 1.7191382618919802e-05, "epoch": 0.5649948822927329, "percentage": 28.25, "elapsed_time": "2:49:33", "remaining_time": "7:10:44"} +{"current_steps": 1933, "total_steps": 6840, "loss": 0.7833362817764282, "lr": 1.7188022300138805e-05, "epoch": 0.5652873227079982, "percentage": 28.26, "elapsed_time": "2:49:38", "remaining_time": "7:10:39"} +{"current_steps": 1934, "total_steps": 6840, "loss": 0.5981882810592651, "lr": 1.71846603012013e-05, "epoch": 0.5655797631232636, "percentage": 28.27, "elapsed_time": "2:49:44", "remaining_time": "7:10:36"} +{"current_steps": 1935, "total_steps": 6840, "loss": 0.6009912490844727, "lr": 1.7181296622893132e-05, "epoch": 0.5658722035385291, "percentage": 28.29, "elapsed_time": "2:49:50", "remaining_time": "7:10:32"} +{"current_steps": 1936, "total_steps": 6840, "loss": 0.5605272054672241, "lr": 1.717793126600054e-05, "epoch": 0.5661646439537944, "percentage": 28.3, "elapsed_time": "2:49:55", "remaining_time": "7:10:25"} +{"current_steps": 1937, "total_steps": 6840, "loss": 0.6310821771621704, "lr": 1.717456423131016e-05, "epoch": 0.5664570843690598, "percentage": 28.32, "elapsed_time": "2:50:01", "remaining_time": "7:10:21"} +{"current_steps": 1938, "total_steps": 6840, "loss": 0.6776266694068909, "lr": 1.7171195519609013e-05, "epoch": 0.5667495247843252, "percentage": 28.33, "elapsed_time": "2:50:06", "remaining_time": "7:10:15"} +{"current_steps": 1939, "total_steps": 6840, "loss": 0.6369091868400574, "lr": 1.7167825131684516e-05, "epoch": 0.5670419651995906, "percentage": 28.35, "elapsed_time": "2:50:11", "remaining_time": "7:10:10"} +{"current_steps": 1940, "total_steps": 6840, "loss": 0.6241647005081177, "lr": 1.7164453068324472e-05, "epoch": 0.5673344056148559, "percentage": 28.36, "elapsed_time": "2:50:15", "remaining_time": "7:10:01"} +{"current_steps": 1941, "total_steps": 6840, "loss": 0.6411961317062378, "lr": 1.7161079330317086e-05, "epoch": 0.5676268460301214, "percentage": 28.38, "elapsed_time": "2:50:20", "remaining_time": "7:09:55"} +{"current_steps": 1942, "total_steps": 6840, "loss": 0.6148936152458191, "lr": 1.7157703918450942e-05, "epoch": 0.5679192864453868, "percentage": 28.39, "elapsed_time": "2:50:25", "remaining_time": "7:09:50"} +{"current_steps": 1943, "total_steps": 6840, "loss": 0.5006934404373169, "lr": 1.7154326833515034e-05, "epoch": 0.5682117268606521, "percentage": 28.41, "elapsed_time": "2:50:31", "remaining_time": "7:09:46"} +{"current_steps": 1944, "total_steps": 6840, "loss": 0.7446701526641846, "lr": 1.7150948076298722e-05, "epoch": 0.5685041672759176, "percentage": 28.42, "elapsed_time": "2:50:36", "remaining_time": "7:09:39"} +{"current_steps": 1945, "total_steps": 6840, "loss": 0.6159533262252808, "lr": 1.7147567647591777e-05, "epoch": 0.5687966076911829, "percentage": 28.44, "elapsed_time": "2:50:41", "remaining_time": "7:09:34"} +{"current_steps": 1946, "total_steps": 6840, "loss": 0.6437554359436035, "lr": 1.7144185548184355e-05, "epoch": 0.5690890481064483, "percentage": 28.45, "elapsed_time": "2:50:46", "remaining_time": "7:09:28"} +{"current_steps": 1947, "total_steps": 6840, "loss": 0.6229397654533386, "lr": 1.7140801778866995e-05, "epoch": 0.5693814885217137, "percentage": 28.46, "elapsed_time": "2:50:52", "remaining_time": "7:09:25"} +{"current_steps": 1948, "total_steps": 6840, "loss": 0.5777184963226318, "lr": 1.7137416340430636e-05, "epoch": 0.5696739289369791, "percentage": 28.48, "elapsed_time": "2:50:56", "remaining_time": "7:09:16"} +{"current_steps": 1949, "total_steps": 6840, "loss": 0.7817827463150024, "lr": 1.7134029233666603e-05, "epoch": 0.5699663693522444, "percentage": 28.49, "elapsed_time": "2:51:00", "remaining_time": "7:09:09"} +{"current_steps": 1950, "total_steps": 6840, "loss": 0.6784861087799072, "lr": 1.713064045936662e-05, "epoch": 0.5702588097675099, "percentage": 28.51, "elapsed_time": "2:51:06", "remaining_time": "7:09:04"} +{"current_steps": 1951, "total_steps": 6840, "loss": 0.6883150339126587, "lr": 1.7127250018322777e-05, "epoch": 0.5705512501827753, "percentage": 28.52, "elapsed_time": "2:51:11", "remaining_time": "7:08:58"} +{"current_steps": 1952, "total_steps": 6840, "loss": 0.5464504957199097, "lr": 1.712385791132758e-05, "epoch": 0.5708436905980406, "percentage": 28.54, "elapsed_time": "2:51:16", "remaining_time": "7:08:54"} +{"current_steps": 1953, "total_steps": 6840, "loss": 0.5950040817260742, "lr": 1.7120464139173908e-05, "epoch": 0.5711361310133061, "percentage": 28.55, "elapsed_time": "2:51:22", "remaining_time": "7:08:49"} +{"current_steps": 1954, "total_steps": 6840, "loss": 0.6381576061248779, "lr": 1.7117068702655034e-05, "epoch": 0.5714285714285714, "percentage": 28.57, "elapsed_time": "2:51:28", "remaining_time": "7:08:46"} +{"current_steps": 1955, "total_steps": 6840, "loss": 0.6611777544021606, "lr": 1.7113671602564628e-05, "epoch": 0.5717210118438368, "percentage": 28.58, "elapsed_time": "2:51:33", "remaining_time": "7:08:39"} +{"current_steps": 1956, "total_steps": 6840, "loss": 0.5057446956634521, "lr": 1.7110272839696735e-05, "epoch": 0.5720134522591022, "percentage": 28.6, "elapsed_time": "2:51:38", "remaining_time": "7:08:35"} +{"current_steps": 1957, "total_steps": 6840, "loss": 0.6095671653747559, "lr": 1.7106872414845798e-05, "epoch": 0.5723058926743676, "percentage": 28.61, "elapsed_time": "2:51:43", "remaining_time": "7:08:29"} +{"current_steps": 1958, "total_steps": 6840, "loss": 0.5514808893203735, "lr": 1.710347032880664e-05, "epoch": 0.572598333089633, "percentage": 28.63, "elapsed_time": "2:51:48", "remaining_time": "7:08:21"} +{"current_steps": 1959, "total_steps": 6840, "loss": 0.6491304039955139, "lr": 1.7100066582374487e-05, "epoch": 0.5728907735048984, "percentage": 28.64, "elapsed_time": "2:51:53", "remaining_time": "7:08:16"} +{"current_steps": 1960, "total_steps": 6840, "loss": 0.6759692430496216, "lr": 1.7096661176344936e-05, "epoch": 0.5731832139201638, "percentage": 28.65, "elapsed_time": "2:51:57", "remaining_time": "7:08:08"} +{"current_steps": 1961, "total_steps": 6840, "loss": 0.5897858142852783, "lr": 1.709325411151399e-05, "epoch": 0.5734756543354291, "percentage": 28.67, "elapsed_time": "2:52:02", "remaining_time": "7:08:01"} +{"current_steps": 1962, "total_steps": 6840, "loss": 0.6822922229766846, "lr": 1.7089845388678015e-05, "epoch": 0.5737680947506946, "percentage": 28.68, "elapsed_time": "2:52:07", "remaining_time": "7:07:56"} +{"current_steps": 1963, "total_steps": 6840, "loss": 0.7694820165634155, "lr": 1.7086435008633792e-05, "epoch": 0.5740605351659599, "percentage": 28.7, "elapsed_time": "2:52:13", "remaining_time": "7:07:52"} +{"current_steps": 1964, "total_steps": 6840, "loss": 0.702151358127594, "lr": 1.7083022972178473e-05, "epoch": 0.5743529755812253, "percentage": 28.71, "elapsed_time": "2:52:17", "remaining_time": "7:07:45"} +{"current_steps": 1965, "total_steps": 6840, "loss": 0.768844485282898, "lr": 1.7079609280109597e-05, "epoch": 0.5746454159964907, "percentage": 28.73, "elapsed_time": "2:52:22", "remaining_time": "7:07:39"} +{"current_steps": 1966, "total_steps": 6840, "loss": 0.6641331911087036, "lr": 1.7076193933225097e-05, "epoch": 0.5749378564117561, "percentage": 28.74, "elapsed_time": "2:52:27", "remaining_time": "7:07:32"} +{"current_steps": 1967, "total_steps": 6840, "loss": 0.7176777124404907, "lr": 1.707277693232329e-05, "epoch": 0.5752302968270215, "percentage": 28.76, "elapsed_time": "2:52:33", "remaining_time": "7:07:29"} +{"current_steps": 1968, "total_steps": 6840, "loss": 0.6543929576873779, "lr": 1.7069358278202877e-05, "epoch": 0.5755227372422869, "percentage": 28.77, "elapsed_time": "2:52:37", "remaining_time": "7:07:21"} +{"current_steps": 1969, "total_steps": 6840, "loss": 0.7501214742660522, "lr": 1.7065937971662953e-05, "epoch": 0.5758151776575523, "percentage": 28.79, "elapsed_time": "2:52:42", "remaining_time": "7:07:14"} +{"current_steps": 1970, "total_steps": 6840, "loss": 0.6013212203979492, "lr": 1.7062516013502984e-05, "epoch": 0.5761076180728176, "percentage": 28.8, "elapsed_time": "2:52:48", "remaining_time": "7:07:10"} +{"current_steps": 1971, "total_steps": 6840, "loss": 0.5920547246932983, "lr": 1.7059092404522843e-05, "epoch": 0.5764000584880831, "percentage": 28.82, "elapsed_time": "2:52:51", "remaining_time": "7:07:02"} +{"current_steps": 1972, "total_steps": 6840, "loss": 0.6720744371414185, "lr": 1.7055667145522767e-05, "epoch": 0.5766924989033484, "percentage": 28.83, "elapsed_time": "2:52:57", "remaining_time": "7:06:57"} +{"current_steps": 1973, "total_steps": 6840, "loss": 0.6938234567642212, "lr": 1.70522402373034e-05, "epoch": 0.5769849393186138, "percentage": 28.85, "elapsed_time": "2:53:03", "remaining_time": "7:06:52"} +{"current_steps": 1974, "total_steps": 6840, "loss": 0.6430555582046509, "lr": 1.704881168066575e-05, "epoch": 0.5772773797338793, "percentage": 28.86, "elapsed_time": "2:53:07", "remaining_time": "7:06:46"} +{"current_steps": 1975, "total_steps": 6840, "loss": 0.7738221883773804, "lr": 1.7045381476411234e-05, "epoch": 0.5775698201491446, "percentage": 28.87, "elapsed_time": "2:53:14", "remaining_time": "7:06:43"} +{"current_steps": 1976, "total_steps": 6840, "loss": 0.5335453748703003, "lr": 1.704194962534163e-05, "epoch": 0.57786226056441, "percentage": 28.89, "elapsed_time": "2:53:19", "remaining_time": "7:06:38"} +{"current_steps": 1977, "total_steps": 6840, "loss": 0.691404402256012, "lr": 1.7038516128259118e-05, "epoch": 0.5781547009796754, "percentage": 28.9, "elapsed_time": "2:53:24", "remaining_time": "7:06:33"} +{"current_steps": 1978, "total_steps": 6840, "loss": 0.7371880412101746, "lr": 1.7035080985966253e-05, "epoch": 0.5784471413949408, "percentage": 28.92, "elapsed_time": "2:53:29", "remaining_time": "7:06:26"} +{"current_steps": 1979, "total_steps": 6840, "loss": 0.5661574602127075, "lr": 1.7031644199265987e-05, "epoch": 0.5787395818102061, "percentage": 28.93, "elapsed_time": "2:53:33", "remaining_time": "7:06:19"} +{"current_steps": 1980, "total_steps": 6840, "loss": 0.5823863744735718, "lr": 1.702820576896164e-05, "epoch": 0.5790320222254716, "percentage": 28.95, "elapsed_time": "2:53:40", "remaining_time": "7:06:16"} +{"current_steps": 1981, "total_steps": 6840, "loss": 0.6228796243667603, "lr": 1.7024765695856924e-05, "epoch": 0.579324462640737, "percentage": 28.96, "elapsed_time": "2:53:44", "remaining_time": "7:06:09"} +{"current_steps": 1982, "total_steps": 6840, "loss": 0.5788040161132812, "lr": 1.702132398075594e-05, "epoch": 0.5796169030560023, "percentage": 28.98, "elapsed_time": "2:53:49", "remaining_time": "7:06:03"} +{"current_steps": 1983, "total_steps": 6840, "loss": 0.5950253009796143, "lr": 1.701788062446317e-05, "epoch": 0.5799093434712678, "percentage": 28.99, "elapsed_time": "2:53:54", "remaining_time": "7:05:57"} +{"current_steps": 1984, "total_steps": 6840, "loss": 0.5672034025192261, "lr": 1.7014435627783466e-05, "epoch": 0.5802017838865331, "percentage": 29.01, "elapsed_time": "2:53:59", "remaining_time": "7:05:51"} +{"current_steps": 1985, "total_steps": 6840, "loss": 0.6646316051483154, "lr": 1.7010988991522085e-05, "epoch": 0.5804942243017985, "percentage": 29.02, "elapsed_time": "2:54:04", "remaining_time": "7:05:45"} +{"current_steps": 1986, "total_steps": 6840, "loss": 0.6430097818374634, "lr": 1.7007540716484657e-05, "epoch": 0.5807866647170639, "percentage": 29.04, "elapsed_time": "2:54:09", "remaining_time": "7:05:40"} +{"current_steps": 1987, "total_steps": 6840, "loss": 0.5803329348564148, "lr": 1.700409080347719e-05, "epoch": 0.5810791051323293, "percentage": 29.05, "elapsed_time": "2:54:14", "remaining_time": "7:05:33"} +{"current_steps": 1988, "total_steps": 6840, "loss": 0.7526525259017944, "lr": 1.7000639253306085e-05, "epoch": 0.5813715455475946, "percentage": 29.06, "elapsed_time": "2:54:19", "remaining_time": "7:05:28"} +{"current_steps": 1989, "total_steps": 6840, "loss": 0.6679468750953674, "lr": 1.6997186066778118e-05, "epoch": 0.5816639859628601, "percentage": 29.08, "elapsed_time": "2:54:24", "remaining_time": "7:05:23"} +{"current_steps": 1990, "total_steps": 6840, "loss": 0.7233256101608276, "lr": 1.6993731244700454e-05, "epoch": 0.5819564263781255, "percentage": 29.09, "elapsed_time": "2:54:28", "remaining_time": "7:05:13"} +{"current_steps": 1991, "total_steps": 6840, "loss": 0.5986290574073792, "lr": 1.6990274787880633e-05, "epoch": 0.5822488667933908, "percentage": 29.11, "elapsed_time": "2:54:33", "remaining_time": "7:05:07"} +{"current_steps": 1992, "total_steps": 6840, "loss": 0.6898672580718994, "lr": 1.6986816697126583e-05, "epoch": 0.5825413072086563, "percentage": 29.12, "elapsed_time": "2:54:38", "remaining_time": "7:05:01"} +{"current_steps": 1993, "total_steps": 6840, "loss": 0.6888613104820251, "lr": 1.698335697324661e-05, "epoch": 0.5828337476239216, "percentage": 29.14, "elapsed_time": "2:54:42", "remaining_time": "7:04:53"} +{"current_steps": 1994, "total_steps": 6840, "loss": 0.6002428531646729, "lr": 1.6979895617049404e-05, "epoch": 0.583126188039187, "percentage": 29.15, "elapsed_time": "2:54:47", "remaining_time": "7:04:48"} +{"current_steps": 1995, "total_steps": 6840, "loss": 0.6372438669204712, "lr": 1.6976432629344036e-05, "epoch": 0.5834186284544524, "percentage": 29.17, "elapsed_time": "2:54:53", "remaining_time": "7:04:43"} +{"current_steps": 1996, "total_steps": 6840, "loss": 0.529569149017334, "lr": 1.6972968010939953e-05, "epoch": 0.5837110688697178, "percentage": 29.18, "elapsed_time": "2:54:57", "remaining_time": "7:04:36"} +{"current_steps": 1997, "total_steps": 6840, "loss": 0.5534025430679321, "lr": 1.6969501762647002e-05, "epoch": 0.5840035092849832, "percentage": 29.2, "elapsed_time": "2:55:01", "remaining_time": "7:04:28"} +{"current_steps": 1998, "total_steps": 6840, "loss": 0.8105937242507935, "lr": 1.6966033885275384e-05, "epoch": 0.5842959497002486, "percentage": 29.21, "elapsed_time": "2:55:07", "remaining_time": "7:04:24"} +{"current_steps": 1999, "total_steps": 6840, "loss": 0.7657530903816223, "lr": 1.6962564379635702e-05, "epoch": 0.584588390115514, "percentage": 29.23, "elapsed_time": "2:55:11", "remaining_time": "7:04:16"} +{"current_steps": 2000, "total_steps": 6840, "loss": 0.5941641330718994, "lr": 1.6959093246538927e-05, "epoch": 0.5848808305307793, "percentage": 29.24, "elapsed_time": "2:55:17", "remaining_time": "7:04:11"} +{"current_steps": 2001, "total_steps": 6840, "loss": 0.6130149364471436, "lr": 1.695562048679642e-05, "epoch": 0.5851732709460448, "percentage": 29.25, "elapsed_time": "2:55:27", "remaining_time": "7:04:17"} +{"current_steps": 2002, "total_steps": 6840, "loss": 0.7078043222427368, "lr": 1.6952146101219914e-05, "epoch": 0.5854657113613101, "percentage": 29.27, "elapsed_time": "2:55:33", "remaining_time": "7:04:14"} +{"current_steps": 2003, "total_steps": 6840, "loss": 0.6330863237380981, "lr": 1.6948670090621528e-05, "epoch": 0.5857581517765755, "percentage": 29.28, "elapsed_time": "2:55:38", "remaining_time": "7:04:10"} +{"current_steps": 2004, "total_steps": 6840, "loss": 0.6631220579147339, "lr": 1.6945192455813755e-05, "epoch": 0.5860505921918409, "percentage": 29.3, "elapsed_time": "2:55:44", "remaining_time": "7:04:06"} +{"current_steps": 2005, "total_steps": 6840, "loss": 0.6669473648071289, "lr": 1.6941713197609476e-05, "epoch": 0.5863430326071063, "percentage": 29.31, "elapsed_time": "2:55:50", "remaining_time": "7:04:03"} +{"current_steps": 2006, "total_steps": 6840, "loss": 0.608252763748169, "lr": 1.6938232316821938e-05, "epoch": 0.5866354730223717, "percentage": 29.33, "elapsed_time": "2:55:55", "remaining_time": "7:03:56"} +{"current_steps": 2007, "total_steps": 6840, "loss": 0.5979427695274353, "lr": 1.6934749814264786e-05, "epoch": 0.5869279134376371, "percentage": 29.34, "elapsed_time": "2:55:59", "remaining_time": "7:03:48"} +{"current_steps": 2008, "total_steps": 6840, "loss": 0.5653454661369324, "lr": 1.6931265690752027e-05, "epoch": 0.5872203538529025, "percentage": 29.36, "elapsed_time": "2:56:04", "remaining_time": "7:03:43"} +{"current_steps": 2009, "total_steps": 6840, "loss": 0.6399147510528564, "lr": 1.6927779947098052e-05, "epoch": 0.5875127942681678, "percentage": 29.37, "elapsed_time": "2:56:10", "remaining_time": "7:03:38"} +{"current_steps": 2010, "total_steps": 6840, "loss": 0.41824793815612793, "lr": 1.6924292584117642e-05, "epoch": 0.5878052346834333, "percentage": 29.39, "elapsed_time": "2:56:14", "remaining_time": "7:03:31"} +{"current_steps": 2011, "total_steps": 6840, "loss": 0.8881042003631592, "lr": 1.6920803602625938e-05, "epoch": 0.5880976750986986, "percentage": 29.4, "elapsed_time": "2:56:20", "remaining_time": "7:03:26"} +{"current_steps": 2012, "total_steps": 6840, "loss": 0.636030912399292, "lr": 1.6917313003438473e-05, "epoch": 0.588390115513964, "percentage": 29.42, "elapsed_time": "2:56:25", "remaining_time": "7:03:21"} +{"current_steps": 2013, "total_steps": 6840, "loss": 0.6038305759429932, "lr": 1.6913820787371147e-05, "epoch": 0.5886825559292295, "percentage": 29.43, "elapsed_time": "2:56:30", "remaining_time": "7:03:14"} +{"current_steps": 2014, "total_steps": 6840, "loss": 0.7073840498924255, "lr": 1.6910326955240252e-05, "epoch": 0.5889749963444948, "percentage": 29.44, "elapsed_time": "2:56:36", "remaining_time": "7:03:11"} +{"current_steps": 2015, "total_steps": 6840, "loss": 0.5804994106292725, "lr": 1.6906831507862446e-05, "epoch": 0.5892674367597602, "percentage": 29.46, "elapsed_time": "2:56:40", "remaining_time": "7:03:04"} +{"current_steps": 2016, "total_steps": 6840, "loss": 0.8194780349731445, "lr": 1.6903334446054768e-05, "epoch": 0.5895598771750256, "percentage": 29.47, "elapsed_time": "2:56:45", "remaining_time": "7:02:56"} +{"current_steps": 2017, "total_steps": 6840, "loss": 0.7348685264587402, "lr": 1.689983577063464e-05, "epoch": 0.589852317590291, "percentage": 29.49, "elapsed_time": "2:56:48", "remaining_time": "7:02:47"} +{"current_steps": 2018, "total_steps": 6840, "loss": 0.5855007171630859, "lr": 1.689633548241985e-05, "epoch": 0.5901447580055563, "percentage": 29.5, "elapsed_time": "2:56:54", "remaining_time": "7:02:43"} +{"current_steps": 2019, "total_steps": 6840, "loss": 0.7387616634368896, "lr": 1.689283358222857e-05, "epoch": 0.5904371984208218, "percentage": 29.52, "elapsed_time": "2:56:59", "remaining_time": "7:02:37"} +{"current_steps": 2020, "total_steps": 6840, "loss": 0.688759446144104, "lr": 1.688933007087935e-05, "epoch": 0.5907296388360872, "percentage": 29.53, "elapsed_time": "2:57:05", "remaining_time": "7:02:33"} +{"current_steps": 2021, "total_steps": 6840, "loss": 0.7203953266143799, "lr": 1.6885824949191117e-05, "epoch": 0.5910220792513525, "percentage": 29.55, "elapsed_time": "2:57:10", "remaining_time": "7:02:27"} +{"current_steps": 2022, "total_steps": 6840, "loss": 0.6465663909912109, "lr": 1.6882318217983165e-05, "epoch": 0.591314519666618, "percentage": 29.56, "elapsed_time": "2:57:15", "remaining_time": "7:02:21"} +{"current_steps": 2023, "total_steps": 6840, "loss": 0.6625394821166992, "lr": 1.6878809878075176e-05, "epoch": 0.5916069600818833, "percentage": 29.58, "elapsed_time": "2:57:22", "remaining_time": "7:02:19"} +{"current_steps": 2024, "total_steps": 6840, "loss": 0.6577074527740479, "lr": 1.68752999302872e-05, "epoch": 0.5918994004971487, "percentage": 29.59, "elapsed_time": "2:57:28", "remaining_time": "7:02:17"} +{"current_steps": 2025, "total_steps": 6840, "loss": 0.50509113073349, "lr": 1.6871788375439667e-05, "epoch": 0.5921918409124141, "percentage": 29.61, "elapsed_time": "2:57:33", "remaining_time": "7:02:11"} +{"current_steps": 2026, "total_steps": 6840, "loss": 0.5723974704742432, "lr": 1.6868275214353387e-05, "epoch": 0.5924842813276795, "percentage": 29.62, "elapsed_time": "2:57:38", "remaining_time": "7:02:05"} +{"current_steps": 2027, "total_steps": 6840, "loss": 0.6383459568023682, "lr": 1.6864760447849533e-05, "epoch": 0.5927767217429448, "percentage": 29.63, "elapsed_time": "2:57:43", "remaining_time": "7:02:00"} +{"current_steps": 2028, "total_steps": 6840, "loss": 0.5307388305664062, "lr": 1.6861244076749663e-05, "epoch": 0.5930691621582103, "percentage": 29.65, "elapsed_time": "2:57:49", "remaining_time": "7:01:57"} +{"current_steps": 2029, "total_steps": 6840, "loss": 0.8009265661239624, "lr": 1.6857726101875706e-05, "epoch": 0.5933616025734757, "percentage": 29.66, "elapsed_time": "2:57:54", "remaining_time": "7:01:50"} +{"current_steps": 2030, "total_steps": 6840, "loss": 0.5505321025848389, "lr": 1.685420652404997e-05, "epoch": 0.593654042988741, "percentage": 29.68, "elapsed_time": "2:57:58", "remaining_time": "7:01:43"} +{"current_steps": 2031, "total_steps": 6840, "loss": 0.680927038192749, "lr": 1.6850685344095134e-05, "epoch": 0.5939464834040065, "percentage": 29.69, "elapsed_time": "2:58:03", "remaining_time": "7:01:37"} +{"current_steps": 2032, "total_steps": 6840, "loss": 0.7357309460639954, "lr": 1.684716256283425e-05, "epoch": 0.5942389238192718, "percentage": 29.71, "elapsed_time": "2:58:09", "remaining_time": "7:01:31"} +{"current_steps": 2033, "total_steps": 6840, "loss": 0.5896620750427246, "lr": 1.6843638181090748e-05, "epoch": 0.5945313642345372, "percentage": 29.72, "elapsed_time": "2:58:14", "remaining_time": "7:01:26"} +{"current_steps": 2034, "total_steps": 6840, "loss": 0.5567387342453003, "lr": 1.6840112199688432e-05, "epoch": 0.5948238046498026, "percentage": 29.74, "elapsed_time": "2:58:19", "remaining_time": "7:01:20"} +{"current_steps": 2035, "total_steps": 6840, "loss": 0.6428712606430054, "lr": 1.6836584619451478e-05, "epoch": 0.595116245065068, "percentage": 29.75, "elapsed_time": "2:58:23", "remaining_time": "7:01:13"} +{"current_steps": 2036, "total_steps": 6840, "loss": 0.7430459260940552, "lr": 1.6833055441204436e-05, "epoch": 0.5954086854803334, "percentage": 29.77, "elapsed_time": "2:58:28", "remaining_time": "7:01:06"} +{"current_steps": 2037, "total_steps": 6840, "loss": 0.5982654690742493, "lr": 1.682952466577223e-05, "epoch": 0.5957011258955988, "percentage": 29.78, "elapsed_time": "2:58:34", "remaining_time": "7:01:03"} +{"current_steps": 2038, "total_steps": 6840, "loss": 0.5807450413703918, "lr": 1.6825992293980158e-05, "epoch": 0.5959935663108642, "percentage": 29.8, "elapsed_time": "2:58:38", "remaining_time": "7:00:55"} +{"current_steps": 2039, "total_steps": 6840, "loss": 0.7667814493179321, "lr": 1.6822458326653888e-05, "epoch": 0.5962860067261295, "percentage": 29.81, "elapsed_time": "2:58:44", "remaining_time": "7:00:52"} +{"current_steps": 2040, "total_steps": 6840, "loss": 0.8192781805992126, "lr": 1.6818922764619467e-05, "epoch": 0.596578447141395, "percentage": 29.82, "elapsed_time": "2:58:52", "remaining_time": "7:00:52"} +{"current_steps": 2041, "total_steps": 6840, "loss": 0.6652504205703735, "lr": 1.681538560870331e-05, "epoch": 0.5968708875566603, "percentage": 29.84, "elapsed_time": "2:58:57", "remaining_time": "7:00:48"} +{"current_steps": 2042, "total_steps": 6840, "loss": 0.6227332353591919, "lr": 1.6811846859732207e-05, "epoch": 0.5971633279719257, "percentage": 29.85, "elapsed_time": "2:59:02", "remaining_time": "7:00:41"} +{"current_steps": 2043, "total_steps": 6840, "loss": 0.5459558963775635, "lr": 1.6808306518533315e-05, "epoch": 0.597455768387191, "percentage": 29.87, "elapsed_time": "2:59:08", "remaining_time": "7:00:37"} +{"current_steps": 2044, "total_steps": 6840, "loss": 0.5176202058792114, "lr": 1.6804764585934167e-05, "epoch": 0.5977482088024565, "percentage": 29.88, "elapsed_time": "2:59:12", "remaining_time": "7:00:29"} +{"current_steps": 2045, "total_steps": 6840, "loss": 0.5818016529083252, "lr": 1.6801221062762677e-05, "epoch": 0.5980406492177219, "percentage": 29.9, "elapsed_time": "2:59:17", "remaining_time": "7:00:22"} +{"current_steps": 2046, "total_steps": 6840, "loss": 0.622256875038147, "lr": 1.679767594984711e-05, "epoch": 0.5983330896329873, "percentage": 29.91, "elapsed_time": "2:59:22", "remaining_time": "7:00:16"} +{"current_steps": 2047, "total_steps": 6840, "loss": 0.5538911819458008, "lr": 1.6794129248016124e-05, "epoch": 0.5986255300482527, "percentage": 29.93, "elapsed_time": "2:59:27", "remaining_time": "7:00:12"} +{"current_steps": 2048, "total_steps": 6840, "loss": 0.4934890568256378, "lr": 1.6790580958098733e-05, "epoch": 0.598917970463518, "percentage": 29.94, "elapsed_time": "2:59:33", "remaining_time": "7:00:07"} +{"current_steps": 2049, "total_steps": 6840, "loss": 0.6754223108291626, "lr": 1.678703108092433e-05, "epoch": 0.5992104108787835, "percentage": 29.96, "elapsed_time": "2:59:38", "remaining_time": "7:00:03"} +{"current_steps": 2050, "total_steps": 6840, "loss": 0.48618268966674805, "lr": 1.678347961732268e-05, "epoch": 0.5995028512940488, "percentage": 29.97, "elapsed_time": "2:59:44", "remaining_time": "6:59:59"} +{"current_steps": 2051, "total_steps": 6840, "loss": 0.6844758987426758, "lr": 1.6779926568123913e-05, "epoch": 0.5997952917093142, "percentage": 29.99, "elapsed_time": "2:59:49", "remaining_time": "6:59:52"} +{"current_steps": 2052, "total_steps": 6840, "loss": 0.5258621573448181, "lr": 1.677637193415853e-05, "epoch": 0.6000877321245797, "percentage": 30.0, "elapsed_time": "2:59:54", "remaining_time": "6:59:47"} +{"current_steps": 2053, "total_steps": 6840, "loss": 0.5571128129959106, "lr": 1.6772815716257414e-05, "epoch": 0.600380172539845, "percentage": 30.01, "elapsed_time": "2:59:59", "remaining_time": "6:59:40"} +{"current_steps": 2054, "total_steps": 6840, "loss": 0.5881344079971313, "lr": 1.67692579152518e-05, "epoch": 0.6006726129551104, "percentage": 30.03, "elapsed_time": "3:00:03", "remaining_time": "6:59:32"} +{"current_steps": 2055, "total_steps": 6840, "loss": 0.7162419557571411, "lr": 1.6765698531973305e-05, "epoch": 0.6009650533703758, "percentage": 30.04, "elapsed_time": "3:00:08", "remaining_time": "6:59:26"} +{"current_steps": 2056, "total_steps": 6840, "loss": 0.7470849752426147, "lr": 1.6762137567253917e-05, "epoch": 0.6012574937856412, "percentage": 30.06, "elapsed_time": "3:00:12", "remaining_time": "6:59:19"} +{"current_steps": 2057, "total_steps": 6840, "loss": 0.6043628454208374, "lr": 1.6758575021925987e-05, "epoch": 0.6015499342009065, "percentage": 30.07, "elapsed_time": "3:00:18", "remaining_time": "6:59:15"} +{"current_steps": 2058, "total_steps": 6840, "loss": 0.6574143171310425, "lr": 1.6755010896822237e-05, "epoch": 0.601842374616172, "percentage": 30.09, "elapsed_time": "3:00:22", "remaining_time": "6:59:08"} +{"current_steps": 2059, "total_steps": 6840, "loss": 0.605838418006897, "lr": 1.675144519277576e-05, "epoch": 0.6021348150314374, "percentage": 30.1, "elapsed_time": "3:00:28", "remaining_time": "6:59:03"} +{"current_steps": 2060, "total_steps": 6840, "loss": 0.5859218239784241, "lr": 1.6747877910620022e-05, "epoch": 0.6024272554467027, "percentage": 30.12, "elapsed_time": "3:00:32", "remaining_time": "6:58:55"} +{"current_steps": 2061, "total_steps": 6840, "loss": 0.7272971868515015, "lr": 1.674430905118885e-05, "epoch": 0.6027196958619682, "percentage": 30.13, "elapsed_time": "3:00:37", "remaining_time": "6:58:48"} +{"current_steps": 2062, "total_steps": 6840, "loss": 0.606023907661438, "lr": 1.674073861531644e-05, "epoch": 0.6030121362772335, "percentage": 30.15, "elapsed_time": "3:00:42", "remaining_time": "6:58:43"} +{"current_steps": 2063, "total_steps": 6840, "loss": 0.6029521822929382, "lr": 1.6737166603837364e-05, "epoch": 0.6033045766924989, "percentage": 30.16, "elapsed_time": "3:00:47", "remaining_time": "6:58:37"} +{"current_steps": 2064, "total_steps": 6840, "loss": 0.7544999122619629, "lr": 1.673359301758656e-05, "epoch": 0.6035970171077643, "percentage": 30.18, "elapsed_time": "3:00:52", "remaining_time": "6:58:32"} +{"current_steps": 2065, "total_steps": 6840, "loss": 0.7487601637840271, "lr": 1.6730017857399327e-05, "epoch": 0.6038894575230297, "percentage": 30.19, "elapsed_time": "3:00:57", "remaining_time": "6:58:25"} +{"current_steps": 2066, "total_steps": 6840, "loss": 0.6429200172424316, "lr": 1.672644112411134e-05, "epoch": 0.604181897938295, "percentage": 30.2, "elapsed_time": "3:01:03", "remaining_time": "6:58:23"} +{"current_steps": 2067, "total_steps": 6840, "loss": 0.7337179183959961, "lr": 1.6722862818558635e-05, "epoch": 0.6044743383535605, "percentage": 30.22, "elapsed_time": "3:01:08", "remaining_time": "6:58:17"} +{"current_steps": 2068, "total_steps": 6840, "loss": 0.6644014120101929, "lr": 1.671928294157762e-05, "epoch": 0.6047667787688259, "percentage": 30.23, "elapsed_time": "3:01:12", "remaining_time": "6:58:09"} +{"current_steps": 2069, "total_steps": 6840, "loss": 0.5987672805786133, "lr": 1.6715701494005078e-05, "epoch": 0.6050592191840912, "percentage": 30.25, "elapsed_time": "3:01:18", "remaining_time": "6:58:04"} +{"current_steps": 2070, "total_steps": 6840, "loss": 0.5878695845603943, "lr": 1.671211847667814e-05, "epoch": 0.6053516595993567, "percentage": 30.26, "elapsed_time": "3:01:24", "remaining_time": "6:58:01"} +{"current_steps": 2071, "total_steps": 6840, "loss": 0.540128231048584, "lr": 1.670853389043432e-05, "epoch": 0.605644100014622, "percentage": 30.28, "elapsed_time": "3:01:29", "remaining_time": "6:57:55"} +{"current_steps": 2072, "total_steps": 6840, "loss": 0.667206346988678, "lr": 1.670494773611149e-05, "epoch": 0.6059365404298874, "percentage": 30.29, "elapsed_time": "3:01:34", "remaining_time": "6:57:51"} +{"current_steps": 2073, "total_steps": 6840, "loss": 0.6433641910552979, "lr": 1.6701360014547896e-05, "epoch": 0.6062289808451528, "percentage": 30.31, "elapsed_time": "3:01:39", "remaining_time": "6:57:45"} +{"current_steps": 2074, "total_steps": 6840, "loss": 0.5803529024124146, "lr": 1.669777072658214e-05, "epoch": 0.6065214212604182, "percentage": 30.32, "elapsed_time": "3:01:45", "remaining_time": "6:57:39"} +{"current_steps": 2075, "total_steps": 6840, "loss": 0.6203820705413818, "lr": 1.6694179873053202e-05, "epoch": 0.6068138616756836, "percentage": 30.34, "elapsed_time": "3:01:52", "remaining_time": "6:57:39"} +{"current_steps": 2076, "total_steps": 6840, "loss": 0.6194918155670166, "lr": 1.669058745480042e-05, "epoch": 0.607106302090949, "percentage": 30.35, "elapsed_time": "3:01:58", "remaining_time": "6:57:36"} +{"current_steps": 2077, "total_steps": 6840, "loss": 0.6797547936439514, "lr": 1.66869934726635e-05, "epoch": 0.6073987425062144, "percentage": 30.37, "elapsed_time": "3:02:03", "remaining_time": "6:57:30"} +{"current_steps": 2078, "total_steps": 6840, "loss": 0.6076459884643555, "lr": 1.6683397927482512e-05, "epoch": 0.6076911829214797, "percentage": 30.38, "elapsed_time": "3:02:08", "remaining_time": "6:57:24"} +{"current_steps": 2079, "total_steps": 6840, "loss": 0.6958068609237671, "lr": 1.6679800820097895e-05, "epoch": 0.6079836233367452, "percentage": 30.39, "elapsed_time": "3:02:13", "remaining_time": "6:57:18"} +{"current_steps": 2080, "total_steps": 6840, "loss": 0.5819929242134094, "lr": 1.6676202151350453e-05, "epoch": 0.6082760637520105, "percentage": 30.41, "elapsed_time": "3:02:19", "remaining_time": "6:57:15"} +{"current_steps": 2081, "total_steps": 6840, "loss": 0.7125047445297241, "lr": 1.6672601922081347e-05, "epoch": 0.6085685041672759, "percentage": 30.42, "elapsed_time": "3:02:24", "remaining_time": "6:57:09"} +{"current_steps": 2082, "total_steps": 6840, "loss": 0.8046560287475586, "lr": 1.6669000133132108e-05, "epoch": 0.6088609445825413, "percentage": 30.44, "elapsed_time": "3:02:29", "remaining_time": "6:57:03"} +{"current_steps": 2083, "total_steps": 6840, "loss": 0.5468478202819824, "lr": 1.666539678534464e-05, "epoch": 0.6091533849978067, "percentage": 30.45, "elapsed_time": "3:02:34", "remaining_time": "6:56:58"} +{"current_steps": 2084, "total_steps": 6840, "loss": 0.6387852430343628, "lr": 1.6661791879561204e-05, "epoch": 0.6094458254130721, "percentage": 30.47, "elapsed_time": "3:02:40", "remaining_time": "6:56:52"} +{"current_steps": 2085, "total_steps": 6840, "loss": 0.643539547920227, "lr": 1.6658185416624415e-05, "epoch": 0.6097382658283375, "percentage": 30.48, "elapsed_time": "3:02:44", "remaining_time": "6:56:46"} +{"current_steps": 2086, "total_steps": 6840, "loss": 0.5031965374946594, "lr": 1.6654577397377266e-05, "epoch": 0.6100307062436029, "percentage": 30.5, "elapsed_time": "3:02:50", "remaining_time": "6:56:41"} +{"current_steps": 2087, "total_steps": 6840, "loss": 0.6690273284912109, "lr": 1.6650967822663115e-05, "epoch": 0.6103231466588682, "percentage": 30.51, "elapsed_time": "3:02:54", "remaining_time": "6:56:34"} +{"current_steps": 2088, "total_steps": 6840, "loss": 0.6396887302398682, "lr": 1.6647356693325672e-05, "epoch": 0.6106155870741337, "percentage": 30.53, "elapsed_time": "3:02:59", "remaining_time": "6:56:27"} +{"current_steps": 2089, "total_steps": 6840, "loss": 0.6306549310684204, "lr": 1.664374401020902e-05, "epoch": 0.610908027489399, "percentage": 30.54, "elapsed_time": "3:03:03", "remaining_time": "6:56:20"} +{"current_steps": 2090, "total_steps": 6840, "loss": 0.5936366319656372, "lr": 1.66401297741576e-05, "epoch": 0.6112004679046644, "percentage": 30.56, "elapsed_time": "3:03:08", "remaining_time": "6:56:14"} +{"current_steps": 2091, "total_steps": 6840, "loss": 0.6153277158737183, "lr": 1.6636513986016215e-05, "epoch": 0.6114929083199299, "percentage": 30.57, "elapsed_time": "3:03:13", "remaining_time": "6:56:08"} +{"current_steps": 2092, "total_steps": 6840, "loss": 0.6361621618270874, "lr": 1.663289664663004e-05, "epoch": 0.6117853487351952, "percentage": 30.58, "elapsed_time": "3:03:20", "remaining_time": "6:56:05"} +{"current_steps": 2093, "total_steps": 6840, "loss": 0.6511524319648743, "lr": 1.6629277756844603e-05, "epoch": 0.6120777891504606, "percentage": 30.6, "elapsed_time": "3:03:25", "remaining_time": "6:56:00"} +{"current_steps": 2094, "total_steps": 6840, "loss": 0.5811333656311035, "lr": 1.6625657317505792e-05, "epoch": 0.612370229565726, "percentage": 30.61, "elapsed_time": "3:03:30", "remaining_time": "6:55:55"} +{"current_steps": 2095, "total_steps": 6840, "loss": 0.6935377717018127, "lr": 1.6622035329459872e-05, "epoch": 0.6126626699809914, "percentage": 30.63, "elapsed_time": "3:03:35", "remaining_time": "6:55:48"} +{"current_steps": 2096, "total_steps": 6840, "loss": 0.6363199949264526, "lr": 1.6618411793553455e-05, "epoch": 0.6129551103962567, "percentage": 30.64, "elapsed_time": "3:03:41", "remaining_time": "6:55:44"} +{"current_steps": 2097, "total_steps": 6840, "loss": 0.7325713634490967, "lr": 1.6614786710633525e-05, "epoch": 0.6132475508115222, "percentage": 30.66, "elapsed_time": "3:03:45", "remaining_time": "6:55:36"} +{"current_steps": 2098, "total_steps": 6840, "loss": 0.5739182829856873, "lr": 1.6611160081547414e-05, "epoch": 0.6135399912267876, "percentage": 30.67, "elapsed_time": "3:03:50", "remaining_time": "6:55:32"} +{"current_steps": 2099, "total_steps": 6840, "loss": 0.611133873462677, "lr": 1.6607531907142835e-05, "epoch": 0.6138324316420529, "percentage": 30.69, "elapsed_time": "3:03:55", "remaining_time": "6:55:26"} +{"current_steps": 2100, "total_steps": 6840, "loss": 0.6419532299041748, "lr": 1.6603902188267842e-05, "epoch": 0.6141248720573184, "percentage": 30.7, "elapsed_time": "3:04:01", "remaining_time": "6:55:22"} +{"current_steps": 2101, "total_steps": 6840, "loss": 0.7736743688583374, "lr": 1.660027092577087e-05, "epoch": 0.6144173124725837, "percentage": 30.72, "elapsed_time": "3:04:10", "remaining_time": "6:55:26"} +{"current_steps": 2102, "total_steps": 6840, "loss": 0.5249119400978088, "lr": 1.6596638120500696e-05, "epoch": 0.6147097528878491, "percentage": 30.73, "elapsed_time": "3:04:16", "remaining_time": "6:55:22"} +{"current_steps": 2103, "total_steps": 6840, "loss": 0.7145636081695557, "lr": 1.6593003773306475e-05, "epoch": 0.6150021933031145, "percentage": 30.75, "elapsed_time": "3:04:22", "remaining_time": "6:55:18"} +{"current_steps": 2104, "total_steps": 6840, "loss": 0.5807666182518005, "lr": 1.65893678850377e-05, "epoch": 0.6152946337183799, "percentage": 30.76, "elapsed_time": "3:04:28", "remaining_time": "6:55:14"} +{"current_steps": 2105, "total_steps": 6840, "loss": 0.5049663782119751, "lr": 1.6585730456544255e-05, "epoch": 0.6155870741336452, "percentage": 30.77, "elapsed_time": "3:04:33", "remaining_time": "6:55:07"} +{"current_steps": 2106, "total_steps": 6840, "loss": 0.6744092702865601, "lr": 1.658209148867635e-05, "epoch": 0.6158795145489107, "percentage": 30.79, "elapsed_time": "3:04:37", "remaining_time": "6:55:01"} +{"current_steps": 2107, "total_steps": 6840, "loss": 0.605404794216156, "lr": 1.6578450982284584e-05, "epoch": 0.6161719549641761, "percentage": 30.8, "elapsed_time": "3:04:42", "remaining_time": "6:54:54"} +{"current_steps": 2108, "total_steps": 6840, "loss": 0.6074866056442261, "lr": 1.6574808938219894e-05, "epoch": 0.6164643953794414, "percentage": 30.82, "elapsed_time": "3:04:47", "remaining_time": "6:54:48"} +{"current_steps": 2109, "total_steps": 6840, "loss": 0.6758207082748413, "lr": 1.6571165357333594e-05, "epoch": 0.6167568357947069, "percentage": 30.83, "elapsed_time": "3:04:52", "remaining_time": "6:54:44"} +{"current_steps": 2110, "total_steps": 6840, "loss": 0.7669274806976318, "lr": 1.6567520240477344e-05, "epoch": 0.6170492762099722, "percentage": 30.85, "elapsed_time": "3:04:56", "remaining_time": "6:54:35"} +{"current_steps": 2111, "total_steps": 6840, "loss": 0.497562050819397, "lr": 1.6563873588503173e-05, "epoch": 0.6173417166252376, "percentage": 30.86, "elapsed_time": "3:05:02", "remaining_time": "6:54:30"} +{"current_steps": 2112, "total_steps": 6840, "loss": 0.6398104429244995, "lr": 1.656022540226345e-05, "epoch": 0.617634157040503, "percentage": 30.88, "elapsed_time": "3:05:07", "remaining_time": "6:54:25"} +{"current_steps": 2113, "total_steps": 6840, "loss": 0.6739988327026367, "lr": 1.6556575682610935e-05, "epoch": 0.6179265974557684, "percentage": 30.89, "elapsed_time": "3:05:13", "remaining_time": "6:54:21"} +{"current_steps": 2114, "total_steps": 6840, "loss": 0.5710165500640869, "lr": 1.6552924430398716e-05, "epoch": 0.6182190378710338, "percentage": 30.91, "elapsed_time": "3:05:19", "remaining_time": "6:54:18"} +{"current_steps": 2115, "total_steps": 6840, "loss": 0.6087738871574402, "lr": 1.6549271646480253e-05, "epoch": 0.6185114782862992, "percentage": 30.92, "elapsed_time": "3:05:24", "remaining_time": "6:54:12"} +{"current_steps": 2116, "total_steps": 6840, "loss": 0.5300824642181396, "lr": 1.6545617331709364e-05, "epoch": 0.6188039187015646, "percentage": 30.94, "elapsed_time": "3:05:30", "remaining_time": "6:54:08"} +{"current_steps": 2117, "total_steps": 6840, "loss": 0.7384774684906006, "lr": 1.6541961486940222e-05, "epoch": 0.6190963591168299, "percentage": 30.95, "elapsed_time": "3:05:34", "remaining_time": "6:54:01"} +{"current_steps": 2118, "total_steps": 6840, "loss": 0.5867838263511658, "lr": 1.6538304113027356e-05, "epoch": 0.6193887995320954, "percentage": 30.96, "elapsed_time": "3:05:39", "remaining_time": "6:53:55"} +{"current_steps": 2119, "total_steps": 6840, "loss": 0.617068886756897, "lr": 1.653464521082566e-05, "epoch": 0.6196812399473607, "percentage": 30.98, "elapsed_time": "3:05:44", "remaining_time": "6:53:50"} +{"current_steps": 2120, "total_steps": 6840, "loss": 0.7316439151763916, "lr": 1.6530984781190374e-05, "epoch": 0.6199736803626261, "percentage": 30.99, "elapsed_time": "3:05:50", "remaining_time": "6:53:44"} +{"current_steps": 2121, "total_steps": 6840, "loss": 0.5469995737075806, "lr": 1.6527322824977104e-05, "epoch": 0.6202661207778914, "percentage": 31.01, "elapsed_time": "3:05:53", "remaining_time": "6:53:35"} +{"current_steps": 2122, "total_steps": 6840, "loss": 0.6577411890029907, "lr": 1.6523659343041815e-05, "epoch": 0.6205585611931569, "percentage": 31.02, "elapsed_time": "3:05:57", "remaining_time": "6:53:28"} +{"current_steps": 2123, "total_steps": 6840, "loss": 0.7425049543380737, "lr": 1.6519994336240816e-05, "epoch": 0.6208510016084223, "percentage": 31.04, "elapsed_time": "3:06:02", "remaining_time": "6:53:22"} +{"current_steps": 2124, "total_steps": 6840, "loss": 0.7894090414047241, "lr": 1.6516327805430785e-05, "epoch": 0.6211434420236877, "percentage": 31.05, "elapsed_time": "3:06:06", "remaining_time": "6:53:14"} +{"current_steps": 2125, "total_steps": 6840, "loss": 0.5739543437957764, "lr": 1.651265975146875e-05, "epoch": 0.6214358824389531, "percentage": 31.07, "elapsed_time": "3:06:12", "remaining_time": "6:53:09"} +{"current_steps": 2126, "total_steps": 6840, "loss": 0.6987308263778687, "lr": 1.6508990175212092e-05, "epoch": 0.6217283228542184, "percentage": 31.08, "elapsed_time": "3:06:18", "remaining_time": "6:53:05"} +{"current_steps": 2127, "total_steps": 6840, "loss": 0.5956544280052185, "lr": 1.650531907751856e-05, "epoch": 0.6220207632694839, "percentage": 31.1, "elapsed_time": "3:06:25", "remaining_time": "6:53:03"} +{"current_steps": 2128, "total_steps": 6840, "loss": 0.582348108291626, "lr": 1.6501646459246245e-05, "epoch": 0.6223132036847492, "percentage": 31.11, "elapsed_time": "3:06:30", "remaining_time": "6:52:58"} +{"current_steps": 2129, "total_steps": 6840, "loss": 0.8057917356491089, "lr": 1.64979723212536e-05, "epoch": 0.6226056441000146, "percentage": 31.13, "elapsed_time": "3:06:35", "remaining_time": "6:52:52"} +{"current_steps": 2130, "total_steps": 6840, "loss": 0.6237305402755737, "lr": 1.6494296664399428e-05, "epoch": 0.6228980845152801, "percentage": 31.14, "elapsed_time": "3:06:41", "remaining_time": "6:52:49"} +{"current_steps": 2131, "total_steps": 6840, "loss": 0.6445767879486084, "lr": 1.6490619489542905e-05, "epoch": 0.6231905249305454, "percentage": 31.15, "elapsed_time": "3:06:47", "remaining_time": "6:52:45"} +{"current_steps": 2132, "total_steps": 6840, "loss": 0.6397994160652161, "lr": 1.648694079754354e-05, "epoch": 0.6234829653458108, "percentage": 31.17, "elapsed_time": "3:06:52", "remaining_time": "6:52:40"} +{"current_steps": 2133, "total_steps": 6840, "loss": 0.8216533660888672, "lr": 1.64832605892612e-05, "epoch": 0.6237754057610762, "percentage": 31.18, "elapsed_time": "3:06:58", "remaining_time": "6:52:36"} +{"current_steps": 2134, "total_steps": 6840, "loss": 0.6894406080245972, "lr": 1.6479578865556115e-05, "epoch": 0.6240678461763416, "percentage": 31.2, "elapsed_time": "3:07:02", "remaining_time": "6:52:28"} +{"current_steps": 2135, "total_steps": 6840, "loss": 0.6608946323394775, "lr": 1.6475895627288873e-05, "epoch": 0.6243602865916069, "percentage": 31.21, "elapsed_time": "3:07:08", "remaining_time": "6:52:23"} +{"current_steps": 2136, "total_steps": 6840, "loss": 0.6070076823234558, "lr": 1.6472210875320397e-05, "epoch": 0.6246527270068724, "percentage": 31.23, "elapsed_time": "3:07:12", "remaining_time": "6:52:15"} +{"current_steps": 2137, "total_steps": 6840, "loss": 0.7357348799705505, "lr": 1.6468524610511982e-05, "epoch": 0.6249451674221378, "percentage": 31.24, "elapsed_time": "3:07:16", "remaining_time": "6:52:09"} +{"current_steps": 2138, "total_steps": 6840, "loss": 0.5959880352020264, "lr": 1.6464836833725267e-05, "epoch": 0.6252376078374031, "percentage": 31.26, "elapsed_time": "3:07:20", "remaining_time": "6:52:01"} +{"current_steps": 2139, "total_steps": 6840, "loss": 0.7812649011611938, "lr": 1.646114754582225e-05, "epoch": 0.6255300482526686, "percentage": 31.27, "elapsed_time": "3:07:27", "remaining_time": "6:51:58"} +{"current_steps": 2140, "total_steps": 6840, "loss": 0.5985091924667358, "lr": 1.6457456747665282e-05, "epoch": 0.6258224886679339, "percentage": 31.29, "elapsed_time": "3:07:32", "remaining_time": "6:51:53"} +{"current_steps": 2141, "total_steps": 6840, "loss": 0.6610564589500427, "lr": 1.645376444011706e-05, "epoch": 0.6261149290831993, "percentage": 31.3, "elapsed_time": "3:07:38", "remaining_time": "6:51:49"} +{"current_steps": 2142, "total_steps": 6840, "loss": 0.6876299381256104, "lr": 1.6450070624040636e-05, "epoch": 0.6264073694984647, "percentage": 31.32, "elapsed_time": "3:07:43", "remaining_time": "6:51:43"} +{"current_steps": 2143, "total_steps": 6840, "loss": 0.6715782284736633, "lr": 1.6446375300299425e-05, "epoch": 0.6266998099137301, "percentage": 31.33, "elapsed_time": "3:07:50", "remaining_time": "6:51:43"} +{"current_steps": 2144, "total_steps": 6840, "loss": 0.6066923141479492, "lr": 1.644267846975718e-05, "epoch": 0.6269922503289954, "percentage": 31.35, "elapsed_time": "3:07:55", "remaining_time": "6:51:37"} +{"current_steps": 2145, "total_steps": 6840, "loss": 0.5642968416213989, "lr": 1.6438980133278017e-05, "epoch": 0.6272846907442609, "percentage": 31.36, "elapsed_time": "3:08:00", "remaining_time": "6:51:29"} +{"current_steps": 2146, "total_steps": 6840, "loss": 0.604590654373169, "lr": 1.6435280291726394e-05, "epoch": 0.6275771311595263, "percentage": 31.37, "elapsed_time": "3:08:06", "remaining_time": "6:51:26"} +{"current_steps": 2147, "total_steps": 6840, "loss": 0.6313889026641846, "lr": 1.643157894596713e-05, "epoch": 0.6278695715747916, "percentage": 31.39, "elapsed_time": "3:08:11", "remaining_time": "6:51:22"} +{"current_steps": 2148, "total_steps": 6840, "loss": 0.5084092617034912, "lr": 1.6427876096865394e-05, "epoch": 0.6281620119900571, "percentage": 31.4, "elapsed_time": "3:08:18", "remaining_time": "6:51:18"} +{"current_steps": 2149, "total_steps": 6840, "loss": 0.5191931128501892, "lr": 1.6424171745286704e-05, "epoch": 0.6284544524053224, "percentage": 31.42, "elapsed_time": "3:08:23", "remaining_time": "6:51:15"} +{"current_steps": 2150, "total_steps": 6840, "loss": 0.7397615909576416, "lr": 1.6420465892096924e-05, "epoch": 0.6287468928205878, "percentage": 31.43, "elapsed_time": "3:08:27", "remaining_time": "6:51:07"} +{"current_steps": 2151, "total_steps": 6840, "loss": 0.622586727142334, "lr": 1.641675853816228e-05, "epoch": 0.6290393332358531, "percentage": 31.45, "elapsed_time": "3:08:31", "remaining_time": "6:50:58"} +{"current_steps": 2152, "total_steps": 6840, "loss": 0.7894928455352783, "lr": 1.6413049684349344e-05, "epoch": 0.6293317736511186, "percentage": 31.46, "elapsed_time": "3:08:36", "remaining_time": "6:50:51"} +{"current_steps": 2153, "total_steps": 6840, "loss": 0.5752773284912109, "lr": 1.640933933152504e-05, "epoch": 0.629624214066384, "percentage": 31.48, "elapsed_time": "3:08:41", "remaining_time": "6:50:46"} +{"current_steps": 2154, "total_steps": 6840, "loss": 0.6738473176956177, "lr": 1.640562748055663e-05, "epoch": 0.6299166544816494, "percentage": 31.49, "elapsed_time": "3:08:47", "remaining_time": "6:50:42"} +{"current_steps": 2155, "total_steps": 6840, "loss": 0.5789517164230347, "lr": 1.6401914132311745e-05, "epoch": 0.6302090948969148, "percentage": 31.51, "elapsed_time": "3:08:53", "remaining_time": "6:50:39"} +{"current_steps": 2156, "total_steps": 6840, "loss": 0.5925524830818176, "lr": 1.6398199287658358e-05, "epoch": 0.6305015353121801, "percentage": 31.52, "elapsed_time": "3:08:57", "remaining_time": "6:50:31"} +{"current_steps": 2157, "total_steps": 6840, "loss": 0.6949414610862732, "lr": 1.6394482947464784e-05, "epoch": 0.6307939757274456, "percentage": 31.54, "elapsed_time": "3:09:02", "remaining_time": "6:50:25"} +{"current_steps": 2158, "total_steps": 6840, "loss": 0.7435301542282104, "lr": 1.6390765112599705e-05, "epoch": 0.6310864161427109, "percentage": 31.55, "elapsed_time": "3:09:07", "remaining_time": "6:50:20"} +{"current_steps": 2159, "total_steps": 6840, "loss": 0.6931856274604797, "lr": 1.6387045783932137e-05, "epoch": 0.6313788565579763, "percentage": 31.56, "elapsed_time": "3:09:11", "remaining_time": "6:50:12"} +{"current_steps": 2160, "total_steps": 6840, "loss": 0.7856471538543701, "lr": 1.638332496233145e-05, "epoch": 0.6316712969732416, "percentage": 31.58, "elapsed_time": "3:09:18", "remaining_time": "6:50:09"} +{"current_steps": 2161, "total_steps": 6840, "loss": 0.6299946308135986, "lr": 1.6379602648667362e-05, "epoch": 0.6319637373885071, "percentage": 31.59, "elapsed_time": "3:09:22", "remaining_time": "6:50:02"} +{"current_steps": 2162, "total_steps": 6840, "loss": 0.6209328174591064, "lr": 1.6375878843809946e-05, "epoch": 0.6322561778037725, "percentage": 31.61, "elapsed_time": "3:09:27", "remaining_time": "6:49:57"} +{"current_steps": 2163, "total_steps": 6840, "loss": 0.6498390436172485, "lr": 1.6372153548629617e-05, "epoch": 0.6325486182190379, "percentage": 31.62, "elapsed_time": "3:09:33", "remaining_time": "6:49:53"} +{"current_steps": 2164, "total_steps": 6840, "loss": 0.6757122278213501, "lr": 1.6368426763997137e-05, "epoch": 0.6328410586343033, "percentage": 31.64, "elapsed_time": "3:09:37", "remaining_time": "6:49:45"} +{"current_steps": 2165, "total_steps": 6840, "loss": 0.5137026906013489, "lr": 1.6364698490783623e-05, "epoch": 0.6331334990495686, "percentage": 31.65, "elapsed_time": "3:09:41", "remaining_time": "6:49:36"} +{"current_steps": 2166, "total_steps": 6840, "loss": 0.5876519680023193, "lr": 1.6360968729860536e-05, "epoch": 0.633425939464834, "percentage": 31.67, "elapsed_time": "3:09:44", "remaining_time": "6:49:27"} +{"current_steps": 2167, "total_steps": 6840, "loss": 0.5804057717323303, "lr": 1.6357237482099682e-05, "epoch": 0.6337183798800994, "percentage": 31.68, "elapsed_time": "3:09:50", "remaining_time": "6:49:22"} +{"current_steps": 2168, "total_steps": 6840, "loss": 0.6186444759368896, "lr": 1.635350474837322e-05, "epoch": 0.6340108202953648, "percentage": 31.7, "elapsed_time": "3:09:55", "remaining_time": "6:49:16"} +{"current_steps": 2169, "total_steps": 6840, "loss": 0.6358560919761658, "lr": 1.6349770529553654e-05, "epoch": 0.6343032607106303, "percentage": 31.71, "elapsed_time": "3:10:00", "remaining_time": "6:49:12"} +{"current_steps": 2170, "total_steps": 6840, "loss": 0.64283686876297, "lr": 1.6346034826513834e-05, "epoch": 0.6345957011258956, "percentage": 31.73, "elapsed_time": "3:10:07", "remaining_time": "6:49:09"} +{"current_steps": 2171, "total_steps": 6840, "loss": 0.5269169807434082, "lr": 1.6342297640126955e-05, "epoch": 0.634888141541161, "percentage": 31.74, "elapsed_time": "3:10:13", "remaining_time": "6:49:05"} +{"current_steps": 2172, "total_steps": 6840, "loss": 0.5338561534881592, "lr": 1.6338558971266563e-05, "epoch": 0.6351805819564263, "percentage": 31.75, "elapsed_time": "3:10:18", "remaining_time": "6:48:59"} +{"current_steps": 2173, "total_steps": 6840, "loss": 0.5587184429168701, "lr": 1.6334818820806555e-05, "epoch": 0.6354730223716918, "percentage": 31.77, "elapsed_time": "3:10:24", "remaining_time": "6:48:56"} +{"current_steps": 2174, "total_steps": 6840, "loss": 0.6468764543533325, "lr": 1.633107718962116e-05, "epoch": 0.6357654627869571, "percentage": 31.78, "elapsed_time": "3:10:30", "remaining_time": "6:48:53"} +{"current_steps": 2175, "total_steps": 6840, "loss": 0.7305203676223755, "lr": 1.6327334078584967e-05, "epoch": 0.6360579032022226, "percentage": 31.8, "elapsed_time": "3:10:35", "remaining_time": "6:48:47"} +{"current_steps": 2176, "total_steps": 6840, "loss": 0.6226189136505127, "lr": 1.6323589488572908e-05, "epoch": 0.636350343617488, "percentage": 31.81, "elapsed_time": "3:10:41", "remaining_time": "6:48:43"} +{"current_steps": 2177, "total_steps": 6840, "loss": 0.6552053093910217, "lr": 1.631984342046025e-05, "epoch": 0.6366427840327533, "percentage": 31.83, "elapsed_time": "3:10:46", "remaining_time": "6:48:37"} +{"current_steps": 2178, "total_steps": 6840, "loss": 0.8121978044509888, "lr": 1.6316095875122617e-05, "epoch": 0.6369352244480188, "percentage": 31.84, "elapsed_time": "3:10:50", "remaining_time": "6:48:29"} +{"current_steps": 2179, "total_steps": 6840, "loss": 0.5826296806335449, "lr": 1.6312346853435976e-05, "epoch": 0.6372276648632841, "percentage": 31.86, "elapsed_time": "3:10:54", "remaining_time": "6:48:21"} +{"current_steps": 2180, "total_steps": 6840, "loss": 0.5862709283828735, "lr": 1.630859635627664e-05, "epoch": 0.6375201052785495, "percentage": 31.87, "elapsed_time": "3:10:58", "remaining_time": "6:48:14"} +{"current_steps": 2181, "total_steps": 6840, "loss": 0.7081524133682251, "lr": 1.6304844384521263e-05, "epoch": 0.6378125456938148, "percentage": 31.89, "elapsed_time": "3:11:05", "remaining_time": "6:48:11"} +{"current_steps": 2182, "total_steps": 6840, "loss": 0.6394449472427368, "lr": 1.6301090939046843e-05, "epoch": 0.6381049861090803, "percentage": 31.9, "elapsed_time": "3:11:11", "remaining_time": "6:48:07"} +{"current_steps": 2183, "total_steps": 6840, "loss": 0.6184799075126648, "lr": 1.6297336020730727e-05, "epoch": 0.6383974265243456, "percentage": 31.92, "elapsed_time": "3:11:17", "remaining_time": "6:48:04"} +{"current_steps": 2184, "total_steps": 6840, "loss": 0.6877666711807251, "lr": 1.6293579630450606e-05, "epoch": 0.638689866939611, "percentage": 31.93, "elapsed_time": "3:11:23", "remaining_time": "6:48:00"} +{"current_steps": 2185, "total_steps": 6840, "loss": 0.5596371293067932, "lr": 1.6289821769084512e-05, "epoch": 0.6389823073548765, "percentage": 31.94, "elapsed_time": "3:11:27", "remaining_time": "6:47:54"} +{"current_steps": 2186, "total_steps": 6840, "loss": 0.5378291010856628, "lr": 1.6286062437510823e-05, "epoch": 0.6392747477701418, "percentage": 31.96, "elapsed_time": "3:11:34", "remaining_time": "6:47:50"} +{"current_steps": 2187, "total_steps": 6840, "loss": 0.6965627670288086, "lr": 1.6282301636608256e-05, "epoch": 0.6395671881854073, "percentage": 31.97, "elapsed_time": "3:11:38", "remaining_time": "6:47:44"} +{"current_steps": 2188, "total_steps": 6840, "loss": 0.5939220190048218, "lr": 1.6278539367255885e-05, "epoch": 0.6398596286006726, "percentage": 31.99, "elapsed_time": "3:11:43", "remaining_time": "6:47:38"} +{"current_steps": 2189, "total_steps": 6840, "loss": 0.6225341558456421, "lr": 1.6274775630333104e-05, "epoch": 0.640152069015938, "percentage": 32.0, "elapsed_time": "3:11:48", "remaining_time": "6:47:32"} +{"current_steps": 2190, "total_steps": 6840, "loss": 0.471333384513855, "lr": 1.6271010426719672e-05, "epoch": 0.6404445094312033, "percentage": 32.02, "elapsed_time": "3:11:53", "remaining_time": "6:47:26"} +{"current_steps": 2191, "total_steps": 6840, "loss": 0.6066263914108276, "lr": 1.626724375729568e-05, "epoch": 0.6407369498464688, "percentage": 32.03, "elapsed_time": "3:11:59", "remaining_time": "6:47:22"} +{"current_steps": 2192, "total_steps": 6840, "loss": 0.6525982618331909, "lr": 1.626347562294157e-05, "epoch": 0.6410293902617342, "percentage": 32.05, "elapsed_time": "3:12:04", "remaining_time": "6:47:16"} +{"current_steps": 2193, "total_steps": 6840, "loss": 0.7395817041397095, "lr": 1.6259706024538113e-05, "epoch": 0.6413218306769995, "percentage": 32.06, "elapsed_time": "3:12:10", "remaining_time": "6:47:14"} +{"current_steps": 2194, "total_steps": 6840, "loss": 0.720014214515686, "lr": 1.6255934962966432e-05, "epoch": 0.641614271092265, "percentage": 32.08, "elapsed_time": "3:12:16", "remaining_time": "6:47:09"} +{"current_steps": 2195, "total_steps": 6840, "loss": 0.6905295252799988, "lr": 1.625216243910799e-05, "epoch": 0.6419067115075303, "percentage": 32.09, "elapsed_time": "3:12:22", "remaining_time": "6:47:05"} +{"current_steps": 2196, "total_steps": 6840, "loss": 0.6877295970916748, "lr": 1.6248388453844596e-05, "epoch": 0.6421991519227958, "percentage": 32.11, "elapsed_time": "3:12:27", "remaining_time": "6:46:59"} +{"current_steps": 2197, "total_steps": 6840, "loss": 0.5782181024551392, "lr": 1.6244613008058386e-05, "epoch": 0.6424915923380611, "percentage": 32.12, "elapsed_time": "3:12:33", "remaining_time": "6:46:56"} +{"current_steps": 2198, "total_steps": 6840, "loss": 0.5253425240516663, "lr": 1.6240836102631856e-05, "epoch": 0.6427840327533265, "percentage": 32.13, "elapsed_time": "3:12:38", "remaining_time": "6:46:51"} +{"current_steps": 2199, "total_steps": 6840, "loss": 0.6631319522857666, "lr": 1.623705773844783e-05, "epoch": 0.6430764731685918, "percentage": 32.15, "elapsed_time": "3:12:44", "remaining_time": "6:46:46"} +{"current_steps": 2200, "total_steps": 6840, "loss": 0.6458526849746704, "lr": 1.6233277916389482e-05, "epoch": 0.6433689135838573, "percentage": 32.16, "elapsed_time": "3:12:47", "remaining_time": "6:46:37"} +{"current_steps": 2201, "total_steps": 6840, "loss": 0.5723023414611816, "lr": 1.622949663734032e-05, "epoch": 0.6436613539991227, "percentage": 32.18, "elapsed_time": "3:12:57", "remaining_time": "6:46:42"} +{"current_steps": 2202, "total_steps": 6840, "loss": 0.6852096319198608, "lr": 1.6225713902184193e-05, "epoch": 0.643953794414388, "percentage": 32.19, "elapsed_time": "3:13:02", "remaining_time": "6:46:36"} +{"current_steps": 2203, "total_steps": 6840, "loss": 0.6343507170677185, "lr": 1.6221929711805297e-05, "epoch": 0.6442462348296535, "percentage": 32.21, "elapsed_time": "3:13:08", "remaining_time": "6:46:32"} +{"current_steps": 2204, "total_steps": 6840, "loss": 0.6378631591796875, "lr": 1.6218144067088157e-05, "epoch": 0.6445386752449188, "percentage": 32.22, "elapsed_time": "3:13:13", "remaining_time": "6:46:27"} +{"current_steps": 2205, "total_steps": 6840, "loss": 0.6550023555755615, "lr": 1.621435696891765e-05, "epoch": 0.6448311156601843, "percentage": 32.24, "elapsed_time": "3:13:19", "remaining_time": "6:46:22"} +{"current_steps": 2206, "total_steps": 6840, "loss": 0.5555052757263184, "lr": 1.6210568418178983e-05, "epoch": 0.6451235560754496, "percentage": 32.25, "elapsed_time": "3:13:23", "remaining_time": "6:46:14"} +{"current_steps": 2207, "total_steps": 6840, "loss": 0.7171934247016907, "lr": 1.6206778415757715e-05, "epoch": 0.645415996490715, "percentage": 32.27, "elapsed_time": "3:13:30", "remaining_time": "6:46:13"} +{"current_steps": 2208, "total_steps": 6840, "loss": 0.6464889049530029, "lr": 1.6202986962539726e-05, "epoch": 0.6457084369059805, "percentage": 32.28, "elapsed_time": "3:13:35", "remaining_time": "6:46:08"} +{"current_steps": 2209, "total_steps": 6840, "loss": 0.6316033601760864, "lr": 1.619919405941125e-05, "epoch": 0.6460008773212458, "percentage": 32.3, "elapsed_time": "3:13:41", "remaining_time": "6:46:03"} +{"current_steps": 2210, "total_steps": 6840, "loss": 0.5548732876777649, "lr": 1.6195399707258855e-05, "epoch": 0.6462933177365112, "percentage": 32.31, "elapsed_time": "3:13:46", "remaining_time": "6:45:57"} +{"current_steps": 2211, "total_steps": 6840, "loss": 0.5055203437805176, "lr": 1.6191603906969447e-05, "epoch": 0.6465857581517765, "percentage": 32.32, "elapsed_time": "3:13:51", "remaining_time": "6:45:52"} +{"current_steps": 2212, "total_steps": 6840, "loss": 0.7010073661804199, "lr": 1.6187806659430268e-05, "epoch": 0.646878198567042, "percentage": 32.34, "elapsed_time": "3:13:56", "remaining_time": "6:45:45"} +{"current_steps": 2213, "total_steps": 6840, "loss": 0.6188487410545349, "lr": 1.6184007965528908e-05, "epoch": 0.6471706389823073, "percentage": 32.35, "elapsed_time": "3:13:59", "remaining_time": "6:45:37"} +{"current_steps": 2214, "total_steps": 6840, "loss": 0.46920153498649597, "lr": 1.6180207826153284e-05, "epoch": 0.6474630793975727, "percentage": 32.37, "elapsed_time": "3:14:05", "remaining_time": "6:45:31"} +{"current_steps": 2215, "total_steps": 6840, "loss": 0.6811172962188721, "lr": 1.617640624219166e-05, "epoch": 0.6477555198128382, "percentage": 32.38, "elapsed_time": "3:14:11", "remaining_time": "6:45:27"} +{"current_steps": 2216, "total_steps": 6840, "loss": 0.6425800323486328, "lr": 1.617260321453263e-05, "epoch": 0.6480479602281035, "percentage": 32.4, "elapsed_time": "3:14:16", "remaining_time": "6:45:21"} +{"current_steps": 2217, "total_steps": 6840, "loss": 0.7020897269248962, "lr": 1.6168798744065123e-05, "epoch": 0.648340400643369, "percentage": 32.41, "elapsed_time": "3:14:21", "remaining_time": "6:45:17"} +{"current_steps": 2218, "total_steps": 6840, "loss": 0.5872179865837097, "lr": 1.6164992831678422e-05, "epoch": 0.6486328410586343, "percentage": 32.43, "elapsed_time": "3:14:27", "remaining_time": "6:45:13"} +{"current_steps": 2219, "total_steps": 6840, "loss": 0.7414118647575378, "lr": 1.6161185478262127e-05, "epoch": 0.6489252814738997, "percentage": 32.44, "elapsed_time": "3:14:31", "remaining_time": "6:45:06"} +{"current_steps": 2220, "total_steps": 6840, "loss": 0.5408385396003723, "lr": 1.615737668470619e-05, "epoch": 0.649217721889165, "percentage": 32.46, "elapsed_time": "3:14:37", "remaining_time": "6:45:02"} +{"current_steps": 2221, "total_steps": 6840, "loss": 0.6145513653755188, "lr": 1.6153566451900887e-05, "epoch": 0.6495101623044305, "percentage": 32.47, "elapsed_time": "3:14:43", "remaining_time": "6:44:58"} +{"current_steps": 2222, "total_steps": 6840, "loss": 0.556422233581543, "lr": 1.6149754780736847e-05, "epoch": 0.6498026027196958, "percentage": 32.49, "elapsed_time": "3:14:48", "remaining_time": "6:44:53"} +{"current_steps": 2223, "total_steps": 6840, "loss": 0.7155405282974243, "lr": 1.614594167210501e-05, "epoch": 0.6500950431349612, "percentage": 32.5, "elapsed_time": "3:14:53", "remaining_time": "6:44:46"} +{"current_steps": 2224, "total_steps": 6840, "loss": 0.4988427758216858, "lr": 1.6142127126896682e-05, "epoch": 0.6503874835502267, "percentage": 32.51, "elapsed_time": "3:14:58", "remaining_time": "6:44:40"} +{"current_steps": 2225, "total_steps": 6840, "loss": 0.6187007427215576, "lr": 1.6138311146003477e-05, "epoch": 0.650679923965492, "percentage": 32.53, "elapsed_time": "3:15:03", "remaining_time": "6:44:35"} +{"current_steps": 2226, "total_steps": 6840, "loss": 0.5668798685073853, "lr": 1.6134493730317364e-05, "epoch": 0.6509723643807575, "percentage": 32.54, "elapsed_time": "3:15:08", "remaining_time": "6:44:29"} +{"current_steps": 2227, "total_steps": 6840, "loss": 0.6354215145111084, "lr": 1.6130674880730642e-05, "epoch": 0.6512648047960228, "percentage": 32.56, "elapsed_time": "3:15:13", "remaining_time": "6:44:23"} +{"current_steps": 2228, "total_steps": 6840, "loss": 0.5409573912620544, "lr": 1.612685459813594e-05, "epoch": 0.6515572452112882, "percentage": 32.57, "elapsed_time": "3:15:18", "remaining_time": "6:44:16"} +{"current_steps": 2229, "total_steps": 6840, "loss": 0.5622435808181763, "lr": 1.612303288342623e-05, "epoch": 0.6518496856265535, "percentage": 32.59, "elapsed_time": "3:15:23", "remaining_time": "6:44:11"} +{"current_steps": 2230, "total_steps": 6840, "loss": 0.786159873008728, "lr": 1.6119209737494814e-05, "epoch": 0.652142126041819, "percentage": 32.6, "elapsed_time": "3:15:29", "remaining_time": "6:44:08"} +{"current_steps": 2231, "total_steps": 6840, "loss": 0.6359272003173828, "lr": 1.611538516123532e-05, "epoch": 0.6524345664570844, "percentage": 32.62, "elapsed_time": "3:15:34", "remaining_time": "6:44:02"} +{"current_steps": 2232, "total_steps": 6840, "loss": 0.5688974261283875, "lr": 1.6111559155541732e-05, "epoch": 0.6527270068723497, "percentage": 32.63, "elapsed_time": "3:15:38", "remaining_time": "6:43:54"} +{"current_steps": 2233, "total_steps": 6840, "loss": 0.581497311592102, "lr": 1.610773172130835e-05, "epoch": 0.6530194472876152, "percentage": 32.65, "elapsed_time": "3:15:44", "remaining_time": "6:43:51"} +{"current_steps": 2234, "total_steps": 6840, "loss": 0.674004316329956, "lr": 1.6103902859429812e-05, "epoch": 0.6533118877028805, "percentage": 32.66, "elapsed_time": "3:15:51", "remaining_time": "6:43:48"} +{"current_steps": 2235, "total_steps": 6840, "loss": 0.6798728108406067, "lr": 1.6100072570801092e-05, "epoch": 0.653604328118146, "percentage": 32.68, "elapsed_time": "3:15:55", "remaining_time": "6:43:41"} +{"current_steps": 2236, "total_steps": 6840, "loss": 0.5742023587226868, "lr": 1.60962408563175e-05, "epoch": 0.6538967685334113, "percentage": 32.69, "elapsed_time": "3:16:00", "remaining_time": "6:43:34"} +{"current_steps": 2237, "total_steps": 6840, "loss": 0.470009446144104, "lr": 1.6092407716874674e-05, "epoch": 0.6541892089486767, "percentage": 32.7, "elapsed_time": "3:16:05", "remaining_time": "6:43:29"} +{"current_steps": 2238, "total_steps": 6840, "loss": 0.8113270998001099, "lr": 1.6088573153368586e-05, "epoch": 0.654481649363942, "percentage": 32.72, "elapsed_time": "3:16:10", "remaining_time": "6:43:23"} +{"current_steps": 2239, "total_steps": 6840, "loss": 0.7737559676170349, "lr": 1.6084737166695542e-05, "epoch": 0.6547740897792075, "percentage": 32.73, "elapsed_time": "3:16:16", "remaining_time": "6:43:19"} +{"current_steps": 2240, "total_steps": 6840, "loss": 0.6499667167663574, "lr": 1.6080899757752183e-05, "epoch": 0.6550665301944729, "percentage": 32.75, "elapsed_time": "3:16:21", "remaining_time": "6:43:13"} +{"current_steps": 2241, "total_steps": 6840, "loss": 0.6898500323295593, "lr": 1.6077060927435476e-05, "epoch": 0.6553589706097382, "percentage": 32.76, "elapsed_time": "3:16:26", "remaining_time": "6:43:08"} +{"current_steps": 2242, "total_steps": 6840, "loss": 0.5933262705802917, "lr": 1.6073220676642724e-05, "epoch": 0.6556514110250037, "percentage": 32.78, "elapsed_time": "3:16:33", "remaining_time": "6:43:06"} +{"current_steps": 2243, "total_steps": 6840, "loss": 0.6566172242164612, "lr": 1.606937900627157e-05, "epoch": 0.655943851440269, "percentage": 32.79, "elapsed_time": "3:16:38", "remaining_time": "6:43:01"} +{"current_steps": 2244, "total_steps": 6840, "loss": 0.6955286264419556, "lr": 1.606553591721997e-05, "epoch": 0.6562362918555344, "percentage": 32.81, "elapsed_time": "3:16:43", "remaining_time": "6:42:54"} +{"current_steps": 2245, "total_steps": 6840, "loss": 0.6905182600021362, "lr": 1.6061691410386234e-05, "epoch": 0.6565287322707998, "percentage": 32.82, "elapsed_time": "3:16:49", "remaining_time": "6:42:51"} +{"current_steps": 2246, "total_steps": 6840, "loss": 0.6733677387237549, "lr": 1.6057845486668984e-05, "epoch": 0.6568211726860652, "percentage": 32.84, "elapsed_time": "3:16:53", "remaining_time": "6:42:44"} +{"current_steps": 2247, "total_steps": 6840, "loss": 0.5368545055389404, "lr": 1.6053998146967186e-05, "epoch": 0.6571136131013307, "percentage": 32.85, "elapsed_time": "3:16:59", "remaining_time": "6:42:39"} +{"current_steps": 2248, "total_steps": 6840, "loss": 0.6995619535446167, "lr": 1.6050149392180125e-05, "epoch": 0.657406053516596, "percentage": 32.87, "elapsed_time": "3:17:04", "remaining_time": "6:42:34"} +{"current_steps": 2249, "total_steps": 6840, "loss": 0.6637085676193237, "lr": 1.6046299223207432e-05, "epoch": 0.6576984939318614, "percentage": 32.88, "elapsed_time": "3:17:10", "remaining_time": "6:42:29"} +{"current_steps": 2250, "total_steps": 6840, "loss": 0.5834380388259888, "lr": 1.6042447640949058e-05, "epoch": 0.6579909343471267, "percentage": 32.89, "elapsed_time": "3:17:15", "remaining_time": "6:42:24"} +{"current_steps": 2251, "total_steps": 6840, "loss": 0.5735288858413696, "lr": 1.6038594646305285e-05, "epoch": 0.6582833747623922, "percentage": 32.91, "elapsed_time": "3:17:21", "remaining_time": "6:42:21"} +{"current_steps": 2252, "total_steps": 6840, "loss": 0.6227413415908813, "lr": 1.6034740240176728e-05, "epoch": 0.6585758151776575, "percentage": 32.92, "elapsed_time": "3:17:27", "remaining_time": "6:42:17"} +{"current_steps": 2253, "total_steps": 6840, "loss": 0.6881246566772461, "lr": 1.6030884423464336e-05, "epoch": 0.658868255592923, "percentage": 32.94, "elapsed_time": "3:17:32", "remaining_time": "6:42:12"} +{"current_steps": 2254, "total_steps": 6840, "loss": 0.6059132814407349, "lr": 1.6027027197069376e-05, "epoch": 0.6591606960081884, "percentage": 32.95, "elapsed_time": "3:17:38", "remaining_time": "6:42:06"} +{"current_steps": 2255, "total_steps": 6840, "loss": 0.5829097032546997, "lr": 1.6023168561893453e-05, "epoch": 0.6594531364234537, "percentage": 32.97, "elapsed_time": "3:17:42", "remaining_time": "6:41:59"} +{"current_steps": 2256, "total_steps": 6840, "loss": 0.5173588991165161, "lr": 1.60193085188385e-05, "epoch": 0.6597455768387191, "percentage": 32.98, "elapsed_time": "3:17:47", "remaining_time": "6:41:54"} +{"current_steps": 2257, "total_steps": 6840, "loss": 0.5128534436225891, "lr": 1.601544706880678e-05, "epoch": 0.6600380172539845, "percentage": 33.0, "elapsed_time": "3:17:53", "remaining_time": "6:41:49"} +{"current_steps": 2258, "total_steps": 6840, "loss": 0.5472848415374756, "lr": 1.601158421270088e-05, "epoch": 0.6603304576692499, "percentage": 33.01, "elapsed_time": "3:17:57", "remaining_time": "6:41:41"} +{"current_steps": 2259, "total_steps": 6840, "loss": 0.5775434970855713, "lr": 1.6007719951423725e-05, "epoch": 0.6606228980845152, "percentage": 33.03, "elapsed_time": "3:18:01", "remaining_time": "6:41:35"} +{"current_steps": 2260, "total_steps": 6840, "loss": 0.5529654622077942, "lr": 1.6003854285878558e-05, "epoch": 0.6609153384997807, "percentage": 33.04, "elapsed_time": "3:18:06", "remaining_time": "6:41:29"} +{"current_steps": 2261, "total_steps": 6840, "loss": 0.5295222997665405, "lr": 1.5999987216968954e-05, "epoch": 0.661207778915046, "percentage": 33.06, "elapsed_time": "3:18:12", "remaining_time": "6:41:25"} +{"current_steps": 2262, "total_steps": 6840, "loss": 0.6782759428024292, "lr": 1.5996118745598817e-05, "epoch": 0.6615002193303114, "percentage": 33.07, "elapsed_time": "3:18:19", "remaining_time": "6:41:22"} +{"current_steps": 2263, "total_steps": 6840, "loss": 0.7698723077774048, "lr": 1.5992248872672384e-05, "epoch": 0.6617926597455769, "percentage": 33.08, "elapsed_time": "3:18:24", "remaining_time": "6:41:16"} +{"current_steps": 2264, "total_steps": 6840, "loss": 0.5056325793266296, "lr": 1.5988377599094208e-05, "epoch": 0.6620851001608422, "percentage": 33.1, "elapsed_time": "3:18:29", "remaining_time": "6:41:11"} +{"current_steps": 2265, "total_steps": 6840, "loss": 0.6748740673065186, "lr": 1.598450492576918e-05, "epoch": 0.6623775405761076, "percentage": 33.11, "elapsed_time": "3:18:35", "remaining_time": "6:41:08"} +{"current_steps": 2266, "total_steps": 6840, "loss": 0.6594111919403076, "lr": 1.598063085360251e-05, "epoch": 0.662669980991373, "percentage": 33.13, "elapsed_time": "3:18:41", "remaining_time": "6:41:03"} +{"current_steps": 2267, "total_steps": 6840, "loss": 0.5942472815513611, "lr": 1.5976755383499743e-05, "epoch": 0.6629624214066384, "percentage": 33.14, "elapsed_time": "3:18:45", "remaining_time": "6:40:57"} +{"current_steps": 2268, "total_steps": 6840, "loss": 0.6956725120544434, "lr": 1.5972878516366742e-05, "epoch": 0.6632548618219037, "percentage": 33.16, "elapsed_time": "3:18:51", "remaining_time": "6:40:52"} +{"current_steps": 2269, "total_steps": 6840, "loss": 0.6743103265762329, "lr": 1.5969000253109707e-05, "epoch": 0.6635473022371692, "percentage": 33.17, "elapsed_time": "3:18:57", "remaining_time": "6:40:49"} +{"current_steps": 2270, "total_steps": 6840, "loss": 0.5452187061309814, "lr": 1.596512059463515e-05, "epoch": 0.6638397426524346, "percentage": 33.19, "elapsed_time": "3:19:03", "remaining_time": "6:40:44"} +{"current_steps": 2271, "total_steps": 6840, "loss": 0.6064754128456116, "lr": 1.5961239541849923e-05, "epoch": 0.6641321830676999, "percentage": 33.2, "elapsed_time": "3:19:07", "remaining_time": "6:40:38"} +{"current_steps": 2272, "total_steps": 6840, "loss": 0.5879498720169067, "lr": 1.59573570956612e-05, "epoch": 0.6644246234829654, "percentage": 33.22, "elapsed_time": "3:19:13", "remaining_time": "6:40:32"} +{"current_steps": 2273, "total_steps": 6840, "loss": 0.6610721945762634, "lr": 1.595347325697648e-05, "epoch": 0.6647170638982307, "percentage": 33.23, "elapsed_time": "3:19:18", "remaining_time": "6:40:27"} +{"current_steps": 2274, "total_steps": 6840, "loss": 0.6674839854240417, "lr": 1.594958802670358e-05, "epoch": 0.6650095043134961, "percentage": 33.25, "elapsed_time": "3:19:23", "remaining_time": "6:40:22"} +{"current_steps": 2275, "total_steps": 6840, "loss": 0.5189186334609985, "lr": 1.5945701405750654e-05, "epoch": 0.6653019447287615, "percentage": 33.26, "elapsed_time": "3:19:29", "remaining_time": "6:40:17"} +{"current_steps": 2276, "total_steps": 6840, "loss": 0.5225304365158081, "lr": 1.5941813395026174e-05, "epoch": 0.6655943851440269, "percentage": 33.27, "elapsed_time": "3:19:34", "remaining_time": "6:40:12"} +{"current_steps": 2277, "total_steps": 6840, "loss": 0.5426747798919678, "lr": 1.5937923995438942e-05, "epoch": 0.6658868255592922, "percentage": 33.29, "elapsed_time": "3:19:39", "remaining_time": "6:40:06"} +{"current_steps": 2278, "total_steps": 6840, "loss": 0.6408158540725708, "lr": 1.593403320789808e-05, "epoch": 0.6661792659745577, "percentage": 33.3, "elapsed_time": "3:19:46", "remaining_time": "6:40:05"} +{"current_steps": 2279, "total_steps": 6840, "loss": 0.6213311553001404, "lr": 1.5930141033313034e-05, "epoch": 0.6664717063898231, "percentage": 33.32, "elapsed_time": "3:19:52", "remaining_time": "6:40:01"} +{"current_steps": 2280, "total_steps": 6840, "loss": 0.6538233757019043, "lr": 1.5926247472593575e-05, "epoch": 0.6667641468050884, "percentage": 33.33, "elapsed_time": "3:19:58", "remaining_time": "6:39:56"} +{"current_steps": 2281, "total_steps": 6840, "loss": 0.6714701056480408, "lr": 1.5922352526649803e-05, "epoch": 0.6670565872203539, "percentage": 33.35, "elapsed_time": "3:20:03", "remaining_time": "6:39:50"} +{"current_steps": 2282, "total_steps": 6840, "loss": 0.501068115234375, "lr": 1.5918456196392137e-05, "epoch": 0.6673490276356192, "percentage": 33.36, "elapsed_time": "3:20:08", "remaining_time": "6:39:44"} +{"current_steps": 2283, "total_steps": 6840, "loss": 0.6551339626312256, "lr": 1.5914558482731317e-05, "epoch": 0.6676414680508846, "percentage": 33.38, "elapsed_time": "3:20:14", "remaining_time": "6:39:41"} +{"current_steps": 2284, "total_steps": 6840, "loss": 0.666611909866333, "lr": 1.5910659386578415e-05, "epoch": 0.66793390846615, "percentage": 33.39, "elapsed_time": "3:20:18", "remaining_time": "6:39:34"} +{"current_steps": 2285, "total_steps": 6840, "loss": 0.6612483859062195, "lr": 1.590675890884482e-05, "epoch": 0.6682263488814154, "percentage": 33.41, "elapsed_time": "3:20:25", "remaining_time": "6:39:32"} +{"current_steps": 2286, "total_steps": 6840, "loss": 0.5299272537231445, "lr": 1.590285705044224e-05, "epoch": 0.6685187892966808, "percentage": 33.42, "elapsed_time": "3:20:31", "remaining_time": "6:39:28"} +{"current_steps": 2287, "total_steps": 6840, "loss": 0.6873815655708313, "lr": 1.589895381228272e-05, "epoch": 0.6688112297119462, "percentage": 33.44, "elapsed_time": "3:20:37", "remaining_time": "6:39:24"} +{"current_steps": 2288, "total_steps": 6840, "loss": 0.6473613977432251, "lr": 1.5895049195278608e-05, "epoch": 0.6691036701272116, "percentage": 33.45, "elapsed_time": "3:20:43", "remaining_time": "6:39:21"} +{"current_steps": 2289, "total_steps": 6840, "loss": 0.6600902080535889, "lr": 1.589114320034259e-05, "epoch": 0.6693961105424769, "percentage": 33.46, "elapsed_time": "3:20:48", "remaining_time": "6:39:15"} +{"current_steps": 2290, "total_steps": 6840, "loss": 0.6066039800643921, "lr": 1.5887235828387667e-05, "epoch": 0.6696885509577424, "percentage": 33.48, "elapsed_time": "3:20:53", "remaining_time": "6:39:09"} +{"current_steps": 2291, "total_steps": 6840, "loss": 0.5411461591720581, "lr": 1.5883327080327165e-05, "epoch": 0.6699809913730077, "percentage": 33.49, "elapsed_time": "3:20:59", "remaining_time": "6:39:04"} +{"current_steps": 2292, "total_steps": 6840, "loss": 0.5678138136863708, "lr": 1.587941695707473e-05, "epoch": 0.6702734317882731, "percentage": 33.51, "elapsed_time": "3:21:03", "remaining_time": "6:38:58"} +{"current_steps": 2293, "total_steps": 6840, "loss": 0.6175323724746704, "lr": 1.5875505459544327e-05, "epoch": 0.6705658722035386, "percentage": 33.52, "elapsed_time": "3:21:10", "remaining_time": "6:38:55"} +{"current_steps": 2294, "total_steps": 6840, "loss": 0.5790976285934448, "lr": 1.587159258865025e-05, "epoch": 0.6708583126188039, "percentage": 33.54, "elapsed_time": "3:21:15", "remaining_time": "6:38:50"} +{"current_steps": 2295, "total_steps": 6840, "loss": 0.5891247391700745, "lr": 1.58676783453071e-05, "epoch": 0.6711507530340693, "percentage": 33.55, "elapsed_time": "3:21:20", "remaining_time": "6:38:44"} +{"current_steps": 2296, "total_steps": 6840, "loss": 0.5604299902915955, "lr": 1.5863762730429817e-05, "epoch": 0.6714431934493347, "percentage": 33.57, "elapsed_time": "3:21:25", "remaining_time": "6:38:37"} +{"current_steps": 2297, "total_steps": 6840, "loss": 0.5402317047119141, "lr": 1.585984574493365e-05, "epoch": 0.6717356338646001, "percentage": 33.58, "elapsed_time": "3:21:31", "remaining_time": "6:38:33"} +{"current_steps": 2298, "total_steps": 6840, "loss": 0.5569097995758057, "lr": 1.5855927389734163e-05, "epoch": 0.6720280742798654, "percentage": 33.6, "elapsed_time": "3:21:36", "remaining_time": "6:38:28"} +{"current_steps": 2299, "total_steps": 6840, "loss": 0.6754734516143799, "lr": 1.5852007665747255e-05, "epoch": 0.6723205146951309, "percentage": 33.61, "elapsed_time": "3:21:42", "remaining_time": "6:38:25"} +{"current_steps": 2300, "total_steps": 6840, "loss": 0.5555064678192139, "lr": 1.584808657388914e-05, "epoch": 0.6726129551103962, "percentage": 33.63, "elapsed_time": "3:21:48", "remaining_time": "6:38:20"} +{"current_steps": 2301, "total_steps": 6840, "loss": 0.5735480785369873, "lr": 1.584416411507634e-05, "epoch": 0.6729053955256616, "percentage": 33.64, "elapsed_time": "3:21:58", "remaining_time": "6:38:25"} +{"current_steps": 2302, "total_steps": 6840, "loss": 0.6084697842597961, "lr": 1.5840240290225713e-05, "epoch": 0.6731978359409271, "percentage": 33.65, "elapsed_time": "3:22:03", "remaining_time": "6:38:19"} +{"current_steps": 2303, "total_steps": 6840, "loss": 0.5747361779212952, "lr": 1.5836315100254427e-05, "epoch": 0.6734902763561924, "percentage": 33.67, "elapsed_time": "3:22:09", "remaining_time": "6:38:14"} +{"current_steps": 2304, "total_steps": 6840, "loss": 0.6597394943237305, "lr": 1.583238854607997e-05, "epoch": 0.6737827167714578, "percentage": 33.68, "elapsed_time": "3:22:14", "remaining_time": "6:38:09"} +{"current_steps": 2305, "total_steps": 6840, "loss": 0.6054418087005615, "lr": 1.582846062862016e-05, "epoch": 0.6740751571867232, "percentage": 33.7, "elapsed_time": "3:22:20", "remaining_time": "6:38:06"} +{"current_steps": 2306, "total_steps": 6840, "loss": 0.6897715330123901, "lr": 1.5824531348793106e-05, "epoch": 0.6743675976019886, "percentage": 33.71, "elapsed_time": "3:22:24", "remaining_time": "6:37:58"} +{"current_steps": 2307, "total_steps": 6840, "loss": 0.5438888072967529, "lr": 1.5820600707517265e-05, "epoch": 0.6746600380172539, "percentage": 33.73, "elapsed_time": "3:22:30", "remaining_time": "6:37:53"} +{"current_steps": 2308, "total_steps": 6840, "loss": 0.5139850378036499, "lr": 1.5816668705711402e-05, "epoch": 0.6749524784325194, "percentage": 33.74, "elapsed_time": "3:22:34", "remaining_time": "6:37:46"} +{"current_steps": 2309, "total_steps": 6840, "loss": 0.5970615744590759, "lr": 1.5812735344294594e-05, "epoch": 0.6752449188477848, "percentage": 33.76, "elapsed_time": "3:22:39", "remaining_time": "6:37:40"} +{"current_steps": 2310, "total_steps": 6840, "loss": 0.6206730604171753, "lr": 1.580880062418624e-05, "epoch": 0.6755373592630501, "percentage": 33.77, "elapsed_time": "3:22:43", "remaining_time": "6:37:32"} +{"current_steps": 2311, "total_steps": 6840, "loss": 0.6545864939689636, "lr": 1.580486454630606e-05, "epoch": 0.6758297996783156, "percentage": 33.79, "elapsed_time": "3:22:48", "remaining_time": "6:37:26"} +{"current_steps": 2312, "total_steps": 6840, "loss": 0.6284571290016174, "lr": 1.5800927111574084e-05, "epoch": 0.6761222400935809, "percentage": 33.8, "elapsed_time": "3:22:52", "remaining_time": "6:37:19"} +{"current_steps": 2313, "total_steps": 6840, "loss": 0.6662822365760803, "lr": 1.5796988320910665e-05, "epoch": 0.6764146805088463, "percentage": 33.82, "elapsed_time": "3:22:58", "remaining_time": "6:37:16"} +{"current_steps": 2314, "total_steps": 6840, "loss": 0.6952080130577087, "lr": 1.5793048175236477e-05, "epoch": 0.6767071209241117, "percentage": 33.83, "elapsed_time": "3:23:04", "remaining_time": "6:37:11"} +{"current_steps": 2315, "total_steps": 6840, "loss": 0.55562424659729, "lr": 1.5789106675472496e-05, "epoch": 0.6769995613393771, "percentage": 33.85, "elapsed_time": "3:23:10", "remaining_time": "6:37:08"} +{"current_steps": 2316, "total_steps": 6840, "loss": 0.696354866027832, "lr": 1.578516382254003e-05, "epoch": 0.6772920017546424, "percentage": 33.86, "elapsed_time": "3:23:17", "remaining_time": "6:37:05"} +{"current_steps": 2317, "total_steps": 6840, "loss": 0.5764954686164856, "lr": 1.5781219617360695e-05, "epoch": 0.6775844421699079, "percentage": 33.87, "elapsed_time": "3:23:24", "remaining_time": "6:37:03"} +{"current_steps": 2318, "total_steps": 6840, "loss": 0.6944533586502075, "lr": 1.577727406085642e-05, "epoch": 0.6778768825851733, "percentage": 33.89, "elapsed_time": "3:23:28", "remaining_time": "6:36:56"} +{"current_steps": 2319, "total_steps": 6840, "loss": 0.5517882704734802, "lr": 1.5773327153949465e-05, "epoch": 0.6781693230004386, "percentage": 33.9, "elapsed_time": "3:23:33", "remaining_time": "6:36:49"} +{"current_steps": 2320, "total_steps": 6840, "loss": 0.6151533126831055, "lr": 1.576937889756239e-05, "epoch": 0.6784617634157041, "percentage": 33.92, "elapsed_time": "3:23:38", "remaining_time": "6:36:45"} +{"current_steps": 2321, "total_steps": 6840, "loss": 0.6221417784690857, "lr": 1.5765429292618075e-05, "epoch": 0.6787542038309694, "percentage": 33.93, "elapsed_time": "3:23:44", "remaining_time": "6:36:40"} +{"current_steps": 2322, "total_steps": 6840, "loss": 0.6218827962875366, "lr": 1.576147834003972e-05, "epoch": 0.6790466442462348, "percentage": 33.95, "elapsed_time": "3:23:49", "remaining_time": "6:36:34"} +{"current_steps": 2323, "total_steps": 6840, "loss": 0.689696192741394, "lr": 1.575752604075083e-05, "epoch": 0.6793390846615002, "percentage": 33.96, "elapsed_time": "3:23:54", "remaining_time": "6:36:29"} +{"current_steps": 2324, "total_steps": 6840, "loss": 0.6457825899124146, "lr": 1.5753572395675234e-05, "epoch": 0.6796315250767656, "percentage": 33.98, "elapsed_time": "3:23:58", "remaining_time": "6:36:22"} +{"current_steps": 2325, "total_steps": 6840, "loss": 0.6261845827102661, "lr": 1.5749617405737075e-05, "epoch": 0.679923965492031, "percentage": 33.99, "elapsed_time": "3:24:02", "remaining_time": "6:36:13"} +{"current_steps": 2326, "total_steps": 6840, "loss": 0.6631760597229004, "lr": 1.5745661071860802e-05, "epoch": 0.6802164059072964, "percentage": 34.01, "elapsed_time": "3:24:07", "remaining_time": "6:36:07"} +{"current_steps": 2327, "total_steps": 6840, "loss": 0.6223125457763672, "lr": 1.574170339497119e-05, "epoch": 0.6805088463225618, "percentage": 34.02, "elapsed_time": "3:24:11", "remaining_time": "6:36:00"} +{"current_steps": 2328, "total_steps": 6840, "loss": 0.5649152398109436, "lr": 1.5737744375993318e-05, "epoch": 0.6808012867378271, "percentage": 34.04, "elapsed_time": "3:24:17", "remaining_time": "6:35:56"} +{"current_steps": 2329, "total_steps": 6840, "loss": 0.6822011470794678, "lr": 1.573378401585259e-05, "epoch": 0.6810937271530926, "percentage": 34.05, "elapsed_time": "3:24:22", "remaining_time": "6:35:51"} +{"current_steps": 2330, "total_steps": 6840, "loss": 0.4853206276893616, "lr": 1.5729822315474704e-05, "epoch": 0.6813861675683579, "percentage": 34.06, "elapsed_time": "3:24:29", "remaining_time": "6:35:48"} +{"current_steps": 2331, "total_steps": 6840, "loss": 0.6410783529281616, "lr": 1.572585927578569e-05, "epoch": 0.6816786079836233, "percentage": 34.08, "elapsed_time": "3:24:33", "remaining_time": "6:35:41"} +{"current_steps": 2332, "total_steps": 6840, "loss": 0.607154369354248, "lr": 1.572189489771189e-05, "epoch": 0.6819710483988888, "percentage": 34.09, "elapsed_time": "3:24:38", "remaining_time": "6:35:36"} +{"current_steps": 2333, "total_steps": 6840, "loss": 0.5079061388969421, "lr": 1.571792918217994e-05, "epoch": 0.6822634888141541, "percentage": 34.11, "elapsed_time": "3:24:44", "remaining_time": "6:35:31"} +{"current_steps": 2334, "total_steps": 6840, "loss": 0.534178614616394, "lr": 1.5713962130116812e-05, "epoch": 0.6825559292294195, "percentage": 34.12, "elapsed_time": "3:24:48", "remaining_time": "6:35:23"} +{"current_steps": 2335, "total_steps": 6840, "loss": 0.6172807812690735, "lr": 1.5709993742449777e-05, "epoch": 0.6828483696446849, "percentage": 34.14, "elapsed_time": "3:24:53", "remaining_time": "6:35:19"} +{"current_steps": 2336, "total_steps": 6840, "loss": 0.6863975524902344, "lr": 1.5706024020106425e-05, "epoch": 0.6831408100599503, "percentage": 34.15, "elapsed_time": "3:25:00", "remaining_time": "6:35:15"} +{"current_steps": 2337, "total_steps": 6840, "loss": 0.6314880847930908, "lr": 1.570205296401465e-05, "epoch": 0.6834332504752156, "percentage": 34.17, "elapsed_time": "3:25:05", "remaining_time": "6:35:10"} +{"current_steps": 2338, "total_steps": 6840, "loss": 0.5420910120010376, "lr": 1.5698080575102662e-05, "epoch": 0.6837256908904811, "percentage": 34.18, "elapsed_time": "3:25:09", "remaining_time": "6:35:02"} +{"current_steps": 2339, "total_steps": 6840, "loss": 0.6598352789878845, "lr": 1.5694106854298988e-05, "epoch": 0.6840181313057464, "percentage": 34.2, "elapsed_time": "3:25:15", "remaining_time": "6:34:59"} +{"current_steps": 2340, "total_steps": 6840, "loss": 0.49957770109176636, "lr": 1.5690131802532454e-05, "epoch": 0.6843105717210118, "percentage": 34.21, "elapsed_time": "3:25:20", "remaining_time": "6:34:53"} +{"current_steps": 2341, "total_steps": 6840, "loss": 0.7217017412185669, "lr": 1.568615542073221e-05, "epoch": 0.6846030121362773, "percentage": 34.23, "elapsed_time": "3:25:25", "remaining_time": "6:34:47"} +{"current_steps": 2342, "total_steps": 6840, "loss": 0.5824606418609619, "lr": 1.5682177709827705e-05, "epoch": 0.6848954525515426, "percentage": 34.24, "elapsed_time": "3:25:31", "remaining_time": "6:34:43"} +{"current_steps": 2343, "total_steps": 6840, "loss": 0.5932704210281372, "lr": 1.567819867074871e-05, "epoch": 0.685187892966808, "percentage": 34.25, "elapsed_time": "3:25:35", "remaining_time": "6:34:36"} +{"current_steps": 2344, "total_steps": 6840, "loss": 0.6098836660385132, "lr": 1.5674218304425304e-05, "epoch": 0.6854803333820734, "percentage": 34.27, "elapsed_time": "3:25:40", "remaining_time": "6:34:30"} +{"current_steps": 2345, "total_steps": 6840, "loss": 0.5158270597457886, "lr": 1.5670236611787865e-05, "epoch": 0.6857727737973388, "percentage": 34.28, "elapsed_time": "3:25:45", "remaining_time": "6:34:24"} +{"current_steps": 2346, "total_steps": 6840, "loss": 0.7840174436569214, "lr": 1.5666253593767095e-05, "epoch": 0.6860652142126041, "percentage": 34.3, "elapsed_time": "3:25:49", "remaining_time": "6:34:17"} +{"current_steps": 2347, "total_steps": 6840, "loss": 0.5665150880813599, "lr": 1.5662269251294e-05, "epoch": 0.6863576546278696, "percentage": 34.31, "elapsed_time": "3:25:55", "remaining_time": "6:34:12"} +{"current_steps": 2348, "total_steps": 6840, "loss": 0.5801588296890259, "lr": 1.5658283585299894e-05, "epoch": 0.686650095043135, "percentage": 34.33, "elapsed_time": "3:26:00", "remaining_time": "6:34:06"} +{"current_steps": 2349, "total_steps": 6840, "loss": 0.759188175201416, "lr": 1.56542965967164e-05, "epoch": 0.6869425354584003, "percentage": 34.34, "elapsed_time": "3:26:03", "remaining_time": "6:33:58"} +{"current_steps": 2350, "total_steps": 6840, "loss": 0.7182703018188477, "lr": 1.565030828647546e-05, "epoch": 0.6872349758736658, "percentage": 34.36, "elapsed_time": "3:26:08", "remaining_time": "6:33:51"} +{"current_steps": 2351, "total_steps": 6840, "loss": 0.7172018885612488, "lr": 1.564631865550931e-05, "epoch": 0.6875274162889311, "percentage": 34.37, "elapsed_time": "3:26:12", "remaining_time": "6:33:44"} +{"current_steps": 2352, "total_steps": 6840, "loss": 0.5959519743919373, "lr": 1.5642327704750502e-05, "epoch": 0.6878198567041965, "percentage": 34.39, "elapsed_time": "3:26:19", "remaining_time": "6:33:41"} +{"current_steps": 2353, "total_steps": 6840, "loss": 0.5531836748123169, "lr": 1.5638335435131902e-05, "epoch": 0.6881122971194619, "percentage": 34.4, "elapsed_time": "3:26:23", "remaining_time": "6:33:35"} +{"current_steps": 2354, "total_steps": 6840, "loss": 0.672225296497345, "lr": 1.5634341847586676e-05, "epoch": 0.6884047375347273, "percentage": 34.42, "elapsed_time": "3:26:29", "remaining_time": "6:33:30"} +{"current_steps": 2355, "total_steps": 6840, "loss": 0.5721465349197388, "lr": 1.5630346943048297e-05, "epoch": 0.6886971779499926, "percentage": 34.43, "elapsed_time": "3:26:33", "remaining_time": "6:33:23"} +{"current_steps": 2356, "total_steps": 6840, "loss": 0.6357900500297546, "lr": 1.5626350722450555e-05, "epoch": 0.6889896183652581, "percentage": 34.44, "elapsed_time": "3:26:40", "remaining_time": "6:33:21"} +{"current_steps": 2357, "total_steps": 6840, "loss": 0.6348878145217896, "lr": 1.5622353186727542e-05, "epoch": 0.6892820587805235, "percentage": 34.46, "elapsed_time": "3:26:47", "remaining_time": "6:33:18"} +{"current_steps": 2358, "total_steps": 6840, "loss": 0.5473623275756836, "lr": 1.5618354336813656e-05, "epoch": 0.6895744991957888, "percentage": 34.47, "elapsed_time": "3:26:52", "remaining_time": "6:33:12"} +{"current_steps": 2359, "total_steps": 6840, "loss": 0.8284158706665039, "lr": 1.5614354173643606e-05, "epoch": 0.6898669396110543, "percentage": 34.49, "elapsed_time": "3:26:57", "remaining_time": "6:33:07"} +{"current_steps": 2360, "total_steps": 6840, "loss": 0.5915359854698181, "lr": 1.5610352698152396e-05, "epoch": 0.6901593800263196, "percentage": 34.5, "elapsed_time": "3:27:01", "remaining_time": "6:32:59"} +{"current_steps": 2361, "total_steps": 6840, "loss": 0.6173555254936218, "lr": 1.560634991127536e-05, "epoch": 0.690451820441585, "percentage": 34.52, "elapsed_time": "3:27:06", "remaining_time": "6:32:54"} +{"current_steps": 2362, "total_steps": 6840, "loss": 0.5551577806472778, "lr": 1.560234581394812e-05, "epoch": 0.6907442608568504, "percentage": 34.53, "elapsed_time": "3:27:12", "remaining_time": "6:32:51"} +{"current_steps": 2363, "total_steps": 6840, "loss": 0.7160264253616333, "lr": 1.559834040710661e-05, "epoch": 0.6910367012721158, "percentage": 34.55, "elapsed_time": "3:27:19", "remaining_time": "6:32:47"} +{"current_steps": 2364, "total_steps": 6840, "loss": 0.5986248850822449, "lr": 1.5594333691687062e-05, "epoch": 0.6913291416873812, "percentage": 34.56, "elapsed_time": "3:27:24", "remaining_time": "6:32:42"} +{"current_steps": 2365, "total_steps": 6840, "loss": 0.7347019910812378, "lr": 1.559032566862603e-05, "epoch": 0.6916215821026466, "percentage": 34.58, "elapsed_time": "3:27:30", "remaining_time": "6:32:37"} +{"current_steps": 2366, "total_steps": 6840, "loss": 0.502663791179657, "lr": 1.5586316338860363e-05, "epoch": 0.691914022517912, "percentage": 34.59, "elapsed_time": "3:27:36", "remaining_time": "6:32:35"} +{"current_steps": 2367, "total_steps": 6840, "loss": 0.5026617050170898, "lr": 1.558230570332722e-05, "epoch": 0.6922064629331773, "percentage": 34.61, "elapsed_time": "3:27:41", "remaining_time": "6:32:29"} +{"current_steps": 2368, "total_steps": 6840, "loss": 0.6091101169586182, "lr": 1.5578293762964057e-05, "epoch": 0.6924989033484428, "percentage": 34.62, "elapsed_time": "3:27:46", "remaining_time": "6:32:23"} +{"current_steps": 2369, "total_steps": 6840, "loss": 0.6202579736709595, "lr": 1.5574280518708645e-05, "epoch": 0.6927913437637081, "percentage": 34.63, "elapsed_time": "3:27:52", "remaining_time": "6:32:18"} +{"current_steps": 2370, "total_steps": 6840, "loss": 0.6532948017120361, "lr": 1.557026597149905e-05, "epoch": 0.6930837841789735, "percentage": 34.65, "elapsed_time": "3:27:57", "remaining_time": "6:32:14"} +{"current_steps": 2371, "total_steps": 6840, "loss": 0.6197448372840881, "lr": 1.5566250122273658e-05, "epoch": 0.693376224594239, "percentage": 34.66, "elapsed_time": "3:28:04", "remaining_time": "6:32:11"} +{"current_steps": 2372, "total_steps": 6840, "loss": 0.6181553602218628, "lr": 1.556223297197114e-05, "epoch": 0.6936686650095043, "percentage": 34.68, "elapsed_time": "3:28:10", "remaining_time": "6:32:06"} +{"current_steps": 2373, "total_steps": 6840, "loss": 0.6015427112579346, "lr": 1.5558214521530482e-05, "epoch": 0.6939611054247697, "percentage": 34.69, "elapsed_time": "3:28:14", "remaining_time": "6:32:00"} +{"current_steps": 2374, "total_steps": 6840, "loss": 0.6204534769058228, "lr": 1.555419477189098e-05, "epoch": 0.6942535458400351, "percentage": 34.71, "elapsed_time": "3:28:19", "remaining_time": "6:31:54"} +{"current_steps": 2375, "total_steps": 6840, "loss": 0.5914584994316101, "lr": 1.5550173723992218e-05, "epoch": 0.6945459862553005, "percentage": 34.72, "elapsed_time": "3:28:25", "remaining_time": "6:31:50"} +{"current_steps": 2376, "total_steps": 6840, "loss": 0.5077188611030579, "lr": 1.554615137877409e-05, "epoch": 0.6948384266705658, "percentage": 34.74, "elapsed_time": "3:28:31", "remaining_time": "6:31:45"} +{"current_steps": 2377, "total_steps": 6840, "loss": 0.5560270547866821, "lr": 1.55421277371768e-05, "epoch": 0.6951308670858313, "percentage": 34.75, "elapsed_time": "3:28:36", "remaining_time": "6:31:40"} +{"current_steps": 2378, "total_steps": 6840, "loss": 0.7064549922943115, "lr": 1.553810280014085e-05, "epoch": 0.6954233075010966, "percentage": 34.77, "elapsed_time": "3:28:41", "remaining_time": "6:31:34"} +{"current_steps": 2379, "total_steps": 6840, "loss": 0.7433110475540161, "lr": 1.5534076568607043e-05, "epoch": 0.695715747916362, "percentage": 34.78, "elapsed_time": "3:28:46", "remaining_time": "6:31:29"} +{"current_steps": 2380, "total_steps": 6840, "loss": 0.6061110496520996, "lr": 1.553004904351648e-05, "epoch": 0.6960081883316275, "percentage": 34.8, "elapsed_time": "3:28:51", "remaining_time": "6:31:22"} +{"current_steps": 2381, "total_steps": 6840, "loss": 0.604006290435791, "lr": 1.5526020225810583e-05, "epoch": 0.6963006287468928, "percentage": 34.81, "elapsed_time": "3:28:55", "remaining_time": "6:31:16"} +{"current_steps": 2382, "total_steps": 6840, "loss": 0.6221635341644287, "lr": 1.5521990116431052e-05, "epoch": 0.6965930691621582, "percentage": 34.82, "elapsed_time": "3:29:00", "remaining_time": "6:31:10"} +{"current_steps": 2383, "total_steps": 6840, "loss": 0.5848093032836914, "lr": 1.551795871631991e-05, "epoch": 0.6968855095774236, "percentage": 34.84, "elapsed_time": "3:29:07", "remaining_time": "6:31:07"} +{"current_steps": 2384, "total_steps": 6840, "loss": 0.6451606154441833, "lr": 1.5513926026419464e-05, "epoch": 0.697177949992689, "percentage": 34.85, "elapsed_time": "3:29:12", "remaining_time": "6:31:02"} +{"current_steps": 2385, "total_steps": 6840, "loss": 0.7922245264053345, "lr": 1.5509892047672336e-05, "epoch": 0.6974703904079543, "percentage": 34.87, "elapsed_time": "3:29:18", "remaining_time": "6:30:57"} +{"current_steps": 2386, "total_steps": 6840, "loss": 0.6458885073661804, "lr": 1.5505856781021443e-05, "epoch": 0.6977628308232198, "percentage": 34.88, "elapsed_time": "3:29:21", "remaining_time": "6:30:49"} +{"current_steps": 2387, "total_steps": 6840, "loss": 0.5989570617675781, "lr": 1.5501820227410002e-05, "epoch": 0.6980552712384852, "percentage": 34.9, "elapsed_time": "3:29:27", "remaining_time": "6:30:45"} +{"current_steps": 2388, "total_steps": 6840, "loss": 0.740998387336731, "lr": 1.5497782387781536e-05, "epoch": 0.6983477116537505, "percentage": 34.91, "elapsed_time": "3:29:32", "remaining_time": "6:30:38"} +{"current_steps": 2389, "total_steps": 6840, "loss": 0.63981032371521, "lr": 1.5493743263079866e-05, "epoch": 0.698640152069016, "percentage": 34.93, "elapsed_time": "3:29:37", "remaining_time": "6:30:32"} +{"current_steps": 2390, "total_steps": 6840, "loss": 0.766716480255127, "lr": 1.5489702854249106e-05, "epoch": 0.6989325924842813, "percentage": 34.94, "elapsed_time": "3:29:43", "remaining_time": "6:30:29"} +{"current_steps": 2391, "total_steps": 6840, "loss": 0.7879365086555481, "lr": 1.5485661162233684e-05, "epoch": 0.6992250328995467, "percentage": 34.96, "elapsed_time": "3:29:47", "remaining_time": "6:30:22"} +{"current_steps": 2392, "total_steps": 6840, "loss": 0.6005786657333374, "lr": 1.5481618187978322e-05, "epoch": 0.6995174733148121, "percentage": 34.97, "elapsed_time": "3:29:53", "remaining_time": "6:30:17"} +{"current_steps": 2393, "total_steps": 6840, "loss": 0.6207927465438843, "lr": 1.5477573932428033e-05, "epoch": 0.6998099137300775, "percentage": 34.99, "elapsed_time": "3:29:57", "remaining_time": "6:30:10"} +{"current_steps": 2394, "total_steps": 6840, "loss": 0.5582053661346436, "lr": 1.5473528396528144e-05, "epoch": 0.7001023541453428, "percentage": 35.0, "elapsed_time": "3:30:03", "remaining_time": "6:30:06"} +{"current_steps": 2395, "total_steps": 6840, "loss": 0.5701307058334351, "lr": 1.5469481581224274e-05, "epoch": 0.7003947945606083, "percentage": 35.01, "elapsed_time": "3:30:09", "remaining_time": "6:30:02"} +{"current_steps": 2396, "total_steps": 6840, "loss": 0.6201068162918091, "lr": 1.546543348746233e-05, "epoch": 0.7006872349758737, "percentage": 35.03, "elapsed_time": "3:30:13", "remaining_time": "6:29:55"} +{"current_steps": 2397, "total_steps": 6840, "loss": 0.6102321147918701, "lr": 1.5461384116188546e-05, "epoch": 0.700979675391139, "percentage": 35.04, "elapsed_time": "3:30:19", "remaining_time": "6:29:51"} +{"current_steps": 2398, "total_steps": 6840, "loss": 0.5445820093154907, "lr": 1.545733346834943e-05, "epoch": 0.7012721158064045, "percentage": 35.06, "elapsed_time": "3:30:24", "remaining_time": "6:29:45"} +{"current_steps": 2399, "total_steps": 6840, "loss": 0.5278012752532959, "lr": 1.5453281544891797e-05, "epoch": 0.7015645562216698, "percentage": 35.07, "elapsed_time": "3:30:30", "remaining_time": "6:29:41"} +{"current_steps": 2400, "total_steps": 6840, "loss": 0.7051252126693726, "lr": 1.544922834676276e-05, "epoch": 0.7018569966369352, "percentage": 35.09, "elapsed_time": "3:30:35", "remaining_time": "6:29:35"} +{"current_steps": 2401, "total_steps": 6840, "loss": 0.6024646759033203, "lr": 1.544517387490973e-05, "epoch": 0.7021494370522006, "percentage": 35.1, "elapsed_time": "3:30:44", "remaining_time": "6:29:37"} +{"current_steps": 2402, "total_steps": 6840, "loss": 0.5563746094703674, "lr": 1.5441118130280406e-05, "epoch": 0.702441877467466, "percentage": 35.12, "elapsed_time": "3:30:48", "remaining_time": "6:29:30"} +{"current_steps": 2403, "total_steps": 6840, "loss": 0.5971669554710388, "lr": 1.5437061113822805e-05, "epoch": 0.7027343178827314, "percentage": 35.13, "elapsed_time": "3:30:54", "remaining_time": "6:29:24"} +{"current_steps": 2404, "total_steps": 6840, "loss": 0.5846019983291626, "lr": 1.5433002826485234e-05, "epoch": 0.7030267582979968, "percentage": 35.15, "elapsed_time": "3:30:58", "remaining_time": "6:29:17"} +{"current_steps": 2405, "total_steps": 6840, "loss": 0.5571885108947754, "lr": 1.5428943269216278e-05, "epoch": 0.7033191987132622, "percentage": 35.16, "elapsed_time": "3:31:02", "remaining_time": "6:29:10"} +{"current_steps": 2406, "total_steps": 6840, "loss": 0.4770846962928772, "lr": 1.542488244296484e-05, "epoch": 0.7036116391285275, "percentage": 35.18, "elapsed_time": "3:31:07", "remaining_time": "6:29:05"} +{"current_steps": 2407, "total_steps": 6840, "loss": 0.636760950088501, "lr": 1.542082034868012e-05, "epoch": 0.703904079543793, "percentage": 35.19, "elapsed_time": "3:31:11", "remaining_time": "6:28:58"} +{"current_steps": 2408, "total_steps": 6840, "loss": 0.7264662981033325, "lr": 1.5416756987311603e-05, "epoch": 0.7041965199590583, "percentage": 35.2, "elapsed_time": "3:31:17", "remaining_time": "6:28:53"} +{"current_steps": 2409, "total_steps": 6840, "loss": 0.6723978519439697, "lr": 1.5412692359809073e-05, "epoch": 0.7044889603743237, "percentage": 35.22, "elapsed_time": "3:31:21", "remaining_time": "6:28:46"} +{"current_steps": 2410, "total_steps": 6840, "loss": 0.6205083727836609, "lr": 1.5408626467122612e-05, "epoch": 0.7047814007895892, "percentage": 35.23, "elapsed_time": "3:31:26", "remaining_time": "6:28:40"} +{"current_steps": 2411, "total_steps": 6840, "loss": 0.5980903506278992, "lr": 1.54045593102026e-05, "epoch": 0.7050738412048545, "percentage": 35.25, "elapsed_time": "3:31:32", "remaining_time": "6:28:35"} +{"current_steps": 2412, "total_steps": 6840, "loss": 0.6311691999435425, "lr": 1.540049088999971e-05, "epoch": 0.7053662816201199, "percentage": 35.26, "elapsed_time": "3:31:37", "remaining_time": "6:28:29"} +{"current_steps": 2413, "total_steps": 6840, "loss": 0.5872593522071838, "lr": 1.539642120746491e-05, "epoch": 0.7056587220353853, "percentage": 35.28, "elapsed_time": "3:31:43", "remaining_time": "6:28:26"} +{"current_steps": 2414, "total_steps": 6840, "loss": 0.5037539005279541, "lr": 1.5392350263549462e-05, "epoch": 0.7059511624506507, "percentage": 35.29, "elapsed_time": "3:31:48", "remaining_time": "6:28:21"} +{"current_steps": 2415, "total_steps": 6840, "loss": 0.5917855501174927, "lr": 1.538827805920493e-05, "epoch": 0.706243602865916, "percentage": 35.31, "elapsed_time": "3:31:52", "remaining_time": "6:28:13"} +{"current_steps": 2416, "total_steps": 6840, "loss": 0.6350749731063843, "lr": 1.538420459538316e-05, "epoch": 0.7065360432811815, "percentage": 35.32, "elapsed_time": "3:31:57", "remaining_time": "6:28:08"} +{"current_steps": 2417, "total_steps": 6840, "loss": 0.6828908920288086, "lr": 1.53801298730363e-05, "epoch": 0.7068284836964468, "percentage": 35.34, "elapsed_time": "3:32:02", "remaining_time": "6:28:02"} +{"current_steps": 2418, "total_steps": 6840, "loss": 0.6307995319366455, "lr": 1.5376053893116796e-05, "epoch": 0.7071209241117122, "percentage": 35.35, "elapsed_time": "3:32:06", "remaining_time": "6:27:53"} +{"current_steps": 2419, "total_steps": 6840, "loss": 0.5305014252662659, "lr": 1.5371976656577385e-05, "epoch": 0.7074133645269777, "percentage": 35.37, "elapsed_time": "3:32:11", "remaining_time": "6:27:48"} +{"current_steps": 2420, "total_steps": 6840, "loss": 0.560103178024292, "lr": 1.536789816437109e-05, "epoch": 0.707705804942243, "percentage": 35.38, "elapsed_time": "3:32:17", "remaining_time": "6:27:44"} +{"current_steps": 2421, "total_steps": 6840, "loss": 0.5449249148368835, "lr": 1.5363818417451236e-05, "epoch": 0.7079982453575084, "percentage": 35.39, "elapsed_time": "3:32:23", "remaining_time": "6:27:39"} +{"current_steps": 2422, "total_steps": 6840, "loss": 0.7456427812576294, "lr": 1.5359737416771438e-05, "epoch": 0.7082906857727738, "percentage": 35.41, "elapsed_time": "3:32:28", "remaining_time": "6:27:34"} +{"current_steps": 2423, "total_steps": 6840, "loss": 0.5401932597160339, "lr": 1.5355655163285607e-05, "epoch": 0.7085831261880392, "percentage": 35.42, "elapsed_time": "3:32:33", "remaining_time": "6:27:28"} +{"current_steps": 2424, "total_steps": 6840, "loss": 0.6215255856513977, "lr": 1.5351571657947947e-05, "epoch": 0.7088755666033045, "percentage": 35.44, "elapsed_time": "3:32:38", "remaining_time": "6:27:23"} +{"current_steps": 2425, "total_steps": 6840, "loss": 0.724073052406311, "lr": 1.5347486901712946e-05, "epoch": 0.70916800701857, "percentage": 35.45, "elapsed_time": "3:32:44", "remaining_time": "6:27:19"} +{"current_steps": 2426, "total_steps": 6840, "loss": 0.6375223398208618, "lr": 1.5343400895535402e-05, "epoch": 0.7094604474338354, "percentage": 35.47, "elapsed_time": "3:32:51", "remaining_time": "6:27:17"} +{"current_steps": 2427, "total_steps": 6840, "loss": 0.6087045669555664, "lr": 1.533931364037038e-05, "epoch": 0.7097528878491007, "percentage": 35.48, "elapsed_time": "3:32:56", "remaining_time": "6:27:10"} +{"current_steps": 2428, "total_steps": 6840, "loss": 0.7927658557891846, "lr": 1.5335225137173262e-05, "epoch": 0.7100453282643662, "percentage": 35.5, "elapsed_time": "3:33:01", "remaining_time": "6:27:05"} +{"current_steps": 2429, "total_steps": 6840, "loss": 0.6312417387962341, "lr": 1.5331135386899702e-05, "epoch": 0.7103377686796315, "percentage": 35.51, "elapsed_time": "3:33:05", "remaining_time": "6:26:58"} +{"current_steps": 2430, "total_steps": 6840, "loss": 0.6856948137283325, "lr": 1.5327044390505666e-05, "epoch": 0.7106302090948969, "percentage": 35.53, "elapsed_time": "3:33:11", "remaining_time": "6:26:53"} +{"current_steps": 2431, "total_steps": 6840, "loss": 0.5683865547180176, "lr": 1.532295214894739e-05, "epoch": 0.7109226495101623, "percentage": 35.54, "elapsed_time": "3:33:16", "remaining_time": "6:26:48"} +{"current_steps": 2432, "total_steps": 6840, "loss": 0.6208291053771973, "lr": 1.5318858663181412e-05, "epoch": 0.7112150899254277, "percentage": 35.56, "elapsed_time": "3:33:21", "remaining_time": "6:26:42"} +{"current_steps": 2433, "total_steps": 6840, "loss": 0.6751389503479004, "lr": 1.531476393416456e-05, "epoch": 0.711507530340693, "percentage": 35.57, "elapsed_time": "3:33:27", "remaining_time": "6:26:38"} +{"current_steps": 2434, "total_steps": 6840, "loss": 0.422024667263031, "lr": 1.5310667962853954e-05, "epoch": 0.7117999707559585, "percentage": 35.58, "elapsed_time": "3:33:31", "remaining_time": "6:26:32"} +{"current_steps": 2435, "total_steps": 6840, "loss": 0.6714169979095459, "lr": 1.5306570750207003e-05, "epoch": 0.7120924111712239, "percentage": 35.6, "elapsed_time": "3:33:38", "remaining_time": "6:26:28"} +{"current_steps": 2436, "total_steps": 6840, "loss": 0.5757386088371277, "lr": 1.53024722971814e-05, "epoch": 0.7123848515864892, "percentage": 35.61, "elapsed_time": "3:33:43", "remaining_time": "6:26:23"} +{"current_steps": 2437, "total_steps": 6840, "loss": 0.5686037540435791, "lr": 1.529837260473514e-05, "epoch": 0.7126772920017547, "percentage": 35.63, "elapsed_time": "3:33:48", "remaining_time": "6:26:16"} +{"current_steps": 2438, "total_steps": 6840, "loss": 0.7601959705352783, "lr": 1.5294271673826498e-05, "epoch": 0.71296973241702, "percentage": 35.64, "elapsed_time": "3:33:53", "remaining_time": "6:26:11"} +{"current_steps": 2439, "total_steps": 6840, "loss": 0.5654840469360352, "lr": 1.529016950541404e-05, "epoch": 0.7132621728322854, "percentage": 35.66, "elapsed_time": "3:33:57", "remaining_time": "6:26:05"} +{"current_steps": 2440, "total_steps": 6840, "loss": 0.7009234428405762, "lr": 1.5286066100456623e-05, "epoch": 0.7135546132475508, "percentage": 35.67, "elapsed_time": "3:34:01", "remaining_time": "6:25:57"} +{"current_steps": 2441, "total_steps": 6840, "loss": 0.4856370687484741, "lr": 1.52819614599134e-05, "epoch": 0.7138470536628162, "percentage": 35.69, "elapsed_time": "3:34:07", "remaining_time": "6:25:52"} +{"current_steps": 2442, "total_steps": 6840, "loss": 0.5135019421577454, "lr": 1.52778555847438e-05, "epoch": 0.7141394940780816, "percentage": 35.7, "elapsed_time": "3:34:13", "remaining_time": "6:25:48"} +{"current_steps": 2443, "total_steps": 6840, "loss": 0.7350283861160278, "lr": 1.5273748475907542e-05, "epoch": 0.714431934493347, "percentage": 35.72, "elapsed_time": "3:34:19", "remaining_time": "6:25:44"} +{"current_steps": 2444, "total_steps": 6840, "loss": 0.5985803604125977, "lr": 1.5269640134364646e-05, "epoch": 0.7147243749086124, "percentage": 35.73, "elapsed_time": "3:34:24", "remaining_time": "6:25:39"} +{"current_steps": 2445, "total_steps": 6840, "loss": 0.6840892434120178, "lr": 1.5265530561075407e-05, "epoch": 0.7150168153238777, "percentage": 35.75, "elapsed_time": "3:34:30", "remaining_time": "6:25:35"} +{"current_steps": 2446, "total_steps": 6840, "loss": 0.6921327114105225, "lr": 1.5261419757000417e-05, "epoch": 0.7153092557391432, "percentage": 35.76, "elapsed_time": "3:34:36", "remaining_time": "6:25:31"} +{"current_steps": 2447, "total_steps": 6840, "loss": 0.6428500413894653, "lr": 1.525730772310055e-05, "epoch": 0.7156016961544085, "percentage": 35.77, "elapsed_time": "3:34:42", "remaining_time": "6:25:26"} +{"current_steps": 2448, "total_steps": 6840, "loss": 0.645559549331665, "lr": 1.5253194460336964e-05, "epoch": 0.7158941365696739, "percentage": 35.79, "elapsed_time": "3:34:46", "remaining_time": "6:25:19"} +{"current_steps": 2449, "total_steps": 6840, "loss": 0.6211013793945312, "lr": 1.5249079969671114e-05, "epoch": 0.7161865769849394, "percentage": 35.8, "elapsed_time": "3:34:50", "remaining_time": "6:25:11"} +{"current_steps": 2450, "total_steps": 6840, "loss": 0.5709721446037292, "lr": 1.5244964252064737e-05, "epoch": 0.7164790174002047, "percentage": 35.82, "elapsed_time": "3:34:56", "remaining_time": "6:25:08"} +{"current_steps": 2451, "total_steps": 6840, "loss": 0.6781377196311951, "lr": 1.5240847308479855e-05, "epoch": 0.7167714578154701, "percentage": 35.83, "elapsed_time": "3:35:00", "remaining_time": "6:25:01"} +{"current_steps": 2452, "total_steps": 6840, "loss": 0.6476876735687256, "lr": 1.523672913987878e-05, "epoch": 0.7170638982307355, "percentage": 35.85, "elapsed_time": "3:35:05", "remaining_time": "6:24:55"} +{"current_steps": 2453, "total_steps": 6840, "loss": 0.6564218997955322, "lr": 1.523260974722411e-05, "epoch": 0.7173563386460009, "percentage": 35.86, "elapsed_time": "3:35:10", "remaining_time": "6:24:49"} +{"current_steps": 2454, "total_steps": 6840, "loss": 0.6455773711204529, "lr": 1.5228489131478722e-05, "epoch": 0.7176487790612662, "percentage": 35.88, "elapsed_time": "3:35:15", "remaining_time": "6:24:43"} +{"current_steps": 2455, "total_steps": 6840, "loss": 0.6039570569992065, "lr": 1.5224367293605791e-05, "epoch": 0.7179412194765317, "percentage": 35.89, "elapsed_time": "3:35:21", "remaining_time": "6:24:39"} +{"current_steps": 2456, "total_steps": 6840, "loss": 0.7060747146606445, "lr": 1.522024423456877e-05, "epoch": 0.718233659891797, "percentage": 35.91, "elapsed_time": "3:35:27", "remaining_time": "6:24:35"} +{"current_steps": 2457, "total_steps": 6840, "loss": 0.561469316482544, "lr": 1.52161199553314e-05, "epoch": 0.7185261003070624, "percentage": 35.92, "elapsed_time": "3:35:34", "remaining_time": "6:24:32"} +{"current_steps": 2458, "total_steps": 6840, "loss": 0.6682697534561157, "lr": 1.5211994456857706e-05, "epoch": 0.7188185407223279, "percentage": 35.94, "elapsed_time": "3:35:38", "remaining_time": "6:24:25"} +{"current_steps": 2459, "total_steps": 6840, "loss": 0.7893983125686646, "lr": 1.5207867740111994e-05, "epoch": 0.7191109811375932, "percentage": 35.95, "elapsed_time": "3:35:42", "remaining_time": "6:24:18"} +{"current_steps": 2460, "total_steps": 6840, "loss": 0.617809534072876, "lr": 1.5203739806058863e-05, "epoch": 0.7194034215528586, "percentage": 35.96, "elapsed_time": "3:35:47", "remaining_time": "6:24:13"} +{"current_steps": 2461, "total_steps": 6840, "loss": 0.5444413423538208, "lr": 1.5199610655663193e-05, "epoch": 0.719695861968124, "percentage": 35.98, "elapsed_time": "3:35:52", "remaining_time": "6:24:07"} +{"current_steps": 2462, "total_steps": 6840, "loss": 0.615330696105957, "lr": 1.5195480289890146e-05, "epoch": 0.7199883023833894, "percentage": 35.99, "elapsed_time": "3:35:59", "remaining_time": "6:24:04"} +{"current_steps": 2463, "total_steps": 6840, "loss": 0.6811497211456299, "lr": 1.5191348709705169e-05, "epoch": 0.7202807427986547, "percentage": 36.01, "elapsed_time": "3:36:04", "remaining_time": "6:23:58"} +{"current_steps": 2464, "total_steps": 6840, "loss": 0.612322211265564, "lr": 1.5187215916073997e-05, "epoch": 0.7205731832139202, "percentage": 36.02, "elapsed_time": "3:36:10", "remaining_time": "6:23:54"} +{"current_steps": 2465, "total_steps": 6840, "loss": 0.6106880903244019, "lr": 1.518308190996264e-05, "epoch": 0.7208656236291856, "percentage": 36.04, "elapsed_time": "3:36:15", "remaining_time": "6:23:50"} +{"current_steps": 2466, "total_steps": 6840, "loss": 0.4901464581489563, "lr": 1.5178946692337405e-05, "epoch": 0.7211580640444509, "percentage": 36.05, "elapsed_time": "3:36:22", "remaining_time": "6:23:46"} +{"current_steps": 2467, "total_steps": 6840, "loss": 0.6777167320251465, "lr": 1.5174810264164865e-05, "epoch": 0.7214505044597164, "percentage": 36.07, "elapsed_time": "3:36:28", "remaining_time": "6:23:43"} +{"current_steps": 2468, "total_steps": 6840, "loss": 0.6353746056556702, "lr": 1.5170672626411888e-05, "epoch": 0.7217429448749817, "percentage": 36.08, "elapsed_time": "3:36:36", "remaining_time": "6:23:42"} +{"current_steps": 2469, "total_steps": 6840, "loss": 0.6218847632408142, "lr": 1.516653378004563e-05, "epoch": 0.7220353852902471, "percentage": 36.1, "elapsed_time": "3:36:41", "remaining_time": "6:23:37"} +{"current_steps": 2470, "total_steps": 6840, "loss": 0.5001585483551025, "lr": 1.5162393726033508e-05, "epoch": 0.7223278257055125, "percentage": 36.11, "elapsed_time": "3:36:46", "remaining_time": "6:23:31"} +{"current_steps": 2471, "total_steps": 6840, "loss": 0.6801280975341797, "lr": 1.5158252465343242e-05, "epoch": 0.7226202661207779, "percentage": 36.13, "elapsed_time": "3:36:52", "remaining_time": "6:23:27"} +{"current_steps": 2472, "total_steps": 6840, "loss": 0.6739565134048462, "lr": 1.5154109998942823e-05, "epoch": 0.7229127065360432, "percentage": 36.14, "elapsed_time": "3:36:58", "remaining_time": "6:23:23"} +{"current_steps": 2473, "total_steps": 6840, "loss": 0.5970213413238525, "lr": 1.5149966327800532e-05, "epoch": 0.7232051469513087, "percentage": 36.15, "elapsed_time": "3:37:02", "remaining_time": "6:23:16"} +{"current_steps": 2474, "total_steps": 6840, "loss": 0.7367317080497742, "lr": 1.5145821452884923e-05, "epoch": 0.7234975873665741, "percentage": 36.17, "elapsed_time": "3:37:09", "remaining_time": "6:23:13"} +{"current_steps": 2475, "total_steps": 6840, "loss": 0.6332153677940369, "lr": 1.5141675375164839e-05, "epoch": 0.7237900277818394, "percentage": 36.18, "elapsed_time": "3:37:15", "remaining_time": "6:23:09"} +{"current_steps": 2476, "total_steps": 6840, "loss": 0.6185739636421204, "lr": 1.5137528095609395e-05, "epoch": 0.7240824681971049, "percentage": 36.2, "elapsed_time": "3:37:20", "remaining_time": "6:23:04"} +{"current_steps": 2477, "total_steps": 6840, "loss": 0.5982746481895447, "lr": 1.5133379615187996e-05, "epoch": 0.7243749086123702, "percentage": 36.21, "elapsed_time": "3:37:25", "remaining_time": "6:22:57"} +{"current_steps": 2478, "total_steps": 6840, "loss": 0.5946815013885498, "lr": 1.512922993487032e-05, "epoch": 0.7246673490276356, "percentage": 36.23, "elapsed_time": "3:37:31", "remaining_time": "6:22:53"} +{"current_steps": 2479, "total_steps": 6840, "loss": 0.5645624399185181, "lr": 1.5125079055626337e-05, "epoch": 0.724959789442901, "percentage": 36.24, "elapsed_time": "3:37:37", "remaining_time": "6:22:50"} +{"current_steps": 2480, "total_steps": 6840, "loss": 0.43329858779907227, "lr": 1.5120926978426288e-05, "epoch": 0.7252522298581664, "percentage": 36.26, "elapsed_time": "3:37:43", "remaining_time": "6:22:46"} +{"current_steps": 2481, "total_steps": 6840, "loss": 0.64244544506073, "lr": 1.5116773704240689e-05, "epoch": 0.7255446702734318, "percentage": 36.27, "elapsed_time": "3:37:47", "remaining_time": "6:22:38"} +{"current_steps": 2482, "total_steps": 6840, "loss": 0.6640222072601318, "lr": 1.5112619234040348e-05, "epoch": 0.7258371106886972, "percentage": 36.29, "elapsed_time": "3:37:53", "remaining_time": "6:22:34"} +{"current_steps": 2483, "total_steps": 6840, "loss": 0.6346921324729919, "lr": 1.5108463568796346e-05, "epoch": 0.7261295511039626, "percentage": 36.3, "elapsed_time": "3:37:59", "remaining_time": "6:22:30"} +{"current_steps": 2484, "total_steps": 6840, "loss": 0.5891947746276855, "lr": 1.5104306709480045e-05, "epoch": 0.7264219915192279, "percentage": 36.32, "elapsed_time": "3:38:03", "remaining_time": "6:22:23"} +{"current_steps": 2485, "total_steps": 6840, "loss": 0.616216242313385, "lr": 1.5100148657063089e-05, "epoch": 0.7267144319344934, "percentage": 36.33, "elapsed_time": "3:38:07", "remaining_time": "6:22:16"} +{"current_steps": 2486, "total_steps": 6840, "loss": 0.5961766242980957, "lr": 1.5095989412517389e-05, "epoch": 0.7270068723497587, "percentage": 36.35, "elapsed_time": "3:38:13", "remaining_time": "6:22:11"} +{"current_steps": 2487, "total_steps": 6840, "loss": 0.5629050731658936, "lr": 1.509182897681515e-05, "epoch": 0.7272993127650241, "percentage": 36.36, "elapsed_time": "3:38:19", "remaining_time": "6:22:08"} +{"current_steps": 2488, "total_steps": 6840, "loss": 0.6640661954879761, "lr": 1.5087667350928844e-05, "epoch": 0.7275917531802896, "percentage": 36.37, "elapsed_time": "3:38:23", "remaining_time": "6:22:00"} +{"current_steps": 2489, "total_steps": 6840, "loss": 0.5884503126144409, "lr": 1.5083504535831233e-05, "epoch": 0.7278841935955549, "percentage": 36.39, "elapsed_time": "3:38:28", "remaining_time": "6:21:54"} +{"current_steps": 2490, "total_steps": 6840, "loss": 0.5395207405090332, "lr": 1.5079340532495344e-05, "epoch": 0.7281766340108203, "percentage": 36.4, "elapsed_time": "3:38:34", "remaining_time": "6:21:50"} +{"current_steps": 2491, "total_steps": 6840, "loss": 0.5713212490081787, "lr": 1.5075175341894487e-05, "epoch": 0.7284690744260857, "percentage": 36.42, "elapsed_time": "3:38:39", "remaining_time": "6:21:44"} +{"current_steps": 2492, "total_steps": 6840, "loss": 0.5732176303863525, "lr": 1.5071008965002252e-05, "epoch": 0.7287615148413511, "percentage": 36.43, "elapsed_time": "3:38:43", "remaining_time": "6:21:37"} +{"current_steps": 2493, "total_steps": 6840, "loss": 0.6381006240844727, "lr": 1.50668414027925e-05, "epoch": 0.7290539552566164, "percentage": 36.45, "elapsed_time": "3:38:48", "remaining_time": "6:21:32"} +{"current_steps": 2494, "total_steps": 6840, "loss": 0.6533833742141724, "lr": 1.5062672656239381e-05, "epoch": 0.7293463956718819, "percentage": 36.46, "elapsed_time": "3:38:54", "remaining_time": "6:21:28"} +{"current_steps": 2495, "total_steps": 6840, "loss": 0.5919456481933594, "lr": 1.5058502726317309e-05, "epoch": 0.7296388360871472, "percentage": 36.48, "elapsed_time": "3:39:01", "remaining_time": "6:21:25"} +{"current_steps": 2496, "total_steps": 6840, "loss": 0.6128921508789062, "lr": 1.5054331614000984e-05, "epoch": 0.7299312765024126, "percentage": 36.49, "elapsed_time": "3:39:07", "remaining_time": "6:21:21"} +{"current_steps": 2497, "total_steps": 6840, "loss": 0.5949394702911377, "lr": 1.5050159320265371e-05, "epoch": 0.7302237169176781, "percentage": 36.51, "elapsed_time": "3:39:13", "remaining_time": "6:21:17"} +{"current_steps": 2498, "total_steps": 6840, "loss": 0.6262483596801758, "lr": 1.5045985846085724e-05, "epoch": 0.7305161573329434, "percentage": 36.52, "elapsed_time": "3:39:18", "remaining_time": "6:21:11"} +{"current_steps": 2499, "total_steps": 6840, "loss": 0.5032243728637695, "lr": 1.5041811192437563e-05, "epoch": 0.7308085977482088, "percentage": 36.54, "elapsed_time": "3:39:22", "remaining_time": "6:21:04"} +{"current_steps": 2500, "total_steps": 6840, "loss": 0.6721810102462769, "lr": 1.5037635360296695e-05, "epoch": 0.7311010381634742, "percentage": 36.55, "elapsed_time": "3:39:27", "remaining_time": "6:20:58"} +{"current_steps": 2501, "total_steps": 6840, "loss": 0.7091001272201538, "lr": 1.5033458350639185e-05, "epoch": 0.7313934785787396, "percentage": 36.56, "elapsed_time": "3:39:36", "remaining_time": "6:21:00"} +{"current_steps": 2502, "total_steps": 6840, "loss": 0.5414971113204956, "lr": 1.5029280164441395e-05, "epoch": 0.7316859189940049, "percentage": 36.58, "elapsed_time": "3:39:42", "remaining_time": "6:20:56"} +{"current_steps": 2503, "total_steps": 6840, "loss": 0.6714789271354675, "lr": 1.5025100802679944e-05, "epoch": 0.7319783594092704, "percentage": 36.59, "elapsed_time": "3:39:47", "remaining_time": "6:20:50"} +{"current_steps": 2504, "total_steps": 6840, "loss": 0.5008493065834045, "lr": 1.5020920266331733e-05, "epoch": 0.7322707998245358, "percentage": 36.61, "elapsed_time": "3:39:51", "remaining_time": "6:20:42"} +{"current_steps": 2505, "total_steps": 6840, "loss": 0.563892126083374, "lr": 1.5016738556373936e-05, "epoch": 0.7325632402398011, "percentage": 36.62, "elapsed_time": "3:39:56", "remaining_time": "6:20:37"} +{"current_steps": 2506, "total_steps": 6840, "loss": 0.6371973752975464, "lr": 1.5012555673784004e-05, "epoch": 0.7328556806550666, "percentage": 36.64, "elapsed_time": "3:40:01", "remaining_time": "6:20:31"} +{"current_steps": 2507, "total_steps": 6840, "loss": 0.7365365624427795, "lr": 1.5008371619539661e-05, "epoch": 0.7331481210703319, "percentage": 36.65, "elapsed_time": "3:40:06", "remaining_time": "6:20:25"} +{"current_steps": 2508, "total_steps": 6840, "loss": 0.5401967763900757, "lr": 1.5004186394618906e-05, "epoch": 0.7334405614855973, "percentage": 36.67, "elapsed_time": "3:40:11", "remaining_time": "6:20:19"} +{"current_steps": 2509, "total_steps": 6840, "loss": 0.5827134847640991, "lr": 1.5000000000000002e-05, "epoch": 0.7337330019008627, "percentage": 36.68, "elapsed_time": "3:40:16", "remaining_time": "6:20:13"} +{"current_steps": 2510, "total_steps": 6840, "loss": 0.7655869126319885, "lr": 1.49958124366615e-05, "epoch": 0.7340254423161281, "percentage": 36.7, "elapsed_time": "3:40:21", "remaining_time": "6:20:08"} +{"current_steps": 2511, "total_steps": 6840, "loss": 0.5410823822021484, "lr": 1.4991623705582216e-05, "epoch": 0.7343178827313934, "percentage": 36.71, "elapsed_time": "3:40:26", "remaining_time": "6:20:02"} +{"current_steps": 2512, "total_steps": 6840, "loss": 0.6831178665161133, "lr": 1.4987433807741242e-05, "epoch": 0.7346103231466589, "percentage": 36.73, "elapsed_time": "3:40:31", "remaining_time": "6:19:56"} +{"current_steps": 2513, "total_steps": 6840, "loss": 0.4952821731567383, "lr": 1.498324274411794e-05, "epoch": 0.7349027635619243, "percentage": 36.74, "elapsed_time": "3:40:38", "remaining_time": "6:19:53"} +{"current_steps": 2514, "total_steps": 6840, "loss": 0.6973339319229126, "lr": 1.4979050515691944e-05, "epoch": 0.7351952039771896, "percentage": 36.75, "elapsed_time": "3:40:43", "remaining_time": "6:19:49"} +{"current_steps": 2515, "total_steps": 6840, "loss": 0.6604373455047607, "lr": 1.4974857123443163e-05, "epoch": 0.7354876443924551, "percentage": 36.77, "elapsed_time": "3:40:49", "remaining_time": "6:19:44"} +{"current_steps": 2516, "total_steps": 6840, "loss": 0.6523034572601318, "lr": 1.4970662568351776e-05, "epoch": 0.7357800848077204, "percentage": 36.78, "elapsed_time": "3:40:54", "remaining_time": "6:19:39"} +{"current_steps": 2517, "total_steps": 6840, "loss": 0.6557538509368896, "lr": 1.4966466851398238e-05, "epoch": 0.7360725252229858, "percentage": 36.8, "elapsed_time": "3:41:00", "remaining_time": "6:19:34"} +{"current_steps": 2518, "total_steps": 6840, "loss": 0.6993967294692993, "lr": 1.4962269973563269e-05, "epoch": 0.7363649656382512, "percentage": 36.81, "elapsed_time": "3:41:06", "remaining_time": "6:19:31"} +{"current_steps": 2519, "total_steps": 6840, "loss": 0.611979067325592, "lr": 1.4958071935827862e-05, "epoch": 0.7366574060535166, "percentage": 36.83, "elapsed_time": "3:41:12", "remaining_time": "6:19:26"} +{"current_steps": 2520, "total_steps": 6840, "loss": 0.9108786582946777, "lr": 1.4953872739173289e-05, "epoch": 0.736949846468782, "percentage": 36.84, "elapsed_time": "3:41:17", "remaining_time": "6:19:21"} +{"current_steps": 2521, "total_steps": 6840, "loss": 0.7086392045021057, "lr": 1.4949672384581082e-05, "epoch": 0.7372422868840474, "percentage": 36.86, "elapsed_time": "3:41:23", "remaining_time": "6:19:17"} +{"current_steps": 2522, "total_steps": 6840, "loss": 0.6103025674819946, "lr": 1.494547087303305e-05, "epoch": 0.7375347272993128, "percentage": 36.87, "elapsed_time": "3:41:29", "remaining_time": "6:19:13"} +{"current_steps": 2523, "total_steps": 6840, "loss": 0.5597528219223022, "lr": 1.4941268205511272e-05, "epoch": 0.7378271677145781, "percentage": 36.89, "elapsed_time": "3:41:33", "remaining_time": "6:19:05"} +{"current_steps": 2524, "total_steps": 6840, "loss": 0.6222598552703857, "lr": 1.4937064382998091e-05, "epoch": 0.7381196081298436, "percentage": 36.9, "elapsed_time": "3:41:38", "remaining_time": "6:19:00"} +{"current_steps": 2525, "total_steps": 6840, "loss": 0.6083353757858276, "lr": 1.4932859406476131e-05, "epoch": 0.7384120485451089, "percentage": 36.92, "elapsed_time": "3:41:43", "remaining_time": "6:18:53"} +{"current_steps": 2526, "total_steps": 6840, "loss": 0.47920671105384827, "lr": 1.4928653276928275e-05, "epoch": 0.7387044889603743, "percentage": 36.93, "elapsed_time": "3:41:47", "remaining_time": "6:18:46"} +{"current_steps": 2527, "total_steps": 6840, "loss": 0.5752983093261719, "lr": 1.4924445995337685e-05, "epoch": 0.7389969293756398, "percentage": 36.94, "elapsed_time": "3:41:53", "remaining_time": "6:18:43"} +{"current_steps": 2528, "total_steps": 6840, "loss": 0.6275333762168884, "lr": 1.4920237562687784e-05, "epoch": 0.7392893697909051, "percentage": 36.96, "elapsed_time": "3:41:59", "remaining_time": "6:18:39"} +{"current_steps": 2529, "total_steps": 6840, "loss": 0.6362103223800659, "lr": 1.4916027979962266e-05, "epoch": 0.7395818102061705, "percentage": 36.97, "elapsed_time": "3:42:04", "remaining_time": "6:18:33"} +{"current_steps": 2530, "total_steps": 6840, "loss": 0.5902664661407471, "lr": 1.49118172481451e-05, "epoch": 0.7398742506214359, "percentage": 36.99, "elapsed_time": "3:42:10", "remaining_time": "6:18:29"} +{"current_steps": 2531, "total_steps": 6840, "loss": 0.5293874740600586, "lr": 1.4907605368220514e-05, "epoch": 0.7401666910367013, "percentage": 37.0, "elapsed_time": "3:42:16", "remaining_time": "6:18:24"} +{"current_steps": 2532, "total_steps": 6840, "loss": 0.7298746109008789, "lr": 1.4903392341173013e-05, "epoch": 0.7404591314519666, "percentage": 37.02, "elapsed_time": "3:42:22", "remaining_time": "6:18:20"} +{"current_steps": 2533, "total_steps": 6840, "loss": 0.6428382396697998, "lr": 1.4899178167987367e-05, "epoch": 0.7407515718672321, "percentage": 37.03, "elapsed_time": "3:42:27", "remaining_time": "6:18:15"} +{"current_steps": 2534, "total_steps": 6840, "loss": 0.6204425096511841, "lr": 1.489496284964861e-05, "epoch": 0.7410440122824974, "percentage": 37.05, "elapsed_time": "3:42:33", "remaining_time": "6:18:10"} +{"current_steps": 2535, "total_steps": 6840, "loss": 0.6025601625442505, "lr": 1.4890746387142052e-05, "epoch": 0.7413364526977628, "percentage": 37.06, "elapsed_time": "3:42:39", "remaining_time": "6:18:08"} +{"current_steps": 2536, "total_steps": 6840, "loss": 0.5570085644721985, "lr": 1.4886528781453258e-05, "epoch": 0.7416288931130283, "percentage": 37.08, "elapsed_time": "3:42:45", "remaining_time": "6:18:03"} +{"current_steps": 2537, "total_steps": 6840, "loss": 0.6816439628601074, "lr": 1.4882310033568072e-05, "epoch": 0.7419213335282936, "percentage": 37.09, "elapsed_time": "3:42:50", "remaining_time": "6:17:58"} +{"current_steps": 2538, "total_steps": 6840, "loss": 0.5424396991729736, "lr": 1.4878090144472603e-05, "epoch": 0.742213773943559, "percentage": 37.11, "elapsed_time": "3:42:57", "remaining_time": "6:17:55"} +{"current_steps": 2539, "total_steps": 6840, "loss": 0.58860182762146, "lr": 1.4873869115153223e-05, "epoch": 0.7425062143588244, "percentage": 37.12, "elapsed_time": "3:43:02", "remaining_time": "6:17:49"} +{"current_steps": 2540, "total_steps": 6840, "loss": 0.513140857219696, "lr": 1.4869646946596568e-05, "epoch": 0.7427986547740898, "percentage": 37.13, "elapsed_time": "3:43:07", "remaining_time": "6:17:43"} +{"current_steps": 2541, "total_steps": 6840, "loss": 0.5967035293579102, "lr": 1.486542363978955e-05, "epoch": 0.7430910951893551, "percentage": 37.15, "elapsed_time": "3:43:12", "remaining_time": "6:17:38"} +{"current_steps": 2542, "total_steps": 6840, "loss": 0.6988440752029419, "lr": 1.4861199195719334e-05, "epoch": 0.7433835356046206, "percentage": 37.16, "elapsed_time": "3:43:18", "remaining_time": "6:17:34"} +{"current_steps": 2543, "total_steps": 6840, "loss": 0.6176164746284485, "lr": 1.4856973615373366e-05, "epoch": 0.743675976019886, "percentage": 37.18, "elapsed_time": "3:43:24", "remaining_time": "6:17:29"} +{"current_steps": 2544, "total_steps": 6840, "loss": 0.5616505742073059, "lr": 1.4852746899739346e-05, "epoch": 0.7439684164351513, "percentage": 37.19, "elapsed_time": "3:43:29", "remaining_time": "6:17:23"} +{"current_steps": 2545, "total_steps": 6840, "loss": 0.5470465421676636, "lr": 1.4848519049805243e-05, "epoch": 0.7442608568504168, "percentage": 37.21, "elapsed_time": "3:43:35", "remaining_time": "6:17:20"} +{"current_steps": 2546, "total_steps": 6840, "loss": 0.6362754106521606, "lr": 1.4844290066559292e-05, "epoch": 0.7445532972656821, "percentage": 37.22, "elapsed_time": "3:43:40", "remaining_time": "6:17:15"} +{"current_steps": 2547, "total_steps": 6840, "loss": 0.6290515661239624, "lr": 1.4840059950989992e-05, "epoch": 0.7448457376809475, "percentage": 37.24, "elapsed_time": "3:43:47", "remaining_time": "6:17:11"} +{"current_steps": 2548, "total_steps": 6840, "loss": 0.7225647568702698, "lr": 1.4835828704086105e-05, "epoch": 0.7451381780962129, "percentage": 37.25, "elapsed_time": "3:43:52", "remaining_time": "6:17:06"} +{"current_steps": 2549, "total_steps": 6840, "loss": 0.6993023157119751, "lr": 1.483159632683666e-05, "epoch": 0.7454306185114783, "percentage": 37.27, "elapsed_time": "3:43:56", "remaining_time": "6:16:59"} +{"current_steps": 2550, "total_steps": 6840, "loss": 0.6960086226463318, "lr": 1.482736282023095e-05, "epoch": 0.7457230589267436, "percentage": 37.28, "elapsed_time": "3:44:01", "remaining_time": "6:16:52"} +{"current_steps": 2551, "total_steps": 6840, "loss": 0.627712607383728, "lr": 1.4823128185258535e-05, "epoch": 0.7460154993420091, "percentage": 37.3, "elapsed_time": "3:44:06", "remaining_time": "6:16:47"} +{"current_steps": 2552, "total_steps": 6840, "loss": 0.6314729452133179, "lr": 1.481889242290923e-05, "epoch": 0.7463079397572745, "percentage": 37.31, "elapsed_time": "3:44:11", "remaining_time": "6:16:42"} +{"current_steps": 2553, "total_steps": 6840, "loss": 0.5948070287704468, "lr": 1.4814655534173121e-05, "epoch": 0.7466003801725398, "percentage": 37.32, "elapsed_time": "3:44:17", "remaining_time": "6:16:37"} +{"current_steps": 2554, "total_steps": 6840, "loss": 0.6227586269378662, "lr": 1.4810417520040551e-05, "epoch": 0.7468928205878053, "percentage": 37.34, "elapsed_time": "3:44:22", "remaining_time": "6:16:31"} +{"current_steps": 2555, "total_steps": 6840, "loss": 0.589213490486145, "lr": 1.4806178381502139e-05, "epoch": 0.7471852610030706, "percentage": 37.35, "elapsed_time": "3:44:26", "remaining_time": "6:16:24"} +{"current_steps": 2556, "total_steps": 6840, "loss": 0.6748968362808228, "lr": 1.4801938119548748e-05, "epoch": 0.747477701418336, "percentage": 37.37, "elapsed_time": "3:44:32", "remaining_time": "6:16:20"} +{"current_steps": 2557, "total_steps": 6840, "loss": 0.627450704574585, "lr": 1.4797696735171521e-05, "epoch": 0.7477701418336014, "percentage": 37.38, "elapsed_time": "3:44:37", "remaining_time": "6:16:15"} +{"current_steps": 2558, "total_steps": 6840, "loss": 0.5816184878349304, "lr": 1.479345422936185e-05, "epoch": 0.7480625822488668, "percentage": 37.4, "elapsed_time": "3:44:42", "remaining_time": "6:16:09"} +{"current_steps": 2559, "total_steps": 6840, "loss": 0.5184855461120605, "lr": 1.4789210603111399e-05, "epoch": 0.7483550226641322, "percentage": 37.41, "elapsed_time": "3:44:46", "remaining_time": "6:16:02"} +{"current_steps": 2560, "total_steps": 6840, "loss": 0.5747300982475281, "lr": 1.4784965857412088e-05, "epoch": 0.7486474630793976, "percentage": 37.43, "elapsed_time": "3:44:51", "remaining_time": "6:15:56"} +{"current_steps": 2561, "total_steps": 6840, "loss": 0.6957682371139526, "lr": 1.4780719993256104e-05, "epoch": 0.748939903494663, "percentage": 37.44, "elapsed_time": "3:44:56", "remaining_time": "6:15:51"} +{"current_steps": 2562, "total_steps": 6840, "loss": 0.5711330771446228, "lr": 1.4776473011635886e-05, "epoch": 0.7492323439099283, "percentage": 37.46, "elapsed_time": "3:45:02", "remaining_time": "6:15:46"} +{"current_steps": 2563, "total_steps": 6840, "loss": 0.687350869178772, "lr": 1.4772224913544142e-05, "epoch": 0.7495247843251938, "percentage": 37.47, "elapsed_time": "3:45:07", "remaining_time": "6:15:40"} +{"current_steps": 2564, "total_steps": 6840, "loss": 0.71396803855896, "lr": 1.476797569997384e-05, "epoch": 0.7498172247404591, "percentage": 37.49, "elapsed_time": "3:45:12", "remaining_time": "6:15:34"} +{"current_steps": 2565, "total_steps": 6840, "loss": 0.5457814335823059, "lr": 1.4763725371918209e-05, "epoch": 0.7501096651557245, "percentage": 37.5, "elapsed_time": "3:45:17", "remaining_time": "6:15:29"} +{"current_steps": 2566, "total_steps": 6840, "loss": 0.5889413952827454, "lr": 1.4759473930370738e-05, "epoch": 0.75040210557099, "percentage": 37.51, "elapsed_time": "3:45:23", "remaining_time": "6:15:24"} +{"current_steps": 2567, "total_steps": 6840, "loss": 0.6222226619720459, "lr": 1.4755221376325171e-05, "epoch": 0.7506945459862553, "percentage": 37.53, "elapsed_time": "3:45:28", "remaining_time": "6:15:18"} +{"current_steps": 2568, "total_steps": 6840, "loss": 0.5273243188858032, "lr": 1.475096771077552e-05, "epoch": 0.7509869864015207, "percentage": 37.54, "elapsed_time": "3:45:33", "remaining_time": "6:15:13"} +{"current_steps": 2569, "total_steps": 6840, "loss": 0.5665162801742554, "lr": 1.4746712934716055e-05, "epoch": 0.7512794268167861, "percentage": 37.56, "elapsed_time": "3:45:38", "remaining_time": "6:15:08"} +{"current_steps": 2570, "total_steps": 6840, "loss": 0.5748391151428223, "lr": 1.4742457049141298e-05, "epoch": 0.7515718672320515, "percentage": 37.57, "elapsed_time": "3:45:44", "remaining_time": "6:15:03"} +{"current_steps": 2571, "total_steps": 6840, "loss": 0.7002041339874268, "lr": 1.4738200055046044e-05, "epoch": 0.7518643076473168, "percentage": 37.59, "elapsed_time": "3:45:48", "remaining_time": "6:14:56"} +{"current_steps": 2572, "total_steps": 6840, "loss": 0.6841630935668945, "lr": 1.4733941953425337e-05, "epoch": 0.7521567480625823, "percentage": 37.6, "elapsed_time": "3:45:53", "remaining_time": "6:14:51"} +{"current_steps": 2573, "total_steps": 6840, "loss": 0.7047172784805298, "lr": 1.4729682745274478e-05, "epoch": 0.7524491884778476, "percentage": 37.62, "elapsed_time": "3:45:59", "remaining_time": "6:14:46"} +{"current_steps": 2574, "total_steps": 6840, "loss": 0.6979919672012329, "lr": 1.4725422431589035e-05, "epoch": 0.752741628893113, "percentage": 37.63, "elapsed_time": "3:46:05", "remaining_time": "6:14:42"} +{"current_steps": 2575, "total_steps": 6840, "loss": 0.6437125205993652, "lr": 1.4721161013364829e-05, "epoch": 0.7530340693083785, "percentage": 37.65, "elapsed_time": "3:46:10", "remaining_time": "6:14:37"} +{"current_steps": 2576, "total_steps": 6840, "loss": 0.591254711151123, "lr": 1.4716898491597942e-05, "epoch": 0.7533265097236438, "percentage": 37.66, "elapsed_time": "3:46:15", "remaining_time": "6:14:31"} +{"current_steps": 2577, "total_steps": 6840, "loss": 0.6276297569274902, "lr": 1.4712634867284714e-05, "epoch": 0.7536189501389092, "percentage": 37.68, "elapsed_time": "3:46:19", "remaining_time": "6:14:24"} +{"current_steps": 2578, "total_steps": 6840, "loss": 0.5310626029968262, "lr": 1.4708370141421737e-05, "epoch": 0.7539113905541746, "percentage": 37.69, "elapsed_time": "3:46:24", "remaining_time": "6:14:18"} +{"current_steps": 2579, "total_steps": 6840, "loss": 0.5256849527359009, "lr": 1.4704104315005864e-05, "epoch": 0.75420383096944, "percentage": 37.7, "elapsed_time": "3:46:29", "remaining_time": "6:14:12"} +{"current_steps": 2580, "total_steps": 6840, "loss": 0.6050584316253662, "lr": 1.4699837389034212e-05, "epoch": 0.7544962713847053, "percentage": 37.72, "elapsed_time": "3:46:34", "remaining_time": "6:14:06"} +{"current_steps": 2581, "total_steps": 6840, "loss": 0.5124386548995972, "lr": 1.4695569364504144e-05, "epoch": 0.7547887117999708, "percentage": 37.73, "elapsed_time": "3:46:38", "remaining_time": "6:13:59"} +{"current_steps": 2582, "total_steps": 6840, "loss": 0.5631951093673706, "lr": 1.4691300242413289e-05, "epoch": 0.7550811522152362, "percentage": 37.75, "elapsed_time": "3:46:44", "remaining_time": "6:13:54"} +{"current_steps": 2583, "total_steps": 6840, "loss": 0.6352444291114807, "lr": 1.4687030023759527e-05, "epoch": 0.7553735926305015, "percentage": 37.76, "elapsed_time": "3:46:50", "remaining_time": "6:13:50"} +{"current_steps": 2584, "total_steps": 6840, "loss": 0.6717500686645508, "lr": 1.4682758709540992e-05, "epoch": 0.755666033045767, "percentage": 37.78, "elapsed_time": "3:46:54", "remaining_time": "6:13:44"} +{"current_steps": 2585, "total_steps": 6840, "loss": 0.5889217853546143, "lr": 1.467848630075608e-05, "epoch": 0.7559584734610323, "percentage": 37.79, "elapsed_time": "3:46:59", "remaining_time": "6:13:37"} +{"current_steps": 2586, "total_steps": 6840, "loss": 0.49069908261299133, "lr": 1.4674212798403443e-05, "epoch": 0.7562509138762977, "percentage": 37.81, "elapsed_time": "3:47:04", "remaining_time": "6:13:33"} +{"current_steps": 2587, "total_steps": 6840, "loss": 0.6272397041320801, "lr": 1.4669938203481982e-05, "epoch": 0.756543354291563, "percentage": 37.82, "elapsed_time": "3:47:09", "remaining_time": "6:13:27"} +{"current_steps": 2588, "total_steps": 6840, "loss": 0.6218451261520386, "lr": 1.466566251699086e-05, "epoch": 0.7568357947068285, "percentage": 37.84, "elapsed_time": "3:47:14", "remaining_time": "6:13:21"} +{"current_steps": 2589, "total_steps": 6840, "loss": 0.6174849271774292, "lr": 1.4661385739929492e-05, "epoch": 0.7571282351220939, "percentage": 37.85, "elapsed_time": "3:47:19", "remaining_time": "6:13:15"} +{"current_steps": 2590, "total_steps": 6840, "loss": 0.5595160126686096, "lr": 1.465710787329755e-05, "epoch": 0.7574206755373593, "percentage": 37.87, "elapsed_time": "3:47:25", "remaining_time": "6:13:10"} +{"current_steps": 2591, "total_steps": 6840, "loss": 0.757240891456604, "lr": 1.4652828918094954e-05, "epoch": 0.7577131159526247, "percentage": 37.88, "elapsed_time": "3:47:30", "remaining_time": "6:13:05"} +{"current_steps": 2592, "total_steps": 6840, "loss": 0.630811333656311, "lr": 1.4648548875321893e-05, "epoch": 0.75800555636789, "percentage": 37.89, "elapsed_time": "3:47:35", "remaining_time": "6:13:00"} +{"current_steps": 2593, "total_steps": 6840, "loss": 0.5857812762260437, "lr": 1.4644267745978797e-05, "epoch": 0.7582979967831555, "percentage": 37.91, "elapsed_time": "3:47:40", "remaining_time": "6:12:54"} +{"current_steps": 2594, "total_steps": 6840, "loss": 0.5869519710540771, "lr": 1.463998553106635e-05, "epoch": 0.7585904371984208, "percentage": 37.92, "elapsed_time": "3:47:47", "remaining_time": "6:12:51"} +{"current_steps": 2595, "total_steps": 6840, "loss": 0.5610413551330566, "lr": 1.4635702231585498e-05, "epoch": 0.7588828776136862, "percentage": 37.94, "elapsed_time": "3:47:53", "remaining_time": "6:12:47"} +{"current_steps": 2596, "total_steps": 6840, "loss": 0.5634676218032837, "lr": 1.4631417848537435e-05, "epoch": 0.7591753180289516, "percentage": 37.95, "elapsed_time": "3:47:59", "remaining_time": "6:12:43"} +{"current_steps": 2597, "total_steps": 6840, "loss": 0.6813392639160156, "lr": 1.4627132382923607e-05, "epoch": 0.759467758444217, "percentage": 37.97, "elapsed_time": "3:48:05", "remaining_time": "6:12:39"} +{"current_steps": 2598, "total_steps": 6840, "loss": 0.644945502281189, "lr": 1.4622845835745723e-05, "epoch": 0.7597601988594824, "percentage": 37.98, "elapsed_time": "3:48:10", "remaining_time": "6:12:34"} +{"current_steps": 2599, "total_steps": 6840, "loss": 0.7432133555412292, "lr": 1.461855820800573e-05, "epoch": 0.7600526392747478, "percentage": 38.0, "elapsed_time": "3:48:15", "remaining_time": "6:12:28"} +{"current_steps": 2600, "total_steps": 6840, "loss": 0.4729112982749939, "lr": 1.4614269500705832e-05, "epoch": 0.7603450796900132, "percentage": 38.01, "elapsed_time": "3:48:20", "remaining_time": "6:12:23"} +{"current_steps": 2601, "total_steps": 6840, "loss": 0.7146443128585815, "lr": 1.4609979714848499e-05, "epoch": 0.7606375201052785, "percentage": 38.03, "elapsed_time": "3:48:29", "remaining_time": "6:12:23"} +{"current_steps": 2602, "total_steps": 6840, "loss": 0.5959945917129517, "lr": 1.4605688851436436e-05, "epoch": 0.760929960520544, "percentage": 38.04, "elapsed_time": "3:48:34", "remaining_time": "6:12:16"} +{"current_steps": 2603, "total_steps": 6840, "loss": 0.6091525554656982, "lr": 1.4601396911472605e-05, "epoch": 0.7612224009358093, "percentage": 38.06, "elapsed_time": "3:48:39", "remaining_time": "6:12:11"} +{"current_steps": 2604, "total_steps": 6840, "loss": 0.5101523399353027, "lr": 1.4597103895960228e-05, "epoch": 0.7615148413510747, "percentage": 38.07, "elapsed_time": "3:48:44", "remaining_time": "6:12:05"} +{"current_steps": 2605, "total_steps": 6840, "loss": 0.6036165952682495, "lr": 1.4592809805902762e-05, "epoch": 0.7618072817663402, "percentage": 38.08, "elapsed_time": "3:48:49", "remaining_time": "6:11:59"} +{"current_steps": 2606, "total_steps": 6840, "loss": 0.6094970703125, "lr": 1.4588514642303928e-05, "epoch": 0.7620997221816055, "percentage": 38.1, "elapsed_time": "3:48:53", "remaining_time": "6:11:53"} +{"current_steps": 2607, "total_steps": 6840, "loss": 0.49754881858825684, "lr": 1.4584218406167697e-05, "epoch": 0.7623921625968709, "percentage": 38.11, "elapsed_time": "3:48:57", "remaining_time": "6:11:45"} +{"current_steps": 2608, "total_steps": 6840, "loss": 0.6066807508468628, "lr": 1.4579921098498285e-05, "epoch": 0.7626846030121363, "percentage": 38.13, "elapsed_time": "3:49:02", "remaining_time": "6:11:39"} +{"current_steps": 2609, "total_steps": 6840, "loss": 0.5758910179138184, "lr": 1.4575622720300162e-05, "epoch": 0.7629770434274017, "percentage": 38.14, "elapsed_time": "3:49:07", "remaining_time": "6:11:34"} +{"current_steps": 2610, "total_steps": 6840, "loss": 0.6641621589660645, "lr": 1.457132327257805e-05, "epoch": 0.763269483842667, "percentage": 38.16, "elapsed_time": "3:49:12", "remaining_time": "6:11:28"} +{"current_steps": 2611, "total_steps": 6840, "loss": 0.7024788856506348, "lr": 1.4567022756336916e-05, "epoch": 0.7635619242579325, "percentage": 38.17, "elapsed_time": "3:49:17", "remaining_time": "6:11:22"} +{"current_steps": 2612, "total_steps": 6840, "loss": 0.6066344380378723, "lr": 1.4562721172581982e-05, "epoch": 0.7638543646731978, "percentage": 38.19, "elapsed_time": "3:49:23", "remaining_time": "6:11:18"} +{"current_steps": 2613, "total_steps": 6840, "loss": 0.566038966178894, "lr": 1.4558418522318713e-05, "epoch": 0.7641468050884632, "percentage": 38.2, "elapsed_time": "3:49:28", "remaining_time": "6:11:13"} +{"current_steps": 2614, "total_steps": 6840, "loss": 0.5817335844039917, "lr": 1.4554114806552833e-05, "epoch": 0.7644392455037287, "percentage": 38.22, "elapsed_time": "3:49:33", "remaining_time": "6:11:07"} +{"current_steps": 2615, "total_steps": 6840, "loss": 0.6001763343811035, "lr": 1.4549810026290305e-05, "epoch": 0.764731685918994, "percentage": 38.23, "elapsed_time": "3:49:40", "remaining_time": "6:11:04"} +{"current_steps": 2616, "total_steps": 6840, "loss": 0.6363068222999573, "lr": 1.4545504182537346e-05, "epoch": 0.7650241263342594, "percentage": 38.25, "elapsed_time": "3:49:46", "remaining_time": "6:11:00"} +{"current_steps": 2617, "total_steps": 6840, "loss": 0.669566810131073, "lr": 1.4541197276300424e-05, "epoch": 0.7653165667495248, "percentage": 38.26, "elapsed_time": "3:49:50", "remaining_time": "6:10:53"} +{"current_steps": 2618, "total_steps": 6840, "loss": 0.47967004776000977, "lr": 1.4536889308586245e-05, "epoch": 0.7656090071647902, "percentage": 38.27, "elapsed_time": "3:49:56", "remaining_time": "6:10:49"} +{"current_steps": 2619, "total_steps": 6840, "loss": 0.5803399085998535, "lr": 1.4532580280401777e-05, "epoch": 0.7659014475800555, "percentage": 38.29, "elapsed_time": "3:50:02", "remaining_time": "6:10:44"} +{"current_steps": 2620, "total_steps": 6840, "loss": 0.6870115995407104, "lr": 1.452827019275423e-05, "epoch": 0.766193887995321, "percentage": 38.3, "elapsed_time": "3:50:06", "remaining_time": "6:10:37"} +{"current_steps": 2621, "total_steps": 6840, "loss": 0.6190885901451111, "lr": 1.4523959046651058e-05, "epoch": 0.7664863284105864, "percentage": 38.32, "elapsed_time": "3:50:11", "remaining_time": "6:10:32"} +{"current_steps": 2622, "total_steps": 6840, "loss": 0.6624859571456909, "lr": 1.4519646843099961e-05, "epoch": 0.7667787688258517, "percentage": 38.33, "elapsed_time": "3:50:16", "remaining_time": "6:10:26"} +{"current_steps": 2623, "total_steps": 6840, "loss": 0.5770546197891235, "lr": 1.4515333583108896e-05, "epoch": 0.7670712092411172, "percentage": 38.35, "elapsed_time": "3:50:20", "remaining_time": "6:10:19"} +{"current_steps": 2624, "total_steps": 6840, "loss": 0.6843355894088745, "lr": 1.451101926768606e-05, "epoch": 0.7673636496563825, "percentage": 38.36, "elapsed_time": "3:50:25", "remaining_time": "6:10:13"} +{"current_steps": 2625, "total_steps": 6840, "loss": 0.5293717384338379, "lr": 1.4506703897839895e-05, "epoch": 0.7676560900716479, "percentage": 38.38, "elapsed_time": "3:50:31", "remaining_time": "6:10:09"} +{"current_steps": 2626, "total_steps": 6840, "loss": 0.44534316658973694, "lr": 1.45023874745791e-05, "epoch": 0.7679485304869133, "percentage": 38.39, "elapsed_time": "3:50:37", "remaining_time": "6:10:05"} +{"current_steps": 2627, "total_steps": 6840, "loss": 0.7279446721076965, "lr": 1.4498069998912603e-05, "epoch": 0.7682409709021787, "percentage": 38.41, "elapsed_time": "3:50:42", "remaining_time": "6:09:59"} +{"current_steps": 2628, "total_steps": 6840, "loss": 0.6990453600883484, "lr": 1.4493751471849596e-05, "epoch": 0.7685334113174441, "percentage": 38.42, "elapsed_time": "3:50:46", "remaining_time": "6:09:53"} +{"current_steps": 2629, "total_steps": 6840, "loss": 0.6610965728759766, "lr": 1.44894318943995e-05, "epoch": 0.7688258517327095, "percentage": 38.44, "elapsed_time": "3:50:52", "remaining_time": "6:09:48"} +{"current_steps": 2630, "total_steps": 6840, "loss": 0.5124749541282654, "lr": 1.4485111267571999e-05, "epoch": 0.7691182921479749, "percentage": 38.45, "elapsed_time": "3:50:56", "remaining_time": "6:09:41"} +{"current_steps": 2631, "total_steps": 6840, "loss": 0.7191518545150757, "lr": 1.448078959237701e-05, "epoch": 0.7694107325632402, "percentage": 38.46, "elapsed_time": "3:51:02", "remaining_time": "6:09:36"} +{"current_steps": 2632, "total_steps": 6840, "loss": 0.5798880457878113, "lr": 1.4476466869824694e-05, "epoch": 0.7697031729785057, "percentage": 38.48, "elapsed_time": "3:51:08", "remaining_time": "6:09:32"} +{"current_steps": 2633, "total_steps": 6840, "loss": 0.5187106728553772, "lr": 1.4472143100925467e-05, "epoch": 0.769995613393771, "percentage": 38.49, "elapsed_time": "3:51:13", "remaining_time": "6:09:27"} +{"current_steps": 2634, "total_steps": 6840, "loss": 0.5794588327407837, "lr": 1.4467818286689981e-05, "epoch": 0.7702880538090364, "percentage": 38.51, "elapsed_time": "3:51:20", "remaining_time": "6:09:23"} +{"current_steps": 2635, "total_steps": 6840, "loss": 0.4884936809539795, "lr": 1.4463492428129133e-05, "epoch": 0.7705804942243017, "percentage": 38.52, "elapsed_time": "3:51:26", "remaining_time": "6:09:20"} +{"current_steps": 2636, "total_steps": 6840, "loss": 0.5782946348190308, "lr": 1.4459165526254074e-05, "epoch": 0.7708729346395672, "percentage": 38.54, "elapsed_time": "3:51:31", "remaining_time": "6:09:14"} +{"current_steps": 2637, "total_steps": 6840, "loss": 0.5173349380493164, "lr": 1.445483758207618e-05, "epoch": 0.7711653750548326, "percentage": 38.55, "elapsed_time": "3:51:36", "remaining_time": "6:09:08"} +{"current_steps": 2638, "total_steps": 6840, "loss": 0.616407573223114, "lr": 1.4450508596607087e-05, "epoch": 0.771457815470098, "percentage": 38.57, "elapsed_time": "3:51:40", "remaining_time": "6:09:02"} +{"current_steps": 2639, "total_steps": 6840, "loss": 0.537878155708313, "lr": 1.4446178570858672e-05, "epoch": 0.7717502558853634, "percentage": 38.58, "elapsed_time": "3:51:44", "remaining_time": "6:08:54"} +{"current_steps": 2640, "total_steps": 6840, "loss": 0.674277663230896, "lr": 1.4441847505843048e-05, "epoch": 0.7720426963006287, "percentage": 38.6, "elapsed_time": "3:51:50", "remaining_time": "6:08:49"} +{"current_steps": 2641, "total_steps": 6840, "loss": 0.5064860582351685, "lr": 1.4437515402572576e-05, "epoch": 0.7723351367158942, "percentage": 38.61, "elapsed_time": "3:51:55", "remaining_time": "6:08:45"} +{"current_steps": 2642, "total_steps": 6840, "loss": 0.6256883144378662, "lr": 1.4433182262059861e-05, "epoch": 0.7726275771311595, "percentage": 38.63, "elapsed_time": "3:52:02", "remaining_time": "6:08:42"} +{"current_steps": 2643, "total_steps": 6840, "loss": 0.6023700833320618, "lr": 1.4428848085317744e-05, "epoch": 0.7729200175464249, "percentage": 38.64, "elapsed_time": "3:52:07", "remaining_time": "6:08:36"} +{"current_steps": 2644, "total_steps": 6840, "loss": 0.5670932531356812, "lr": 1.4424512873359316e-05, "epoch": 0.7732124579616904, "percentage": 38.65, "elapsed_time": "3:52:12", "remaining_time": "6:08:30"} +{"current_steps": 2645, "total_steps": 6840, "loss": 0.760460376739502, "lr": 1.4420176627197906e-05, "epoch": 0.7735048983769557, "percentage": 38.67, "elapsed_time": "3:52:17", "remaining_time": "6:08:24"} +{"current_steps": 2646, "total_steps": 6840, "loss": 0.5680848956108093, "lr": 1.4415839347847082e-05, "epoch": 0.7737973387922211, "percentage": 38.68, "elapsed_time": "3:52:23", "remaining_time": "6:08:20"} +{"current_steps": 2647, "total_steps": 6840, "loss": 0.5962368249893188, "lr": 1.4411501036320661e-05, "epoch": 0.7740897792074865, "percentage": 38.7, "elapsed_time": "3:52:27", "remaining_time": "6:08:13"} +{"current_steps": 2648, "total_steps": 6840, "loss": 0.7149791121482849, "lr": 1.4407161693632697e-05, "epoch": 0.7743822196227519, "percentage": 38.71, "elapsed_time": "3:52:32", "remaining_time": "6:08:07"} +{"current_steps": 2649, "total_steps": 6840, "loss": 0.5943992733955383, "lr": 1.440282132079748e-05, "epoch": 0.7746746600380172, "percentage": 38.73, "elapsed_time": "3:52:36", "remaining_time": "6:08:00"} +{"current_steps": 2650, "total_steps": 6840, "loss": 0.7265899181365967, "lr": 1.439847991882955e-05, "epoch": 0.7749671004532827, "percentage": 38.74, "elapsed_time": "3:52:41", "remaining_time": "6:07:54"} +{"current_steps": 2651, "total_steps": 6840, "loss": 0.6011309027671814, "lr": 1.4394137488743682e-05, "epoch": 0.775259540868548, "percentage": 38.76, "elapsed_time": "3:52:46", "remaining_time": "6:07:49"} +{"current_steps": 2652, "total_steps": 6840, "loss": 0.6853964328765869, "lr": 1.4389794031554894e-05, "epoch": 0.7755519812838134, "percentage": 38.77, "elapsed_time": "3:52:51", "remaining_time": "6:07:43"} +{"current_steps": 2653, "total_steps": 6840, "loss": 0.6598547697067261, "lr": 1.438544954827844e-05, "epoch": 0.7758444216990789, "percentage": 38.79, "elapsed_time": "3:52:56", "remaining_time": "6:07:38"} +{"current_steps": 2654, "total_steps": 6840, "loss": 0.5776119232177734, "lr": 1.4381104039929819e-05, "epoch": 0.7761368621143442, "percentage": 38.8, "elapsed_time": "3:53:02", "remaining_time": "6:07:33"} +{"current_steps": 2655, "total_steps": 6840, "loss": 0.6026376485824585, "lr": 1.4376757507524766e-05, "epoch": 0.7764293025296096, "percentage": 38.82, "elapsed_time": "3:53:06", "remaining_time": "6:07:27"} +{"current_steps": 2656, "total_steps": 6840, "loss": 0.5776997804641724, "lr": 1.4372409952079256e-05, "epoch": 0.776721742944875, "percentage": 38.83, "elapsed_time": "3:53:12", "remaining_time": "6:07:22"} +{"current_steps": 2657, "total_steps": 6840, "loss": 0.5766068696975708, "lr": 1.4368061374609505e-05, "epoch": 0.7770141833601404, "percentage": 38.85, "elapsed_time": "3:53:18", "remaining_time": "6:07:18"} +{"current_steps": 2658, "total_steps": 6840, "loss": 0.4783105254173279, "lr": 1.4363711776131966e-05, "epoch": 0.7773066237754057, "percentage": 38.86, "elapsed_time": "3:53:23", "remaining_time": "6:07:13"} +{"current_steps": 2659, "total_steps": 6840, "loss": 0.6563695073127747, "lr": 1.4359361157663332e-05, "epoch": 0.7775990641906712, "percentage": 38.87, "elapsed_time": "3:53:28", "remaining_time": "6:07:07"} +{"current_steps": 2660, "total_steps": 6840, "loss": 0.5177119374275208, "lr": 1.4355009520220531e-05, "epoch": 0.7778915046059366, "percentage": 38.89, "elapsed_time": "3:53:35", "remaining_time": "6:07:03"} +{"current_steps": 2661, "total_steps": 6840, "loss": 0.6590641736984253, "lr": 1.4350656864820733e-05, "epoch": 0.7781839450212019, "percentage": 38.9, "elapsed_time": "3:53:40", "remaining_time": "6:06:59"} +{"current_steps": 2662, "total_steps": 6840, "loss": 0.6012274622917175, "lr": 1.4346303192481348e-05, "epoch": 0.7784763854364674, "percentage": 38.92, "elapsed_time": "3:53:46", "remaining_time": "6:06:54"} +{"current_steps": 2663, "total_steps": 6840, "loss": 0.6731704473495483, "lr": 1.4341948504220016e-05, "epoch": 0.7787688258517327, "percentage": 38.93, "elapsed_time": "3:53:52", "remaining_time": "6:06:49"} +{"current_steps": 2664, "total_steps": 6840, "loss": 0.6827171444892883, "lr": 1.4337592801054623e-05, "epoch": 0.7790612662669981, "percentage": 38.95, "elapsed_time": "3:53:56", "remaining_time": "6:06:42"} +{"current_steps": 2665, "total_steps": 6840, "loss": 0.6654937267303467, "lr": 1.4333236084003282e-05, "epoch": 0.7793537066822634, "percentage": 38.96, "elapsed_time": "3:54:01", "remaining_time": "6:06:37"} +{"current_steps": 2666, "total_steps": 6840, "loss": 0.5673532485961914, "lr": 1.4328878354084355e-05, "epoch": 0.7796461470975289, "percentage": 38.98, "elapsed_time": "3:54:06", "remaining_time": "6:06:32"} +{"current_steps": 2667, "total_steps": 6840, "loss": 0.5401986241340637, "lr": 1.432451961231643e-05, "epoch": 0.7799385875127943, "percentage": 38.99, "elapsed_time": "3:54:12", "remaining_time": "6:06:27"} +{"current_steps": 2668, "total_steps": 6840, "loss": 0.6134701371192932, "lr": 1.4320159859718341e-05, "epoch": 0.7802310279280597, "percentage": 39.01, "elapsed_time": "3:54:17", "remaining_time": "6:06:21"} +{"current_steps": 2669, "total_steps": 6840, "loss": 0.6913554668426514, "lr": 1.4315799097309152e-05, "epoch": 0.7805234683433251, "percentage": 39.02, "elapsed_time": "3:54:22", "remaining_time": "6:06:16"} +{"current_steps": 2670, "total_steps": 6840, "loss": 0.6969482898712158, "lr": 1.4311437326108167e-05, "epoch": 0.7808159087585904, "percentage": 39.04, "elapsed_time": "3:54:28", "remaining_time": "6:06:11"} +{"current_steps": 2671, "total_steps": 6840, "loss": 0.6612537503242493, "lr": 1.4307074547134918e-05, "epoch": 0.7811083491738559, "percentage": 39.05, "elapsed_time": "3:54:33", "remaining_time": "6:06:06"} +{"current_steps": 2672, "total_steps": 6840, "loss": 0.5545899868011475, "lr": 1.430271076140918e-05, "epoch": 0.7814007895891212, "percentage": 39.06, "elapsed_time": "3:54:39", "remaining_time": "6:06:01"} +{"current_steps": 2673, "total_steps": 6840, "loss": 0.6635574698448181, "lr": 1.4298345969950965e-05, "epoch": 0.7816932300043866, "percentage": 39.08, "elapsed_time": "3:54:44", "remaining_time": "6:05:57"} +{"current_steps": 2674, "total_steps": 6840, "loss": 0.5859510898590088, "lr": 1.4293980173780514e-05, "epoch": 0.781985670419652, "percentage": 39.09, "elapsed_time": "3:54:49", "remaining_time": "6:05:51"} +{"current_steps": 2675, "total_steps": 6840, "loss": 0.5839825868606567, "lr": 1.4289613373918304e-05, "epoch": 0.7822781108349174, "percentage": 39.11, "elapsed_time": "3:54:54", "remaining_time": "6:05:45"} +{"current_steps": 2676, "total_steps": 6840, "loss": 0.6376889944076538, "lr": 1.428524557138505e-05, "epoch": 0.7825705512501828, "percentage": 39.12, "elapsed_time": "3:54:59", "remaining_time": "6:05:40"} +{"current_steps": 2677, "total_steps": 6840, "loss": 0.5473129749298096, "lr": 1.4280876767201696e-05, "epoch": 0.7828629916654481, "percentage": 39.14, "elapsed_time": "3:55:06", "remaining_time": "6:05:36"} +{"current_steps": 2678, "total_steps": 6840, "loss": 0.6723904609680176, "lr": 1.4276506962389429e-05, "epoch": 0.7831554320807136, "percentage": 39.15, "elapsed_time": "3:55:13", "remaining_time": "6:05:33"} +{"current_steps": 2679, "total_steps": 6840, "loss": 0.6036845445632935, "lr": 1.4272136157969658e-05, "epoch": 0.7834478724959789, "percentage": 39.17, "elapsed_time": "3:55:17", "remaining_time": "6:05:27"} +{"current_steps": 2680, "total_steps": 6840, "loss": 0.5993655920028687, "lr": 1.4267764354964038e-05, "epoch": 0.7837403129112444, "percentage": 39.18, "elapsed_time": "3:55:22", "remaining_time": "6:05:21"} +{"current_steps": 2681, "total_steps": 6840, "loss": 0.6678075194358826, "lr": 1.4263391554394448e-05, "epoch": 0.7840327533265097, "percentage": 39.2, "elapsed_time": "3:55:28", "remaining_time": "6:05:16"} +{"current_steps": 2682, "total_steps": 6840, "loss": 0.5627151727676392, "lr": 1.4259017757283003e-05, "epoch": 0.7843251937417751, "percentage": 39.21, "elapsed_time": "3:55:33", "remaining_time": "6:05:11"} +{"current_steps": 2683, "total_steps": 6840, "loss": 0.6060316562652588, "lr": 1.4254642964652053e-05, "epoch": 0.7846176341570406, "percentage": 39.23, "elapsed_time": "3:55:39", "remaining_time": "6:05:07"} +{"current_steps": 2684, "total_steps": 6840, "loss": 0.6535854935646057, "lr": 1.4250267177524177e-05, "epoch": 0.7849100745723059, "percentage": 39.24, "elapsed_time": "3:55:45", "remaining_time": "6:05:02"} +{"current_steps": 2685, "total_steps": 6840, "loss": 0.7141643762588501, "lr": 1.4245890396922195e-05, "epoch": 0.7852025149875713, "percentage": 39.25, "elapsed_time": "3:55:50", "remaining_time": "6:04:58"} +{"current_steps": 2686, "total_steps": 6840, "loss": 0.6685847640037537, "lr": 1.4241512623869143e-05, "epoch": 0.7854949554028366, "percentage": 39.27, "elapsed_time": "3:55:57", "remaining_time": "6:04:54"} +{"current_steps": 2687, "total_steps": 6840, "loss": 0.6745196580886841, "lr": 1.4237133859388305e-05, "epoch": 0.7857873958181021, "percentage": 39.28, "elapsed_time": "3:56:01", "remaining_time": "6:04:47"} +{"current_steps": 2688, "total_steps": 6840, "loss": 0.6891968250274658, "lr": 1.423275410450319e-05, "epoch": 0.7860798362333674, "percentage": 39.3, "elapsed_time": "3:56:05", "remaining_time": "6:04:41"} +{"current_steps": 2689, "total_steps": 6840, "loss": 0.5614763498306274, "lr": 1.422837336023754e-05, "epoch": 0.7863722766486329, "percentage": 39.31, "elapsed_time": "3:56:10", "remaining_time": "6:04:35"} +{"current_steps": 2690, "total_steps": 6840, "loss": 0.5867494344711304, "lr": 1.4223991627615324e-05, "epoch": 0.7866647170638982, "percentage": 39.33, "elapsed_time": "3:56:15", "remaining_time": "6:04:29"} +{"current_steps": 2691, "total_steps": 6840, "loss": 0.644777774810791, "lr": 1.421960890766075e-05, "epoch": 0.7869571574791636, "percentage": 39.34, "elapsed_time": "3:56:19", "remaining_time": "6:04:21"} +{"current_steps": 2692, "total_steps": 6840, "loss": 0.7237588167190552, "lr": 1.4215225201398249e-05, "epoch": 0.787249597894429, "percentage": 39.36, "elapsed_time": "3:56:25", "remaining_time": "6:04:17"} +{"current_steps": 2693, "total_steps": 6840, "loss": 0.6314423680305481, "lr": 1.4210840509852484e-05, "epoch": 0.7875420383096944, "percentage": 39.37, "elapsed_time": "3:56:31", "remaining_time": "6:04:13"} +{"current_steps": 2694, "total_steps": 6840, "loss": 0.5298433303833008, "lr": 1.4206454834048353e-05, "epoch": 0.7878344787249598, "percentage": 39.39, "elapsed_time": "3:56:35", "remaining_time": "6:04:07"} +{"current_steps": 2695, "total_steps": 6840, "loss": 0.507548451423645, "lr": 1.420206817501098e-05, "epoch": 0.7881269191402251, "percentage": 39.4, "elapsed_time": "3:56:42", "remaining_time": "6:04:03"} +{"current_steps": 2696, "total_steps": 6840, "loss": 0.7742520570755005, "lr": 1.4197680533765721e-05, "epoch": 0.7884193595554906, "percentage": 39.42, "elapsed_time": "3:56:46", "remaining_time": "6:03:56"} +{"current_steps": 2697, "total_steps": 6840, "loss": 0.6261187195777893, "lr": 1.4193291911338161e-05, "epoch": 0.7887117999707559, "percentage": 39.43, "elapsed_time": "3:56:53", "remaining_time": "6:03:53"} +{"current_steps": 2698, "total_steps": 6840, "loss": 0.7501171827316284, "lr": 1.4188902308754108e-05, "epoch": 0.7890042403860213, "percentage": 39.44, "elapsed_time": "3:56:59", "remaining_time": "6:03:49"} +{"current_steps": 2699, "total_steps": 6840, "loss": 0.5590647459030151, "lr": 1.4184511727039612e-05, "epoch": 0.7892966808012868, "percentage": 39.46, "elapsed_time": "3:57:04", "remaining_time": "6:03:44"} +{"current_steps": 2700, "total_steps": 6840, "loss": 0.586786150932312, "lr": 1.4180120167220941e-05, "epoch": 0.7895891212165521, "percentage": 39.47, "elapsed_time": "3:57:08", "remaining_time": "6:03:37"} +{"current_steps": 2701, "total_steps": 6840, "loss": 0.5208219289779663, "lr": 1.4175727630324598e-05, "epoch": 0.7898815616318176, "percentage": 39.49, "elapsed_time": "3:57:17", "remaining_time": "6:03:37"} +{"current_steps": 2702, "total_steps": 6840, "loss": 0.5925623178482056, "lr": 1.4171334117377312e-05, "epoch": 0.7901740020470829, "percentage": 39.5, "elapsed_time": "3:57:24", "remaining_time": "6:03:35"} +{"current_steps": 2703, "total_steps": 6840, "loss": 0.7095032930374146, "lr": 1.4166939629406034e-05, "epoch": 0.7904664424623483, "percentage": 39.52, "elapsed_time": "3:57:30", "remaining_time": "6:03:30"} +{"current_steps": 2704, "total_steps": 6840, "loss": 0.5683872699737549, "lr": 1.4162544167437955e-05, "epoch": 0.7907588828776136, "percentage": 39.53, "elapsed_time": "3:57:35", "remaining_time": "6:03:25"} +{"current_steps": 2705, "total_steps": 6840, "loss": 0.7079274654388428, "lr": 1.4158147732500482e-05, "epoch": 0.7910513232928791, "percentage": 39.55, "elapsed_time": "3:57:41", "remaining_time": "6:03:20"} +{"current_steps": 2706, "total_steps": 6840, "loss": 0.6336439847946167, "lr": 1.415375032562126e-05, "epoch": 0.7913437637081445, "percentage": 39.56, "elapsed_time": "3:57:46", "remaining_time": "6:03:15"} +{"current_steps": 2707, "total_steps": 6840, "loss": 0.4842381477355957, "lr": 1.414935194782816e-05, "epoch": 0.7916362041234098, "percentage": 39.58, "elapsed_time": "3:57:51", "remaining_time": "6:03:10"} +{"current_steps": 2708, "total_steps": 6840, "loss": 0.5439653396606445, "lr": 1.4144952600149267e-05, "epoch": 0.7919286445386753, "percentage": 39.59, "elapsed_time": "3:57:55", "remaining_time": "6:03:02"} +{"current_steps": 2709, "total_steps": 6840, "loss": 0.6365468502044678, "lr": 1.4140552283612906e-05, "epoch": 0.7922210849539406, "percentage": 39.61, "elapsed_time": "3:58:01", "remaining_time": "6:02:58"} +{"current_steps": 2710, "total_steps": 6840, "loss": 0.6192438006401062, "lr": 1.4136150999247623e-05, "epoch": 0.792513525369206, "percentage": 39.62, "elapsed_time": "3:58:06", "remaining_time": "6:02:53"} +{"current_steps": 2711, "total_steps": 6840, "loss": 0.5695269703865051, "lr": 1.4131748748082191e-05, "epoch": 0.7928059657844714, "percentage": 39.63, "elapsed_time": "3:58:12", "remaining_time": "6:02:49"} +{"current_steps": 2712, "total_steps": 6840, "loss": 0.6892319321632385, "lr": 1.4127345531145614e-05, "epoch": 0.7930984061997368, "percentage": 39.65, "elapsed_time": "3:58:18", "remaining_time": "6:02:43"} +{"current_steps": 2713, "total_steps": 6840, "loss": 0.6294678449630737, "lr": 1.4122941349467109e-05, "epoch": 0.7933908466150021, "percentage": 39.66, "elapsed_time": "3:58:23", "remaining_time": "6:02:38"} +{"current_steps": 2714, "total_steps": 6840, "loss": 0.6666272878646851, "lr": 1.4118536204076135e-05, "epoch": 0.7936832870302676, "percentage": 39.68, "elapsed_time": "3:58:28", "remaining_time": "6:02:33"} +{"current_steps": 2715, "total_steps": 6840, "loss": 0.5981796383857727, "lr": 1.4114130096002363e-05, "epoch": 0.793975727445533, "percentage": 39.69, "elapsed_time": "3:58:36", "remaining_time": "6:02:30"} +{"current_steps": 2716, "total_steps": 6840, "loss": 0.6120023131370544, "lr": 1.4109723026275695e-05, "epoch": 0.7942681678607983, "percentage": 39.71, "elapsed_time": "3:58:40", "remaining_time": "6:02:25"} +{"current_steps": 2717, "total_steps": 6840, "loss": 0.5892866849899292, "lr": 1.4105314995926257e-05, "epoch": 0.7945606082760638, "percentage": 39.72, "elapsed_time": "3:58:44", "remaining_time": "6:02:17"} +{"current_steps": 2718, "total_steps": 6840, "loss": 0.7625553607940674, "lr": 1.4100906005984404e-05, "epoch": 0.7948530486913291, "percentage": 39.74, "elapsed_time": "3:58:49", "remaining_time": "6:02:11"} +{"current_steps": 2719, "total_steps": 6840, "loss": 0.643633246421814, "lr": 1.40964960574807e-05, "epoch": 0.7951454891065945, "percentage": 39.75, "elapsed_time": "3:58:55", "remaining_time": "6:02:07"} +{"current_steps": 2720, "total_steps": 6840, "loss": 0.46422284841537476, "lr": 1.4092085151445953e-05, "epoch": 0.7954379295218599, "percentage": 39.77, "elapsed_time": "3:59:00", "remaining_time": "6:02:01"} +{"current_steps": 2721, "total_steps": 6840, "loss": 0.6290001273155212, "lr": 1.4087673288911182e-05, "epoch": 0.7957303699371253, "percentage": 39.78, "elapsed_time": "3:59:07", "remaining_time": "6:01:58"} +{"current_steps": 2722, "total_steps": 6840, "loss": 0.5175197124481201, "lr": 1.4083260470907632e-05, "epoch": 0.7960228103523908, "percentage": 39.8, "elapsed_time": "3:59:11", "remaining_time": "6:01:52"} +{"current_steps": 2723, "total_steps": 6840, "loss": 0.6475427150726318, "lr": 1.4078846698466776e-05, "epoch": 0.7963152507676561, "percentage": 39.81, "elapsed_time": "3:59:17", "remaining_time": "6:01:47"} +{"current_steps": 2724, "total_steps": 6840, "loss": 0.5978254079818726, "lr": 1.40744319726203e-05, "epoch": 0.7966076911829215, "percentage": 39.82, "elapsed_time": "3:59:23", "remaining_time": "6:01:43"} +{"current_steps": 2725, "total_steps": 6840, "loss": 0.5738629102706909, "lr": 1.4070016294400124e-05, "epoch": 0.7969001315981868, "percentage": 39.84, "elapsed_time": "3:59:27", "remaining_time": "6:01:36"} +{"current_steps": 2726, "total_steps": 6840, "loss": 0.5809024572372437, "lr": 1.4065599664838388e-05, "epoch": 0.7971925720134523, "percentage": 39.85, "elapsed_time": "3:59:34", "remaining_time": "6:01:32"} +{"current_steps": 2727, "total_steps": 6840, "loss": 0.5907782316207886, "lr": 1.4061182084967446e-05, "epoch": 0.7974850124287176, "percentage": 39.87, "elapsed_time": "3:59:39", "remaining_time": "6:01:27"} +{"current_steps": 2728, "total_steps": 6840, "loss": 0.7640036344528198, "lr": 1.4056763555819887e-05, "epoch": 0.797777452843983, "percentage": 39.88, "elapsed_time": "3:59:44", "remaining_time": "6:01:22"} +{"current_steps": 2729, "total_steps": 6840, "loss": 0.7472168207168579, "lr": 1.4052344078428513e-05, "epoch": 0.7980698932592484, "percentage": 39.9, "elapsed_time": "3:59:50", "remaining_time": "6:01:17"} +{"current_steps": 2730, "total_steps": 6840, "loss": 0.6726990342140198, "lr": 1.4047923653826347e-05, "epoch": 0.7983623336745138, "percentage": 39.91, "elapsed_time": "3:59:55", "remaining_time": "6:01:11"} +{"current_steps": 2731, "total_steps": 6840, "loss": 0.5949650406837463, "lr": 1.404350228304664e-05, "epoch": 0.7986547740897793, "percentage": 39.93, "elapsed_time": "4:00:01", "remaining_time": "6:01:08"} +{"current_steps": 2732, "total_steps": 6840, "loss": 0.5578774213790894, "lr": 1.403907996712286e-05, "epoch": 0.7989472145050446, "percentage": 39.94, "elapsed_time": "4:00:06", "remaining_time": "6:01:02"} +{"current_steps": 2733, "total_steps": 6840, "loss": 0.6092333197593689, "lr": 1.4034656707088692e-05, "epoch": 0.79923965492031, "percentage": 39.96, "elapsed_time": "4:00:09", "remaining_time": "6:00:54"} +{"current_steps": 2734, "total_steps": 6840, "loss": 0.5095718502998352, "lr": 1.4030232503978053e-05, "epoch": 0.7995320953355753, "percentage": 39.97, "elapsed_time": "4:00:14", "remaining_time": "6:00:47"} +{"current_steps": 2735, "total_steps": 6840, "loss": 0.5155727863311768, "lr": 1.4025807358825072e-05, "epoch": 0.7998245357508408, "percentage": 39.99, "elapsed_time": "4:00:19", "remaining_time": "6:00:42"} +{"current_steps": 2736, "total_steps": 6840, "loss": 0.5752589702606201, "lr": 1.4021381272664094e-05, "epoch": 0.8001169761661061, "percentage": 40.0, "elapsed_time": "4:00:24", "remaining_time": "6:00:37"} +{"current_steps": 2737, "total_steps": 6840, "loss": 0.6334787607192993, "lr": 1.4016954246529697e-05, "epoch": 0.8004094165813715, "percentage": 40.01, "elapsed_time": "4:00:29", "remaining_time": "6:00:30"} +{"current_steps": 2738, "total_steps": 6840, "loss": 0.7406032085418701, "lr": 1.4012526281456666e-05, "epoch": 0.800701856996637, "percentage": 40.03, "elapsed_time": "4:00:35", "remaining_time": "6:00:26"} +{"current_steps": 2739, "total_steps": 6840, "loss": 0.5805078744888306, "lr": 1.4008097378480014e-05, "epoch": 0.8009942974119023, "percentage": 40.04, "elapsed_time": "4:00:39", "remaining_time": "6:00:20"} +{"current_steps": 2740, "total_steps": 6840, "loss": 0.6849163770675659, "lr": 1.4003667538634972e-05, "epoch": 0.8012867378271677, "percentage": 40.06, "elapsed_time": "4:00:45", "remaining_time": "6:00:15"} +{"current_steps": 2741, "total_steps": 6840, "loss": 0.7707695960998535, "lr": 1.3999236762956985e-05, "epoch": 0.8015791782424331, "percentage": 40.07, "elapsed_time": "4:00:49", "remaining_time": "6:00:09"} +{"current_steps": 2742, "total_steps": 6840, "loss": 0.6253059506416321, "lr": 1.3994805052481715e-05, "epoch": 0.8018716186576985, "percentage": 40.09, "elapsed_time": "4:00:54", "remaining_time": "6:00:02"} +{"current_steps": 2743, "total_steps": 6840, "loss": 0.6450316905975342, "lr": 1.3990372408245057e-05, "epoch": 0.8021640590729638, "percentage": 40.1, "elapsed_time": "4:01:00", "remaining_time": "5:59:57"} +{"current_steps": 2744, "total_steps": 6840, "loss": 0.672899603843689, "lr": 1.398593883128311e-05, "epoch": 0.8024564994882293, "percentage": 40.12, "elapsed_time": "4:01:05", "remaining_time": "5:59:53"} +{"current_steps": 2745, "total_steps": 6840, "loss": 0.6203787326812744, "lr": 1.3981504322632198e-05, "epoch": 0.8027489399034947, "percentage": 40.13, "elapsed_time": "4:01:10", "remaining_time": "5:59:47"} +{"current_steps": 2746, "total_steps": 6840, "loss": 0.541740894317627, "lr": 1.3977068883328854e-05, "epoch": 0.80304138031876, "percentage": 40.15, "elapsed_time": "4:01:16", "remaining_time": "5:59:42"} +{"current_steps": 2747, "total_steps": 6840, "loss": 0.5566504001617432, "lr": 1.3972632514409843e-05, "epoch": 0.8033338207340255, "percentage": 40.16, "elapsed_time": "4:01:20", "remaining_time": "5:59:36"} +{"current_steps": 2748, "total_steps": 6840, "loss": 0.6911404728889465, "lr": 1.3968195216912135e-05, "epoch": 0.8036262611492908, "percentage": 40.18, "elapsed_time": "4:01:26", "remaining_time": "5:59:31"} +{"current_steps": 2749, "total_steps": 6840, "loss": 0.6744735240936279, "lr": 1.3963756991872921e-05, "epoch": 0.8039187015645562, "percentage": 40.19, "elapsed_time": "4:01:31", "remaining_time": "5:59:25"} +{"current_steps": 2750, "total_steps": 6840, "loss": 0.6660502552986145, "lr": 1.3959317840329613e-05, "epoch": 0.8042111419798216, "percentage": 40.2, "elapsed_time": "4:01:34", "remaining_time": "5:59:17"} +{"current_steps": 2751, "total_steps": 6840, "loss": 0.607395589351654, "lr": 1.3954877763319832e-05, "epoch": 0.804503582395087, "percentage": 40.22, "elapsed_time": "4:01:40", "remaining_time": "5:59:13"} +{"current_steps": 2752, "total_steps": 6840, "loss": 0.53249192237854, "lr": 1.395043676188142e-05, "epoch": 0.8047960228103523, "percentage": 40.23, "elapsed_time": "4:01:45", "remaining_time": "5:59:07"} +{"current_steps": 2753, "total_steps": 6840, "loss": 0.5728630423545837, "lr": 1.394599483705243e-05, "epoch": 0.8050884632256178, "percentage": 40.25, "elapsed_time": "4:01:49", "remaining_time": "5:59:00"} +{"current_steps": 2754, "total_steps": 6840, "loss": 0.6912537813186646, "lr": 1.3941551989871142e-05, "epoch": 0.8053809036408832, "percentage": 40.26, "elapsed_time": "4:01:54", "remaining_time": "5:58:54"} +{"current_steps": 2755, "total_steps": 6840, "loss": 0.6002523899078369, "lr": 1.3937108221376041e-05, "epoch": 0.8056733440561485, "percentage": 40.28, "elapsed_time": "4:01:59", "remaining_time": "5:58:49"} +{"current_steps": 2756, "total_steps": 6840, "loss": 0.6573797464370728, "lr": 1.3932663532605832e-05, "epoch": 0.805965784471414, "percentage": 40.29, "elapsed_time": "4:02:06", "remaining_time": "5:58:45"} +{"current_steps": 2757, "total_steps": 6840, "loss": 0.6997278928756714, "lr": 1.3928217924599433e-05, "epoch": 0.8062582248866793, "percentage": 40.31, "elapsed_time": "4:02:11", "remaining_time": "5:58:40"} +{"current_steps": 2758, "total_steps": 6840, "loss": 0.565264105796814, "lr": 1.3923771398395978e-05, "epoch": 0.8065506653019447, "percentage": 40.32, "elapsed_time": "4:02:16", "remaining_time": "5:58:34"} +{"current_steps": 2759, "total_steps": 6840, "loss": 0.8065239191055298, "lr": 1.3919323955034815e-05, "epoch": 0.8068431057172101, "percentage": 40.34, "elapsed_time": "4:02:21", "remaining_time": "5:58:28"} +{"current_steps": 2760, "total_steps": 6840, "loss": 0.556678056716919, "lr": 1.3914875595555509e-05, "epoch": 0.8071355461324755, "percentage": 40.35, "elapsed_time": "4:02:26", "remaining_time": "5:58:23"} +{"current_steps": 2761, "total_steps": 6840, "loss": 0.5528635382652283, "lr": 1.3910426320997834e-05, "epoch": 0.807427986547741, "percentage": 40.37, "elapsed_time": "4:02:30", "remaining_time": "5:58:16"} +{"current_steps": 2762, "total_steps": 6840, "loss": 0.6127038598060608, "lr": 1.3905976132401785e-05, "epoch": 0.8077204269630063, "percentage": 40.38, "elapsed_time": "4:02:36", "remaining_time": "5:58:12"} +{"current_steps": 2763, "total_steps": 6840, "loss": 0.6311757564544678, "lr": 1.390152503080756e-05, "epoch": 0.8080128673782717, "percentage": 40.39, "elapsed_time": "4:02:41", "remaining_time": "5:58:06"} +{"current_steps": 2764, "total_steps": 6840, "loss": 0.669788122177124, "lr": 1.389707301725558e-05, "epoch": 0.808305307793537, "percentage": 40.41, "elapsed_time": "4:02:46", "remaining_time": "5:58:01"} +{"current_steps": 2765, "total_steps": 6840, "loss": 0.48408570885658264, "lr": 1.3892620092786477e-05, "epoch": 0.8085977482088025, "percentage": 40.42, "elapsed_time": "4:02:52", "remaining_time": "5:57:57"} +{"current_steps": 2766, "total_steps": 6840, "loss": 0.5648288726806641, "lr": 1.3888166258441098e-05, "epoch": 0.8088901886240678, "percentage": 40.44, "elapsed_time": "4:02:57", "remaining_time": "5:57:51"} +{"current_steps": 2767, "total_steps": 6840, "loss": 0.5894806385040283, "lr": 1.3883711515260497e-05, "epoch": 0.8091826290393332, "percentage": 40.45, "elapsed_time": "4:03:03", "remaining_time": "5:57:46"} +{"current_steps": 2768, "total_steps": 6840, "loss": 0.8325392603874207, "lr": 1.3879255864285939e-05, "epoch": 0.8094750694545986, "percentage": 40.47, "elapsed_time": "4:03:07", "remaining_time": "5:57:39"} +{"current_steps": 2769, "total_steps": 6840, "loss": 0.5282119512557983, "lr": 1.387479930655891e-05, "epoch": 0.809767509869864, "percentage": 40.48, "elapsed_time": "4:03:13", "remaining_time": "5:57:34"} +{"current_steps": 2770, "total_steps": 6840, "loss": 0.7565277218818665, "lr": 1.3870341843121104e-05, "epoch": 0.8100599502851294, "percentage": 40.5, "elapsed_time": "4:03:19", "remaining_time": "5:57:31"} +{"current_steps": 2771, "total_steps": 6840, "loss": 0.5767146944999695, "lr": 1.3865883475014424e-05, "epoch": 0.8103523907003948, "percentage": 40.51, "elapsed_time": "4:03:25", "remaining_time": "5:57:27"} +{"current_steps": 2772, "total_steps": 6840, "loss": 0.5988898873329163, "lr": 1.3861424203280987e-05, "epoch": 0.8106448311156602, "percentage": 40.53, "elapsed_time": "4:03:30", "remaining_time": "5:57:22"} +{"current_steps": 2773, "total_steps": 6840, "loss": 0.5752500295639038, "lr": 1.3856964028963119e-05, "epoch": 0.8109372715309255, "percentage": 40.54, "elapsed_time": "4:03:36", "remaining_time": "5:57:17"} +{"current_steps": 2774, "total_steps": 6840, "loss": 0.6834297776222229, "lr": 1.385250295310336e-05, "epoch": 0.811229711946191, "percentage": 40.56, "elapsed_time": "4:03:42", "remaining_time": "5:57:12"} +{"current_steps": 2775, "total_steps": 6840, "loss": 0.5667037963867188, "lr": 1.3848040976744459e-05, "epoch": 0.8115221523614563, "percentage": 40.57, "elapsed_time": "4:03:48", "remaining_time": "5:57:08"} +{"current_steps": 2776, "total_steps": 6840, "loss": 0.5618781447410583, "lr": 1.3843578100929375e-05, "epoch": 0.8118145927767217, "percentage": 40.58, "elapsed_time": "4:03:53", "remaining_time": "5:57:02"} +{"current_steps": 2777, "total_steps": 6840, "loss": 0.538033664226532, "lr": 1.3839114326701281e-05, "epoch": 0.8121070331919872, "percentage": 40.6, "elapsed_time": "4:03:57", "remaining_time": "5:56:56"} +{"current_steps": 2778, "total_steps": 6840, "loss": 0.7218335270881653, "lr": 1.3834649655103556e-05, "epoch": 0.8123994736072525, "percentage": 40.61, "elapsed_time": "4:04:03", "remaining_time": "5:56:51"} +{"current_steps": 2779, "total_steps": 6840, "loss": 0.5979611873626709, "lr": 1.383018408717979e-05, "epoch": 0.812691914022518, "percentage": 40.63, "elapsed_time": "4:04:09", "remaining_time": "5:56:48"} +{"current_steps": 2780, "total_steps": 6840, "loss": 0.4958215355873108, "lr": 1.3825717623973775e-05, "epoch": 0.8129843544377833, "percentage": 40.64, "elapsed_time": "4:04:15", "remaining_time": "5:56:43"} +{"current_steps": 2781, "total_steps": 6840, "loss": 0.6759654879570007, "lr": 1.3821250266529531e-05, "epoch": 0.8132767948530487, "percentage": 40.66, "elapsed_time": "4:04:21", "remaining_time": "5:56:39"} +{"current_steps": 2782, "total_steps": 6840, "loss": 0.5499521493911743, "lr": 1.3816782015891272e-05, "epoch": 0.813569235268314, "percentage": 40.67, "elapsed_time": "4:04:27", "remaining_time": "5:56:35"} +{"current_steps": 2783, "total_steps": 6840, "loss": 0.5308753252029419, "lr": 1.3812312873103425e-05, "epoch": 0.8138616756835795, "percentage": 40.69, "elapsed_time": "4:04:32", "remaining_time": "5:56:29"} +{"current_steps": 2784, "total_steps": 6840, "loss": 0.585492730140686, "lr": 1.3807842839210617e-05, "epoch": 0.8141541160988449, "percentage": 40.7, "elapsed_time": "4:04:38", "remaining_time": "5:56:24"} +{"current_steps": 2785, "total_steps": 6840, "loss": 0.6598281860351562, "lr": 1.3803371915257702e-05, "epoch": 0.8144465565141102, "percentage": 40.72, "elapsed_time": "4:04:42", "remaining_time": "5:56:17"} +{"current_steps": 2786, "total_steps": 6840, "loss": 0.6819334030151367, "lr": 1.3798900102289726e-05, "epoch": 0.8147389969293757, "percentage": 40.73, "elapsed_time": "4:04:48", "remaining_time": "5:56:14"} +{"current_steps": 2787, "total_steps": 6840, "loss": 0.6548545360565186, "lr": 1.3794427401351946e-05, "epoch": 0.815031437344641, "percentage": 40.75, "elapsed_time": "4:04:53", "remaining_time": "5:56:08"} +{"current_steps": 2788, "total_steps": 6840, "loss": 0.7836263179779053, "lr": 1.3789953813489834e-05, "epoch": 0.8153238777599064, "percentage": 40.76, "elapsed_time": "4:04:59", "remaining_time": "5:56:03"} +{"current_steps": 2789, "total_steps": 6840, "loss": 0.6108324527740479, "lr": 1.3785479339749062e-05, "epoch": 0.8156163181751718, "percentage": 40.77, "elapsed_time": "4:05:04", "remaining_time": "5:55:57"} +{"current_steps": 2790, "total_steps": 6840, "loss": 0.7079485058784485, "lr": 1.378100398117551e-05, "epoch": 0.8159087585904372, "percentage": 40.79, "elapsed_time": "4:05:08", "remaining_time": "5:55:50"} +{"current_steps": 2791, "total_steps": 6840, "loss": 0.5935578346252441, "lr": 1.3776527738815264e-05, "epoch": 0.8162011990057025, "percentage": 40.8, "elapsed_time": "4:05:14", "remaining_time": "5:55:47"} +{"current_steps": 2792, "total_steps": 6840, "loss": 0.5559983253479004, "lr": 1.3772050613714623e-05, "epoch": 0.816493639420968, "percentage": 40.82, "elapsed_time": "4:05:19", "remaining_time": "5:55:41"} +{"current_steps": 2793, "total_steps": 6840, "loss": 0.6230447292327881, "lr": 1.3767572606920083e-05, "epoch": 0.8167860798362334, "percentage": 40.83, "elapsed_time": "4:05:25", "remaining_time": "5:55:36"} +{"current_steps": 2794, "total_steps": 6840, "loss": 0.5672184824943542, "lr": 1.3763093719478357e-05, "epoch": 0.8170785202514987, "percentage": 40.85, "elapsed_time": "4:05:29", "remaining_time": "5:55:29"} +{"current_steps": 2795, "total_steps": 6840, "loss": 0.6933468580245972, "lr": 1.3758613952436353e-05, "epoch": 0.8173709606667642, "percentage": 40.86, "elapsed_time": "4:05:33", "remaining_time": "5:55:23"} +{"current_steps": 2796, "total_steps": 6840, "loss": 0.5873827934265137, "lr": 1.3754133306841188e-05, "epoch": 0.8176634010820295, "percentage": 40.88, "elapsed_time": "4:05:39", "remaining_time": "5:55:18"} +{"current_steps": 2797, "total_steps": 6840, "loss": 0.6061393022537231, "lr": 1.3749651783740188e-05, "epoch": 0.8179558414972949, "percentage": 40.89, "elapsed_time": "4:05:44", "remaining_time": "5:55:12"} +{"current_steps": 2798, "total_steps": 6840, "loss": 0.6218947768211365, "lr": 1.3745169384180886e-05, "epoch": 0.8182482819125603, "percentage": 40.91, "elapsed_time": "4:05:49", "remaining_time": "5:55:07"} +{"current_steps": 2799, "total_steps": 6840, "loss": 0.6092264652252197, "lr": 1.3740686109211008e-05, "epoch": 0.8185407223278257, "percentage": 40.92, "elapsed_time": "4:05:55", "remaining_time": "5:55:02"} +{"current_steps": 2800, "total_steps": 6840, "loss": 0.6145539283752441, "lr": 1.3736201959878497e-05, "epoch": 0.8188331627430911, "percentage": 40.94, "elapsed_time": "4:05:59", "remaining_time": "5:54:56"} +{"current_steps": 2801, "total_steps": 6840, "loss": 0.4637746214866638, "lr": 1.3731716937231493e-05, "epoch": 0.8191256031583565, "percentage": 40.95, "elapsed_time": "4:06:09", "remaining_time": "5:54:58"} +{"current_steps": 2802, "total_steps": 6840, "loss": 0.6102726459503174, "lr": 1.3727231042318345e-05, "epoch": 0.8194180435736219, "percentage": 40.96, "elapsed_time": "4:06:16", "remaining_time": "5:54:54"} +{"current_steps": 2803, "total_steps": 6840, "loss": 0.5885297060012817, "lr": 1.3722744276187603e-05, "epoch": 0.8197104839888872, "percentage": 40.98, "elapsed_time": "4:06:22", "remaining_time": "5:54:49"} +{"current_steps": 2804, "total_steps": 6840, "loss": 0.592369019985199, "lr": 1.3718256639888021e-05, "epoch": 0.8200029244041527, "percentage": 40.99, "elapsed_time": "4:06:26", "remaining_time": "5:54:42"} +{"current_steps": 2805, "total_steps": 6840, "loss": 0.5194098949432373, "lr": 1.3713768134468557e-05, "epoch": 0.820295364819418, "percentage": 41.01, "elapsed_time": "4:06:31", "remaining_time": "5:54:37"} +{"current_steps": 2806, "total_steps": 6840, "loss": 0.6033506393432617, "lr": 1.370927876097837e-05, "epoch": 0.8205878052346834, "percentage": 41.02, "elapsed_time": "4:06:37", "remaining_time": "5:54:33"} +{"current_steps": 2807, "total_steps": 6840, "loss": 0.6866108179092407, "lr": 1.3704788520466828e-05, "epoch": 0.8208802456499488, "percentage": 41.04, "elapsed_time": "4:06:42", "remaining_time": "5:54:27"} +{"current_steps": 2808, "total_steps": 6840, "loss": 0.7325261831283569, "lr": 1.3700297413983492e-05, "epoch": 0.8211726860652142, "percentage": 41.05, "elapsed_time": "4:06:46", "remaining_time": "5:54:21"} +{"current_steps": 2809, "total_steps": 6840, "loss": 0.5422608852386475, "lr": 1.3695805442578136e-05, "epoch": 0.8214651264804796, "percentage": 41.07, "elapsed_time": "4:06:51", "remaining_time": "5:54:14"} +{"current_steps": 2810, "total_steps": 6840, "loss": 0.6124732494354248, "lr": 1.369131260730073e-05, "epoch": 0.821757566895745, "percentage": 41.08, "elapsed_time": "4:06:55", "remaining_time": "5:54:07"} +{"current_steps": 2811, "total_steps": 6840, "loss": 0.6097716093063354, "lr": 1.3686818909201442e-05, "epoch": 0.8220500073110104, "percentage": 41.1, "elapsed_time": "4:07:01", "remaining_time": "5:54:03"} +{"current_steps": 2812, "total_steps": 6840, "loss": 0.6283478140830994, "lr": 1.3682324349330652e-05, "epoch": 0.8223424477262757, "percentage": 41.11, "elapsed_time": "4:07:07", "remaining_time": "5:53:59"} +{"current_steps": 2813, "total_steps": 6840, "loss": 0.6590027213096619, "lr": 1.3677828928738934e-05, "epoch": 0.8226348881415412, "percentage": 41.13, "elapsed_time": "4:07:12", "remaining_time": "5:53:53"} +{"current_steps": 2814, "total_steps": 6840, "loss": 0.6417049169540405, "lr": 1.3673332648477065e-05, "epoch": 0.8229273285568065, "percentage": 41.14, "elapsed_time": "4:07:18", "remaining_time": "5:53:49"} +{"current_steps": 2815, "total_steps": 6840, "loss": 0.6217149496078491, "lr": 1.3668835509596023e-05, "epoch": 0.8232197689720719, "percentage": 41.15, "elapsed_time": "4:07:22", "remaining_time": "5:53:42"} +{"current_steps": 2816, "total_steps": 6840, "loss": 0.7530043125152588, "lr": 1.3664337513146993e-05, "epoch": 0.8235122093873374, "percentage": 41.17, "elapsed_time": "4:07:27", "remaining_time": "5:53:37"} +{"current_steps": 2817, "total_steps": 6840, "loss": 0.6690578460693359, "lr": 1.3659838660181341e-05, "epoch": 0.8238046498026027, "percentage": 41.18, "elapsed_time": "4:07:33", "remaining_time": "5:53:32"} +{"current_steps": 2818, "total_steps": 6840, "loss": 0.5348777174949646, "lr": 1.3655338951750657e-05, "epoch": 0.8240970902178681, "percentage": 41.2, "elapsed_time": "4:07:38", "remaining_time": "5:53:26"} +{"current_steps": 2819, "total_steps": 6840, "loss": 0.7076361179351807, "lr": 1.3650838388906718e-05, "epoch": 0.8243895306331335, "percentage": 41.21, "elapsed_time": "4:07:44", "remaining_time": "5:53:22"} +{"current_steps": 2820, "total_steps": 6840, "loss": 0.6649855375289917, "lr": 1.3646336972701507e-05, "epoch": 0.8246819710483989, "percentage": 41.23, "elapsed_time": "4:07:48", "remaining_time": "5:53:15"} +{"current_steps": 2821, "total_steps": 6840, "loss": 0.6484942436218262, "lr": 1.3641834704187194e-05, "epoch": 0.8249744114636642, "percentage": 41.24, "elapsed_time": "4:07:54", "remaining_time": "5:53:11"} +{"current_steps": 2822, "total_steps": 6840, "loss": 0.5167717337608337, "lr": 1.3637331584416163e-05, "epoch": 0.8252668518789297, "percentage": 41.26, "elapsed_time": "4:08:00", "remaining_time": "5:53:06"} +{"current_steps": 2823, "total_steps": 6840, "loss": 0.7808440327644348, "lr": 1.3632827614440988e-05, "epoch": 0.8255592922941951, "percentage": 41.27, "elapsed_time": "4:08:05", "remaining_time": "5:53:00"} +{"current_steps": 2824, "total_steps": 6840, "loss": 0.551183819770813, "lr": 1.3628322795314449e-05, "epoch": 0.8258517327094604, "percentage": 41.29, "elapsed_time": "4:08:09", "remaining_time": "5:52:53"} +{"current_steps": 2825, "total_steps": 6840, "loss": 0.6084691286087036, "lr": 1.3623817128089513e-05, "epoch": 0.8261441731247259, "percentage": 41.3, "elapsed_time": "4:08:14", "remaining_time": "5:52:48"} +{"current_steps": 2826, "total_steps": 6840, "loss": 0.6251019239425659, "lr": 1.3619310613819363e-05, "epoch": 0.8264366135399912, "percentage": 41.32, "elapsed_time": "4:08:18", "remaining_time": "5:52:42"} +{"current_steps": 2827, "total_steps": 6840, "loss": 0.5037761926651001, "lr": 1.3614803253557358e-05, "epoch": 0.8267290539552566, "percentage": 41.33, "elapsed_time": "4:08:23", "remaining_time": "5:52:36"} +{"current_steps": 2828, "total_steps": 6840, "loss": 0.5606831312179565, "lr": 1.3610295048357072e-05, "epoch": 0.827021494370522, "percentage": 41.35, "elapsed_time": "4:08:28", "remaining_time": "5:52:30"} +{"current_steps": 2829, "total_steps": 6840, "loss": 0.6664785146713257, "lr": 1.360578599927227e-05, "epoch": 0.8273139347857874, "percentage": 41.36, "elapsed_time": "4:08:33", "remaining_time": "5:52:24"} +{"current_steps": 2830, "total_steps": 6840, "loss": 0.7105492353439331, "lr": 1.360127610735691e-05, "epoch": 0.8276063752010527, "percentage": 41.37, "elapsed_time": "4:08:38", "remaining_time": "5:52:19"} +{"current_steps": 2831, "total_steps": 6840, "loss": 0.6255359053611755, "lr": 1.3596765373665162e-05, "epoch": 0.8278988156163182, "percentage": 41.39, "elapsed_time": "4:08:43", "remaining_time": "5:52:13"} +{"current_steps": 2832, "total_steps": 6840, "loss": 0.5422149300575256, "lr": 1.3592253799251377e-05, "epoch": 0.8281912560315836, "percentage": 41.4, "elapsed_time": "4:08:48", "remaining_time": "5:52:07"} +{"current_steps": 2833, "total_steps": 6840, "loss": 0.6044044494628906, "lr": 1.3587741385170104e-05, "epoch": 0.8284836964468489, "percentage": 41.42, "elapsed_time": "4:08:53", "remaining_time": "5:52:01"} +{"current_steps": 2834, "total_steps": 6840, "loss": 0.6256763935089111, "lr": 1.3583228132476094e-05, "epoch": 0.8287761368621144, "percentage": 41.43, "elapsed_time": "4:08:59", "remaining_time": "5:51:57"} +{"current_steps": 2835, "total_steps": 6840, "loss": 0.6759064793586731, "lr": 1.3578714042224297e-05, "epoch": 0.8290685772773797, "percentage": 41.45, "elapsed_time": "4:09:04", "remaining_time": "5:51:52"} +{"current_steps": 2836, "total_steps": 6840, "loss": 0.5819023251533508, "lr": 1.3574199115469852e-05, "epoch": 0.8293610176926451, "percentage": 41.46, "elapsed_time": "4:09:09", "remaining_time": "5:51:46"} +{"current_steps": 2837, "total_steps": 6840, "loss": 0.5412642359733582, "lr": 1.3569683353268098e-05, "epoch": 0.8296534581079105, "percentage": 41.48, "elapsed_time": "4:09:15", "remaining_time": "5:51:41"} +{"current_steps": 2838, "total_steps": 6840, "loss": 0.5129171013832092, "lr": 1.356516675667456e-05, "epoch": 0.8299458985231759, "percentage": 41.49, "elapsed_time": "4:09:21", "remaining_time": "5:51:37"} +{"current_steps": 2839, "total_steps": 6840, "loss": 0.5165198445320129, "lr": 1.356064932674497e-05, "epoch": 0.8302383389384413, "percentage": 41.51, "elapsed_time": "4:09:26", "remaining_time": "5:51:32"} +{"current_steps": 2840, "total_steps": 6840, "loss": 0.6545724272727966, "lr": 1.3556131064535249e-05, "epoch": 0.8305307793537067, "percentage": 41.52, "elapsed_time": "4:09:30", "remaining_time": "5:51:25"} +{"current_steps": 2841, "total_steps": 6840, "loss": 0.5715968608856201, "lr": 1.3551611971101513e-05, "epoch": 0.8308232197689721, "percentage": 41.54, "elapsed_time": "4:09:36", "remaining_time": "5:51:20"} +{"current_steps": 2842, "total_steps": 6840, "loss": 0.7063779830932617, "lr": 1.3547092047500074e-05, "epoch": 0.8311156601842374, "percentage": 41.55, "elapsed_time": "4:09:42", "remaining_time": "5:51:16"} +{"current_steps": 2843, "total_steps": 6840, "loss": 0.6391212940216064, "lr": 1.3542571294787437e-05, "epoch": 0.8314081005995029, "percentage": 41.56, "elapsed_time": "4:09:47", "remaining_time": "5:51:11"} +{"current_steps": 2844, "total_steps": 6840, "loss": 0.7145380973815918, "lr": 1.3538049714020298e-05, "epoch": 0.8317005410147682, "percentage": 41.58, "elapsed_time": "4:09:55", "remaining_time": "5:51:08"} +{"current_steps": 2845, "total_steps": 6840, "loss": 0.7262213230133057, "lr": 1.3533527306255547e-05, "epoch": 0.8319929814300336, "percentage": 41.59, "elapsed_time": "4:09:59", "remaining_time": "5:51:02"} +{"current_steps": 2846, "total_steps": 6840, "loss": 0.7621959447860718, "lr": 1.3529004072550276e-05, "epoch": 0.832285421845299, "percentage": 41.61, "elapsed_time": "4:10:03", "remaining_time": "5:50:56"} +{"current_steps": 2847, "total_steps": 6840, "loss": 0.6372592449188232, "lr": 1.3524480013961757e-05, "epoch": 0.8325778622605644, "percentage": 41.62, "elapsed_time": "4:10:08", "remaining_time": "5:50:49"} +{"current_steps": 2848, "total_steps": 6840, "loss": 0.6223774552345276, "lr": 1.3519955131547469e-05, "epoch": 0.8328703026758298, "percentage": 41.64, "elapsed_time": "4:10:12", "remaining_time": "5:50:42"} +{"current_steps": 2849, "total_steps": 6840, "loss": 0.6500433683395386, "lr": 1.3515429426365066e-05, "epoch": 0.8331627430910952, "percentage": 41.65, "elapsed_time": "4:10:17", "remaining_time": "5:50:37"} +{"current_steps": 2850, "total_steps": 6840, "loss": 0.6136040687561035, "lr": 1.3510902899472408e-05, "epoch": 0.8334551835063606, "percentage": 41.67, "elapsed_time": "4:10:22", "remaining_time": "5:50:31"} +{"current_steps": 2851, "total_steps": 6840, "loss": 0.5297173261642456, "lr": 1.3506375551927546e-05, "epoch": 0.8337476239216259, "percentage": 41.68, "elapsed_time": "4:10:26", "remaining_time": "5:50:25"} +{"current_steps": 2852, "total_steps": 6840, "loss": 0.6215870976448059, "lr": 1.3501847384788718e-05, "epoch": 0.8340400643368914, "percentage": 41.7, "elapsed_time": "4:10:33", "remaining_time": "5:50:21"} +{"current_steps": 2853, "total_steps": 6840, "loss": 0.5507583618164062, "lr": 1.3497318399114354e-05, "epoch": 0.8343325047521567, "percentage": 41.71, "elapsed_time": "4:10:39", "remaining_time": "5:50:17"} +{"current_steps": 2854, "total_steps": 6840, "loss": 0.6348794102668762, "lr": 1.349278859596308e-05, "epoch": 0.8346249451674221, "percentage": 41.73, "elapsed_time": "4:10:45", "remaining_time": "5:50:13"} +{"current_steps": 2855, "total_steps": 6840, "loss": 0.7009605765342712, "lr": 1.3488257976393708e-05, "epoch": 0.8349173855826876, "percentage": 41.74, "elapsed_time": "4:10:50", "remaining_time": "5:50:07"} +{"current_steps": 2856, "total_steps": 6840, "loss": 0.6268658638000488, "lr": 1.3483726541465238e-05, "epoch": 0.8352098259979529, "percentage": 41.75, "elapsed_time": "4:10:56", "remaining_time": "5:50:02"} +{"current_steps": 2857, "total_steps": 6840, "loss": 0.7187683582305908, "lr": 1.3479194292236875e-05, "epoch": 0.8355022664132183, "percentage": 41.77, "elapsed_time": "4:11:01", "remaining_time": "5:49:56"} +{"current_steps": 2858, "total_steps": 6840, "loss": 0.7016449570655823, "lr": 1.3474661229768002e-05, "epoch": 0.8357947068284837, "percentage": 41.78, "elapsed_time": "4:11:06", "remaining_time": "5:49:51"} +{"current_steps": 2859, "total_steps": 6840, "loss": 0.5852428674697876, "lr": 1.347012735511819e-05, "epoch": 0.8360871472437491, "percentage": 41.8, "elapsed_time": "4:11:13", "remaining_time": "5:49:48"} +{"current_steps": 2860, "total_steps": 6840, "loss": 0.6232450008392334, "lr": 1.3465592669347207e-05, "epoch": 0.8363795876590144, "percentage": 41.81, "elapsed_time": "4:11:17", "remaining_time": "5:49:42"} +{"current_steps": 2861, "total_steps": 6840, "loss": 0.526097297668457, "lr": 1.346105717351501e-05, "epoch": 0.8366720280742799, "percentage": 41.83, "elapsed_time": "4:11:23", "remaining_time": "5:49:37"} +{"current_steps": 2862, "total_steps": 6840, "loss": 0.6065535545349121, "lr": 1.3456520868681741e-05, "epoch": 0.8369644684895453, "percentage": 41.84, "elapsed_time": "4:11:29", "remaining_time": "5:49:33"} +{"current_steps": 2863, "total_steps": 6840, "loss": 0.5836296677589417, "lr": 1.3451983755907736e-05, "epoch": 0.8372569089048106, "percentage": 41.86, "elapsed_time": "4:11:33", "remaining_time": "5:49:26"} +{"current_steps": 2864, "total_steps": 6840, "loss": 0.678827166557312, "lr": 1.3447445836253519e-05, "epoch": 0.8375493493200761, "percentage": 41.87, "elapsed_time": "4:11:39", "remaining_time": "5:49:21"} +{"current_steps": 2865, "total_steps": 6840, "loss": 0.5206096172332764, "lr": 1.3442907110779794e-05, "epoch": 0.8378417897353414, "percentage": 41.89, "elapsed_time": "4:11:45", "remaining_time": "5:49:17"} +{"current_steps": 2866, "total_steps": 6840, "loss": 0.6424980163574219, "lr": 1.3438367580547468e-05, "epoch": 0.8381342301506068, "percentage": 41.9, "elapsed_time": "4:11:49", "remaining_time": "5:49:11"} +{"current_steps": 2867, "total_steps": 6840, "loss": 0.6293484568595886, "lr": 1.3433827246617624e-05, "epoch": 0.8384266705658722, "percentage": 41.92, "elapsed_time": "4:11:55", "remaining_time": "5:49:05"} +{"current_steps": 2868, "total_steps": 6840, "loss": 0.5912167429924011, "lr": 1.3429286110051539e-05, "epoch": 0.8387191109811376, "percentage": 41.93, "elapsed_time": "4:11:59", "remaining_time": "5:49:00"} +{"current_steps": 2869, "total_steps": 6840, "loss": 0.6571674346923828, "lr": 1.342474417191068e-05, "epoch": 0.8390115513964029, "percentage": 41.94, "elapsed_time": "4:12:04", "remaining_time": "5:48:54"} +{"current_steps": 2870, "total_steps": 6840, "loss": 0.5519720911979675, "lr": 1.342020143325669e-05, "epoch": 0.8393039918116684, "percentage": 41.96, "elapsed_time": "4:12:10", "remaining_time": "5:48:49"} +{"current_steps": 2871, "total_steps": 6840, "loss": 0.6465001106262207, "lr": 1.341565789515141e-05, "epoch": 0.8395964322269338, "percentage": 41.97, "elapsed_time": "4:12:15", "remaining_time": "5:48:44"} +{"current_steps": 2872, "total_steps": 6840, "loss": 0.6022073030471802, "lr": 1.3411113558656865e-05, "epoch": 0.8398888726421991, "percentage": 41.99, "elapsed_time": "4:12:21", "remaining_time": "5:48:39"} +{"current_steps": 2873, "total_steps": 6840, "loss": 0.610893726348877, "lr": 1.3406568424835264e-05, "epoch": 0.8401813130574646, "percentage": 42.0, "elapsed_time": "4:12:26", "remaining_time": "5:48:33"} +{"current_steps": 2874, "total_steps": 6840, "loss": 0.5296563506126404, "lr": 1.340202249474901e-05, "epoch": 0.8404737534727299, "percentage": 42.02, "elapsed_time": "4:12:31", "remaining_time": "5:48:28"} +{"current_steps": 2875, "total_steps": 6840, "loss": 0.6327008605003357, "lr": 1.3397475769460679e-05, "epoch": 0.8407661938879953, "percentage": 42.03, "elapsed_time": "4:12:37", "remaining_time": "5:48:24"} +{"current_steps": 2876, "total_steps": 6840, "loss": 0.6437617540359497, "lr": 1.3392928250033045e-05, "epoch": 0.8410586343032607, "percentage": 42.05, "elapsed_time": "4:12:41", "remaining_time": "5:48:17"} +{"current_steps": 2877, "total_steps": 6840, "loss": 0.5627291202545166, "lr": 1.3388379937529063e-05, "epoch": 0.8413510747185261, "percentage": 42.06, "elapsed_time": "4:12:46", "remaining_time": "5:48:12"} +{"current_steps": 2878, "total_steps": 6840, "loss": 0.5921163558959961, "lr": 1.3383830833011871e-05, "epoch": 0.8416435151337915, "percentage": 42.08, "elapsed_time": "4:12:52", "remaining_time": "5:48:07"} +{"current_steps": 2879, "total_steps": 6840, "loss": 0.5749082565307617, "lr": 1.3379280937544797e-05, "epoch": 0.8419359555490569, "percentage": 42.09, "elapsed_time": "4:12:59", "remaining_time": "5:48:03"} +{"current_steps": 2880, "total_steps": 6840, "loss": 0.6294553279876709, "lr": 1.3374730252191347e-05, "epoch": 0.8422283959643223, "percentage": 42.11, "elapsed_time": "4:13:05", "remaining_time": "5:47:59"} +{"current_steps": 2881, "total_steps": 6840, "loss": 0.5172078609466553, "lr": 1.3370178778015223e-05, "epoch": 0.8425208363795876, "percentage": 42.12, "elapsed_time": "4:13:10", "remaining_time": "5:47:54"} +{"current_steps": 2882, "total_steps": 6840, "loss": 0.44069811701774597, "lr": 1.3365626516080301e-05, "epoch": 0.8428132767948531, "percentage": 42.13, "elapsed_time": "4:13:15", "remaining_time": "5:47:48"} +{"current_steps": 2883, "total_steps": 6840, "loss": 0.72663813829422, "lr": 1.336107346745064e-05, "epoch": 0.8431057172101184, "percentage": 42.15, "elapsed_time": "4:13:21", "remaining_time": "5:47:44"} +{"current_steps": 2884, "total_steps": 6840, "loss": 0.6562269926071167, "lr": 1.3356519633190495e-05, "epoch": 0.8433981576253838, "percentage": 42.16, "elapsed_time": "4:13:25", "remaining_time": "5:47:37"} +{"current_steps": 2885, "total_steps": 6840, "loss": 0.699925422668457, "lr": 1.3351965014364293e-05, "epoch": 0.8436905980406492, "percentage": 42.18, "elapsed_time": "4:13:29", "remaining_time": "5:47:30"} +{"current_steps": 2886, "total_steps": 6840, "loss": 0.5902425646781921, "lr": 1.3347409612036651e-05, "epoch": 0.8439830384559146, "percentage": 42.19, "elapsed_time": "4:13:34", "remaining_time": "5:47:24"} +{"current_steps": 2887, "total_steps": 6840, "loss": 0.613966703414917, "lr": 1.3342853427272362e-05, "epoch": 0.84427547887118, "percentage": 42.21, "elapsed_time": "4:13:41", "remaining_time": "5:47:21"} +{"current_steps": 2888, "total_steps": 6840, "loss": 0.5864139199256897, "lr": 1.333829646113641e-05, "epoch": 0.8445679192864454, "percentage": 42.22, "elapsed_time": "4:13:46", "remaining_time": "5:47:15"} +{"current_steps": 2889, "total_steps": 6840, "loss": 0.5851572751998901, "lr": 1.3333738714693958e-05, "epoch": 0.8448603597017108, "percentage": 42.24, "elapsed_time": "4:13:51", "remaining_time": "5:47:10"} +{"current_steps": 2890, "total_steps": 6840, "loss": 0.6564328074455261, "lr": 1.3329180189010348e-05, "epoch": 0.8451528001169761, "percentage": 42.25, "elapsed_time": "4:13:56", "remaining_time": "5:47:04"} +{"current_steps": 2891, "total_steps": 6840, "loss": 0.6745615005493164, "lr": 1.3324620885151115e-05, "epoch": 0.8454452405322416, "percentage": 42.27, "elapsed_time": "4:14:01", "remaining_time": "5:46:59"} +{"current_steps": 2892, "total_steps": 6840, "loss": 0.5003606081008911, "lr": 1.3320060804181962e-05, "epoch": 0.8457376809475069, "percentage": 42.28, "elapsed_time": "4:14:06", "remaining_time": "5:46:53"} +{"current_steps": 2893, "total_steps": 6840, "loss": 0.6646369695663452, "lr": 1.3315499947168781e-05, "epoch": 0.8460301213627723, "percentage": 42.3, "elapsed_time": "4:14:11", "remaining_time": "5:46:48"} +{"current_steps": 2894, "total_steps": 6840, "loss": 0.6903572082519531, "lr": 1.3310938315177647e-05, "epoch": 0.8463225617780378, "percentage": 42.31, "elapsed_time": "4:14:16", "remaining_time": "5:46:42"} +{"current_steps": 2895, "total_steps": 6840, "loss": 0.6221956610679626, "lr": 1.330637590927481e-05, "epoch": 0.8466150021933031, "percentage": 42.32, "elapsed_time": "4:14:21", "remaining_time": "5:46:36"} +{"current_steps": 2896, "total_steps": 6840, "loss": 0.5602666139602661, "lr": 1.3301812730526713e-05, "epoch": 0.8469074426085685, "percentage": 42.34, "elapsed_time": "4:14:27", "remaining_time": "5:46:32"} +{"current_steps": 2897, "total_steps": 6840, "loss": 0.5843783617019653, "lr": 1.3297248779999963e-05, "epoch": 0.8471998830238339, "percentage": 42.35, "elapsed_time": "4:14:34", "remaining_time": "5:46:29"} +{"current_steps": 2898, "total_steps": 6840, "loss": 0.5040254592895508, "lr": 1.3292684058761357e-05, "epoch": 0.8474923234390993, "percentage": 42.37, "elapsed_time": "4:14:38", "remaining_time": "5:46:22"} +{"current_steps": 2899, "total_steps": 6840, "loss": 0.6180210709571838, "lr": 1.3288118567877874e-05, "epoch": 0.8477847638543646, "percentage": 42.38, "elapsed_time": "4:14:42", "remaining_time": "5:46:15"} +{"current_steps": 2900, "total_steps": 6840, "loss": 0.5050851106643677, "lr": 1.3283552308416668e-05, "epoch": 0.8480772042696301, "percentage": 42.4, "elapsed_time": "4:14:47", "remaining_time": "5:46:09"} +{"current_steps": 2901, "total_steps": 6840, "loss": 0.6627126932144165, "lr": 1.3278985281445072e-05, "epoch": 0.8483696446848955, "percentage": 42.41, "elapsed_time": "4:14:57", "remaining_time": "5:46:11"} +{"current_steps": 2902, "total_steps": 6840, "loss": 0.5984441041946411, "lr": 1.3274417488030607e-05, "epoch": 0.8486620851001608, "percentage": 42.43, "elapsed_time": "4:15:03", "remaining_time": "5:46:07"} +{"current_steps": 2903, "total_steps": 6840, "loss": 0.611599326133728, "lr": 1.3269848929240958e-05, "epoch": 0.8489545255154263, "percentage": 42.44, "elapsed_time": "4:15:09", "remaining_time": "5:46:02"} +{"current_steps": 2904, "total_steps": 6840, "loss": 0.6057847142219543, "lr": 1.3265279606144006e-05, "epoch": 0.8492469659306916, "percentage": 42.46, "elapsed_time": "4:15:13", "remaining_time": "5:45:55"} +{"current_steps": 2905, "total_steps": 6840, "loss": 0.7123644948005676, "lr": 1.3260709519807797e-05, "epoch": 0.849539406345957, "percentage": 42.47, "elapsed_time": "4:15:18", "remaining_time": "5:45:50"} +{"current_steps": 2906, "total_steps": 6840, "loss": 0.6193811893463135, "lr": 1.3256138671300564e-05, "epoch": 0.8498318467612224, "percentage": 42.49, "elapsed_time": "4:15:23", "remaining_time": "5:45:44"} +{"current_steps": 2907, "total_steps": 6840, "loss": 0.5775484442710876, "lr": 1.3251567061690717e-05, "epoch": 0.8501242871764878, "percentage": 42.5, "elapsed_time": "4:15:29", "remaining_time": "5:45:39"} +{"current_steps": 2908, "total_steps": 6840, "loss": 0.5655511617660522, "lr": 1.3246994692046837e-05, "epoch": 0.8504167275917531, "percentage": 42.51, "elapsed_time": "4:15:32", "remaining_time": "5:45:31"} +{"current_steps": 2909, "total_steps": 6840, "loss": 0.6216102838516235, "lr": 1.3242421563437688e-05, "epoch": 0.8507091680070186, "percentage": 42.53, "elapsed_time": "4:15:38", "remaining_time": "5:45:27"} +{"current_steps": 2910, "total_steps": 6840, "loss": 0.649554967880249, "lr": 1.3237847676932217e-05, "epoch": 0.851001608422284, "percentage": 42.54, "elapsed_time": "4:15:44", "remaining_time": "5:45:22"} +{"current_steps": 2911, "total_steps": 6840, "loss": 0.6688281297683716, "lr": 1.3233273033599534e-05, "epoch": 0.8512940488375493, "percentage": 42.56, "elapsed_time": "4:15:47", "remaining_time": "5:45:15"} +{"current_steps": 2912, "total_steps": 6840, "loss": 0.664188027381897, "lr": 1.322869763450894e-05, "epoch": 0.8515864892528148, "percentage": 42.57, "elapsed_time": "4:15:54", "remaining_time": "5:45:11"} +{"current_steps": 2913, "total_steps": 6840, "loss": 0.47189265489578247, "lr": 1.3224121480729905e-05, "epoch": 0.8518789296680801, "percentage": 42.59, "elapsed_time": "4:15:59", "remaining_time": "5:45:06"} +{"current_steps": 2914, "total_steps": 6840, "loss": 0.6190480589866638, "lr": 1.3219544573332075e-05, "epoch": 0.8521713700833455, "percentage": 42.6, "elapsed_time": "4:16:05", "remaining_time": "5:45:01"} +{"current_steps": 2915, "total_steps": 6840, "loss": 0.6564091444015503, "lr": 1.3214966913385277e-05, "epoch": 0.8524638104986109, "percentage": 42.62, "elapsed_time": "4:16:11", "remaining_time": "5:44:56"} +{"current_steps": 2916, "total_steps": 6840, "loss": 0.6083766222000122, "lr": 1.321038850195951e-05, "epoch": 0.8527562509138763, "percentage": 42.63, "elapsed_time": "4:16:15", "remaining_time": "5:44:50"} +{"current_steps": 2917, "total_steps": 6840, "loss": 0.5262473821640015, "lr": 1.3205809340124951e-05, "epoch": 0.8530486913291417, "percentage": 42.65, "elapsed_time": "4:16:21", "remaining_time": "5:44:46"} +{"current_steps": 2918, "total_steps": 6840, "loss": 0.6170297861099243, "lr": 1.320122942895195e-05, "epoch": 0.8533411317444071, "percentage": 42.66, "elapsed_time": "4:16:25", "remaining_time": "5:44:39"} +{"current_steps": 2919, "total_steps": 6840, "loss": 0.5791536569595337, "lr": 1.3196648769511036e-05, "epoch": 0.8536335721596725, "percentage": 42.68, "elapsed_time": "4:16:30", "remaining_time": "5:44:33"} +{"current_steps": 2920, "total_steps": 6840, "loss": 0.5870766639709473, "lr": 1.3192067362872904e-05, "epoch": 0.8539260125749378, "percentage": 42.69, "elapsed_time": "4:16:35", "remaining_time": "5:44:28"} +{"current_steps": 2921, "total_steps": 6840, "loss": 0.604548990726471, "lr": 1.3187485210108438e-05, "epoch": 0.8542184529902033, "percentage": 42.7, "elapsed_time": "4:16:40", "remaining_time": "5:44:22"} +{"current_steps": 2922, "total_steps": 6840, "loss": 0.5292568206787109, "lr": 1.3182902312288682e-05, "epoch": 0.8545108934054686, "percentage": 42.72, "elapsed_time": "4:16:44", "remaining_time": "5:44:15"} +{"current_steps": 2923, "total_steps": 6840, "loss": 0.5638582706451416, "lr": 1.3178318670484862e-05, "epoch": 0.854803333820734, "percentage": 42.73, "elapsed_time": "4:16:50", "remaining_time": "5:44:10"} +{"current_steps": 2924, "total_steps": 6840, "loss": 0.5730164051055908, "lr": 1.317373428576838e-05, "epoch": 0.8550957742359994, "percentage": 42.75, "elapsed_time": "4:16:54", "remaining_time": "5:44:03"} +{"current_steps": 2925, "total_steps": 6840, "loss": 0.6170799732208252, "lr": 1.3169149159210803e-05, "epoch": 0.8553882146512648, "percentage": 42.76, "elapsed_time": "4:17:00", "remaining_time": "5:43:59"} +{"current_steps": 2926, "total_steps": 6840, "loss": 0.591925323009491, "lr": 1.3164563291883879e-05, "epoch": 0.8556806550665302, "percentage": 42.78, "elapsed_time": "4:17:06", "remaining_time": "5:43:55"} +{"current_steps": 2927, "total_steps": 6840, "loss": 0.7269439697265625, "lr": 1.3159976684859528e-05, "epoch": 0.8559730954817956, "percentage": 42.79, "elapsed_time": "4:17:10", "remaining_time": "5:43:49"} +{"current_steps": 2928, "total_steps": 6840, "loss": 0.615471363067627, "lr": 1.3155389339209839e-05, "epoch": 0.856265535897061, "percentage": 42.81, "elapsed_time": "4:17:16", "remaining_time": "5:43:43"} +{"current_steps": 2929, "total_steps": 6840, "loss": 0.6264692544937134, "lr": 1.3150801256007076e-05, "epoch": 0.8565579763123263, "percentage": 42.82, "elapsed_time": "4:17:21", "remaining_time": "5:43:38"} +{"current_steps": 2930, "total_steps": 6840, "loss": 0.5729779005050659, "lr": 1.314621243632368e-05, "epoch": 0.8568504167275918, "percentage": 42.84, "elapsed_time": "4:17:26", "remaining_time": "5:43:33"} +{"current_steps": 2931, "total_steps": 6840, "loss": 0.6462980508804321, "lr": 1.314162288123225e-05, "epoch": 0.8571428571428571, "percentage": 42.85, "elapsed_time": "4:17:32", "remaining_time": "5:43:28"} +{"current_steps": 2932, "total_steps": 6840, "loss": 0.5493176579475403, "lr": 1.3137032591805577e-05, "epoch": 0.8574352975581225, "percentage": 42.87, "elapsed_time": "4:17:39", "remaining_time": "5:43:25"} +{"current_steps": 2933, "total_steps": 6840, "loss": 0.49161234498023987, "lr": 1.3132441569116608e-05, "epoch": 0.857727737973388, "percentage": 42.88, "elapsed_time": "4:17:45", "remaining_time": "5:43:21"} +{"current_steps": 2934, "total_steps": 6840, "loss": 0.6724506616592407, "lr": 1.312784981423847e-05, "epoch": 0.8580201783886533, "percentage": 42.89, "elapsed_time": "4:17:50", "remaining_time": "5:43:16"} +{"current_steps": 2935, "total_steps": 6840, "loss": 0.6180965900421143, "lr": 1.3123257328244455e-05, "epoch": 0.8583126188039187, "percentage": 42.91, "elapsed_time": "4:17:56", "remaining_time": "5:43:10"} +{"current_steps": 2936, "total_steps": 6840, "loss": 0.6676491498947144, "lr": 1.3118664112208027e-05, "epoch": 0.8586050592191841, "percentage": 42.92, "elapsed_time": "4:18:00", "remaining_time": "5:43:04"} +{"current_steps": 2937, "total_steps": 6840, "loss": 0.5964041948318481, "lr": 1.3114070167202827e-05, "epoch": 0.8588974996344495, "percentage": 42.94, "elapsed_time": "4:18:06", "remaining_time": "5:42:59"} +{"current_steps": 2938, "total_steps": 6840, "loss": 0.708328366279602, "lr": 1.3109475494302657e-05, "epoch": 0.8591899400497148, "percentage": 42.95, "elapsed_time": "4:18:12", "remaining_time": "5:42:55"} +{"current_steps": 2939, "total_steps": 6840, "loss": 0.6360403299331665, "lr": 1.3104880094581495e-05, "epoch": 0.8594823804649803, "percentage": 42.97, "elapsed_time": "4:18:18", "remaining_time": "5:42:51"} +{"current_steps": 2940, "total_steps": 6840, "loss": 0.5450131893157959, "lr": 1.3100283969113494e-05, "epoch": 0.8597748208802457, "percentage": 42.98, "elapsed_time": "4:18:22", "remaining_time": "5:42:45"} +{"current_steps": 2941, "total_steps": 6840, "loss": 0.4472329020500183, "lr": 1.3095687118972962e-05, "epoch": 0.860067261295511, "percentage": 43.0, "elapsed_time": "4:18:27", "remaining_time": "5:42:39"} +{"current_steps": 2942, "total_steps": 6840, "loss": 0.6853972673416138, "lr": 1.3091089545234387e-05, "epoch": 0.8603597017107765, "percentage": 43.01, "elapsed_time": "4:18:33", "remaining_time": "5:42:35"} +{"current_steps": 2943, "total_steps": 6840, "loss": 0.6547979116439819, "lr": 1.3086491248972429e-05, "epoch": 0.8606521421260418, "percentage": 43.03, "elapsed_time": "4:18:38", "remaining_time": "5:42:28"} +{"current_steps": 2944, "total_steps": 6840, "loss": 0.46194693446159363, "lr": 1.3081892231261903e-05, "epoch": 0.8609445825413072, "percentage": 43.04, "elapsed_time": "4:18:43", "remaining_time": "5:42:23"} +{"current_steps": 2945, "total_steps": 6840, "loss": 0.5715345144271851, "lr": 1.307729249317781e-05, "epoch": 0.8612370229565726, "percentage": 43.06, "elapsed_time": "4:18:47", "remaining_time": "5:42:16"} +{"current_steps": 2946, "total_steps": 6840, "loss": 0.5590982437133789, "lr": 1.3072692035795305e-05, "epoch": 0.861529463371838, "percentage": 43.07, "elapsed_time": "4:18:52", "remaining_time": "5:42:10"} +{"current_steps": 2947, "total_steps": 6840, "loss": 0.5435009002685547, "lr": 1.3068090860189719e-05, "epoch": 0.8618219037871033, "percentage": 43.08, "elapsed_time": "4:18:56", "remaining_time": "5:42:03"} +{"current_steps": 2948, "total_steps": 6840, "loss": 0.4528965651988983, "lr": 1.3063488967436548e-05, "epoch": 0.8621143442023688, "percentage": 43.1, "elapsed_time": "4:19:02", "remaining_time": "5:41:59"} +{"current_steps": 2949, "total_steps": 6840, "loss": 0.5520291328430176, "lr": 1.3058886358611457e-05, "epoch": 0.8624067846176342, "percentage": 43.11, "elapsed_time": "4:19:08", "remaining_time": "5:41:55"} +{"current_steps": 2950, "total_steps": 6840, "loss": 0.6444021463394165, "lr": 1.305428303479028e-05, "epoch": 0.8626992250328995, "percentage": 43.13, "elapsed_time": "4:19:14", "remaining_time": "5:41:51"} +{"current_steps": 2951, "total_steps": 6840, "loss": 0.7808041572570801, "lr": 1.3049678997049016e-05, "epoch": 0.862991665448165, "percentage": 43.14, "elapsed_time": "4:19:20", "remaining_time": "5:41:47"} +{"current_steps": 2952, "total_steps": 6840, "loss": 0.6297428607940674, "lr": 1.3045074246463825e-05, "epoch": 0.8632841058634303, "percentage": 43.16, "elapsed_time": "4:19:26", "remaining_time": "5:41:41"} +{"current_steps": 2953, "total_steps": 6840, "loss": 0.5776612162590027, "lr": 1.3040468784111045e-05, "epoch": 0.8635765462786957, "percentage": 43.17, "elapsed_time": "4:19:30", "remaining_time": "5:41:35"} +{"current_steps": 2954, "total_steps": 6840, "loss": 0.49298524856567383, "lr": 1.3035862611067169e-05, "epoch": 0.8638689866939611, "percentage": 43.19, "elapsed_time": "4:19:36", "remaining_time": "5:41:30"} +{"current_steps": 2955, "total_steps": 6840, "loss": 0.8061650991439819, "lr": 1.303125572840887e-05, "epoch": 0.8641614271092265, "percentage": 43.2, "elapsed_time": "4:19:41", "remaining_time": "5:41:25"} +{"current_steps": 2956, "total_steps": 6840, "loss": 0.7741662859916687, "lr": 1.3026648137212976e-05, "epoch": 0.8644538675244919, "percentage": 43.22, "elapsed_time": "4:19:45", "remaining_time": "5:41:18"} +{"current_steps": 2957, "total_steps": 6840, "loss": 0.5589889287948608, "lr": 1.302203983855648e-05, "epoch": 0.8647463079397573, "percentage": 43.23, "elapsed_time": "4:19:50", "remaining_time": "5:41:12"} +{"current_steps": 2958, "total_steps": 6840, "loss": 0.5801941752433777, "lr": 1.3017430833516547e-05, "epoch": 0.8650387483550227, "percentage": 43.25, "elapsed_time": "4:19:55", "remaining_time": "5:41:06"} +{"current_steps": 2959, "total_steps": 6840, "loss": 0.5874185562133789, "lr": 1.30128211231705e-05, "epoch": 0.865331188770288, "percentage": 43.26, "elapsed_time": "4:20:01", "remaining_time": "5:41:02"} +{"current_steps": 2960, "total_steps": 6840, "loss": 0.6062727570533752, "lr": 1.3008210708595837e-05, "epoch": 0.8656236291855535, "percentage": 43.27, "elapsed_time": "4:20:06", "remaining_time": "5:40:57"} +{"current_steps": 2961, "total_steps": 6840, "loss": 0.571448802947998, "lr": 1.3003599590870209e-05, "epoch": 0.8659160696008188, "percentage": 43.29, "elapsed_time": "4:20:11", "remaining_time": "5:40:51"} +{"current_steps": 2962, "total_steps": 6840, "loss": 0.7001944780349731, "lr": 1.2998987771071442e-05, "epoch": 0.8662085100160842, "percentage": 43.3, "elapsed_time": "4:20:17", "remaining_time": "5:40:47"} +{"current_steps": 2963, "total_steps": 6840, "loss": 0.49182790517807007, "lr": 1.2994375250277516e-05, "epoch": 0.8665009504313496, "percentage": 43.32, "elapsed_time": "4:20:22", "remaining_time": "5:40:41"} +{"current_steps": 2964, "total_steps": 6840, "loss": 0.5299041271209717, "lr": 1.298976202956658e-05, "epoch": 0.866793390846615, "percentage": 43.33, "elapsed_time": "4:20:26", "remaining_time": "5:40:34"} +{"current_steps": 2965, "total_steps": 6840, "loss": 0.4955265522003174, "lr": 1.2985148110016947e-05, "epoch": 0.8670858312618804, "percentage": 43.35, "elapsed_time": "4:20:32", "remaining_time": "5:40:30"} +{"current_steps": 2966, "total_steps": 6840, "loss": 0.6395630836486816, "lr": 1.2980533492707094e-05, "epoch": 0.8673782716771458, "percentage": 43.36, "elapsed_time": "4:20:39", "remaining_time": "5:40:26"} +{"current_steps": 2967, "total_steps": 6840, "loss": 0.5926274061203003, "lr": 1.2975918178715661e-05, "epoch": 0.8676707120924112, "percentage": 43.38, "elapsed_time": "4:20:44", "remaining_time": "5:40:22"} +{"current_steps": 2968, "total_steps": 6840, "loss": 0.5556914806365967, "lr": 1.2971302169121447e-05, "epoch": 0.8679631525076765, "percentage": 43.39, "elapsed_time": "4:20:50", "remaining_time": "5:40:16"} +{"current_steps": 2969, "total_steps": 6840, "loss": 0.5347195863723755, "lr": 1.2966685465003415e-05, "epoch": 0.868255592922942, "percentage": 43.41, "elapsed_time": "4:20:56", "remaining_time": "5:40:12"} +{"current_steps": 2970, "total_steps": 6840, "loss": 0.6839208006858826, "lr": 1.2962068067440694e-05, "epoch": 0.8685480333382073, "percentage": 43.42, "elapsed_time": "4:21:01", "remaining_time": "5:40:07"} +{"current_steps": 2971, "total_steps": 6840, "loss": 0.5741337537765503, "lr": 1.295744997751257e-05, "epoch": 0.8688404737534727, "percentage": 43.44, "elapsed_time": "4:21:06", "remaining_time": "5:40:01"} +{"current_steps": 2972, "total_steps": 6840, "loss": 0.7383404970169067, "lr": 1.29528311962985e-05, "epoch": 0.8691329141687382, "percentage": 43.45, "elapsed_time": "4:21:12", "remaining_time": "5:39:57"} +{"current_steps": 2973, "total_steps": 6840, "loss": 0.5075374245643616, "lr": 1.294821172487809e-05, "epoch": 0.8694253545840035, "percentage": 43.46, "elapsed_time": "4:21:16", "remaining_time": "5:39:50"} +{"current_steps": 2974, "total_steps": 6840, "loss": 0.557248592376709, "lr": 1.2943591564331113e-05, "epoch": 0.8697177949992689, "percentage": 43.48, "elapsed_time": "4:21:22", "remaining_time": "5:39:46"} +{"current_steps": 2975, "total_steps": 6840, "loss": 0.5687203407287598, "lr": 1.2938970715737506e-05, "epoch": 0.8700102354145343, "percentage": 43.49, "elapsed_time": "4:21:28", "remaining_time": "5:39:42"} +{"current_steps": 2976, "total_steps": 6840, "loss": 0.5946108102798462, "lr": 1.2934349180177364e-05, "epoch": 0.8703026758297997, "percentage": 43.51, "elapsed_time": "4:21:33", "remaining_time": "5:39:35"} +{"current_steps": 2977, "total_steps": 6840, "loss": 0.6103173494338989, "lr": 1.2929726958730942e-05, "epoch": 0.870595116245065, "percentage": 43.52, "elapsed_time": "4:21:37", "remaining_time": "5:39:28"} +{"current_steps": 2978, "total_steps": 6840, "loss": 0.7007244825363159, "lr": 1.2925104052478657e-05, "epoch": 0.8708875566603305, "percentage": 43.54, "elapsed_time": "4:21:42", "remaining_time": "5:39:23"} +{"current_steps": 2979, "total_steps": 6840, "loss": 0.6157742142677307, "lr": 1.2920480462501082e-05, "epoch": 0.8711799970755959, "percentage": 43.55, "elapsed_time": "4:21:48", "remaining_time": "5:39:19"} +{"current_steps": 2980, "total_steps": 6840, "loss": 0.6501113176345825, "lr": 1.2915856189878956e-05, "epoch": 0.8714724374908612, "percentage": 43.57, "elapsed_time": "4:21:53", "remaining_time": "5:39:14"} +{"current_steps": 2981, "total_steps": 6840, "loss": 0.5084626078605652, "lr": 1.2911231235693178e-05, "epoch": 0.8717648779061267, "percentage": 43.58, "elapsed_time": "4:21:59", "remaining_time": "5:39:08"} +{"current_steps": 2982, "total_steps": 6840, "loss": 0.5953651666641235, "lr": 1.2906605601024796e-05, "epoch": 0.872057318321392, "percentage": 43.6, "elapsed_time": "4:22:03", "remaining_time": "5:39:02"} +{"current_steps": 2983, "total_steps": 6840, "loss": 0.5733205676078796, "lr": 1.290197928695503e-05, "epoch": 0.8723497587366574, "percentage": 43.61, "elapsed_time": "4:22:09", "remaining_time": "5:38:57"} +{"current_steps": 2984, "total_steps": 6840, "loss": 0.5976133942604065, "lr": 1.2897352294565248e-05, "epoch": 0.8726421991519228, "percentage": 43.63, "elapsed_time": "4:22:14", "remaining_time": "5:38:52"} +{"current_steps": 2985, "total_steps": 6840, "loss": 0.5092414617538452, "lr": 1.2892724624936983e-05, "epoch": 0.8729346395671882, "percentage": 43.64, "elapsed_time": "4:22:20", "remaining_time": "5:38:47"} +{"current_steps": 2986, "total_steps": 6840, "loss": 0.7244688272476196, "lr": 1.2888096279151926e-05, "epoch": 0.8732270799824535, "percentage": 43.65, "elapsed_time": "4:22:23", "remaining_time": "5:38:40"} +{"current_steps": 2987, "total_steps": 6840, "loss": 0.6943881511688232, "lr": 1.2883467258291922e-05, "epoch": 0.873519520397719, "percentage": 43.67, "elapsed_time": "4:22:28", "remaining_time": "5:38:34"} +{"current_steps": 2988, "total_steps": 6840, "loss": 0.6484338641166687, "lr": 1.287883756343898e-05, "epoch": 0.8738119608129844, "percentage": 43.68, "elapsed_time": "4:22:33", "remaining_time": "5:38:29"} +{"current_steps": 2989, "total_steps": 6840, "loss": 0.620865523815155, "lr": 1.2874207195675262e-05, "epoch": 0.8741044012282497, "percentage": 43.7, "elapsed_time": "4:22:39", "remaining_time": "5:38:24"} +{"current_steps": 2990, "total_steps": 6840, "loss": 0.5290236473083496, "lr": 1.2869576156083085e-05, "epoch": 0.8743968416435152, "percentage": 43.71, "elapsed_time": "4:22:45", "remaining_time": "5:38:19"} +{"current_steps": 2991, "total_steps": 6840, "loss": 0.7140257358551025, "lr": 1.2864944445744932e-05, "epoch": 0.8746892820587805, "percentage": 43.73, "elapsed_time": "4:22:51", "remaining_time": "5:38:15"} +{"current_steps": 2992, "total_steps": 6840, "loss": 0.7167611122131348, "lr": 1.286031206574343e-05, "epoch": 0.8749817224740459, "percentage": 43.74, "elapsed_time": "4:22:56", "remaining_time": "5:38:10"} +{"current_steps": 2993, "total_steps": 6840, "loss": 0.5631322860717773, "lr": 1.2855679017161372e-05, "epoch": 0.8752741628893113, "percentage": 43.76, "elapsed_time": "4:23:01", "remaining_time": "5:38:04"} +{"current_steps": 2994, "total_steps": 6840, "loss": 0.6250770092010498, "lr": 1.2851045301081714e-05, "epoch": 0.8755666033045767, "percentage": 43.77, "elapsed_time": "4:23:07", "remaining_time": "5:37:59"} +{"current_steps": 2995, "total_steps": 6840, "loss": 0.5121266841888428, "lr": 1.2846410918587546e-05, "epoch": 0.8758590437198421, "percentage": 43.79, "elapsed_time": "4:23:12", "remaining_time": "5:37:54"} +{"current_steps": 2996, "total_steps": 6840, "loss": 0.6075780987739563, "lr": 1.2841775870762134e-05, "epoch": 0.8761514841351075, "percentage": 43.8, "elapsed_time": "4:23:17", "remaining_time": "5:37:48"} +{"current_steps": 2997, "total_steps": 6840, "loss": 0.516838014125824, "lr": 1.283714015868889e-05, "epoch": 0.8764439245503729, "percentage": 43.82, "elapsed_time": "4:23:22", "remaining_time": "5:37:42"} +{"current_steps": 2998, "total_steps": 6840, "loss": 0.6952051520347595, "lr": 1.2832503783451384e-05, "epoch": 0.8767363649656382, "percentage": 43.83, "elapsed_time": "4:23:26", "remaining_time": "5:37:36"} +{"current_steps": 2999, "total_steps": 6840, "loss": 0.8039685487747192, "lr": 1.2827866746133342e-05, "epoch": 0.8770288053809037, "percentage": 43.85, "elapsed_time": "4:23:33", "remaining_time": "5:37:32"} +{"current_steps": 3000, "total_steps": 6840, "loss": 0.6200549602508545, "lr": 1.2823229047818642e-05, "epoch": 0.877321245796169, "percentage": 43.86, "elapsed_time": "4:23:38", "remaining_time": "5:37:27"} +{"current_steps": 3001, "total_steps": 6840, "loss": 0.6666116714477539, "lr": 1.2818590689591315e-05, "epoch": 0.8776136862114344, "percentage": 43.87, "elapsed_time": "4:23:48", "remaining_time": "5:37:27"} +{"current_steps": 3002, "total_steps": 6840, "loss": 0.566741943359375, "lr": 1.2813951672535551e-05, "epoch": 0.8779061266266998, "percentage": 43.89, "elapsed_time": "4:23:52", "remaining_time": "5:37:21"} +{"current_steps": 3003, "total_steps": 6840, "loss": 0.6103402376174927, "lr": 1.2809311997735697e-05, "epoch": 0.8781985670419652, "percentage": 43.9, "elapsed_time": "4:23:56", "remaining_time": "5:37:15"} +{"current_steps": 3004, "total_steps": 6840, "loss": 0.48296916484832764, "lr": 1.280467166627624e-05, "epoch": 0.8784910074572306, "percentage": 43.92, "elapsed_time": "4:24:02", "remaining_time": "5:37:10"} +{"current_steps": 3005, "total_steps": 6840, "loss": 0.5995723605155945, "lr": 1.2800030679241834e-05, "epoch": 0.878783447872496, "percentage": 43.93, "elapsed_time": "4:24:08", "remaining_time": "5:37:06"} +{"current_steps": 3006, "total_steps": 6840, "loss": 0.6199642419815063, "lr": 1.2795389037717286e-05, "epoch": 0.8790758882877614, "percentage": 43.95, "elapsed_time": "4:24:13", "remaining_time": "5:36:59"} +{"current_steps": 3007, "total_steps": 6840, "loss": 0.6740807294845581, "lr": 1.279074674278754e-05, "epoch": 0.8793683287030267, "percentage": 43.96, "elapsed_time": "4:24:18", "remaining_time": "5:36:54"} +{"current_steps": 3008, "total_steps": 6840, "loss": 0.7330688238143921, "lr": 1.2786103795537714e-05, "epoch": 0.8796607691182922, "percentage": 43.98, "elapsed_time": "4:24:22", "remaining_time": "5:36:47"} +{"current_steps": 3009, "total_steps": 6840, "loss": 0.5048441290855408, "lr": 1.2781460197053066e-05, "epoch": 0.8799532095335575, "percentage": 43.99, "elapsed_time": "4:24:28", "remaining_time": "5:36:43"} +{"current_steps": 3010, "total_steps": 6840, "loss": 0.6103702187538147, "lr": 1.277681594841901e-05, "epoch": 0.8802456499488229, "percentage": 44.01, "elapsed_time": "4:24:35", "remaining_time": "5:36:40"} +{"current_steps": 3011, "total_steps": 6840, "loss": 0.5223366022109985, "lr": 1.2772171050721107e-05, "epoch": 0.8805380903640884, "percentage": 44.02, "elapsed_time": "4:24:40", "remaining_time": "5:36:34"} +{"current_steps": 3012, "total_steps": 6840, "loss": 0.708305835723877, "lr": 1.2767525505045078e-05, "epoch": 0.8808305307793537, "percentage": 44.04, "elapsed_time": "4:24:45", "remaining_time": "5:36:28"} +{"current_steps": 3013, "total_steps": 6840, "loss": 0.6827911734580994, "lr": 1.2762879312476785e-05, "epoch": 0.8811229711946191, "percentage": 44.05, "elapsed_time": "4:24:51", "remaining_time": "5:36:24"} +{"current_steps": 3014, "total_steps": 6840, "loss": 0.6977027654647827, "lr": 1.2758232474102254e-05, "epoch": 0.8814154116098845, "percentage": 44.06, "elapsed_time": "4:24:57", "remaining_time": "5:36:19"} +{"current_steps": 3015, "total_steps": 6840, "loss": 0.5534720420837402, "lr": 1.2753584991007654e-05, "epoch": 0.8817078520251499, "percentage": 44.08, "elapsed_time": "4:25:02", "remaining_time": "5:36:14"} +{"current_steps": 3016, "total_steps": 6840, "loss": 0.541682243347168, "lr": 1.2748936864279305e-05, "epoch": 0.8820002924404152, "percentage": 44.09, "elapsed_time": "4:25:08", "remaining_time": "5:36:10"} +{"current_steps": 3017, "total_steps": 6840, "loss": 0.6195456981658936, "lr": 1.2744288095003674e-05, "epoch": 0.8822927328556807, "percentage": 44.11, "elapsed_time": "4:25:14", "remaining_time": "5:36:06"} +{"current_steps": 3018, "total_steps": 6840, "loss": 0.5050234794616699, "lr": 1.2739638684267387e-05, "epoch": 0.8825851732709461, "percentage": 44.12, "elapsed_time": "4:25:20", "remaining_time": "5:36:01"} +{"current_steps": 3019, "total_steps": 6840, "loss": 0.5397066473960876, "lr": 1.2734988633157218e-05, "epoch": 0.8828776136862114, "percentage": 44.14, "elapsed_time": "4:25:25", "remaining_time": "5:35:56"} +{"current_steps": 3020, "total_steps": 6840, "loss": 0.5932190418243408, "lr": 1.273033794276008e-05, "epoch": 0.8831700541014769, "percentage": 44.15, "elapsed_time": "4:25:30", "remaining_time": "5:35:51"} +{"current_steps": 3021, "total_steps": 6840, "loss": 0.5780059099197388, "lr": 1.2725686614163055e-05, "epoch": 0.8834624945167422, "percentage": 44.17, "elapsed_time": "4:25:35", "remaining_time": "5:35:45"} +{"current_steps": 3022, "total_steps": 6840, "loss": 0.5850226879119873, "lr": 1.2721034648453353e-05, "epoch": 0.8837549349320076, "percentage": 44.18, "elapsed_time": "4:25:39", "remaining_time": "5:35:38"} +{"current_steps": 3023, "total_steps": 6840, "loss": 0.6684393882751465, "lr": 1.2716382046718346e-05, "epoch": 0.884047375347273, "percentage": 44.2, "elapsed_time": "4:25:45", "remaining_time": "5:35:34"} +{"current_steps": 3024, "total_steps": 6840, "loss": 0.6045842170715332, "lr": 1.271172881004555e-05, "epoch": 0.8843398157625384, "percentage": 44.21, "elapsed_time": "4:25:51", "remaining_time": "5:35:29"} +{"current_steps": 3025, "total_steps": 6840, "loss": 0.6769551038742065, "lr": 1.2707074939522633e-05, "epoch": 0.8846322561778037, "percentage": 44.23, "elapsed_time": "4:25:56", "remaining_time": "5:35:24"} +{"current_steps": 3026, "total_steps": 6840, "loss": 0.5581091642379761, "lr": 1.2702420436237408e-05, "epoch": 0.8849246965930692, "percentage": 44.24, "elapsed_time": "4:26:00", "remaining_time": "5:35:16"} +{"current_steps": 3027, "total_steps": 6840, "loss": 0.5010186433792114, "lr": 1.269776530127784e-05, "epoch": 0.8852171370083346, "percentage": 44.25, "elapsed_time": "4:26:05", "remaining_time": "5:35:11"} +{"current_steps": 3028, "total_steps": 6840, "loss": 0.4537884294986725, "lr": 1.2693109535732034e-05, "epoch": 0.8855095774235999, "percentage": 44.27, "elapsed_time": "4:26:10", "remaining_time": "5:35:05"} +{"current_steps": 3029, "total_steps": 6840, "loss": 0.5920443534851074, "lr": 1.2688453140688246e-05, "epoch": 0.8858020178388654, "percentage": 44.28, "elapsed_time": "4:26:15", "remaining_time": "5:34:59"} +{"current_steps": 3030, "total_steps": 6840, "loss": 0.564072847366333, "lr": 1.2683796117234884e-05, "epoch": 0.8860944582541307, "percentage": 44.3, "elapsed_time": "4:26:19", "remaining_time": "5:34:53"} +{"current_steps": 3031, "total_steps": 6840, "loss": 0.657585620880127, "lr": 1.26791384664605e-05, "epoch": 0.8863868986693961, "percentage": 44.31, "elapsed_time": "4:26:25", "remaining_time": "5:34:48"} +{"current_steps": 3032, "total_steps": 6840, "loss": 0.6864298582077026, "lr": 1.2674480189453786e-05, "epoch": 0.8866793390846615, "percentage": 44.33, "elapsed_time": "4:26:30", "remaining_time": "5:34:43"} +{"current_steps": 3033, "total_steps": 6840, "loss": 0.6416069865226746, "lr": 1.266982128730359e-05, "epoch": 0.8869717794999269, "percentage": 44.34, "elapsed_time": "4:26:35", "remaining_time": "5:34:37"} +{"current_steps": 3034, "total_steps": 6840, "loss": 0.6405118703842163, "lr": 1.2665161761098899e-05, "epoch": 0.8872642199151923, "percentage": 44.36, "elapsed_time": "4:26:40", "remaining_time": "5:34:31"} +{"current_steps": 3035, "total_steps": 6840, "loss": 0.649673342704773, "lr": 1.266050161192885e-05, "epoch": 0.8875566603304577, "percentage": 44.37, "elapsed_time": "4:26:46", "remaining_time": "5:34:27"} +{"current_steps": 3036, "total_steps": 6840, "loss": 0.5914620161056519, "lr": 1.2655840840882729e-05, "epoch": 0.8878491007457231, "percentage": 44.39, "elapsed_time": "4:26:50", "remaining_time": "5:34:20"} +{"current_steps": 3037, "total_steps": 6840, "loss": 0.6080621480941772, "lr": 1.2651179449049958e-05, "epoch": 0.8881415411609884, "percentage": 44.4, "elapsed_time": "4:26:54", "remaining_time": "5:34:13"} +{"current_steps": 3038, "total_steps": 6840, "loss": 0.657015860080719, "lr": 1.264651743752011e-05, "epoch": 0.8884339815762539, "percentage": 44.42, "elapsed_time": "4:27:00", "remaining_time": "5:34:09"} +{"current_steps": 3039, "total_steps": 6840, "loss": 0.5384848713874817, "lr": 1.26418548073829e-05, "epoch": 0.8887264219915192, "percentage": 44.43, "elapsed_time": "4:27:05", "remaining_time": "5:34:03"} +{"current_steps": 3040, "total_steps": 6840, "loss": 0.7452554106712341, "lr": 1.2637191559728195e-05, "epoch": 0.8890188624067846, "percentage": 44.44, "elapsed_time": "4:27:09", "remaining_time": "5:33:57"} +{"current_steps": 3041, "total_steps": 6840, "loss": 0.743236780166626, "lr": 1.2632527695645993e-05, "epoch": 0.88931130282205, "percentage": 44.46, "elapsed_time": "4:27:15", "remaining_time": "5:33:51"} +{"current_steps": 3042, "total_steps": 6840, "loss": 0.557692289352417, "lr": 1.2627863216226453e-05, "epoch": 0.8896037432373154, "percentage": 44.47, "elapsed_time": "4:27:18", "remaining_time": "5:33:44"} +{"current_steps": 3043, "total_steps": 6840, "loss": 0.5637259483337402, "lr": 1.2623198122559863e-05, "epoch": 0.8898961836525808, "percentage": 44.49, "elapsed_time": "4:27:24", "remaining_time": "5:33:39"} +{"current_steps": 3044, "total_steps": 6840, "loss": 0.5217350721359253, "lr": 1.261853241573666e-05, "epoch": 0.8901886240678462, "percentage": 44.5, "elapsed_time": "4:27:28", "remaining_time": "5:33:32"} +{"current_steps": 3045, "total_steps": 6840, "loss": 0.5971624255180359, "lr": 1.2613866096847423e-05, "epoch": 0.8904810644831116, "percentage": 44.52, "elapsed_time": "4:27:31", "remaining_time": "5:33:25"} +{"current_steps": 3046, "total_steps": 6840, "loss": 0.6586427092552185, "lr": 1.260919916698288e-05, "epoch": 0.8907735048983769, "percentage": 44.53, "elapsed_time": "4:27:37", "remaining_time": "5:33:20"} +{"current_steps": 3047, "total_steps": 6840, "loss": 0.7059915661811829, "lr": 1.2604531627233895e-05, "epoch": 0.8910659453136424, "percentage": 44.55, "elapsed_time": "4:27:42", "remaining_time": "5:33:15"} +{"current_steps": 3048, "total_steps": 6840, "loss": 0.582252025604248, "lr": 1.2599863478691483e-05, "epoch": 0.8913583857289077, "percentage": 44.56, "elapsed_time": "4:27:47", "remaining_time": "5:33:10"} +{"current_steps": 3049, "total_steps": 6840, "loss": 0.6901981830596924, "lr": 1.2595194722446786e-05, "epoch": 0.8916508261441731, "percentage": 44.58, "elapsed_time": "4:27:52", "remaining_time": "5:33:04"} +{"current_steps": 3050, "total_steps": 6840, "loss": 0.7462388873100281, "lr": 1.2590525359591101e-05, "epoch": 0.8919432665594386, "percentage": 44.59, "elapsed_time": "4:27:57", "remaining_time": "5:32:57"} +{"current_steps": 3051, "total_steps": 6840, "loss": 0.4963245391845703, "lr": 1.2585855391215866e-05, "epoch": 0.8922357069747039, "percentage": 44.61, "elapsed_time": "4:28:02", "remaining_time": "5:32:53"} +{"current_steps": 3052, "total_steps": 6840, "loss": 0.6408337354660034, "lr": 1.2581184818412655e-05, "epoch": 0.8925281473899693, "percentage": 44.62, "elapsed_time": "4:28:07", "remaining_time": "5:32:47"} +{"current_steps": 3053, "total_steps": 6840, "loss": 0.44528326392173767, "lr": 1.257651364227319e-05, "epoch": 0.8928205878052347, "percentage": 44.63, "elapsed_time": "4:28:13", "remaining_time": "5:32:42"} +{"current_steps": 3054, "total_steps": 6840, "loss": 0.4595017731189728, "lr": 1.2571841863889322e-05, "epoch": 0.8931130282205001, "percentage": 44.65, "elapsed_time": "4:28:17", "remaining_time": "5:32:35"} +{"current_steps": 3055, "total_steps": 6840, "loss": 0.6934910416603088, "lr": 1.2567169484353057e-05, "epoch": 0.8934054686357654, "percentage": 44.66, "elapsed_time": "4:28:23", "remaining_time": "5:32:31"} +{"current_steps": 3056, "total_steps": 6840, "loss": 0.6392845511436462, "lr": 1.2562496504756535e-05, "epoch": 0.8936979090510309, "percentage": 44.68, "elapsed_time": "4:28:27", "remaining_time": "5:32:24"} +{"current_steps": 3057, "total_steps": 6840, "loss": 0.5506458878517151, "lr": 1.255782292619203e-05, "epoch": 0.8939903494662963, "percentage": 44.69, "elapsed_time": "4:28:32", "remaining_time": "5:32:19"} +{"current_steps": 3058, "total_steps": 6840, "loss": 0.5871223211288452, "lr": 1.255314874975197e-05, "epoch": 0.8942827898815616, "percentage": 44.71, "elapsed_time": "4:28:37", "remaining_time": "5:32:13"} +{"current_steps": 3059, "total_steps": 6840, "loss": 0.603033185005188, "lr": 1.254847397652892e-05, "epoch": 0.8945752302968271, "percentage": 44.72, "elapsed_time": "4:28:42", "remaining_time": "5:32:07"} +{"current_steps": 3060, "total_steps": 6840, "loss": 0.667452335357666, "lr": 1.2543798607615566e-05, "epoch": 0.8948676707120924, "percentage": 44.74, "elapsed_time": "4:28:48", "remaining_time": "5:32:03"} +{"current_steps": 3061, "total_steps": 6840, "loss": 0.6264449954032898, "lr": 1.2539122644104755e-05, "epoch": 0.8951601111273578, "percentage": 44.75, "elapsed_time": "4:28:53", "remaining_time": "5:31:57"} +{"current_steps": 3062, "total_steps": 6840, "loss": 0.6085609793663025, "lr": 1.2534446087089465e-05, "epoch": 0.8954525515426232, "percentage": 44.77, "elapsed_time": "4:28:59", "remaining_time": "5:31:53"} +{"current_steps": 3063, "total_steps": 6840, "loss": 0.6414828896522522, "lr": 1.252976893766281e-05, "epoch": 0.8957449919578886, "percentage": 44.78, "elapsed_time": "4:29:04", "remaining_time": "5:31:48"} +{"current_steps": 3064, "total_steps": 6840, "loss": 0.714614987373352, "lr": 1.2525091196918049e-05, "epoch": 0.8960374323731539, "percentage": 44.8, "elapsed_time": "4:29:09", "remaining_time": "5:31:42"} +{"current_steps": 3065, "total_steps": 6840, "loss": 0.5966176986694336, "lr": 1.2520412865948574e-05, "epoch": 0.8963298727884194, "percentage": 44.81, "elapsed_time": "4:29:15", "remaining_time": "5:31:37"} +{"current_steps": 3066, "total_steps": 6840, "loss": 0.5162957906723022, "lr": 1.2515733945847914e-05, "epoch": 0.8966223132036848, "percentage": 44.82, "elapsed_time": "4:29:19", "remaining_time": "5:31:30"} +{"current_steps": 3067, "total_steps": 6840, "loss": 0.6460821628570557, "lr": 1.2511054437709743e-05, "epoch": 0.8969147536189501, "percentage": 44.84, "elapsed_time": "4:29:23", "remaining_time": "5:31:24"} +{"current_steps": 3068, "total_steps": 6840, "loss": 0.6802507638931274, "lr": 1.2506374342627861e-05, "epoch": 0.8972071940342156, "percentage": 44.85, "elapsed_time": "4:29:28", "remaining_time": "5:31:19"} +{"current_steps": 3069, "total_steps": 6840, "loss": 0.5966957807540894, "lr": 1.2501693661696218e-05, "epoch": 0.8974996344494809, "percentage": 44.87, "elapsed_time": "4:29:33", "remaining_time": "5:31:12"} +{"current_steps": 3070, "total_steps": 6840, "loss": 0.607227087020874, "lr": 1.2497012396008893e-05, "epoch": 0.8977920748647463, "percentage": 44.88, "elapsed_time": "4:29:38", "remaining_time": "5:31:07"} +{"current_steps": 3071, "total_steps": 6840, "loss": 0.6544637084007263, "lr": 1.2492330546660098e-05, "epoch": 0.8980845152800117, "percentage": 44.9, "elapsed_time": "4:29:43", "remaining_time": "5:31:01"} +{"current_steps": 3072, "total_steps": 6840, "loss": 0.5896593332290649, "lr": 1.2487648114744196e-05, "epoch": 0.8983769556952771, "percentage": 44.91, "elapsed_time": "4:29:48", "remaining_time": "5:30:56"} +{"current_steps": 3073, "total_steps": 6840, "loss": 0.5710231065750122, "lr": 1.248296510135567e-05, "epoch": 0.8986693961105425, "percentage": 44.93, "elapsed_time": "4:29:54", "remaining_time": "5:30:51"} +{"current_steps": 3074, "total_steps": 6840, "loss": 0.5918926000595093, "lr": 1.2478281507589147e-05, "epoch": 0.8989618365258079, "percentage": 44.94, "elapsed_time": "4:29:59", "remaining_time": "5:30:45"} +{"current_steps": 3075, "total_steps": 6840, "loss": 0.681663453578949, "lr": 1.2473597334539392e-05, "epoch": 0.8992542769410733, "percentage": 44.96, "elapsed_time": "4:30:04", "remaining_time": "5:30:41"} +{"current_steps": 3076, "total_steps": 6840, "loss": 0.5229436159133911, "lr": 1.24689125833013e-05, "epoch": 0.8995467173563386, "percentage": 44.97, "elapsed_time": "4:30:11", "remaining_time": "5:30:36"} +{"current_steps": 3077, "total_steps": 6840, "loss": 0.7165119051933289, "lr": 1.2464227254969903e-05, "epoch": 0.8998391577716041, "percentage": 44.99, "elapsed_time": "4:30:15", "remaining_time": "5:30:30"} +{"current_steps": 3078, "total_steps": 6840, "loss": 0.514594554901123, "lr": 1.2459541350640368e-05, "epoch": 0.9001315981868694, "percentage": 45.0, "elapsed_time": "4:30:19", "remaining_time": "5:30:23"} +{"current_steps": 3079, "total_steps": 6840, "loss": 0.6173784732818604, "lr": 1.2454854871407993e-05, "epoch": 0.9004240386021348, "percentage": 45.01, "elapsed_time": "4:30:24", "remaining_time": "5:30:18"} +{"current_steps": 3080, "total_steps": 6840, "loss": 0.6796407103538513, "lr": 1.245016781836822e-05, "epoch": 0.9007164790174002, "percentage": 45.03, "elapsed_time": "4:30:29", "remaining_time": "5:30:12"} +{"current_steps": 3081, "total_steps": 6840, "loss": 0.6901683807373047, "lr": 1.2445480192616619e-05, "epoch": 0.9010089194326656, "percentage": 45.04, "elapsed_time": "4:30:36", "remaining_time": "5:30:09"} +{"current_steps": 3082, "total_steps": 6840, "loss": 0.6215920448303223, "lr": 1.2440791995248886e-05, "epoch": 0.901301359847931, "percentage": 45.06, "elapsed_time": "4:30:41", "remaining_time": "5:30:04"} +{"current_steps": 3083, "total_steps": 6840, "loss": 0.6109690070152283, "lr": 1.243610322736087e-05, "epoch": 0.9015938002631964, "percentage": 45.07, "elapsed_time": "4:30:46", "remaining_time": "5:29:58"} +{"current_steps": 3084, "total_steps": 6840, "loss": 0.5273362398147583, "lr": 1.2431413890048534e-05, "epoch": 0.9018862406784618, "percentage": 45.09, "elapsed_time": "4:30:51", "remaining_time": "5:29:52"} +{"current_steps": 3085, "total_steps": 6840, "loss": 0.5219408273696899, "lr": 1.2426723984407982e-05, "epoch": 0.9021786810937271, "percentage": 45.1, "elapsed_time": "4:30:57", "remaining_time": "5:29:47"} +{"current_steps": 3086, "total_steps": 6840, "loss": 0.6894690990447998, "lr": 1.2422033511535458e-05, "epoch": 0.9024711215089926, "percentage": 45.12, "elapsed_time": "4:31:02", "remaining_time": "5:29:42"} +{"current_steps": 3087, "total_steps": 6840, "loss": 0.6135656833648682, "lr": 1.2417342472527325e-05, "epoch": 0.9027635619242579, "percentage": 45.13, "elapsed_time": "4:31:06", "remaining_time": "5:29:36"} +{"current_steps": 3088, "total_steps": 6840, "loss": 0.595108151435852, "lr": 1.2412650868480088e-05, "epoch": 0.9030560023395233, "percentage": 45.15, "elapsed_time": "4:31:12", "remaining_time": "5:29:31"} +{"current_steps": 3089, "total_steps": 6840, "loss": 0.6445261240005493, "lr": 1.2407958700490376e-05, "epoch": 0.9033484427547888, "percentage": 45.16, "elapsed_time": "4:31:18", "remaining_time": "5:29:26"} +{"current_steps": 3090, "total_steps": 6840, "loss": 0.5601890087127686, "lr": 1.240326596965496e-05, "epoch": 0.9036408831700541, "percentage": 45.18, "elapsed_time": "4:31:24", "remaining_time": "5:29:22"} +{"current_steps": 3091, "total_steps": 6840, "loss": 0.6229134798049927, "lr": 1.239857267707074e-05, "epoch": 0.9039333235853195, "percentage": 45.19, "elapsed_time": "4:31:28", "remaining_time": "5:29:15"} +{"current_steps": 3092, "total_steps": 6840, "loss": 0.5769803524017334, "lr": 1.2393878823834737e-05, "epoch": 0.9042257640005849, "percentage": 45.2, "elapsed_time": "4:31:33", "remaining_time": "5:29:10"} +{"current_steps": 3093, "total_steps": 6840, "loss": 0.8101233243942261, "lr": 1.2389184411044113e-05, "epoch": 0.9045182044158503, "percentage": 45.22, "elapsed_time": "4:31:38", "remaining_time": "5:29:04"} +{"current_steps": 3094, "total_steps": 6840, "loss": 0.5562945604324341, "lr": 1.2384489439796159e-05, "epoch": 0.9048106448311156, "percentage": 45.23, "elapsed_time": "4:31:42", "remaining_time": "5:28:58"} +{"current_steps": 3095, "total_steps": 6840, "loss": 0.5764975547790527, "lr": 1.2379793911188299e-05, "epoch": 0.9051030852463811, "percentage": 45.25, "elapsed_time": "4:31:49", "remaining_time": "5:28:54"} +{"current_steps": 3096, "total_steps": 6840, "loss": 0.5951659083366394, "lr": 1.2375097826318079e-05, "epoch": 0.9053955256616465, "percentage": 45.26, "elapsed_time": "4:31:55", "remaining_time": "5:28:50"} +{"current_steps": 3097, "total_steps": 6840, "loss": 0.5550940632820129, "lr": 1.2370401186283186e-05, "epoch": 0.9056879660769118, "percentage": 45.28, "elapsed_time": "4:32:00", "remaining_time": "5:28:44"} +{"current_steps": 3098, "total_steps": 6840, "loss": 0.5423737168312073, "lr": 1.2365703992181425e-05, "epoch": 0.9059804064921773, "percentage": 45.29, "elapsed_time": "4:32:05", "remaining_time": "5:28:39"} +{"current_steps": 3099, "total_steps": 6840, "loss": 0.633366048336029, "lr": 1.236100624511074e-05, "epoch": 0.9062728469074426, "percentage": 45.31, "elapsed_time": "4:32:11", "remaining_time": "5:28:34"} +{"current_steps": 3100, "total_steps": 6840, "loss": 0.6067361831665039, "lr": 1.2356307946169202e-05, "epoch": 0.906565287322708, "percentage": 45.32, "elapsed_time": "4:32:17", "remaining_time": "5:28:30"} +{"current_steps": 3101, "total_steps": 6840, "loss": 0.6039519309997559, "lr": 1.2351609096455006e-05, "epoch": 0.9068577277379734, "percentage": 45.34, "elapsed_time": "4:32:27", "remaining_time": "5:28:30"} +{"current_steps": 3102, "total_steps": 6840, "loss": 0.5643757581710815, "lr": 1.2346909697066486e-05, "epoch": 0.9071501681532388, "percentage": 45.35, "elapsed_time": "4:32:33", "remaining_time": "5:28:26"} +{"current_steps": 3103, "total_steps": 6840, "loss": 0.5406394004821777, "lr": 1.2342209749102088e-05, "epoch": 0.9074426085685041, "percentage": 45.37, "elapsed_time": "4:32:40", "remaining_time": "5:28:22"} +{"current_steps": 3104, "total_steps": 6840, "loss": 0.5845915079116821, "lr": 1.2337509253660404e-05, "epoch": 0.9077350489837696, "percentage": 45.38, "elapsed_time": "4:32:45", "remaining_time": "5:28:17"} +{"current_steps": 3105, "total_steps": 6840, "loss": 0.6912981271743774, "lr": 1.2332808211840147e-05, "epoch": 0.908027489399035, "percentage": 45.39, "elapsed_time": "4:32:51", "remaining_time": "5:28:13"} +{"current_steps": 3106, "total_steps": 6840, "loss": 0.5571672320365906, "lr": 1.2328106624740151e-05, "epoch": 0.9083199298143003, "percentage": 45.41, "elapsed_time": "4:32:57", "remaining_time": "5:28:08"} +{"current_steps": 3107, "total_steps": 6840, "loss": 0.5219087600708008, "lr": 1.2323404493459386e-05, "epoch": 0.9086123702295658, "percentage": 45.42, "elapsed_time": "4:33:02", "remaining_time": "5:28:02"} +{"current_steps": 3108, "total_steps": 6840, "loss": 0.5780971050262451, "lr": 1.2318701819096952e-05, "epoch": 0.9089048106448311, "percentage": 45.44, "elapsed_time": "4:33:08", "remaining_time": "5:27:58"} +{"current_steps": 3109, "total_steps": 6840, "loss": 0.6206589937210083, "lr": 1.2313998602752063e-05, "epoch": 0.9091972510600965, "percentage": 45.45, "elapsed_time": "4:33:12", "remaining_time": "5:27:52"} +{"current_steps": 3110, "total_steps": 6840, "loss": 0.6063584089279175, "lr": 1.2309294845524068e-05, "epoch": 0.9094896914753618, "percentage": 45.47, "elapsed_time": "4:33:17", "remaining_time": "5:27:46"} +{"current_steps": 3111, "total_steps": 6840, "loss": 0.5733555555343628, "lr": 1.2304590548512445e-05, "epoch": 0.9097821318906273, "percentage": 45.48, "elapsed_time": "4:33:22", "remaining_time": "5:27:40"} +{"current_steps": 3112, "total_steps": 6840, "loss": 0.5227848887443542, "lr": 1.2299885712816792e-05, "epoch": 0.9100745723058927, "percentage": 45.5, "elapsed_time": "4:33:27", "remaining_time": "5:27:35"} +{"current_steps": 3113, "total_steps": 6840, "loss": 0.6357969045639038, "lr": 1.2295180339536839e-05, "epoch": 0.910367012721158, "percentage": 45.51, "elapsed_time": "4:33:31", "remaining_time": "5:27:28"} +{"current_steps": 3114, "total_steps": 6840, "loss": 0.6194056272506714, "lr": 1.2290474429772438e-05, "epoch": 0.9106594531364235, "percentage": 45.53, "elapsed_time": "4:33:37", "remaining_time": "5:27:24"} +{"current_steps": 3115, "total_steps": 6840, "loss": 0.5274733304977417, "lr": 1.2285767984623563e-05, "epoch": 0.9109518935516888, "percentage": 45.54, "elapsed_time": "4:33:41", "remaining_time": "5:27:17"} +{"current_steps": 3116, "total_steps": 6840, "loss": 0.5612698197364807, "lr": 1.228106100519032e-05, "epoch": 0.9112443339669543, "percentage": 45.56, "elapsed_time": "4:33:46", "remaining_time": "5:27:11"} +{"current_steps": 3117, "total_steps": 6840, "loss": 0.6261074542999268, "lr": 1.2276353492572937e-05, "epoch": 0.9115367743822196, "percentage": 45.57, "elapsed_time": "4:33:52", "remaining_time": "5:27:07"} +{"current_steps": 3118, "total_steps": 6840, "loss": 0.6407681703567505, "lr": 1.2271645447871764e-05, "epoch": 0.911829214797485, "percentage": 45.58, "elapsed_time": "4:33:58", "remaining_time": "5:27:02"} +{"current_steps": 3119, "total_steps": 6840, "loss": 0.7862328290939331, "lr": 1.226693687218728e-05, "epoch": 0.9121216552127503, "percentage": 45.6, "elapsed_time": "4:34:03", "remaining_time": "5:26:57"} +{"current_steps": 3120, "total_steps": 6840, "loss": 0.5079205632209778, "lr": 1.2262227766620083e-05, "epoch": 0.9124140956280158, "percentage": 45.61, "elapsed_time": "4:34:09", "remaining_time": "5:26:52"} +{"current_steps": 3121, "total_steps": 6840, "loss": 0.6074210405349731, "lr": 1.2257518132270903e-05, "epoch": 0.9127065360432812, "percentage": 45.63, "elapsed_time": "4:34:13", "remaining_time": "5:26:45"} +{"current_steps": 3122, "total_steps": 6840, "loss": 0.642460823059082, "lr": 1.2252807970240582e-05, "epoch": 0.9129989764585466, "percentage": 45.64, "elapsed_time": "4:34:18", "remaining_time": "5:26:40"} +{"current_steps": 3123, "total_steps": 6840, "loss": 0.5996612310409546, "lr": 1.22480972816301e-05, "epoch": 0.913291416873812, "percentage": 45.66, "elapsed_time": "4:34:22", "remaining_time": "5:26:34"} +{"current_steps": 3124, "total_steps": 6840, "loss": 0.5629523992538452, "lr": 1.2243386067540548e-05, "epoch": 0.9135838572890773, "percentage": 45.67, "elapsed_time": "4:34:27", "remaining_time": "5:26:27"} +{"current_steps": 3125, "total_steps": 6840, "loss": 0.5794960260391235, "lr": 1.223867432907314e-05, "epoch": 0.9138762977043428, "percentage": 45.69, "elapsed_time": "4:34:32", "remaining_time": "5:26:22"} +{"current_steps": 3126, "total_steps": 6840, "loss": 0.6665213108062744, "lr": 1.2233962067329217e-05, "epoch": 0.9141687381196081, "percentage": 45.7, "elapsed_time": "4:34:37", "remaining_time": "5:26:17"} +{"current_steps": 3127, "total_steps": 6840, "loss": 0.6834249496459961, "lr": 1.2229249283410245e-05, "epoch": 0.9144611785348735, "percentage": 45.72, "elapsed_time": "4:34:42", "remaining_time": "5:26:11"} +{"current_steps": 3128, "total_steps": 6840, "loss": 0.5709845423698425, "lr": 1.2224535978417809e-05, "epoch": 0.914753618950139, "percentage": 45.73, "elapsed_time": "4:34:47", "remaining_time": "5:26:05"} +{"current_steps": 3129, "total_steps": 6840, "loss": 0.5455344915390015, "lr": 1.2219822153453613e-05, "epoch": 0.9150460593654043, "percentage": 45.75, "elapsed_time": "4:34:53", "remaining_time": "5:26:01"} +{"current_steps": 3130, "total_steps": 6840, "loss": 0.6291406154632568, "lr": 1.2215107809619483e-05, "epoch": 0.9153384997806697, "percentage": 45.76, "elapsed_time": "4:34:58", "remaining_time": "5:25:56"} +{"current_steps": 3131, "total_steps": 6840, "loss": 0.5953069925308228, "lr": 1.2210392948017371e-05, "epoch": 0.915630940195935, "percentage": 45.77, "elapsed_time": "4:35:03", "remaining_time": "5:25:50"} +{"current_steps": 3132, "total_steps": 6840, "loss": 0.6958901882171631, "lr": 1.2205677569749347e-05, "epoch": 0.9159233806112005, "percentage": 45.79, "elapsed_time": "4:35:09", "remaining_time": "5:25:45"} +{"current_steps": 3133, "total_steps": 6840, "loss": 0.5867033004760742, "lr": 1.2200961675917605e-05, "epoch": 0.9162158210264658, "percentage": 45.8, "elapsed_time": "4:35:15", "remaining_time": "5:25:41"} +{"current_steps": 3134, "total_steps": 6840, "loss": 0.5364042520523071, "lr": 1.2196245267624449e-05, "epoch": 0.9165082614417313, "percentage": 45.82, "elapsed_time": "4:35:21", "remaining_time": "5:25:36"} +{"current_steps": 3135, "total_steps": 6840, "loss": 0.5141438841819763, "lr": 1.2191528345972318e-05, "epoch": 0.9168007018569967, "percentage": 45.83, "elapsed_time": "4:35:27", "remaining_time": "5:25:33"} +{"current_steps": 3136, "total_steps": 6840, "loss": 0.5024605393409729, "lr": 1.218681091206376e-05, "epoch": 0.917093142272262, "percentage": 45.85, "elapsed_time": "4:35:34", "remaining_time": "5:25:28"} +{"current_steps": 3137, "total_steps": 6840, "loss": 0.567114531993866, "lr": 1.2182092967001447e-05, "epoch": 0.9173855826875275, "percentage": 45.86, "elapsed_time": "4:35:38", "remaining_time": "5:25:22"} +{"current_steps": 3138, "total_steps": 6840, "loss": 0.7224113941192627, "lr": 1.217737451188817e-05, "epoch": 0.9176780231027928, "percentage": 45.88, "elapsed_time": "4:35:43", "remaining_time": "5:25:17"} +{"current_steps": 3139, "total_steps": 6840, "loss": 0.6033936738967896, "lr": 1.2172655547826839e-05, "epoch": 0.9179704635180582, "percentage": 45.89, "elapsed_time": "4:35:49", "remaining_time": "5:25:12"} +{"current_steps": 3140, "total_steps": 6840, "loss": 0.5555745363235474, "lr": 1.2167936075920486e-05, "epoch": 0.9182629039333235, "percentage": 45.91, "elapsed_time": "4:35:54", "remaining_time": "5:25:06"} +{"current_steps": 3141, "total_steps": 6840, "loss": 0.5939170718193054, "lr": 1.2163216097272255e-05, "epoch": 0.918555344348589, "percentage": 45.92, "elapsed_time": "4:35:58", "remaining_time": "5:24:59"} +{"current_steps": 3142, "total_steps": 6840, "loss": 0.7141895294189453, "lr": 1.2158495612985415e-05, "epoch": 0.9188477847638543, "percentage": 45.94, "elapsed_time": "4:36:03", "remaining_time": "5:24:54"} +{"current_steps": 3143, "total_steps": 6840, "loss": 0.585646390914917, "lr": 1.2153774624163345e-05, "epoch": 0.9191402251791198, "percentage": 45.95, "elapsed_time": "4:36:09", "remaining_time": "5:24:50"} +{"current_steps": 3144, "total_steps": 6840, "loss": 0.5378825068473816, "lr": 1.2149053131909556e-05, "epoch": 0.9194326655943852, "percentage": 45.96, "elapsed_time": "4:36:16", "remaining_time": "5:24:46"} +{"current_steps": 3145, "total_steps": 6840, "loss": 0.569821834564209, "lr": 1.2144331137327663e-05, "epoch": 0.9197251060096505, "percentage": 45.98, "elapsed_time": "4:36:21", "remaining_time": "5:24:41"} +{"current_steps": 3146, "total_steps": 6840, "loss": 0.6101462244987488, "lr": 1.2139608641521406e-05, "epoch": 0.920017546424916, "percentage": 45.99, "elapsed_time": "4:36:26", "remaining_time": "5:24:36"} +{"current_steps": 3147, "total_steps": 6840, "loss": 0.5481746792793274, "lr": 1.2134885645594637e-05, "epoch": 0.9203099868401813, "percentage": 46.01, "elapsed_time": "4:36:33", "remaining_time": "5:24:31"} +{"current_steps": 3148, "total_steps": 6840, "loss": 0.7075197696685791, "lr": 1.2130162150651326e-05, "epoch": 0.9206024272554467, "percentage": 46.02, "elapsed_time": "4:36:37", "remaining_time": "5:24:25"} +{"current_steps": 3149, "total_steps": 6840, "loss": 0.6375464200973511, "lr": 1.2125438157795567e-05, "epoch": 0.920894867670712, "percentage": 46.04, "elapsed_time": "4:36:42", "remaining_time": "5:24:20"} +{"current_steps": 3150, "total_steps": 6840, "loss": 0.6954327821731567, "lr": 1.2120713668131558e-05, "epoch": 0.9211873080859775, "percentage": 46.05, "elapsed_time": "4:36:48", "remaining_time": "5:24:15"} +{"current_steps": 3151, "total_steps": 6840, "loss": 0.5855636596679688, "lr": 1.2115988682763626e-05, "epoch": 0.9214797485012429, "percentage": 46.07, "elapsed_time": "4:36:54", "remaining_time": "5:24:11"} +{"current_steps": 3152, "total_steps": 6840, "loss": 0.6056143641471863, "lr": 1.2111263202796206e-05, "epoch": 0.9217721889165083, "percentage": 46.08, "elapsed_time": "4:36:59", "remaining_time": "5:24:05"} +{"current_steps": 3153, "total_steps": 6840, "loss": 0.7918239831924438, "lr": 1.2106537229333848e-05, "epoch": 0.9220646293317737, "percentage": 46.1, "elapsed_time": "4:37:05", "remaining_time": "5:24:01"} +{"current_steps": 3154, "total_steps": 6840, "loss": 0.7772212028503418, "lr": 1.2101810763481218e-05, "epoch": 0.922357069747039, "percentage": 46.11, "elapsed_time": "4:37:10", "remaining_time": "5:23:55"} +{"current_steps": 3155, "total_steps": 6840, "loss": 0.6332443356513977, "lr": 1.2097083806343104e-05, "epoch": 0.9226495101623045, "percentage": 46.13, "elapsed_time": "4:37:16", "remaining_time": "5:23:50"} +{"current_steps": 3156, "total_steps": 6840, "loss": 0.6254568099975586, "lr": 1.2092356359024399e-05, "epoch": 0.9229419505775698, "percentage": 46.14, "elapsed_time": "4:37:21", "remaining_time": "5:23:45"} +{"current_steps": 3157, "total_steps": 6840, "loss": 0.6178697347640991, "lr": 1.208762842263012e-05, "epoch": 0.9232343909928352, "percentage": 46.15, "elapsed_time": "4:37:26", "remaining_time": "5:23:40"} +{"current_steps": 3158, "total_steps": 6840, "loss": 0.5049355030059814, "lr": 1.2082899998265387e-05, "epoch": 0.9235268314081005, "percentage": 46.17, "elapsed_time": "4:37:33", "remaining_time": "5:23:36"} +{"current_steps": 3159, "total_steps": 6840, "loss": 0.7013234496116638, "lr": 1.2078171087035444e-05, "epoch": 0.923819271823366, "percentage": 46.18, "elapsed_time": "4:37:38", "remaining_time": "5:23:31"} +{"current_steps": 3160, "total_steps": 6840, "loss": 0.576643705368042, "lr": 1.2073441690045647e-05, "epoch": 0.9241117122386314, "percentage": 46.2, "elapsed_time": "4:37:44", "remaining_time": "5:23:26"} +{"current_steps": 3161, "total_steps": 6840, "loss": 0.5163617134094238, "lr": 1.2068711808401459e-05, "epoch": 0.9244041526538967, "percentage": 46.21, "elapsed_time": "4:37:49", "remaining_time": "5:23:20"} +{"current_steps": 3162, "total_steps": 6840, "loss": 0.571370005607605, "lr": 1.2063981443208466e-05, "epoch": 0.9246965930691622, "percentage": 46.23, "elapsed_time": "4:37:54", "remaining_time": "5:23:16"} +{"current_steps": 3163, "total_steps": 6840, "loss": 0.7424927949905396, "lr": 1.2059250595572358e-05, "epoch": 0.9249890334844275, "percentage": 46.24, "elapsed_time": "4:37:59", "remaining_time": "5:23:10"} +{"current_steps": 3164, "total_steps": 6840, "loss": 0.6661131381988525, "lr": 1.2054519266598946e-05, "epoch": 0.925281473899693, "percentage": 46.26, "elapsed_time": "4:38:05", "remaining_time": "5:23:05"} +{"current_steps": 3165, "total_steps": 6840, "loss": 0.6416351795196533, "lr": 1.2049787457394145e-05, "epoch": 0.9255739143149583, "percentage": 46.27, "elapsed_time": "4:38:10", "remaining_time": "5:23:00"} +{"current_steps": 3166, "total_steps": 6840, "loss": 0.6708394289016724, "lr": 1.2045055169063988e-05, "epoch": 0.9258663547302237, "percentage": 46.29, "elapsed_time": "4:38:15", "remaining_time": "5:22:53"} +{"current_steps": 3167, "total_steps": 6840, "loss": 0.536340057849884, "lr": 1.2040322402714624e-05, "epoch": 0.9261587951454892, "percentage": 46.3, "elapsed_time": "4:38:19", "remaining_time": "5:22:47"} +{"current_steps": 3168, "total_steps": 6840, "loss": 0.5621340274810791, "lr": 1.20355891594523e-05, "epoch": 0.9264512355607545, "percentage": 46.32, "elapsed_time": "4:38:23", "remaining_time": "5:22:40"} +{"current_steps": 3169, "total_steps": 6840, "loss": 0.5972496271133423, "lr": 1.2030855440383387e-05, "epoch": 0.9267436759760199, "percentage": 46.33, "elapsed_time": "4:38:28", "remaining_time": "5:22:35"} +{"current_steps": 3170, "total_steps": 6840, "loss": 0.567542314529419, "lr": 1.2026121246614362e-05, "epoch": 0.9270361163912852, "percentage": 46.35, "elapsed_time": "4:38:33", "remaining_time": "5:22:29"} +{"current_steps": 3171, "total_steps": 6840, "loss": 0.5487483739852905, "lr": 1.2021386579251814e-05, "epoch": 0.9273285568065507, "percentage": 46.36, "elapsed_time": "4:38:40", "remaining_time": "5:22:26"} +{"current_steps": 3172, "total_steps": 6840, "loss": 0.7988057136535645, "lr": 1.2016651439402445e-05, "epoch": 0.927620997221816, "percentage": 46.37, "elapsed_time": "4:38:45", "remaining_time": "5:22:20"} +{"current_steps": 3173, "total_steps": 6840, "loss": 0.5333850979804993, "lr": 1.2011915828173066e-05, "epoch": 0.9279134376370815, "percentage": 46.39, "elapsed_time": "4:38:51", "remaining_time": "5:22:16"} +{"current_steps": 3174, "total_steps": 6840, "loss": 0.5640296936035156, "lr": 1.2007179746670592e-05, "epoch": 0.9282058780523469, "percentage": 46.4, "elapsed_time": "4:38:57", "remaining_time": "5:22:12"} +{"current_steps": 3175, "total_steps": 6840, "loss": 0.7154449820518494, "lr": 1.2002443196002057e-05, "epoch": 0.9284983184676122, "percentage": 46.42, "elapsed_time": "4:39:02", "remaining_time": "5:22:06"} +{"current_steps": 3176, "total_steps": 6840, "loss": 0.8660446405410767, "lr": 1.1997706177274597e-05, "epoch": 0.9287907588828777, "percentage": 46.43, "elapsed_time": "4:39:07", "remaining_time": "5:22:00"} +{"current_steps": 3177, "total_steps": 6840, "loss": 0.601166307926178, "lr": 1.1992968691595465e-05, "epoch": 0.929083199298143, "percentage": 46.45, "elapsed_time": "4:39:13", "remaining_time": "5:21:55"} +{"current_steps": 3178, "total_steps": 6840, "loss": 0.6197638511657715, "lr": 1.1988230740072022e-05, "epoch": 0.9293756397134084, "percentage": 46.46, "elapsed_time": "4:39:18", "remaining_time": "5:21:51"} +{"current_steps": 3179, "total_steps": 6840, "loss": 0.5716423988342285, "lr": 1.198349232381173e-05, "epoch": 0.9296680801286737, "percentage": 46.48, "elapsed_time": "4:39:25", "remaining_time": "5:21:46"} +{"current_steps": 3180, "total_steps": 6840, "loss": 0.4319373071193695, "lr": 1.197875344392217e-05, "epoch": 0.9299605205439392, "percentage": 46.49, "elapsed_time": "4:39:31", "remaining_time": "5:21:43"} +{"current_steps": 3181, "total_steps": 6840, "loss": 0.5299028158187866, "lr": 1.1974014101511018e-05, "epoch": 0.9302529609592045, "percentage": 46.51, "elapsed_time": "4:39:35", "remaining_time": "5:21:36"} +{"current_steps": 3182, "total_steps": 6840, "loss": 0.7085509300231934, "lr": 1.1969274297686075e-05, "epoch": 0.93054540137447, "percentage": 46.52, "elapsed_time": "4:39:41", "remaining_time": "5:21:32"} +{"current_steps": 3183, "total_steps": 6840, "loss": 0.6025770902633667, "lr": 1.1964534033555237e-05, "epoch": 0.9308378417897354, "percentage": 46.54, "elapsed_time": "4:39:47", "remaining_time": "5:21:27"} +{"current_steps": 3184, "total_steps": 6840, "loss": 0.5624677538871765, "lr": 1.1959793310226518e-05, "epoch": 0.9311302822050007, "percentage": 46.55, "elapsed_time": "4:39:51", "remaining_time": "5:21:20"} +{"current_steps": 3185, "total_steps": 6840, "loss": 0.602645754814148, "lr": 1.1955052128808025e-05, "epoch": 0.9314227226202662, "percentage": 46.56, "elapsed_time": "4:39:56", "remaining_time": "5:21:15"} +{"current_steps": 3186, "total_steps": 6840, "loss": 0.6495026350021362, "lr": 1.1950310490407984e-05, "epoch": 0.9317151630355315, "percentage": 46.58, "elapsed_time": "4:40:00", "remaining_time": "5:21:08"} +{"current_steps": 3187, "total_steps": 6840, "loss": 0.50370192527771, "lr": 1.1945568396134721e-05, "epoch": 0.9320076034507969, "percentage": 46.59, "elapsed_time": "4:40:06", "remaining_time": "5:21:04"} +{"current_steps": 3188, "total_steps": 6840, "loss": 0.5717373490333557, "lr": 1.1940825847096677e-05, "epoch": 0.9323000438660622, "percentage": 46.61, "elapsed_time": "4:40:12", "remaining_time": "5:20:59"} +{"current_steps": 3189, "total_steps": 6840, "loss": 0.5863519310951233, "lr": 1.1936082844402395e-05, "epoch": 0.9325924842813277, "percentage": 46.62, "elapsed_time": "4:40:17", "remaining_time": "5:20:54"} +{"current_steps": 3190, "total_steps": 6840, "loss": 0.6607284545898438, "lr": 1.1931339389160516e-05, "epoch": 0.9328849246965931, "percentage": 46.64, "elapsed_time": "4:40:22", "remaining_time": "5:20:48"} +{"current_steps": 3191, "total_steps": 6840, "loss": 0.5578058958053589, "lr": 1.1926595482479799e-05, "epoch": 0.9331773651118584, "percentage": 46.65, "elapsed_time": "4:40:28", "remaining_time": "5:20:43"} +{"current_steps": 3192, "total_steps": 6840, "loss": 0.6839171648025513, "lr": 1.19218511254691e-05, "epoch": 0.9334698055271239, "percentage": 46.67, "elapsed_time": "4:40:33", "remaining_time": "5:20:37"} +{"current_steps": 3193, "total_steps": 6840, "loss": 0.5071141719818115, "lr": 1.1917106319237386e-05, "epoch": 0.9337622459423892, "percentage": 46.68, "elapsed_time": "4:40:37", "remaining_time": "5:20:32"} +{"current_steps": 3194, "total_steps": 6840, "loss": 0.5112525820732117, "lr": 1.1912361064893726e-05, "epoch": 0.9340546863576547, "percentage": 46.7, "elapsed_time": "4:40:42", "remaining_time": "5:20:26"} +{"current_steps": 3195, "total_steps": 6840, "loss": 0.5661873817443848, "lr": 1.1907615363547299e-05, "epoch": 0.93434712677292, "percentage": 46.71, "elapsed_time": "4:40:49", "remaining_time": "5:20:22"} +{"current_steps": 3196, "total_steps": 6840, "loss": 0.5520195364952087, "lr": 1.190286921630737e-05, "epoch": 0.9346395671881854, "percentage": 46.73, "elapsed_time": "4:40:54", "remaining_time": "5:20:17"} +{"current_steps": 3197, "total_steps": 6840, "loss": 0.560089111328125, "lr": 1.1898122624283337e-05, "epoch": 0.9349320076034507, "percentage": 46.74, "elapsed_time": "4:41:01", "remaining_time": "5:20:13"} +{"current_steps": 3198, "total_steps": 6840, "loss": 0.6431207656860352, "lr": 1.1893375588584681e-05, "epoch": 0.9352244480187162, "percentage": 46.75, "elapsed_time": "4:41:06", "remaining_time": "5:20:07"} +{"current_steps": 3199, "total_steps": 6840, "loss": 0.7365666031837463, "lr": 1.1888628110320995e-05, "epoch": 0.9355168884339816, "percentage": 46.77, "elapsed_time": "4:41:11", "remaining_time": "5:20:02"} +{"current_steps": 3200, "total_steps": 6840, "loss": 0.5455417633056641, "lr": 1.1883880190601968e-05, "epoch": 0.935809328849247, "percentage": 46.78, "elapsed_time": "4:41:16", "remaining_time": "5:19:57"} +{"current_steps": 3201, "total_steps": 6840, "loss": 0.5749938488006592, "lr": 1.1879131830537403e-05, "epoch": 0.9361017692645124, "percentage": 46.8, "elapsed_time": "4:41:26", "remaining_time": "5:19:57"} +{"current_steps": 3202, "total_steps": 6840, "loss": 0.588424563407898, "lr": 1.1874383031237196e-05, "epoch": 0.9363942096797777, "percentage": 46.81, "elapsed_time": "4:41:30", "remaining_time": "5:19:50"} +{"current_steps": 3203, "total_steps": 6840, "loss": 0.7039792537689209, "lr": 1.1869633793811352e-05, "epoch": 0.9366866500950431, "percentage": 46.83, "elapsed_time": "4:41:34", "remaining_time": "5:19:44"} +{"current_steps": 3204, "total_steps": 6840, "loss": 0.5972777009010315, "lr": 1.1864884119369977e-05, "epoch": 0.9369790905103085, "percentage": 46.84, "elapsed_time": "4:41:40", "remaining_time": "5:19:39"} +{"current_steps": 3205, "total_steps": 6840, "loss": 0.6510647535324097, "lr": 1.1860134009023281e-05, "epoch": 0.9372715309255739, "percentage": 46.86, "elapsed_time": "4:41:45", "remaining_time": "5:19:33"} +{"current_steps": 3206, "total_steps": 6840, "loss": 0.606874406337738, "lr": 1.1855383463881566e-05, "epoch": 0.9375639713408394, "percentage": 46.87, "elapsed_time": "4:41:51", "remaining_time": "5:19:29"} +{"current_steps": 3207, "total_steps": 6840, "loss": 0.5527048110961914, "lr": 1.1850632485055247e-05, "epoch": 0.9378564117561047, "percentage": 46.89, "elapsed_time": "4:41:57", "remaining_time": "5:19:24"} +{"current_steps": 3208, "total_steps": 6840, "loss": 0.6297399997711182, "lr": 1.1845881073654838e-05, "epoch": 0.9381488521713701, "percentage": 46.9, "elapsed_time": "4:42:02", "remaining_time": "5:19:19"} +{"current_steps": 3209, "total_steps": 6840, "loss": 0.5852634310722351, "lr": 1.184112923079095e-05, "epoch": 0.9384412925866354, "percentage": 46.92, "elapsed_time": "4:42:06", "remaining_time": "5:19:12"} +{"current_steps": 3210, "total_steps": 6840, "loss": 0.5648211240768433, "lr": 1.1836376957574301e-05, "epoch": 0.9387337330019009, "percentage": 46.93, "elapsed_time": "4:42:11", "remaining_time": "5:19:06"} +{"current_steps": 3211, "total_steps": 6840, "loss": 0.5547506213188171, "lr": 1.1831624255115703e-05, "epoch": 0.9390261734171662, "percentage": 46.94, "elapsed_time": "4:42:17", "remaining_time": "5:19:02"} +{"current_steps": 3212, "total_steps": 6840, "loss": 0.5927829146385193, "lr": 1.1826871124526072e-05, "epoch": 0.9393186138324316, "percentage": 46.96, "elapsed_time": "4:42:22", "remaining_time": "5:18:57"} +{"current_steps": 3213, "total_steps": 6840, "loss": 0.5705278515815735, "lr": 1.182211756691642e-05, "epoch": 0.9396110542476971, "percentage": 46.97, "elapsed_time": "4:42:29", "remaining_time": "5:18:53"} +{"current_steps": 3214, "total_steps": 6840, "loss": 0.547038197517395, "lr": 1.1817363583397868e-05, "epoch": 0.9399034946629624, "percentage": 46.99, "elapsed_time": "4:42:35", "remaining_time": "5:18:48"} +{"current_steps": 3215, "total_steps": 6840, "loss": 0.6136760115623474, "lr": 1.1812609175081626e-05, "epoch": 0.9401959350782279, "percentage": 47.0, "elapsed_time": "4:42:39", "remaining_time": "5:18:42"} +{"current_steps": 3216, "total_steps": 6840, "loss": 0.5784845352172852, "lr": 1.1807854343079015e-05, "epoch": 0.9404883754934932, "percentage": 47.02, "elapsed_time": "4:42:45", "remaining_time": "5:18:37"} +{"current_steps": 3217, "total_steps": 6840, "loss": 0.6629599332809448, "lr": 1.1803099088501439e-05, "epoch": 0.9407808159087586, "percentage": 47.03, "elapsed_time": "4:42:50", "remaining_time": "5:18:32"} +{"current_steps": 3218, "total_steps": 6840, "loss": 0.6058052778244019, "lr": 1.1798343412460416e-05, "epoch": 0.9410732563240239, "percentage": 47.05, "elapsed_time": "4:42:55", "remaining_time": "5:18:26"} +{"current_steps": 3219, "total_steps": 6840, "loss": 0.5689725875854492, "lr": 1.1793587316067552e-05, "epoch": 0.9413656967392894, "percentage": 47.06, "elapsed_time": "4:43:01", "remaining_time": "5:18:21"} +{"current_steps": 3220, "total_steps": 6840, "loss": 0.5718861818313599, "lr": 1.1788830800434561e-05, "epoch": 0.9416581371545547, "percentage": 47.08, "elapsed_time": "4:43:07", "remaining_time": "5:18:18"} +{"current_steps": 3221, "total_steps": 6840, "loss": 0.6061254739761353, "lr": 1.1784073866673245e-05, "epoch": 0.9419505775698201, "percentage": 47.09, "elapsed_time": "4:43:12", "remaining_time": "5:18:11"} +{"current_steps": 3222, "total_steps": 6840, "loss": 0.6805517077445984, "lr": 1.1779316515895511e-05, "epoch": 0.9422430179850856, "percentage": 47.11, "elapsed_time": "4:43:17", "remaining_time": "5:18:06"} +{"current_steps": 3223, "total_steps": 6840, "loss": 0.5553466081619263, "lr": 1.1774558749213358e-05, "epoch": 0.9425354584003509, "percentage": 47.12, "elapsed_time": "4:43:22", "remaining_time": "5:18:01"} +{"current_steps": 3224, "total_steps": 6840, "loss": 0.6408798694610596, "lr": 1.176980056773889e-05, "epoch": 0.9428278988156163, "percentage": 47.13, "elapsed_time": "4:43:28", "remaining_time": "5:17:56"} +{"current_steps": 3225, "total_steps": 6840, "loss": 0.5269505381584167, "lr": 1.1765041972584296e-05, "epoch": 0.9431203392308817, "percentage": 47.15, "elapsed_time": "4:43:34", "remaining_time": "5:17:51"} +{"current_steps": 3226, "total_steps": 6840, "loss": 0.682415246963501, "lr": 1.1760282964861873e-05, "epoch": 0.9434127796461471, "percentage": 47.16, "elapsed_time": "4:43:39", "remaining_time": "5:17:46"} +{"current_steps": 3227, "total_steps": 6840, "loss": 0.507567286491394, "lr": 1.1755523545684016e-05, "epoch": 0.9437052200614124, "percentage": 47.18, "elapsed_time": "4:43:43", "remaining_time": "5:17:39"} +{"current_steps": 3228, "total_steps": 6840, "loss": 0.6977763175964355, "lr": 1.1750763716163199e-05, "epoch": 0.9439976604766779, "percentage": 47.19, "elapsed_time": "4:43:48", "remaining_time": "5:17:34"} +{"current_steps": 3229, "total_steps": 6840, "loss": 0.5626407861709595, "lr": 1.1746003477412007e-05, "epoch": 0.9442901008919433, "percentage": 47.21, "elapsed_time": "4:43:53", "remaining_time": "5:17:28"} +{"current_steps": 3230, "total_steps": 6840, "loss": 0.5280323624610901, "lr": 1.1741242830543118e-05, "epoch": 0.9445825413072086, "percentage": 47.22, "elapsed_time": "4:43:57", "remaining_time": "5:17:22"} +{"current_steps": 3231, "total_steps": 6840, "loss": 0.6236885190010071, "lr": 1.1736481776669307e-05, "epoch": 0.9448749817224741, "percentage": 47.24, "elapsed_time": "4:44:02", "remaining_time": "5:17:15"} +{"current_steps": 3232, "total_steps": 6840, "loss": 0.5250823497772217, "lr": 1.1731720316903435e-05, "epoch": 0.9451674221377394, "percentage": 47.25, "elapsed_time": "4:44:06", "remaining_time": "5:17:09"} +{"current_steps": 3233, "total_steps": 6840, "loss": 0.5885770320892334, "lr": 1.1726958452358472e-05, "epoch": 0.9454598625530048, "percentage": 47.27, "elapsed_time": "4:44:11", "remaining_time": "5:17:03"} +{"current_steps": 3234, "total_steps": 6840, "loss": 0.7812498807907104, "lr": 1.1722196184147467e-05, "epoch": 0.9457523029682702, "percentage": 47.28, "elapsed_time": "4:44:16", "remaining_time": "5:16:58"} +{"current_steps": 3235, "total_steps": 6840, "loss": 0.6763796210289001, "lr": 1.1717433513383575e-05, "epoch": 0.9460447433835356, "percentage": 47.3, "elapsed_time": "4:44:20", "remaining_time": "5:16:51"} +{"current_steps": 3236, "total_steps": 6840, "loss": 0.5983982682228088, "lr": 1.1712670441180045e-05, "epoch": 0.9463371837988009, "percentage": 47.31, "elapsed_time": "4:44:26", "remaining_time": "5:16:47"} +{"current_steps": 3237, "total_steps": 6840, "loss": 0.6665002107620239, "lr": 1.1707906968650214e-05, "epoch": 0.9466296242140664, "percentage": 47.32, "elapsed_time": "4:44:31", "remaining_time": "5:16:41"} +{"current_steps": 3238, "total_steps": 6840, "loss": 0.7676652669906616, "lr": 1.1703143096907507e-05, "epoch": 0.9469220646293318, "percentage": 47.34, "elapsed_time": "4:44:36", "remaining_time": "5:16:36"} +{"current_steps": 3239, "total_steps": 6840, "loss": 0.710014820098877, "lr": 1.1698378827065461e-05, "epoch": 0.9472145050445971, "percentage": 47.35, "elapsed_time": "4:44:42", "remaining_time": "5:16:31"} +{"current_steps": 3240, "total_steps": 6840, "loss": 0.5800554752349854, "lr": 1.169361416023769e-05, "epoch": 0.9475069454598626, "percentage": 47.37, "elapsed_time": "4:44:48", "remaining_time": "5:16:26"} +{"current_steps": 3241, "total_steps": 6840, "loss": 0.602012574672699, "lr": 1.1688849097537904e-05, "epoch": 0.9477993858751279, "percentage": 47.38, "elapsed_time": "4:44:53", "remaining_time": "5:16:21"} +{"current_steps": 3242, "total_steps": 6840, "loss": 0.4943910241127014, "lr": 1.1684083640079912e-05, "epoch": 0.9480918262903933, "percentage": 47.4, "elapsed_time": "4:44:57", "remaining_time": "5:16:15"} +{"current_steps": 3243, "total_steps": 6840, "loss": 0.49094298481941223, "lr": 1.1679317788977609e-05, "epoch": 0.9483842667056587, "percentage": 47.41, "elapsed_time": "4:45:01", "remaining_time": "5:16:08"} +{"current_steps": 3244, "total_steps": 6840, "loss": 0.46416157484054565, "lr": 1.1674551545344983e-05, "epoch": 0.9486767071209241, "percentage": 47.43, "elapsed_time": "4:45:07", "remaining_time": "5:16:03"} +{"current_steps": 3245, "total_steps": 6840, "loss": 0.5170255899429321, "lr": 1.1669784910296114e-05, "epoch": 0.9489691475361896, "percentage": 47.44, "elapsed_time": "4:45:12", "remaining_time": "5:15:58"} +{"current_steps": 3246, "total_steps": 6840, "loss": 0.7673200368881226, "lr": 1.1665017884945174e-05, "epoch": 0.9492615879514549, "percentage": 47.46, "elapsed_time": "4:45:18", "remaining_time": "5:15:54"} +{"current_steps": 3247, "total_steps": 6840, "loss": 0.49335333704948425, "lr": 1.1660250470406426e-05, "epoch": 0.9495540283667203, "percentage": 47.47, "elapsed_time": "4:45:25", "remaining_time": "5:15:50"} +{"current_steps": 3248, "total_steps": 6840, "loss": 0.6620640754699707, "lr": 1.1655482667794228e-05, "epoch": 0.9498464687819856, "percentage": 47.49, "elapsed_time": "4:45:30", "remaining_time": "5:15:44"} +{"current_steps": 3249, "total_steps": 6840, "loss": 0.600047767162323, "lr": 1.1650714478223022e-05, "epoch": 0.9501389091972511, "percentage": 47.5, "elapsed_time": "4:45:35", "remaining_time": "5:15:39"} +{"current_steps": 3250, "total_steps": 6840, "loss": 0.668572187423706, "lr": 1.164594590280734e-05, "epoch": 0.9504313496125164, "percentage": 47.51, "elapsed_time": "4:45:41", "remaining_time": "5:15:34"} +{"current_steps": 3251, "total_steps": 6840, "loss": 0.4460945725440979, "lr": 1.1641176942661812e-05, "epoch": 0.9507237900277818, "percentage": 47.53, "elapsed_time": "4:45:46", "remaining_time": "5:15:29"} +{"current_steps": 3252, "total_steps": 6840, "loss": 0.6650545597076416, "lr": 1.1636407598901154e-05, "epoch": 0.9510162304430473, "percentage": 47.54, "elapsed_time": "4:45:53", "remaining_time": "5:15:25"} +{"current_steps": 3253, "total_steps": 6840, "loss": 0.5631237030029297, "lr": 1.1631637872640166e-05, "epoch": 0.9513086708583126, "percentage": 47.56, "elapsed_time": "4:45:56", "remaining_time": "5:15:18"} +{"current_steps": 3254, "total_steps": 6840, "loss": 0.650580883026123, "lr": 1.162686776499375e-05, "epoch": 0.951601111273578, "percentage": 47.57, "elapsed_time": "4:46:02", "remaining_time": "5:15:13"} +{"current_steps": 3255, "total_steps": 6840, "loss": 0.5606606602668762, "lr": 1.1622097277076883e-05, "epoch": 0.9518935516888434, "percentage": 47.59, "elapsed_time": "4:46:08", "remaining_time": "5:15:08"} +{"current_steps": 3256, "total_steps": 6840, "loss": 0.667366623878479, "lr": 1.1617326410004639e-05, "epoch": 0.9521859921041088, "percentage": 47.6, "elapsed_time": "4:46:13", "remaining_time": "5:15:03"} +{"current_steps": 3257, "total_steps": 6840, "loss": 0.5895084738731384, "lr": 1.1612555164892181e-05, "epoch": 0.9524784325193741, "percentage": 47.62, "elapsed_time": "4:46:19", "remaining_time": "5:14:59"} +{"current_steps": 3258, "total_steps": 6840, "loss": 0.6468119025230408, "lr": 1.1607783542854759e-05, "epoch": 0.9527708729346396, "percentage": 47.63, "elapsed_time": "4:46:24", "remaining_time": "5:14:53"} +{"current_steps": 3259, "total_steps": 6840, "loss": 0.7178056240081787, "lr": 1.1603011545007708e-05, "epoch": 0.9530633133499049, "percentage": 47.65, "elapsed_time": "4:46:28", "remaining_time": "5:14:47"} +{"current_steps": 3260, "total_steps": 6840, "loss": 0.42994585633277893, "lr": 1.1598239172466457e-05, "epoch": 0.9533557537651703, "percentage": 47.66, "elapsed_time": "4:46:35", "remaining_time": "5:14:43"} +{"current_steps": 3261, "total_steps": 6840, "loss": 0.4939822554588318, "lr": 1.1593466426346513e-05, "epoch": 0.9536481941804358, "percentage": 47.68, "elapsed_time": "4:46:40", "remaining_time": "5:14:38"} +{"current_steps": 3262, "total_steps": 6840, "loss": 0.4252137839794159, "lr": 1.1588693307763483e-05, "epoch": 0.9539406345957011, "percentage": 47.69, "elapsed_time": "4:46:45", "remaining_time": "5:14:32"} +{"current_steps": 3263, "total_steps": 6840, "loss": 0.5772995948791504, "lr": 1.1583919817833051e-05, "epoch": 0.9542330750109665, "percentage": 47.7, "elapsed_time": "4:46:51", "remaining_time": "5:14:27"} +{"current_steps": 3264, "total_steps": 6840, "loss": 0.6784560680389404, "lr": 1.1579145957670992e-05, "epoch": 0.9545255154262319, "percentage": 47.72, "elapsed_time": "4:46:57", "remaining_time": "5:14:22"} +{"current_steps": 3265, "total_steps": 6840, "loss": 0.5373483896255493, "lr": 1.1574371728393169e-05, "epoch": 0.9548179558414973, "percentage": 47.73, "elapsed_time": "4:47:02", "remaining_time": "5:14:17"} +{"current_steps": 3266, "total_steps": 6840, "loss": 0.7517837285995483, "lr": 1.1569597131115523e-05, "epoch": 0.9551103962567626, "percentage": 47.75, "elapsed_time": "4:47:08", "remaining_time": "5:14:12"} +{"current_steps": 3267, "total_steps": 6840, "loss": 0.6715551614761353, "lr": 1.1564822166954092e-05, "epoch": 0.9554028366720281, "percentage": 47.76, "elapsed_time": "4:47:14", "remaining_time": "5:14:08"} +{"current_steps": 3268, "total_steps": 6840, "loss": 0.6892265677452087, "lr": 1.1560046837024994e-05, "epoch": 0.9556952770872935, "percentage": 47.78, "elapsed_time": "4:47:20", "remaining_time": "5:14:04"} +{"current_steps": 3269, "total_steps": 6840, "loss": 0.5564894676208496, "lr": 1.1555271142444433e-05, "epoch": 0.9559877175025588, "percentage": 47.79, "elapsed_time": "4:47:26", "remaining_time": "5:13:59"} +{"current_steps": 3270, "total_steps": 6840, "loss": 0.6211465001106262, "lr": 1.15504950843287e-05, "epoch": 0.9562801579178243, "percentage": 47.81, "elapsed_time": "4:47:31", "remaining_time": "5:13:54"} +{"current_steps": 3271, "total_steps": 6840, "loss": 0.6189093589782715, "lr": 1.1545718663794165e-05, "epoch": 0.9565725983330896, "percentage": 47.82, "elapsed_time": "4:47:36", "remaining_time": "5:13:48"} +{"current_steps": 3272, "total_steps": 6840, "loss": 0.6600508689880371, "lr": 1.1540941881957293e-05, "epoch": 0.956865038748355, "percentage": 47.84, "elapsed_time": "4:47:42", "remaining_time": "5:13:43"} +{"current_steps": 3273, "total_steps": 6840, "loss": 0.5891202688217163, "lr": 1.1536164739934626e-05, "epoch": 0.9571574791636204, "percentage": 47.85, "elapsed_time": "4:47:47", "remaining_time": "5:13:38"} +{"current_steps": 3274, "total_steps": 6840, "loss": 0.5996856093406677, "lr": 1.1531387238842788e-05, "epoch": 0.9574499195788858, "percentage": 47.87, "elapsed_time": "4:47:53", "remaining_time": "5:13:34"} +{"current_steps": 3275, "total_steps": 6840, "loss": 0.5645085573196411, "lr": 1.15266093797985e-05, "epoch": 0.9577423599941511, "percentage": 47.88, "elapsed_time": "4:47:59", "remaining_time": "5:13:29"} +{"current_steps": 3276, "total_steps": 6840, "loss": 0.5934250354766846, "lr": 1.1521831163918545e-05, "epoch": 0.9580348004094166, "percentage": 47.89, "elapsed_time": "4:48:05", "remaining_time": "5:13:24"} +{"current_steps": 3277, "total_steps": 6840, "loss": 0.6659657955169678, "lr": 1.151705259231981e-05, "epoch": 0.958327240824682, "percentage": 47.91, "elapsed_time": "4:48:10", "remaining_time": "5:13:19"} +{"current_steps": 3278, "total_steps": 6840, "loss": 0.518921434879303, "lr": 1.1512273666119255e-05, "epoch": 0.9586196812399473, "percentage": 47.92, "elapsed_time": "4:48:16", "remaining_time": "5:13:14"} +{"current_steps": 3279, "total_steps": 6840, "loss": 0.6015551686286926, "lr": 1.1507494386433927e-05, "epoch": 0.9589121216552128, "percentage": 47.94, "elapsed_time": "4:48:20", "remaining_time": "5:13:08"} +{"current_steps": 3280, "total_steps": 6840, "loss": 0.5590265393257141, "lr": 1.150271475438095e-05, "epoch": 0.9592045620704781, "percentage": 47.95, "elapsed_time": "4:48:25", "remaining_time": "5:13:02"} +{"current_steps": 3281, "total_steps": 6840, "loss": 0.5820340514183044, "lr": 1.149793477107754e-05, "epoch": 0.9594970024857435, "percentage": 47.97, "elapsed_time": "4:48:30", "remaining_time": "5:12:56"} +{"current_steps": 3282, "total_steps": 6840, "loss": 0.5356709957122803, "lr": 1.1493154437640981e-05, "epoch": 0.9597894429010089, "percentage": 47.98, "elapsed_time": "4:48:35", "remaining_time": "5:12:51"} +{"current_steps": 3283, "total_steps": 6840, "loss": 0.7024146318435669, "lr": 1.1488373755188651e-05, "epoch": 0.9600818833162743, "percentage": 48.0, "elapsed_time": "4:48:40", "remaining_time": "5:12:45"} +{"current_steps": 3284, "total_steps": 6840, "loss": 0.4929785132408142, "lr": 1.1483592724838007e-05, "epoch": 0.9603743237315397, "percentage": 48.01, "elapsed_time": "4:48:45", "remaining_time": "5:12:40"} +{"current_steps": 3285, "total_steps": 6840, "loss": 0.6902902126312256, "lr": 1.147881134770658e-05, "epoch": 0.9606667641468051, "percentage": 48.03, "elapsed_time": "4:48:51", "remaining_time": "5:12:35"} +{"current_steps": 3286, "total_steps": 6840, "loss": 0.5339258313179016, "lr": 1.1474029624911997e-05, "epoch": 0.9609592045620705, "percentage": 48.04, "elapsed_time": "4:48:56", "remaining_time": "5:12:30"} +{"current_steps": 3287, "total_steps": 6840, "loss": 0.6998730897903442, "lr": 1.146924755757195e-05, "epoch": 0.9612516449773358, "percentage": 48.06, "elapsed_time": "4:49:02", "remaining_time": "5:12:26"} +{"current_steps": 3288, "total_steps": 6840, "loss": 0.6174519062042236, "lr": 1.1464465146804218e-05, "epoch": 0.9615440853926013, "percentage": 48.07, "elapsed_time": "4:49:08", "remaining_time": "5:12:21"} +{"current_steps": 3289, "total_steps": 6840, "loss": 0.5395258665084839, "lr": 1.145968239372666e-05, "epoch": 0.9618365258078666, "percentage": 48.08, "elapsed_time": "4:49:13", "remaining_time": "5:12:15"} +{"current_steps": 3290, "total_steps": 6840, "loss": 0.6355341672897339, "lr": 1.1454899299457221e-05, "epoch": 0.962128966223132, "percentage": 48.1, "elapsed_time": "4:49:19", "remaining_time": "5:12:10"} +{"current_steps": 3291, "total_steps": 6840, "loss": 0.5315179228782654, "lr": 1.1450115865113916e-05, "epoch": 0.9624214066383975, "percentage": 48.11, "elapsed_time": "4:49:23", "remaining_time": "5:12:04"} +{"current_steps": 3292, "total_steps": 6840, "loss": 0.5595142841339111, "lr": 1.1445332091814844e-05, "epoch": 0.9627138470536628, "percentage": 48.13, "elapsed_time": "4:49:28", "remaining_time": "5:11:59"} +{"current_steps": 3293, "total_steps": 6840, "loss": 0.5509291291236877, "lr": 1.1440547980678185e-05, "epoch": 0.9630062874689282, "percentage": 48.14, "elapsed_time": "4:49:34", "remaining_time": "5:11:54"} +{"current_steps": 3294, "total_steps": 6840, "loss": 0.6831322908401489, "lr": 1.1435763532822191e-05, "epoch": 0.9632987278841936, "percentage": 48.16, "elapsed_time": "4:49:39", "remaining_time": "5:11:49"} +{"current_steps": 3295, "total_steps": 6840, "loss": 0.5494598150253296, "lr": 1.1430978749365203e-05, "epoch": 0.963591168299459, "percentage": 48.17, "elapsed_time": "4:49:44", "remaining_time": "5:11:43"} +{"current_steps": 3296, "total_steps": 6840, "loss": 0.5613550543785095, "lr": 1.142619363142563e-05, "epoch": 0.9638836087147243, "percentage": 48.19, "elapsed_time": "4:49:48", "remaining_time": "5:11:37"} +{"current_steps": 3297, "total_steps": 6840, "loss": 0.656089186668396, "lr": 1.1421408180121972e-05, "epoch": 0.9641760491299898, "percentage": 48.2, "elapsed_time": "4:49:54", "remaining_time": "5:11:32"} +{"current_steps": 3298, "total_steps": 6840, "loss": 0.5913431644439697, "lr": 1.1416622396572791e-05, "epoch": 0.9644684895452551, "percentage": 48.22, "elapsed_time": "4:50:00", "remaining_time": "5:11:27"} +{"current_steps": 3299, "total_steps": 6840, "loss": 0.6706565022468567, "lr": 1.1411836281896737e-05, "epoch": 0.9647609299605205, "percentage": 48.23, "elapsed_time": "4:50:06", "remaining_time": "5:11:23"} +{"current_steps": 3300, "total_steps": 6840, "loss": 0.6169217824935913, "lr": 1.1407049837212539e-05, "epoch": 0.965053370375786, "percentage": 48.25, "elapsed_time": "4:50:12", "remaining_time": "5:11:19"} +{"current_steps": 3301, "total_steps": 6840, "loss": 0.5516680479049683, "lr": 1.1402263063638994e-05, "epoch": 0.9653458107910513, "percentage": 48.26, "elapsed_time": "4:50:22", "remaining_time": "5:11:18"} +{"current_steps": 3302, "total_steps": 6840, "loss": 0.7105098962783813, "lr": 1.1397475962294986e-05, "epoch": 0.9656382512063167, "percentage": 48.27, "elapsed_time": "4:50:28", "remaining_time": "5:11:13"} +{"current_steps": 3303, "total_steps": 6840, "loss": 0.6183327436447144, "lr": 1.139268853429947e-05, "epoch": 0.9659306916215821, "percentage": 48.29, "elapsed_time": "4:50:33", "remaining_time": "5:11:08"} +{"current_steps": 3304, "total_steps": 6840, "loss": 0.6160033941268921, "lr": 1.1387900780771472e-05, "epoch": 0.9662231320368475, "percentage": 48.3, "elapsed_time": "4:50:37", "remaining_time": "5:11:01"} +{"current_steps": 3305, "total_steps": 6840, "loss": 0.5526994466781616, "lr": 1.1383112702830108e-05, "epoch": 0.9665155724521128, "percentage": 48.32, "elapsed_time": "4:50:42", "remaining_time": "5:10:56"} +{"current_steps": 3306, "total_steps": 6840, "loss": 0.5476477742195129, "lr": 1.137832430159456e-05, "epoch": 0.9668080128673783, "percentage": 48.33, "elapsed_time": "4:50:48", "remaining_time": "5:10:52"} +{"current_steps": 3307, "total_steps": 6840, "loss": 0.558393657207489, "lr": 1.1373535578184083e-05, "epoch": 0.9671004532826437, "percentage": 48.35, "elapsed_time": "4:50:53", "remaining_time": "5:10:45"} +{"current_steps": 3308, "total_steps": 6840, "loss": 0.6302276849746704, "lr": 1.1368746533718017e-05, "epoch": 0.967392893697909, "percentage": 48.36, "elapsed_time": "4:50:56", "remaining_time": "5:10:39"} +{"current_steps": 3309, "total_steps": 6840, "loss": 0.619697630405426, "lr": 1.1363957169315773e-05, "epoch": 0.9676853341131745, "percentage": 48.38, "elapsed_time": "4:51:02", "remaining_time": "5:10:34"} +{"current_steps": 3310, "total_steps": 6840, "loss": 0.564563512802124, "lr": 1.135916748609683e-05, "epoch": 0.9679777745284398, "percentage": 48.39, "elapsed_time": "4:51:07", "remaining_time": "5:10:28"} +{"current_steps": 3311, "total_steps": 6840, "loss": 0.6238751411437988, "lr": 1.1354377485180756e-05, "epoch": 0.9682702149437052, "percentage": 48.41, "elapsed_time": "4:51:13", "remaining_time": "5:10:23"} +{"current_steps": 3312, "total_steps": 6840, "loss": 0.8079221844673157, "lr": 1.1349587167687177e-05, "epoch": 0.9685626553589706, "percentage": 48.42, "elapsed_time": "4:51:18", "remaining_time": "5:10:18"} +{"current_steps": 3313, "total_steps": 6840, "loss": 0.5547629594802856, "lr": 1.1344796534735805e-05, "epoch": 0.968855095774236, "percentage": 48.44, "elapsed_time": "4:51:23", "remaining_time": "5:10:12"} +{"current_steps": 3314, "total_steps": 6840, "loss": 0.630042552947998, "lr": 1.134000558744642e-05, "epoch": 0.9691475361895013, "percentage": 48.45, "elapsed_time": "4:51:28", "remaining_time": "5:10:07"} +{"current_steps": 3315, "total_steps": 6840, "loss": 0.5283412337303162, "lr": 1.1335214326938872e-05, "epoch": 0.9694399766047668, "percentage": 48.46, "elapsed_time": "4:51:34", "remaining_time": "5:10:03"} +{"current_steps": 3316, "total_steps": 6840, "loss": 0.6356452703475952, "lr": 1.1330422754333097e-05, "epoch": 0.9697324170200322, "percentage": 48.48, "elapsed_time": "4:51:38", "remaining_time": "5:09:55"} +{"current_steps": 3317, "total_steps": 6840, "loss": 0.6531886458396912, "lr": 1.132563087074909e-05, "epoch": 0.9700248574352975, "percentage": 48.49, "elapsed_time": "4:51:44", "remaining_time": "5:09:51"} +{"current_steps": 3318, "total_steps": 6840, "loss": 0.5725178718566895, "lr": 1.1320838677306927e-05, "epoch": 0.970317297850563, "percentage": 48.51, "elapsed_time": "4:51:48", "remaining_time": "5:09:44"} +{"current_steps": 3319, "total_steps": 6840, "loss": 0.6341495513916016, "lr": 1.1316046175126758e-05, "epoch": 0.9706097382658283, "percentage": 48.52, "elapsed_time": "4:51:52", "remaining_time": "5:09:38"} +{"current_steps": 3320, "total_steps": 6840, "loss": 0.5792768597602844, "lr": 1.1311253365328794e-05, "epoch": 0.9709021786810937, "percentage": 48.54, "elapsed_time": "4:51:59", "remaining_time": "5:09:34"} +{"current_steps": 3321, "total_steps": 6840, "loss": 0.5495700836181641, "lr": 1.1306460249033326e-05, "epoch": 0.9711946190963591, "percentage": 48.55, "elapsed_time": "4:52:05", "remaining_time": "5:09:29"} +{"current_steps": 3322, "total_steps": 6840, "loss": 0.7092291116714478, "lr": 1.1301666827360721e-05, "epoch": 0.9714870595116245, "percentage": 48.57, "elapsed_time": "4:52:10", "remaining_time": "5:09:24"} +{"current_steps": 3323, "total_steps": 6840, "loss": 0.5368257761001587, "lr": 1.1296873101431409e-05, "epoch": 0.97177949992689, "percentage": 48.58, "elapsed_time": "4:52:17", "remaining_time": "5:09:20"} +{"current_steps": 3324, "total_steps": 6840, "loss": 0.6116393804550171, "lr": 1.1292079072365898e-05, "epoch": 0.9720719403421553, "percentage": 48.6, "elapsed_time": "4:52:23", "remaining_time": "5:09:16"} +{"current_steps": 3325, "total_steps": 6840, "loss": 0.5654028654098511, "lr": 1.1287284741284757e-05, "epoch": 0.9723643807574207, "percentage": 48.61, "elapsed_time": "4:52:28", "remaining_time": "5:09:11"} +{"current_steps": 3326, "total_steps": 6840, "loss": 0.6436389684677124, "lr": 1.1282490109308633e-05, "epoch": 0.972656821172686, "percentage": 48.63, "elapsed_time": "4:52:33", "remaining_time": "5:09:06"} +{"current_steps": 3327, "total_steps": 6840, "loss": 0.7687330842018127, "lr": 1.1277695177558243e-05, "epoch": 0.9729492615879515, "percentage": 48.64, "elapsed_time": "4:52:38", "remaining_time": "5:09:00"} +{"current_steps": 3328, "total_steps": 6840, "loss": 0.5350443124771118, "lr": 1.1272899947154377e-05, "epoch": 0.9732417020032168, "percentage": 48.65, "elapsed_time": "4:52:44", "remaining_time": "5:08:55"} +{"current_steps": 3329, "total_steps": 6840, "loss": 0.6032785773277283, "lr": 1.1268104419217884e-05, "epoch": 0.9735341424184822, "percentage": 48.67, "elapsed_time": "4:52:48", "remaining_time": "5:08:49"} +{"current_steps": 3330, "total_steps": 6840, "loss": 0.5756093263626099, "lr": 1.1263308594869697e-05, "epoch": 0.9738265828337477, "percentage": 48.68, "elapsed_time": "4:52:53", "remaining_time": "5:08:43"} +{"current_steps": 3331, "total_steps": 6840, "loss": 0.6977418065071106, "lr": 1.1258512475230807e-05, "epoch": 0.974119023249013, "percentage": 48.7, "elapsed_time": "4:52:58", "remaining_time": "5:08:37"} +{"current_steps": 3332, "total_steps": 6840, "loss": 0.5409448146820068, "lr": 1.1253716061422275e-05, "epoch": 0.9744114636642784, "percentage": 48.71, "elapsed_time": "4:53:03", "remaining_time": "5:08:31"} +{"current_steps": 3333, "total_steps": 6840, "loss": 0.5863862037658691, "lr": 1.1248919354565237e-05, "epoch": 0.9747039040795438, "percentage": 48.73, "elapsed_time": "4:53:08", "remaining_time": "5:08:26"} +{"current_steps": 3334, "total_steps": 6840, "loss": 0.6039433479309082, "lr": 1.1244122355780895e-05, "epoch": 0.9749963444948092, "percentage": 48.74, "elapsed_time": "4:53:14", "remaining_time": "5:08:22"} +{"current_steps": 3335, "total_steps": 6840, "loss": 0.6696581840515137, "lr": 1.1239325066190513e-05, "epoch": 0.9752887849100745, "percentage": 48.76, "elapsed_time": "4:53:19", "remaining_time": "5:08:16"} +{"current_steps": 3336, "total_steps": 6840, "loss": 0.6308715343475342, "lr": 1.1234527486915439e-05, "epoch": 0.97558122532534, "percentage": 48.77, "elapsed_time": "4:53:23", "remaining_time": "5:08:10"} +{"current_steps": 3337, "total_steps": 6840, "loss": 0.580268383026123, "lr": 1.1229729619077065e-05, "epoch": 0.9758736657406053, "percentage": 48.79, "elapsed_time": "4:53:28", "remaining_time": "5:08:04"} +{"current_steps": 3338, "total_steps": 6840, "loss": 0.8080834746360779, "lr": 1.1224931463796871e-05, "epoch": 0.9761661061558707, "percentage": 48.8, "elapsed_time": "4:53:32", "remaining_time": "5:07:58"} +{"current_steps": 3339, "total_steps": 6840, "loss": 0.4933619499206543, "lr": 1.1220133022196395e-05, "epoch": 0.9764585465711362, "percentage": 48.82, "elapsed_time": "4:53:37", "remaining_time": "5:07:52"} +{"current_steps": 3340, "total_steps": 6840, "loss": 0.5639102458953857, "lr": 1.1215334295397244e-05, "epoch": 0.9767509869864015, "percentage": 48.83, "elapsed_time": "4:53:43", "remaining_time": "5:07:47"} +{"current_steps": 3341, "total_steps": 6840, "loss": 0.6332741975784302, "lr": 1.1210535284521094e-05, "epoch": 0.9770434274016669, "percentage": 48.85, "elapsed_time": "4:53:48", "remaining_time": "5:07:42"} +{"current_steps": 3342, "total_steps": 6840, "loss": 0.5425227880477905, "lr": 1.1205735990689677e-05, "epoch": 0.9773358678169323, "percentage": 48.86, "elapsed_time": "4:53:53", "remaining_time": "5:07:37"} +{"current_steps": 3343, "total_steps": 6840, "loss": 0.48746997117996216, "lr": 1.1200936415024804e-05, "epoch": 0.9776283082321977, "percentage": 48.87, "elapsed_time": "4:53:59", "remaining_time": "5:07:32"} +{"current_steps": 3344, "total_steps": 6840, "loss": 0.5509577393531799, "lr": 1.1196136558648345e-05, "epoch": 0.977920748647463, "percentage": 48.89, "elapsed_time": "4:54:04", "remaining_time": "5:07:26"} +{"current_steps": 3345, "total_steps": 6840, "loss": 0.5939484238624573, "lr": 1.1191336422682237e-05, "epoch": 0.9782131890627285, "percentage": 48.9, "elapsed_time": "4:54:08", "remaining_time": "5:07:20"} +{"current_steps": 3346, "total_steps": 6840, "loss": 0.6078917384147644, "lr": 1.1186536008248487e-05, "epoch": 0.9785056294779939, "percentage": 48.92, "elapsed_time": "4:54:12", "remaining_time": "5:07:13"} +{"current_steps": 3347, "total_steps": 6840, "loss": 0.5578145980834961, "lr": 1.1181735316469157e-05, "epoch": 0.9787980698932592, "percentage": 48.93, "elapsed_time": "4:54:16", "remaining_time": "5:07:07"} +{"current_steps": 3348, "total_steps": 6840, "loss": 0.6809493899345398, "lr": 1.1176934348466384e-05, "epoch": 0.9790905103085247, "percentage": 48.95, "elapsed_time": "4:54:22", "remaining_time": "5:07:02"} +{"current_steps": 3349, "total_steps": 6840, "loss": 0.6057093143463135, "lr": 1.117213310536236e-05, "epoch": 0.97938295072379, "percentage": 48.96, "elapsed_time": "4:54:28", "remaining_time": "5:06:58"} +{"current_steps": 3350, "total_steps": 6840, "loss": 0.6656113266944885, "lr": 1.1167331588279351e-05, "epoch": 0.9796753911390554, "percentage": 48.98, "elapsed_time": "4:54:33", "remaining_time": "5:06:52"} +{"current_steps": 3351, "total_steps": 6840, "loss": 0.5260547399520874, "lr": 1.1162529798339682e-05, "epoch": 0.9799678315543208, "percentage": 48.99, "elapsed_time": "4:54:39", "remaining_time": "5:06:47"} +{"current_steps": 3352, "total_steps": 6840, "loss": 0.6918379068374634, "lr": 1.115772773666574e-05, "epoch": 0.9802602719695862, "percentage": 49.01, "elapsed_time": "4:54:44", "remaining_time": "5:06:41"} +{"current_steps": 3353, "total_steps": 6840, "loss": 0.7128825187683105, "lr": 1.115292540437998e-05, "epoch": 0.9805527123848515, "percentage": 49.02, "elapsed_time": "4:54:49", "remaining_time": "5:06:36"} +{"current_steps": 3354, "total_steps": 6840, "loss": 0.6858257055282593, "lr": 1.1148122802604913e-05, "epoch": 0.980845152800117, "percentage": 49.04, "elapsed_time": "4:54:54", "remaining_time": "5:06:31"} +{"current_steps": 3355, "total_steps": 6840, "loss": 0.540290117263794, "lr": 1.1143319932463124e-05, "epoch": 0.9811375932153824, "percentage": 49.05, "elapsed_time": "4:54:59", "remaining_time": "5:06:25"} +{"current_steps": 3356, "total_steps": 6840, "loss": 0.7293038368225098, "lr": 1.1138516795077251e-05, "epoch": 0.9814300336306477, "percentage": 49.06, "elapsed_time": "4:55:05", "remaining_time": "5:06:20"} +{"current_steps": 3357, "total_steps": 6840, "loss": 0.5981270670890808, "lr": 1.1133713391570003e-05, "epoch": 0.9817224740459132, "percentage": 49.08, "elapsed_time": "4:55:11", "remaining_time": "5:06:16"} +{"current_steps": 3358, "total_steps": 6840, "loss": 0.6175673604011536, "lr": 1.1128909723064138e-05, "epoch": 0.9820149144611785, "percentage": 49.09, "elapsed_time": "4:55:16", "remaining_time": "5:06:11"} +{"current_steps": 3359, "total_steps": 6840, "loss": 0.5385074615478516, "lr": 1.112410579068249e-05, "epoch": 0.9823073548764439, "percentage": 49.11, "elapsed_time": "4:55:22", "remaining_time": "5:06:06"} +{"current_steps": 3360, "total_steps": 6840, "loss": 0.5754122734069824, "lr": 1.1119301595547952e-05, "epoch": 0.9825997952917093, "percentage": 49.12, "elapsed_time": "4:55:27", "remaining_time": "5:06:00"} +{"current_steps": 3361, "total_steps": 6840, "loss": 0.5817348957061768, "lr": 1.1114497138783469e-05, "epoch": 0.9828922357069747, "percentage": 49.14, "elapsed_time": "4:55:31", "remaining_time": "5:05:54"} +{"current_steps": 3362, "total_steps": 6840, "loss": 0.7561115026473999, "lr": 1.1109692421512058e-05, "epoch": 0.9831846761222401, "percentage": 49.15, "elapsed_time": "4:55:35", "remaining_time": "5:05:47"} +{"current_steps": 3363, "total_steps": 6840, "loss": 0.5972003936767578, "lr": 1.1104887444856786e-05, "epoch": 0.9834771165375055, "percentage": 49.17, "elapsed_time": "4:55:40", "remaining_time": "5:05:41"} +{"current_steps": 3364, "total_steps": 6840, "loss": 0.7569154500961304, "lr": 1.1100082209940795e-05, "epoch": 0.9837695569527709, "percentage": 49.18, "elapsed_time": "4:55:45", "remaining_time": "5:05:36"} +{"current_steps": 3365, "total_steps": 6840, "loss": 0.587831437587738, "lr": 1.1095276717887273e-05, "epoch": 0.9840619973680362, "percentage": 49.2, "elapsed_time": "4:55:50", "remaining_time": "5:05:30"} +{"current_steps": 3366, "total_steps": 6840, "loss": 0.5265868902206421, "lr": 1.109047096981948e-05, "epoch": 0.9843544377833017, "percentage": 49.21, "elapsed_time": "4:55:54", "remaining_time": "5:05:24"} +{"current_steps": 3367, "total_steps": 6840, "loss": 0.6065980792045593, "lr": 1.1085664966860728e-05, "epoch": 0.984646878198567, "percentage": 49.23, "elapsed_time": "4:55:59", "remaining_time": "5:05:18"} +{"current_steps": 3368, "total_steps": 6840, "loss": 0.5859705209732056, "lr": 1.1080858710134392e-05, "epoch": 0.9849393186138324, "percentage": 49.24, "elapsed_time": "4:56:05", "remaining_time": "5:05:14"} +{"current_steps": 3369, "total_steps": 6840, "loss": 0.508766770362854, "lr": 1.1076052200763903e-05, "epoch": 0.9852317590290979, "percentage": 49.25, "elapsed_time": "4:56:11", "remaining_time": "5:05:09"} +{"current_steps": 3370, "total_steps": 6840, "loss": 0.569848358631134, "lr": 1.1071245439872752e-05, "epoch": 0.9855241994443632, "percentage": 49.27, "elapsed_time": "4:56:15", "remaining_time": "5:05:03"} +{"current_steps": 3371, "total_steps": 6840, "loss": 0.6665600538253784, "lr": 1.1066438428584496e-05, "epoch": 0.9858166398596286, "percentage": 49.28, "elapsed_time": "4:56:20", "remaining_time": "5:04:57"} +{"current_steps": 3372, "total_steps": 6840, "loss": 0.5942315459251404, "lr": 1.1061631168022742e-05, "epoch": 0.986109080274894, "percentage": 49.3, "elapsed_time": "4:56:25", "remaining_time": "5:04:51"} +{"current_steps": 3373, "total_steps": 6840, "loss": 0.5270178318023682, "lr": 1.1056823659311158e-05, "epoch": 0.9864015206901594, "percentage": 49.31, "elapsed_time": "4:56:31", "remaining_time": "5:04:47"} +{"current_steps": 3374, "total_steps": 6840, "loss": 0.6879183053970337, "lr": 1.1052015903573465e-05, "epoch": 0.9866939611054247, "percentage": 49.33, "elapsed_time": "4:56:37", "remaining_time": "5:04:42"} +{"current_steps": 3375, "total_steps": 6840, "loss": 0.5980993509292603, "lr": 1.1047207901933453e-05, "epoch": 0.9869864015206902, "percentage": 49.34, "elapsed_time": "4:56:42", "remaining_time": "5:04:37"} +{"current_steps": 3376, "total_steps": 6840, "loss": 0.5616245865821838, "lr": 1.1042399655514961e-05, "epoch": 0.9872788419359555, "percentage": 49.36, "elapsed_time": "4:56:47", "remaining_time": "5:04:31"} +{"current_steps": 3377, "total_steps": 6840, "loss": 0.6233900785446167, "lr": 1.1037591165441887e-05, "epoch": 0.9875712823512209, "percentage": 49.37, "elapsed_time": "4:56:51", "remaining_time": "5:04:25"} +{"current_steps": 3378, "total_steps": 6840, "loss": 0.612476110458374, "lr": 1.1032782432838188e-05, "epoch": 0.9878637227664864, "percentage": 49.39, "elapsed_time": "4:56:56", "remaining_time": "5:04:19"} +{"current_steps": 3379, "total_steps": 6840, "loss": 0.7109482288360596, "lr": 1.1027973458827874e-05, "epoch": 0.9881561631817517, "percentage": 49.4, "elapsed_time": "4:57:01", "remaining_time": "5:04:13"} +{"current_steps": 3380, "total_steps": 6840, "loss": 0.7105005383491516, "lr": 1.1023164244535013e-05, "epoch": 0.9884486035970171, "percentage": 49.42, "elapsed_time": "4:57:07", "remaining_time": "5:04:09"} +{"current_steps": 3381, "total_steps": 6840, "loss": 0.5401301383972168, "lr": 1.1018354791083731e-05, "epoch": 0.9887410440122825, "percentage": 49.43, "elapsed_time": "4:57:13", "remaining_time": "5:04:05"} +{"current_steps": 3382, "total_steps": 6840, "loss": 0.504487156867981, "lr": 1.101354509959821e-05, "epoch": 0.9890334844275479, "percentage": 49.44, "elapsed_time": "4:57:19", "remaining_time": "5:04:00"} +{"current_steps": 3383, "total_steps": 6840, "loss": 0.5634675025939941, "lr": 1.1008735171202685e-05, "epoch": 0.9893259248428132, "percentage": 49.46, "elapsed_time": "4:57:23", "remaining_time": "5:03:54"} +{"current_steps": 3384, "total_steps": 6840, "loss": 0.4828820824623108, "lr": 1.1003925007021444e-05, "epoch": 0.9896183652580787, "percentage": 49.47, "elapsed_time": "4:57:27", "remaining_time": "5:03:46"} +{"current_steps": 3385, "total_steps": 6840, "loss": 0.7154384851455688, "lr": 1.0999114608178837e-05, "epoch": 0.9899108056733441, "percentage": 49.49, "elapsed_time": "4:57:32", "remaining_time": "5:03:41"} +{"current_steps": 3386, "total_steps": 6840, "loss": 0.626085638999939, "lr": 1.0994303975799268e-05, "epoch": 0.9902032460886094, "percentage": 49.5, "elapsed_time": "4:57:37", "remaining_time": "5:03:36"} +{"current_steps": 3387, "total_steps": 6840, "loss": 0.5179756283760071, "lr": 1.0989493111007186e-05, "epoch": 0.9904956865038749, "percentage": 49.52, "elapsed_time": "4:57:42", "remaining_time": "5:03:30"} +{"current_steps": 3388, "total_steps": 6840, "loss": 0.6992131471633911, "lr": 1.0984682014927108e-05, "epoch": 0.9907881269191402, "percentage": 49.53, "elapsed_time": "4:57:47", "remaining_time": "5:03:24"} +{"current_steps": 3389, "total_steps": 6840, "loss": 0.5791709423065186, "lr": 1.0979870688683598e-05, "epoch": 0.9910805673344056, "percentage": 49.55, "elapsed_time": "4:57:52", "remaining_time": "5:03:19"} +{"current_steps": 3390, "total_steps": 6840, "loss": 0.4703817367553711, "lr": 1.097505913340127e-05, "epoch": 0.991373007749671, "percentage": 49.56, "elapsed_time": "4:57:58", "remaining_time": "5:03:15"} +{"current_steps": 3391, "total_steps": 6840, "loss": 0.6042051911354065, "lr": 1.0970247350204797e-05, "epoch": 0.9916654481649364, "percentage": 49.58, "elapsed_time": "4:58:02", "remaining_time": "5:03:08"} +{"current_steps": 3392, "total_steps": 6840, "loss": 0.6806557178497314, "lr": 1.0965435340218905e-05, "epoch": 0.9919578885802017, "percentage": 49.59, "elapsed_time": "4:58:07", "remaining_time": "5:03:02"} +{"current_steps": 3393, "total_steps": 6840, "loss": 0.6372751593589783, "lr": 1.0960623104568373e-05, "epoch": 0.9922503289954672, "percentage": 49.61, "elapsed_time": "4:58:13", "remaining_time": "5:02:58"} +{"current_steps": 3394, "total_steps": 6840, "loss": 0.48651185631752014, "lr": 1.0955810644378031e-05, "epoch": 0.9925427694107326, "percentage": 49.62, "elapsed_time": "4:58:18", "remaining_time": "5:02:52"} +{"current_steps": 3395, "total_steps": 6840, "loss": 0.5244222283363342, "lr": 1.0950997960772764e-05, "epoch": 0.9928352098259979, "percentage": 49.63, "elapsed_time": "4:58:23", "remaining_time": "5:02:46"} +{"current_steps": 3396, "total_steps": 6840, "loss": 0.6194322109222412, "lr": 1.0946185054877505e-05, "epoch": 0.9931276502412634, "percentage": 49.65, "elapsed_time": "4:58:28", "remaining_time": "5:02:41"} +{"current_steps": 3397, "total_steps": 6840, "loss": 0.690010666847229, "lr": 1.0941371927817241e-05, "epoch": 0.9934200906565287, "percentage": 49.66, "elapsed_time": "4:58:33", "remaining_time": "5:02:36"} +{"current_steps": 3398, "total_steps": 6840, "loss": 0.7332549095153809, "lr": 1.0936558580717013e-05, "epoch": 0.9937125310717941, "percentage": 49.68, "elapsed_time": "4:58:38", "remaining_time": "5:02:30"} +{"current_steps": 3399, "total_steps": 6840, "loss": 0.5264838337898254, "lr": 1.093174501470191e-05, "epoch": 0.9940049714870595, "percentage": 49.69, "elapsed_time": "4:58:44", "remaining_time": "5:02:26"} +{"current_steps": 3400, "total_steps": 6840, "loss": 0.624382734298706, "lr": 1.092693123089708e-05, "epoch": 0.9942974119023249, "percentage": 49.71, "elapsed_time": "4:58:50", "remaining_time": "5:02:21"} +{"current_steps": 3401, "total_steps": 6840, "loss": 0.6340548992156982, "lr": 1.0922117230427705e-05, "epoch": 0.9945898523175903, "percentage": 49.72, "elapsed_time": "4:58:59", "remaining_time": "5:02:19"} +{"current_steps": 3402, "total_steps": 6840, "loss": 0.4452754855155945, "lr": 1.0917303014419036e-05, "epoch": 0.9948822927328557, "percentage": 49.74, "elapsed_time": "4:59:04", "remaining_time": "5:02:14"} +{"current_steps": 3403, "total_steps": 6840, "loss": 0.6180763244628906, "lr": 1.0912488583996364e-05, "epoch": 0.9951747331481211, "percentage": 49.75, "elapsed_time": "4:59:11", "remaining_time": "5:02:10"} +{"current_steps": 3404, "total_steps": 6840, "loss": 0.7079293727874756, "lr": 1.0907673940285032e-05, "epoch": 0.9954671735633864, "percentage": 49.77, "elapsed_time": "4:59:14", "remaining_time": "5:02:03"} +{"current_steps": 3405, "total_steps": 6840, "loss": 0.6608254909515381, "lr": 1.090285908441044e-05, "epoch": 0.9957596139786519, "percentage": 49.78, "elapsed_time": "4:59:19", "remaining_time": "5:01:57"} +{"current_steps": 3406, "total_steps": 6840, "loss": 0.6450251340866089, "lr": 1.0898044017498024e-05, "epoch": 0.9960520543939172, "percentage": 49.8, "elapsed_time": "4:59:23", "remaining_time": "5:01:51"} +{"current_steps": 3407, "total_steps": 6840, "loss": 0.6267623901367188, "lr": 1.089322874067328e-05, "epoch": 0.9963444948091826, "percentage": 49.81, "elapsed_time": "4:59:28", "remaining_time": "5:01:45"} +{"current_steps": 3408, "total_steps": 6840, "loss": 0.6756424903869629, "lr": 1.0888413255061747e-05, "epoch": 0.9966369352244481, "percentage": 49.82, "elapsed_time": "4:59:31", "remaining_time": "5:01:38"} +{"current_steps": 3409, "total_steps": 6840, "loss": 0.6578212976455688, "lr": 1.0883597561789017e-05, "epoch": 0.9969293756397134, "percentage": 49.84, "elapsed_time": "4:59:36", "remaining_time": "5:01:32"} +{"current_steps": 3410, "total_steps": 6840, "loss": 0.8186248540878296, "lr": 1.087878166198073e-05, "epoch": 0.9972218160549788, "percentage": 49.85, "elapsed_time": "4:59:41", "remaining_time": "5:01:26"} +{"current_steps": 3411, "total_steps": 6840, "loss": 0.6689319610595703, "lr": 1.0873965556762573e-05, "epoch": 0.9975142564702442, "percentage": 49.87, "elapsed_time": "4:59:46", "remaining_time": "5:01:21"} +{"current_steps": 3412, "total_steps": 6840, "loss": 0.5471278429031372, "lr": 1.0869149247260282e-05, "epoch": 0.9978066968855096, "percentage": 49.88, "elapsed_time": "4:59:50", "remaining_time": "5:01:14"} +{"current_steps": 3413, "total_steps": 6840, "loss": 0.4673747420310974, "lr": 1.0864332734599636e-05, "epoch": 0.9980991373007749, "percentage": 49.9, "elapsed_time": "4:59:54", "remaining_time": "5:01:08"} +{"current_steps": 3414, "total_steps": 6840, "loss": 0.5777568221092224, "lr": 1.085951601990647e-05, "epoch": 0.9983915777160404, "percentage": 49.91, "elapsed_time": "5:00:00", "remaining_time": "5:01:03"} +{"current_steps": 3415, "total_steps": 6840, "loss": 0.6758528351783752, "lr": 1.0854699104306661e-05, "epoch": 0.9986840181313057, "percentage": 49.93, "elapsed_time": "5:00:04", "remaining_time": "5:00:57"} +{"current_steps": 3416, "total_steps": 6840, "loss": 0.5759919881820679, "lr": 1.0849881988926132e-05, "epoch": 0.9989764585465711, "percentage": 49.94, "elapsed_time": "5:00:09", "remaining_time": "5:00:51"} +{"current_steps": 3417, "total_steps": 6840, "loss": 0.606694221496582, "lr": 1.0845064674890857e-05, "epoch": 0.9992688989618366, "percentage": 49.96, "elapsed_time": "5:00:14", "remaining_time": "5:00:46"} +{"current_steps": 3418, "total_steps": 6840, "loss": 0.627873957157135, "lr": 1.0840247163326851e-05, "epoch": 0.9995613393771019, "percentage": 49.97, "elapsed_time": "5:00:19", "remaining_time": "5:00:40"} +{"current_steps": 3419, "total_steps": 6840, "loss": 0.5560880303382874, "lr": 1.083542945536018e-05, "epoch": 0.9998537797923673, "percentage": 49.99, "elapsed_time": "5:00:24", "remaining_time": "5:00:34"} +{"current_steps": 3420, "total_steps": 6840, "loss": 0.5983354449272156, "lr": 1.0830611552116952e-05, "epoch": 1.0, "percentage": 50.0, "elapsed_time": "5:00:25", "remaining_time": "5:00:25"} +{"current_steps": 3421, "total_steps": 6840, "loss": 0.5012353658676147, "lr": 1.0825793454723325e-05, "epoch": 1.0002924404152653, "percentage": 50.01, "elapsed_time": "5:00:31", "remaining_time": "5:00:21"} +{"current_steps": 3422, "total_steps": 6840, "loss": 0.4585106372833252, "lr": 1.0820975164305498e-05, "epoch": 1.0005848808305309, "percentage": 50.03, "elapsed_time": "5:00:35", "remaining_time": "5:00:14"} +{"current_steps": 3423, "total_steps": 6840, "loss": 0.5790318846702576, "lr": 1.0816156681989717e-05, "epoch": 1.0008773212457962, "percentage": 50.04, "elapsed_time": "5:00:40", "remaining_time": "5:00:08"} +{"current_steps": 3424, "total_steps": 6840, "loss": 0.6016381978988647, "lr": 1.0811338008902277e-05, "epoch": 1.0011697616610615, "percentage": 50.06, "elapsed_time": "5:00:45", "remaining_time": "5:00:03"} +{"current_steps": 3425, "total_steps": 6840, "loss": 0.5756744146347046, "lr": 1.0806519146169507e-05, "epoch": 1.0014622020763269, "percentage": 50.07, "elapsed_time": "5:00:50", "remaining_time": "4:59:58"} +{"current_steps": 3426, "total_steps": 6840, "loss": 0.4776861369609833, "lr": 1.0801700094917792e-05, "epoch": 1.0017546424915924, "percentage": 50.09, "elapsed_time": "5:00:57", "remaining_time": "4:59:54"} +{"current_steps": 3427, "total_steps": 6840, "loss": 0.645842969417572, "lr": 1.0796880856273557e-05, "epoch": 1.0020470829068577, "percentage": 50.1, "elapsed_time": "5:01:03", "remaining_time": "4:59:49"} +{"current_steps": 3428, "total_steps": 6840, "loss": 0.5645815134048462, "lr": 1.0792061431363266e-05, "epoch": 1.002339523322123, "percentage": 50.12, "elapsed_time": "5:01:09", "remaining_time": "4:59:45"} +{"current_steps": 3429, "total_steps": 6840, "loss": 0.5477975606918335, "lr": 1.0787241821313428e-05, "epoch": 1.0026319637373886, "percentage": 50.13, "elapsed_time": "5:01:14", "remaining_time": "4:59:39"} +{"current_steps": 3430, "total_steps": 6840, "loss": 0.4064188599586487, "lr": 1.0782422027250604e-05, "epoch": 1.002924404152654, "percentage": 50.15, "elapsed_time": "5:01:20", "remaining_time": "4:59:34"} +{"current_steps": 3431, "total_steps": 6840, "loss": 0.5360208749771118, "lr": 1.0777602050301384e-05, "epoch": 1.0032168445679193, "percentage": 50.16, "elapsed_time": "5:01:24", "remaining_time": "4:59:28"} +{"current_steps": 3432, "total_steps": 6840, "loss": 0.6189982891082764, "lr": 1.0772781891592419e-05, "epoch": 1.0035092849831846, "percentage": 50.18, "elapsed_time": "5:01:29", "remaining_time": "4:59:22"} +{"current_steps": 3433, "total_steps": 6840, "loss": 0.4623541533946991, "lr": 1.0767961552250382e-05, "epoch": 1.0038017253984501, "percentage": 50.19, "elapsed_time": "5:01:34", "remaining_time": "4:59:17"} +{"current_steps": 3434, "total_steps": 6840, "loss": 0.6094095706939697, "lr": 1.0763141033402e-05, "epoch": 1.0040941658137155, "percentage": 50.2, "elapsed_time": "5:01:39", "remaining_time": "4:59:12"} +{"current_steps": 3435, "total_steps": 6840, "loss": 0.6997445821762085, "lr": 1.0758320336174042e-05, "epoch": 1.0043866062289808, "percentage": 50.22, "elapsed_time": "5:01:44", "remaining_time": "4:59:06"} +{"current_steps": 3436, "total_steps": 6840, "loss": 0.5447323322296143, "lr": 1.0753499461693316e-05, "epoch": 1.0046790466442463, "percentage": 50.23, "elapsed_time": "5:01:49", "remaining_time": "4:59:00"} +{"current_steps": 3437, "total_steps": 6840, "loss": 0.5851927995681763, "lr": 1.0748678411086672e-05, "epoch": 1.0049714870595117, "percentage": 50.25, "elapsed_time": "5:01:53", "remaining_time": "4:58:54"} +{"current_steps": 3438, "total_steps": 6840, "loss": 0.5897810459136963, "lr": 1.0743857185481006e-05, "epoch": 1.005263927474777, "percentage": 50.26, "elapsed_time": "5:01:58", "remaining_time": "4:58:48"} +{"current_steps": 3439, "total_steps": 6840, "loss": 0.47671592235565186, "lr": 1.073903578600324e-05, "epoch": 1.0055563678900423, "percentage": 50.28, "elapsed_time": "5:02:03", "remaining_time": "4:58:43"} +{"current_steps": 3440, "total_steps": 6840, "loss": 0.5586696863174438, "lr": 1.0734214213780355e-05, "epoch": 1.0058488083053079, "percentage": 50.29, "elapsed_time": "5:02:07", "remaining_time": "4:58:37"} +{"current_steps": 3441, "total_steps": 6840, "loss": 0.6817598342895508, "lr": 1.0729392469939362e-05, "epoch": 1.0061412487205732, "percentage": 50.31, "elapsed_time": "5:02:13", "remaining_time": "4:58:32"} +{"current_steps": 3442, "total_steps": 6840, "loss": 0.6503750085830688, "lr": 1.0724570555607311e-05, "epoch": 1.0064336891358385, "percentage": 50.32, "elapsed_time": "5:02:18", "remaining_time": "4:58:27"} +{"current_steps": 3443, "total_steps": 6840, "loss": 0.7121564149856567, "lr": 1.07197484719113e-05, "epoch": 1.0067261295511039, "percentage": 50.34, "elapsed_time": "5:02:24", "remaining_time": "4:58:22"} +{"current_steps": 3444, "total_steps": 6840, "loss": 0.5760178565979004, "lr": 1.071492621997846e-05, "epoch": 1.0070185699663694, "percentage": 50.35, "elapsed_time": "5:02:30", "remaining_time": "4:58:17"} +{"current_steps": 3445, "total_steps": 6840, "loss": 0.4555765390396118, "lr": 1.0710103800935965e-05, "epoch": 1.0073110103816347, "percentage": 50.37, "elapsed_time": "5:02:34", "remaining_time": "4:58:11"} +{"current_steps": 3446, "total_steps": 6840, "loss": 0.6098523736000061, "lr": 1.0705281215911021e-05, "epoch": 1.0076034507969, "percentage": 50.38, "elapsed_time": "5:02:39", "remaining_time": "4:58:05"} +{"current_steps": 3447, "total_steps": 6840, "loss": 0.49828749895095825, "lr": 1.070045846603088e-05, "epoch": 1.0078958912121656, "percentage": 50.39, "elapsed_time": "5:02:43", "remaining_time": "4:57:59"} +{"current_steps": 3448, "total_steps": 6840, "loss": 0.5134999752044678, "lr": 1.0695635552422834e-05, "epoch": 1.008188331627431, "percentage": 50.41, "elapsed_time": "5:02:48", "remaining_time": "4:57:53"} +{"current_steps": 3449, "total_steps": 6840, "loss": 0.53546142578125, "lr": 1.0690812476214209e-05, "epoch": 1.0084807720426963, "percentage": 50.42, "elapsed_time": "5:02:53", "remaining_time": "4:57:47"} +{"current_steps": 3450, "total_steps": 6840, "loss": 0.4955276846885681, "lr": 1.0685989238532364e-05, "epoch": 1.0087732124579616, "percentage": 50.44, "elapsed_time": "5:02:58", "remaining_time": "4:57:42"} +{"current_steps": 3451, "total_steps": 6840, "loss": 0.5693827271461487, "lr": 1.0681165840504708e-05, "epoch": 1.0090656528732271, "percentage": 50.45, "elapsed_time": "5:03:04", "remaining_time": "4:57:37"} +{"current_steps": 3452, "total_steps": 6840, "loss": 0.5023596286773682, "lr": 1.0676342283258676e-05, "epoch": 1.0093580932884925, "percentage": 50.47, "elapsed_time": "5:03:09", "remaining_time": "4:57:32"} +{"current_steps": 3453, "total_steps": 6840, "loss": 0.5601100921630859, "lr": 1.0671518567921748e-05, "epoch": 1.0096505337037578, "percentage": 50.48, "elapsed_time": "5:03:14", "remaining_time": "4:57:26"} +{"current_steps": 3454, "total_steps": 6840, "loss": 0.5744563341140747, "lr": 1.0666694695621438e-05, "epoch": 1.0099429741190233, "percentage": 50.5, "elapsed_time": "5:03:19", "remaining_time": "4:57:21"} +{"current_steps": 3455, "total_steps": 6840, "loss": 0.531909704208374, "lr": 1.0661870667485298e-05, "epoch": 1.0102354145342887, "percentage": 50.51, "elapsed_time": "5:03:26", "remaining_time": "4:57:17"} +{"current_steps": 3456, "total_steps": 6840, "loss": 0.5737274885177612, "lr": 1.0657046484640911e-05, "epoch": 1.010527854949554, "percentage": 50.53, "elapsed_time": "5:03:31", "remaining_time": "4:57:11"} +{"current_steps": 3457, "total_steps": 6840, "loss": 0.5550329089164734, "lr": 1.0652222148215905e-05, "epoch": 1.0108202953648193, "percentage": 50.54, "elapsed_time": "5:03:36", "remaining_time": "4:57:06"} +{"current_steps": 3458, "total_steps": 6840, "loss": 0.47795504331588745, "lr": 1.0647397659337936e-05, "epoch": 1.0111127357800849, "percentage": 50.56, "elapsed_time": "5:03:41", "remaining_time": "4:57:01"} +{"current_steps": 3459, "total_steps": 6840, "loss": 0.6817550659179688, "lr": 1.0642573019134703e-05, "epoch": 1.0114051761953502, "percentage": 50.57, "elapsed_time": "5:03:46", "remaining_time": "4:56:55"} +{"current_steps": 3460, "total_steps": 6840, "loss": 0.45271044969558716, "lr": 1.063774822873393e-05, "epoch": 1.0116976166106155, "percentage": 50.58, "elapsed_time": "5:03:52", "remaining_time": "4:56:51"} +{"current_steps": 3461, "total_steps": 6840, "loss": 0.611709475517273, "lr": 1.0632923289263389e-05, "epoch": 1.011990057025881, "percentage": 50.6, "elapsed_time": "5:03:58", "remaining_time": "4:56:45"} +{"current_steps": 3462, "total_steps": 6840, "loss": 0.5101709961891174, "lr": 1.0628098201850876e-05, "epoch": 1.0122824974411464, "percentage": 50.61, "elapsed_time": "5:04:03", "remaining_time": "4:56:41"} +{"current_steps": 3463, "total_steps": 6840, "loss": 0.6550514698028564, "lr": 1.0623272967624227e-05, "epoch": 1.0125749378564117, "percentage": 50.63, "elapsed_time": "5:04:09", "remaining_time": "4:56:36"} +{"current_steps": 3464, "total_steps": 6840, "loss": 0.479978084564209, "lr": 1.0618447587711312e-05, "epoch": 1.012867378271677, "percentage": 50.64, "elapsed_time": "5:04:15", "remaining_time": "4:56:31"} +{"current_steps": 3465, "total_steps": 6840, "loss": 0.5616719722747803, "lr": 1.0613622063240035e-05, "epoch": 1.0131598186869426, "percentage": 50.66, "elapsed_time": "5:04:19", "remaining_time": "4:56:25"} +{"current_steps": 3466, "total_steps": 6840, "loss": 0.5160953998565674, "lr": 1.060879639533833e-05, "epoch": 1.013452259102208, "percentage": 50.67, "elapsed_time": "5:04:24", "remaining_time": "4:56:19"} +{"current_steps": 3467, "total_steps": 6840, "loss": 0.6069898009300232, "lr": 1.0603970585134168e-05, "epoch": 1.0137446995174733, "percentage": 50.69, "elapsed_time": "5:04:29", "remaining_time": "4:56:14"} +{"current_steps": 3468, "total_steps": 6840, "loss": 0.5800961256027222, "lr": 1.0599144633755555e-05, "epoch": 1.0140371399327388, "percentage": 50.7, "elapsed_time": "5:04:35", "remaining_time": "4:56:09"} +{"current_steps": 3469, "total_steps": 6840, "loss": 0.5286555290222168, "lr": 1.0594318542330528e-05, "epoch": 1.0143295803480041, "percentage": 50.72, "elapsed_time": "5:04:40", "remaining_time": "4:56:03"} +{"current_steps": 3470, "total_steps": 6840, "loss": 0.44960829615592957, "lr": 1.0589492311987157e-05, "epoch": 1.0146220207632695, "percentage": 50.73, "elapsed_time": "5:04:45", "remaining_time": "4:55:58"} +{"current_steps": 3471, "total_steps": 6840, "loss": 0.5799434781074524, "lr": 1.0584665943853538e-05, "epoch": 1.0149144611785348, "percentage": 50.75, "elapsed_time": "5:04:50", "remaining_time": "4:55:52"} +{"current_steps": 3472, "total_steps": 6840, "loss": 0.5142421126365662, "lr": 1.057983943905781e-05, "epoch": 1.0152069015938003, "percentage": 50.76, "elapsed_time": "5:04:55", "remaining_time": "4:55:47"} +{"current_steps": 3473, "total_steps": 6840, "loss": 0.5184981226921082, "lr": 1.0575012798728141e-05, "epoch": 1.0154993420090657, "percentage": 50.77, "elapsed_time": "5:05:00", "remaining_time": "4:55:41"} +{"current_steps": 3474, "total_steps": 6840, "loss": 0.5747173428535461, "lr": 1.0570186023992724e-05, "epoch": 1.015791782424331, "percentage": 50.79, "elapsed_time": "5:05:05", "remaining_time": "4:55:36"} +{"current_steps": 3475, "total_steps": 6840, "loss": 0.5994119644165039, "lr": 1.0565359115979792e-05, "epoch": 1.0160842228395965, "percentage": 50.8, "elapsed_time": "5:05:10", "remaining_time": "4:55:30"} +{"current_steps": 3476, "total_steps": 6840, "loss": 0.5020599365234375, "lr": 1.0560532075817605e-05, "epoch": 1.0163766632548619, "percentage": 50.82, "elapsed_time": "5:05:16", "remaining_time": "4:55:26"} +{"current_steps": 3477, "total_steps": 6840, "loss": 0.5023698806762695, "lr": 1.0555704904634451e-05, "epoch": 1.0166691036701272, "percentage": 50.83, "elapsed_time": "5:05:22", "remaining_time": "4:55:21"} +{"current_steps": 3478, "total_steps": 6840, "loss": 0.3998676538467407, "lr": 1.0550877603558656e-05, "epoch": 1.0169615440853925, "percentage": 50.85, "elapsed_time": "5:05:27", "remaining_time": "4:55:16"} +{"current_steps": 3479, "total_steps": 6840, "loss": 0.5083760619163513, "lr": 1.0546050173718569e-05, "epoch": 1.017253984500658, "percentage": 50.86, "elapsed_time": "5:05:32", "remaining_time": "4:55:10"} +{"current_steps": 3480, "total_steps": 6840, "loss": 0.49840620160102844, "lr": 1.0541222616242575e-05, "epoch": 1.0175464249159234, "percentage": 50.88, "elapsed_time": "5:05:37", "remaining_time": "4:55:05"} +{"current_steps": 3481, "total_steps": 6840, "loss": 0.5302960276603699, "lr": 1.0536394932259085e-05, "epoch": 1.0178388653311887, "percentage": 50.89, "elapsed_time": "5:05:42", "remaining_time": "4:54:59"} +{"current_steps": 3482, "total_steps": 6840, "loss": 0.5694236755371094, "lr": 1.0531567122896543e-05, "epoch": 1.018131305746454, "percentage": 50.91, "elapsed_time": "5:05:48", "remaining_time": "4:54:54"} +{"current_steps": 3483, "total_steps": 6840, "loss": 0.5155326128005981, "lr": 1.0526739189283414e-05, "epoch": 1.0184237461617196, "percentage": 50.92, "elapsed_time": "5:05:52", "remaining_time": "4:54:48"} +{"current_steps": 3484, "total_steps": 6840, "loss": 0.6254806518554688, "lr": 1.0521911132548207e-05, "epoch": 1.018716186576985, "percentage": 50.94, "elapsed_time": "5:05:57", "remaining_time": "4:54:42"} +{"current_steps": 3485, "total_steps": 6840, "loss": 0.5623525977134705, "lr": 1.0517082953819442e-05, "epoch": 1.0190086269922503, "percentage": 50.95, "elapsed_time": "5:06:01", "remaining_time": "4:54:36"} +{"current_steps": 3486, "total_steps": 6840, "loss": 0.6289865970611572, "lr": 1.051225465422568e-05, "epoch": 1.0193010674075158, "percentage": 50.96, "elapsed_time": "5:06:07", "remaining_time": "4:54:31"} +{"current_steps": 3487, "total_steps": 6840, "loss": 0.5935345888137817, "lr": 1.050742623489551e-05, "epoch": 1.0195935078227811, "percentage": 50.98, "elapsed_time": "5:06:11", "remaining_time": "4:54:25"} +{"current_steps": 3488, "total_steps": 6840, "loss": 0.5223839282989502, "lr": 1.0502597696957542e-05, "epoch": 1.0198859482380465, "percentage": 50.99, "elapsed_time": "5:06:16", "remaining_time": "4:54:19"} +{"current_steps": 3489, "total_steps": 6840, "loss": 0.6766373515129089, "lr": 1.0497769041540418e-05, "epoch": 1.0201783886533118, "percentage": 51.01, "elapsed_time": "5:06:20", "remaining_time": "4:54:13"} +{"current_steps": 3490, "total_steps": 6840, "loss": 0.4934672713279724, "lr": 1.0492940269772806e-05, "epoch": 1.0204708290685773, "percentage": 51.02, "elapsed_time": "5:06:24", "remaining_time": "4:54:06"} +{"current_steps": 3491, "total_steps": 6840, "loss": 0.5207735300064087, "lr": 1.0488111382783403e-05, "epoch": 1.0207632694838427, "percentage": 51.04, "elapsed_time": "5:06:28", "remaining_time": "4:54:00"} +{"current_steps": 3492, "total_steps": 6840, "loss": 0.6090695261955261, "lr": 1.0483282381700933e-05, "epoch": 1.021055709899108, "percentage": 51.05, "elapsed_time": "5:06:34", "remaining_time": "4:53:55"} +{"current_steps": 3493, "total_steps": 6840, "loss": 0.5777665376663208, "lr": 1.0478453267654147e-05, "epoch": 1.0213481503143735, "percentage": 51.07, "elapsed_time": "5:06:39", "remaining_time": "4:53:50"} +{"current_steps": 3494, "total_steps": 6840, "loss": 0.7241395711898804, "lr": 1.0473624041771814e-05, "epoch": 1.0216405907296389, "percentage": 51.08, "elapsed_time": "5:06:43", "remaining_time": "4:53:44"} +{"current_steps": 3495, "total_steps": 6840, "loss": 0.45545506477355957, "lr": 1.0468794705182742e-05, "epoch": 1.0219330311449042, "percentage": 51.1, "elapsed_time": "5:06:48", "remaining_time": "4:53:38"} +{"current_steps": 3496, "total_steps": 6840, "loss": 0.5519885420799255, "lr": 1.0463965259015761e-05, "epoch": 1.0222254715601695, "percentage": 51.11, "elapsed_time": "5:06:54", "remaining_time": "4:53:33"} +{"current_steps": 3497, "total_steps": 6840, "loss": 0.558646559715271, "lr": 1.045913570439972e-05, "epoch": 1.022517911975435, "percentage": 51.13, "elapsed_time": "5:06:59", "remaining_time": "4:53:27"} +{"current_steps": 3498, "total_steps": 6840, "loss": 0.5259999632835388, "lr": 1.0454306042463499e-05, "epoch": 1.0228103523907004, "percentage": 51.14, "elapsed_time": "5:07:04", "remaining_time": "4:53:22"} +{"current_steps": 3499, "total_steps": 6840, "loss": 0.4711627960205078, "lr": 1.0449476274336004e-05, "epoch": 1.0231027928059657, "percentage": 51.15, "elapsed_time": "5:07:10", "remaining_time": "4:53:17"} +{"current_steps": 3500, "total_steps": 6840, "loss": 0.5893874168395996, "lr": 1.0444646401146161e-05, "epoch": 1.0233952332212313, "percentage": 51.17, "elapsed_time": "5:07:15", "remaining_time": "4:53:12"} +{"current_steps": 3501, "total_steps": 6840, "loss": 0.5596123933792114, "lr": 1.0439816424022926e-05, "epoch": 1.0236876736364966, "percentage": 51.18, "elapsed_time": "5:07:25", "remaining_time": "4:53:12"} +{"current_steps": 3502, "total_steps": 6840, "loss": 0.5228658318519592, "lr": 1.0434986344095276e-05, "epoch": 1.023980114051762, "percentage": 51.2, "elapsed_time": "5:07:31", "remaining_time": "4:53:07"} +{"current_steps": 3503, "total_steps": 6840, "loss": 0.5520567297935486, "lr": 1.0430156162492216e-05, "epoch": 1.0242725544670273, "percentage": 51.21, "elapsed_time": "5:07:38", "remaining_time": "4:53:03"} +{"current_steps": 3504, "total_steps": 6840, "loss": 0.531911313533783, "lr": 1.0425325880342762e-05, "epoch": 1.0245649948822928, "percentage": 51.23, "elapsed_time": "5:07:44", "remaining_time": "4:52:59"} +{"current_steps": 3505, "total_steps": 6840, "loss": 0.58717942237854, "lr": 1.0420495498775974e-05, "epoch": 1.0248574352975581, "percentage": 51.24, "elapsed_time": "5:07:49", "remaining_time": "4:52:53"} +{"current_steps": 3506, "total_steps": 6840, "loss": 0.4972108006477356, "lr": 1.0415665018920919e-05, "epoch": 1.0251498757128235, "percentage": 51.26, "elapsed_time": "5:07:55", "remaining_time": "4:52:48"} +{"current_steps": 3507, "total_steps": 6840, "loss": 0.567977249622345, "lr": 1.0410834441906692e-05, "epoch": 1.025442316128089, "percentage": 51.27, "elapsed_time": "5:08:01", "remaining_time": "4:52:44"} +{"current_steps": 3508, "total_steps": 6840, "loss": 0.568755567073822, "lr": 1.0406003768862416e-05, "epoch": 1.0257347565433543, "percentage": 51.29, "elapsed_time": "5:08:05", "remaining_time": "4:52:38"} +{"current_steps": 3509, "total_steps": 6840, "loss": 0.5668960809707642, "lr": 1.0401173000917224e-05, "epoch": 1.0260271969586197, "percentage": 51.3, "elapsed_time": "5:08:10", "remaining_time": "4:52:32"} +{"current_steps": 3510, "total_steps": 6840, "loss": 0.5956743955612183, "lr": 1.0396342139200282e-05, "epoch": 1.026319637373885, "percentage": 51.32, "elapsed_time": "5:08:15", "remaining_time": "4:52:27"} +{"current_steps": 3511, "total_steps": 6840, "loss": 0.5258834362030029, "lr": 1.0391511184840775e-05, "epoch": 1.0266120777891505, "percentage": 51.33, "elapsed_time": "5:08:21", "remaining_time": "4:52:22"} +{"current_steps": 3512, "total_steps": 6840, "loss": 0.7358168363571167, "lr": 1.038668013896791e-05, "epoch": 1.0269045182044159, "percentage": 51.35, "elapsed_time": "5:08:26", "remaining_time": "4:52:16"} +{"current_steps": 3513, "total_steps": 6840, "loss": 0.5845209956169128, "lr": 1.0381849002710914e-05, "epoch": 1.0271969586196812, "percentage": 51.36, "elapsed_time": "5:08:31", "remaining_time": "4:52:11"} +{"current_steps": 3514, "total_steps": 6840, "loss": 0.4475495219230652, "lr": 1.0377017777199034e-05, "epoch": 1.0274893990349467, "percentage": 51.37, "elapsed_time": "5:08:36", "remaining_time": "4:52:06"} +{"current_steps": 3515, "total_steps": 6840, "loss": 0.5555804371833801, "lr": 1.0372186463561542e-05, "epoch": 1.027781839450212, "percentage": 51.39, "elapsed_time": "5:08:41", "remaining_time": "4:52:00"} +{"current_steps": 3516, "total_steps": 6840, "loss": 0.5927316546440125, "lr": 1.0367355062927726e-05, "epoch": 1.0280742798654774, "percentage": 51.4, "elapsed_time": "5:08:46", "remaining_time": "4:51:54"} +{"current_steps": 3517, "total_steps": 6840, "loss": 0.47281715273857117, "lr": 1.0362523576426897e-05, "epoch": 1.0283667202807427, "percentage": 51.42, "elapsed_time": "5:08:51", "remaining_time": "4:51:49"} +{"current_steps": 3518, "total_steps": 6840, "loss": 0.5275483727455139, "lr": 1.0357692005188387e-05, "epoch": 1.0286591606960083, "percentage": 51.43, "elapsed_time": "5:08:56", "remaining_time": "4:51:44"} +{"current_steps": 3519, "total_steps": 6840, "loss": 0.5740839242935181, "lr": 1.0352860350341547e-05, "epoch": 1.0289516011112736, "percentage": 51.45, "elapsed_time": "5:09:00", "remaining_time": "4:51:37"} +{"current_steps": 3520, "total_steps": 6840, "loss": 0.6030054688453674, "lr": 1.0348028613015747e-05, "epoch": 1.029244041526539, "percentage": 51.46, "elapsed_time": "5:09:07", "remaining_time": "4:51:33"} +{"current_steps": 3521, "total_steps": 6840, "loss": 0.5415347814559937, "lr": 1.034319679434037e-05, "epoch": 1.0295364819418042, "percentage": 51.48, "elapsed_time": "5:09:13", "remaining_time": "4:51:29"} +{"current_steps": 3522, "total_steps": 6840, "loss": 0.5850083231925964, "lr": 1.033836489544483e-05, "epoch": 1.0298289223570698, "percentage": 51.49, "elapsed_time": "5:09:18", "remaining_time": "4:51:23"} +{"current_steps": 3523, "total_steps": 6840, "loss": 0.47614163160324097, "lr": 1.0333532917458556e-05, "epoch": 1.0301213627723351, "percentage": 51.51, "elapsed_time": "5:09:24", "remaining_time": "4:51:18"} +{"current_steps": 3524, "total_steps": 6840, "loss": 0.5645745992660522, "lr": 1.0328700861510987e-05, "epoch": 1.0304138031876005, "percentage": 51.52, "elapsed_time": "5:09:29", "remaining_time": "4:51:12"} +{"current_steps": 3525, "total_steps": 6840, "loss": 0.5729008913040161, "lr": 1.0323868728731591e-05, "epoch": 1.030706243602866, "percentage": 51.54, "elapsed_time": "5:09:35", "remaining_time": "4:51:08"} +{"current_steps": 3526, "total_steps": 6840, "loss": 0.5177778005599976, "lr": 1.031903652024985e-05, "epoch": 1.0309986840181313, "percentage": 51.55, "elapsed_time": "5:09:40", "remaining_time": "4:51:03"} +{"current_steps": 3527, "total_steps": 6840, "loss": 0.49413079023361206, "lr": 1.0314204237195263e-05, "epoch": 1.0312911244333967, "percentage": 51.56, "elapsed_time": "5:09:46", "remaining_time": "4:50:58"} +{"current_steps": 3528, "total_steps": 6840, "loss": 0.5074756145477295, "lr": 1.0309371880697342e-05, "epoch": 1.031583564848662, "percentage": 51.58, "elapsed_time": "5:09:50", "remaining_time": "4:50:52"} +{"current_steps": 3529, "total_steps": 6840, "loss": 0.5601285696029663, "lr": 1.0304539451885629e-05, "epoch": 1.0318760052639275, "percentage": 51.59, "elapsed_time": "5:09:56", "remaining_time": "4:50:47"} +{"current_steps": 3530, "total_steps": 6840, "loss": 0.48358121514320374, "lr": 1.029970695188967e-05, "epoch": 1.0321684456791929, "percentage": 51.61, "elapsed_time": "5:10:01", "remaining_time": "4:50:42"} +{"current_steps": 3531, "total_steps": 6840, "loss": 0.4472161829471588, "lr": 1.0294874381839033e-05, "epoch": 1.0324608860944582, "percentage": 51.62, "elapsed_time": "5:10:07", "remaining_time": "4:50:37"} +{"current_steps": 3532, "total_steps": 6840, "loss": 0.6011627912521362, "lr": 1.02900417428633e-05, "epoch": 1.0327533265097237, "percentage": 51.64, "elapsed_time": "5:10:14", "remaining_time": "4:50:33"} +{"current_steps": 3533, "total_steps": 6840, "loss": 0.5212395191192627, "lr": 1.0285209036092076e-05, "epoch": 1.033045766924989, "percentage": 51.65, "elapsed_time": "5:10:18", "remaining_time": "4:50:27"} +{"current_steps": 3534, "total_steps": 6840, "loss": 0.5433810949325562, "lr": 1.0280376262654971e-05, "epoch": 1.0333382073402544, "percentage": 51.67, "elapsed_time": "5:10:23", "remaining_time": "4:50:21"} +{"current_steps": 3535, "total_steps": 6840, "loss": 0.5215464234352112, "lr": 1.0275543423681622e-05, "epoch": 1.0336306477555197, "percentage": 51.68, "elapsed_time": "5:10:29", "remaining_time": "4:50:16"} +{"current_steps": 3536, "total_steps": 6840, "loss": 0.511099100112915, "lr": 1.0270710520301672e-05, "epoch": 1.0339230881707853, "percentage": 51.7, "elapsed_time": "5:10:35", "remaining_time": "4:50:12"} +{"current_steps": 3537, "total_steps": 6840, "loss": 0.4954407811164856, "lr": 1.0265877553644783e-05, "epoch": 1.0342155285860506, "percentage": 51.71, "elapsed_time": "5:10:40", "remaining_time": "4:50:07"} +{"current_steps": 3538, "total_steps": 6840, "loss": 0.5491081476211548, "lr": 1.0261044524840633e-05, "epoch": 1.034507969001316, "percentage": 51.73, "elapsed_time": "5:10:45", "remaining_time": "4:50:02"} +{"current_steps": 3539, "total_steps": 6840, "loss": 0.43202829360961914, "lr": 1.0256211435018912e-05, "epoch": 1.0348004094165815, "percentage": 51.74, "elapsed_time": "5:10:51", "remaining_time": "4:49:57"} +{"current_steps": 3540, "total_steps": 6840, "loss": 0.4721212089061737, "lr": 1.0251378285309326e-05, "epoch": 1.0350928498318468, "percentage": 51.75, "elapsed_time": "5:10:57", "remaining_time": "4:49:52"} +{"current_steps": 3541, "total_steps": 6840, "loss": 0.5621099472045898, "lr": 1.0246545076841596e-05, "epoch": 1.0353852902471121, "percentage": 51.77, "elapsed_time": "5:11:02", "remaining_time": "4:49:46"} +{"current_steps": 3542, "total_steps": 6840, "loss": 0.5572346448898315, "lr": 1.0241711810745452e-05, "epoch": 1.0356777306623774, "percentage": 51.78, "elapsed_time": "5:11:06", "remaining_time": "4:49:40"} +{"current_steps": 3543, "total_steps": 6840, "loss": 0.40916550159454346, "lr": 1.023687848815064e-05, "epoch": 1.035970171077643, "percentage": 51.8, "elapsed_time": "5:11:10", "remaining_time": "4:49:34"} +{"current_steps": 3544, "total_steps": 6840, "loss": 0.5370572805404663, "lr": 1.0232045110186926e-05, "epoch": 1.0362626114929083, "percentage": 51.81, "elapsed_time": "5:11:17", "remaining_time": "4:49:30"} +{"current_steps": 3545, "total_steps": 6840, "loss": 0.5381634831428528, "lr": 1.0227211677984074e-05, "epoch": 1.0365550519081737, "percentage": 51.83, "elapsed_time": "5:11:21", "remaining_time": "4:49:24"} +{"current_steps": 3546, "total_steps": 6840, "loss": 0.4807749092578888, "lr": 1.0222378192671878e-05, "epoch": 1.0368474923234392, "percentage": 51.84, "elapsed_time": "5:11:27", "remaining_time": "4:49:19"} +{"current_steps": 3547, "total_steps": 6840, "loss": 0.5673447847366333, "lr": 1.0217544655380129e-05, "epoch": 1.0371399327387045, "percentage": 51.86, "elapsed_time": "5:11:34", "remaining_time": "4:49:15"} +{"current_steps": 3548, "total_steps": 6840, "loss": 0.5259549021720886, "lr": 1.0212711067238639e-05, "epoch": 1.0374323731539699, "percentage": 51.87, "elapsed_time": "5:11:40", "remaining_time": "4:49:11"} +{"current_steps": 3549, "total_steps": 6840, "loss": 0.48267534375190735, "lr": 1.0207877429377232e-05, "epoch": 1.0377248135692352, "percentage": 51.89, "elapsed_time": "5:11:46", "remaining_time": "4:49:06"} +{"current_steps": 3550, "total_steps": 6840, "loss": 0.44843387603759766, "lr": 1.0203043742925738e-05, "epoch": 1.0380172539845007, "percentage": 51.9, "elapsed_time": "5:11:52", "remaining_time": "4:49:01"} +{"current_steps": 3551, "total_steps": 6840, "loss": 0.8050575256347656, "lr": 1.0198210009014005e-05, "epoch": 1.038309694399766, "percentage": 51.92, "elapsed_time": "5:11:57", "remaining_time": "4:48:56"} +{"current_steps": 3552, "total_steps": 6840, "loss": 0.590203046798706, "lr": 1.0193376228771887e-05, "epoch": 1.0386021348150314, "percentage": 51.93, "elapsed_time": "5:12:02", "remaining_time": "4:48:51"} +{"current_steps": 3553, "total_steps": 6840, "loss": 0.5974458456039429, "lr": 1.0188542403329252e-05, "epoch": 1.0388945752302967, "percentage": 51.94, "elapsed_time": "5:12:07", "remaining_time": "4:48:45"} +{"current_steps": 3554, "total_steps": 6840, "loss": 0.4628743827342987, "lr": 1.0183708533815975e-05, "epoch": 1.0391870156455623, "percentage": 51.96, "elapsed_time": "5:12:11", "remaining_time": "4:48:39"} +{"current_steps": 3555, "total_steps": 6840, "loss": 0.6738137006759644, "lr": 1.0178874621361944e-05, "epoch": 1.0394794560608276, "percentage": 51.97, "elapsed_time": "5:12:16", "remaining_time": "4:48:33"} +{"current_steps": 3556, "total_steps": 6840, "loss": 0.48062413930892944, "lr": 1.0174040667097061e-05, "epoch": 1.039771896476093, "percentage": 51.99, "elapsed_time": "5:12:21", "remaining_time": "4:48:27"} +{"current_steps": 3557, "total_steps": 6840, "loss": 0.564401388168335, "lr": 1.016920667215123e-05, "epoch": 1.0400643368913585, "percentage": 52.0, "elapsed_time": "5:12:27", "remaining_time": "4:48:22"} +{"current_steps": 3558, "total_steps": 6840, "loss": 0.4035246968269348, "lr": 1.0164372637654367e-05, "epoch": 1.0403567773066238, "percentage": 52.02, "elapsed_time": "5:12:32", "remaining_time": "4:48:17"} +{"current_steps": 3559, "total_steps": 6840, "loss": 0.4484536051750183, "lr": 1.0159538564736399e-05, "epoch": 1.0406492177218891, "percentage": 52.03, "elapsed_time": "5:12:37", "remaining_time": "4:48:12"} +{"current_steps": 3560, "total_steps": 6840, "loss": 0.6257200837135315, "lr": 1.0154704454527265e-05, "epoch": 1.0409416581371547, "percentage": 52.05, "elapsed_time": "5:12:43", "remaining_time": "4:48:07"} +{"current_steps": 3561, "total_steps": 6840, "loss": 0.5541477799415588, "lr": 1.0149870308156899e-05, "epoch": 1.04123409855242, "percentage": 52.06, "elapsed_time": "5:12:47", "remaining_time": "4:48:01"} +{"current_steps": 3562, "total_steps": 6840, "loss": 0.6248821020126343, "lr": 1.0145036126755264e-05, "epoch": 1.0415265389676853, "percentage": 52.08, "elapsed_time": "5:12:51", "remaining_time": "4:47:54"} +{"current_steps": 3563, "total_steps": 6840, "loss": 0.574689507484436, "lr": 1.0140201911452318e-05, "epoch": 1.0418189793829506, "percentage": 52.09, "elapsed_time": "5:12:57", "remaining_time": "4:47:49"} +{"current_steps": 3564, "total_steps": 6840, "loss": 0.5873313546180725, "lr": 1.0135367663378025e-05, "epoch": 1.0421114197982162, "percentage": 52.11, "elapsed_time": "5:13:03", "remaining_time": "4:47:45"} +{"current_steps": 3565, "total_steps": 6840, "loss": 0.6662088632583618, "lr": 1.0130533383662361e-05, "epoch": 1.0424038602134815, "percentage": 52.12, "elapsed_time": "5:13:07", "remaining_time": "4:47:39"} +{"current_steps": 3566, "total_steps": 6840, "loss": 0.6517773866653442, "lr": 1.0125699073435316e-05, "epoch": 1.0426963006287469, "percentage": 52.13, "elapsed_time": "5:13:13", "remaining_time": "4:47:34"} +{"current_steps": 3567, "total_steps": 6840, "loss": 0.6311444640159607, "lr": 1.0120864733826877e-05, "epoch": 1.0429887410440122, "percentage": 52.15, "elapsed_time": "5:13:17", "remaining_time": "4:47:28"} +{"current_steps": 3568, "total_steps": 6840, "loss": 0.49060457944869995, "lr": 1.0116030365967037e-05, "epoch": 1.0432811814592777, "percentage": 52.16, "elapsed_time": "5:13:23", "remaining_time": "4:47:23"} +{"current_steps": 3569, "total_steps": 6840, "loss": 0.5405893921852112, "lr": 1.0111195970985813e-05, "epoch": 1.043573621874543, "percentage": 52.18, "elapsed_time": "5:13:28", "remaining_time": "4:47:17"} +{"current_steps": 3570, "total_steps": 6840, "loss": 0.482162743806839, "lr": 1.01063615500132e-05, "epoch": 1.0438660622898084, "percentage": 52.19, "elapsed_time": "5:13:33", "remaining_time": "4:47:12"} +{"current_steps": 3571, "total_steps": 6840, "loss": 0.4542362093925476, "lr": 1.0101527104179224e-05, "epoch": 1.044158502705074, "percentage": 52.21, "elapsed_time": "5:13:38", "remaining_time": "4:47:06"} +{"current_steps": 3572, "total_steps": 6840, "loss": 0.6157265305519104, "lr": 1.00966926346139e-05, "epoch": 1.0444509431203393, "percentage": 52.22, "elapsed_time": "5:13:43", "remaining_time": "4:47:01"} +{"current_steps": 3573, "total_steps": 6840, "loss": 0.6591875553131104, "lr": 1.0091858142447266e-05, "epoch": 1.0447433835356046, "percentage": 52.24, "elapsed_time": "5:13:49", "remaining_time": "4:46:56"} +{"current_steps": 3574, "total_steps": 6840, "loss": 0.5686256885528564, "lr": 1.0087023628809347e-05, "epoch": 1.04503582395087, "percentage": 52.25, "elapsed_time": "5:13:54", "remaining_time": "4:46:51"} +{"current_steps": 3575, "total_steps": 6840, "loss": 0.45131799578666687, "lr": 1.0082189094830183e-05, "epoch": 1.0453282643661355, "percentage": 52.27, "elapsed_time": "5:14:00", "remaining_time": "4:46:46"} +{"current_steps": 3576, "total_steps": 6840, "loss": 0.5787829160690308, "lr": 1.0077354541639821e-05, "epoch": 1.0456207047814008, "percentage": 52.28, "elapsed_time": "5:14:06", "remaining_time": "4:46:42"} +{"current_steps": 3577, "total_steps": 6840, "loss": 0.5755574107170105, "lr": 1.0072519970368303e-05, "epoch": 1.0459131451966661, "percentage": 52.3, "elapsed_time": "5:14:10", "remaining_time": "4:46:36"} +{"current_steps": 3578, "total_steps": 6840, "loss": 0.5017693638801575, "lr": 1.0067685382145683e-05, "epoch": 1.0462055856119317, "percentage": 52.31, "elapsed_time": "5:14:16", "remaining_time": "4:46:31"} +{"current_steps": 3579, "total_steps": 6840, "loss": 0.5096016526222229, "lr": 1.0062850778102017e-05, "epoch": 1.046498026027197, "percentage": 52.32, "elapsed_time": "5:14:22", "remaining_time": "4:46:26"} +{"current_steps": 3580, "total_steps": 6840, "loss": 0.4988967180252075, "lr": 1.0058016159367365e-05, "epoch": 1.0467904664424623, "percentage": 52.34, "elapsed_time": "5:14:27", "remaining_time": "4:46:21"} +{"current_steps": 3581, "total_steps": 6840, "loss": 0.5410172939300537, "lr": 1.0053181527071786e-05, "epoch": 1.0470829068577276, "percentage": 52.35, "elapsed_time": "5:14:32", "remaining_time": "4:46:15"} +{"current_steps": 3582, "total_steps": 6840, "loss": 0.5980710983276367, "lr": 1.004834688234535e-05, "epoch": 1.0473753472729932, "percentage": 52.37, "elapsed_time": "5:14:37", "remaining_time": "4:46:10"} +{"current_steps": 3583, "total_steps": 6840, "loss": 0.4737449586391449, "lr": 1.0043512226318124e-05, "epoch": 1.0476677876882585, "percentage": 52.38, "elapsed_time": "5:14:42", "remaining_time": "4:46:04"} +{"current_steps": 3584, "total_steps": 6840, "loss": 0.6106469631195068, "lr": 1.003867756012018e-05, "epoch": 1.0479602281035238, "percentage": 52.4, "elapsed_time": "5:14:47", "remaining_time": "4:45:59"} +{"current_steps": 3585, "total_steps": 6840, "loss": 0.48002901673316956, "lr": 1.0033842884881593e-05, "epoch": 1.0482526685187894, "percentage": 52.41, "elapsed_time": "5:14:52", "remaining_time": "4:45:53"} +{"current_steps": 3586, "total_steps": 6840, "loss": 0.5101731419563293, "lr": 1.0029008201732433e-05, "epoch": 1.0485451089340547, "percentage": 52.43, "elapsed_time": "5:14:59", "remaining_time": "4:45:49"} +{"current_steps": 3587, "total_steps": 6840, "loss": 0.6350706219673157, "lr": 1.0024173511802786e-05, "epoch": 1.04883754934932, "percentage": 52.44, "elapsed_time": "5:15:03", "remaining_time": "4:45:43"} +{"current_steps": 3588, "total_steps": 6840, "loss": 0.5268979072570801, "lr": 1.0019338816222725e-05, "epoch": 1.0491299897645854, "percentage": 52.46, "elapsed_time": "5:15:08", "remaining_time": "4:45:37"} +{"current_steps": 3589, "total_steps": 6840, "loss": 0.5670457482337952, "lr": 1.0014504116122335e-05, "epoch": 1.049422430179851, "percentage": 52.47, "elapsed_time": "5:15:13", "remaining_time": "4:45:31"} +{"current_steps": 3590, "total_steps": 6840, "loss": 0.6200711727142334, "lr": 1.0009669412631697e-05, "epoch": 1.0497148705951163, "percentage": 52.49, "elapsed_time": "5:15:17", "remaining_time": "4:45:25"} +{"current_steps": 3591, "total_steps": 6840, "loss": 0.44014686346054077, "lr": 1.0004834706880891e-05, "epoch": 1.0500073110103816, "percentage": 52.5, "elapsed_time": "5:15:22", "remaining_time": "4:45:20"} +{"current_steps": 3592, "total_steps": 6840, "loss": 0.4690900146961212, "lr": 1e-05, "epoch": 1.050299751425647, "percentage": 52.51, "elapsed_time": "5:15:27", "remaining_time": "4:45:14"} +{"current_steps": 3593, "total_steps": 6840, "loss": 0.5791969299316406, "lr": 9.995165293119112e-06, "epoch": 1.0505921918409125, "percentage": 52.53, "elapsed_time": "5:15:33", "remaining_time": "4:45:10"} +{"current_steps": 3594, "total_steps": 6840, "loss": 0.5566244125366211, "lr": 9.990330587368306e-06, "epoch": 1.0508846322561778, "percentage": 52.54, "elapsed_time": "5:15:40", "remaining_time": "4:45:06"} +{"current_steps": 3595, "total_steps": 6840, "loss": 0.5201646685600281, "lr": 9.985495883877668e-06, "epoch": 1.0511770726714431, "percentage": 52.56, "elapsed_time": "5:15:45", "remaining_time": "4:45:01"} +{"current_steps": 3596, "total_steps": 6840, "loss": 0.44774526357650757, "lr": 9.980661183777277e-06, "epoch": 1.0514695130867087, "percentage": 52.57, "elapsed_time": "5:15:51", "remaining_time": "4:44:56"} +{"current_steps": 3597, "total_steps": 6840, "loss": 0.5346901416778564, "lr": 9.975826488197217e-06, "epoch": 1.051761953501974, "percentage": 52.59, "elapsed_time": "5:15:57", "remaining_time": "4:44:51"} +{"current_steps": 3598, "total_steps": 6840, "loss": 0.4639764428138733, "lr": 9.970991798267568e-06, "epoch": 1.0520543939172393, "percentage": 52.6, "elapsed_time": "5:16:02", "remaining_time": "4:44:46"} +{"current_steps": 3599, "total_steps": 6840, "loss": 0.5505763292312622, "lr": 9.966157115118412e-06, "epoch": 1.0523468343325049, "percentage": 52.62, "elapsed_time": "5:16:08", "remaining_time": "4:44:41"} +{"current_steps": 3600, "total_steps": 6840, "loss": 0.5187631845474243, "lr": 9.961322439879821e-06, "epoch": 1.0526392747477702, "percentage": 52.63, "elapsed_time": "5:16:14", "remaining_time": "4:44:37"} +{"current_steps": 3601, "total_steps": 6840, "loss": 0.5990081429481506, "lr": 9.95648777368188e-06, "epoch": 1.0529317151630355, "percentage": 52.65, "elapsed_time": "5:16:25", "remaining_time": "4:44:36"} +{"current_steps": 3602, "total_steps": 6840, "loss": 0.5926306843757629, "lr": 9.951653117654653e-06, "epoch": 1.0532241555783008, "percentage": 52.66, "elapsed_time": "5:16:29", "remaining_time": "4:44:30"} +{"current_steps": 3603, "total_steps": 6840, "loss": 0.5294582843780518, "lr": 9.946818472928215e-06, "epoch": 1.0535165959935664, "percentage": 52.68, "elapsed_time": "5:16:35", "remaining_time": "4:44:25"} +{"current_steps": 3604, "total_steps": 6840, "loss": 0.5442140102386475, "lr": 9.941983840632637e-06, "epoch": 1.0538090364088317, "percentage": 52.69, "elapsed_time": "5:16:40", "remaining_time": "4:44:20"} +{"current_steps": 3605, "total_steps": 6840, "loss": 0.5888028740882874, "lr": 9.937149221897984e-06, "epoch": 1.054101476824097, "percentage": 52.7, "elapsed_time": "5:16:46", "remaining_time": "4:44:15"} +{"current_steps": 3606, "total_steps": 6840, "loss": 0.7545796632766724, "lr": 9.93231461785432e-06, "epoch": 1.0543939172393624, "percentage": 52.72, "elapsed_time": "5:16:50", "remaining_time": "4:44:09"} +{"current_steps": 3607, "total_steps": 6840, "loss": 0.4850383996963501, "lr": 9.9274800296317e-06, "epoch": 1.054686357654628, "percentage": 52.73, "elapsed_time": "5:16:55", "remaining_time": "4:44:04"} +{"current_steps": 3608, "total_steps": 6840, "loss": 0.5658243894577026, "lr": 9.922645458360182e-06, "epoch": 1.0549787980698933, "percentage": 52.75, "elapsed_time": "5:17:00", "remaining_time": "4:43:58"} +{"current_steps": 3609, "total_steps": 6840, "loss": 0.6526712775230408, "lr": 9.917810905169818e-06, "epoch": 1.0552712384851586, "percentage": 52.76, "elapsed_time": "5:17:05", "remaining_time": "4:43:53"} +{"current_steps": 3610, "total_steps": 6840, "loss": 0.6125987768173218, "lr": 9.912976371190657e-06, "epoch": 1.0555636789004241, "percentage": 52.78, "elapsed_time": "5:17:09", "remaining_time": "4:43:46"} +{"current_steps": 3611, "total_steps": 6840, "loss": 0.40159785747528076, "lr": 9.908141857552737e-06, "epoch": 1.0558561193156895, "percentage": 52.79, "elapsed_time": "5:17:14", "remaining_time": "4:43:41"} +{"current_steps": 3612, "total_steps": 6840, "loss": 0.6628924608230591, "lr": 9.903307365386103e-06, "epoch": 1.0561485597309548, "percentage": 52.81, "elapsed_time": "5:17:18", "remaining_time": "4:43:34"} +{"current_steps": 3613, "total_steps": 6840, "loss": 0.6083816289901733, "lr": 9.898472895820783e-06, "epoch": 1.05644100014622, "percentage": 52.82, "elapsed_time": "5:17:23", "remaining_time": "4:43:29"} +{"current_steps": 3614, "total_steps": 6840, "loss": 0.5349488854408264, "lr": 9.893638449986806e-06, "epoch": 1.0567334405614857, "percentage": 52.84, "elapsed_time": "5:17:29", "remaining_time": "4:43:24"} +{"current_steps": 3615, "total_steps": 6840, "loss": 0.6119222044944763, "lr": 9.888804029014194e-06, "epoch": 1.057025880976751, "percentage": 52.85, "elapsed_time": "5:17:36", "remaining_time": "4:43:20"} +{"current_steps": 3616, "total_steps": 6840, "loss": 0.531359851360321, "lr": 9.883969634032964e-06, "epoch": 1.0573183213920163, "percentage": 52.87, "elapsed_time": "5:17:42", "remaining_time": "4:43:15"} +{"current_steps": 3617, "total_steps": 6840, "loss": 0.6604791879653931, "lr": 9.879135266173127e-06, "epoch": 1.0576107618072819, "percentage": 52.88, "elapsed_time": "5:17:48", "remaining_time": "4:43:11"} +{"current_steps": 3618, "total_steps": 6840, "loss": 0.4691445231437683, "lr": 9.874300926564689e-06, "epoch": 1.0579032022225472, "percentage": 52.89, "elapsed_time": "5:17:54", "remaining_time": "4:43:06"} +{"current_steps": 3619, "total_steps": 6840, "loss": 0.5690087080001831, "lr": 9.869466616337642e-06, "epoch": 1.0581956426378125, "percentage": 52.91, "elapsed_time": "5:18:00", "remaining_time": "4:43:02"} +{"current_steps": 3620, "total_steps": 6840, "loss": 0.5426729917526245, "lr": 9.86463233662198e-06, "epoch": 1.0584880830530778, "percentage": 52.92, "elapsed_time": "5:18:06", "remaining_time": "4:42:57"} +{"current_steps": 3621, "total_steps": 6840, "loss": 0.5640411376953125, "lr": 9.859798088547687e-06, "epoch": 1.0587805234683434, "percentage": 52.94, "elapsed_time": "5:18:10", "remaining_time": "4:42:50"} +{"current_steps": 3622, "total_steps": 6840, "loss": 0.6724091172218323, "lr": 9.854963873244738e-06, "epoch": 1.0590729638836087, "percentage": 52.95, "elapsed_time": "5:18:15", "remaining_time": "4:42:45"} +{"current_steps": 3623, "total_steps": 6840, "loss": 0.5448887348175049, "lr": 9.850129691843105e-06, "epoch": 1.059365404298874, "percentage": 52.97, "elapsed_time": "5:18:20", "remaining_time": "4:42:39"} +{"current_steps": 3624, "total_steps": 6840, "loss": 0.5555344820022583, "lr": 9.845295545472742e-06, "epoch": 1.0596578447141396, "percentage": 52.98, "elapsed_time": "5:18:25", "remaining_time": "4:42:34"} +{"current_steps": 3625, "total_steps": 6840, "loss": 0.5053969621658325, "lr": 9.840461435263604e-06, "epoch": 1.059950285129405, "percentage": 53.0, "elapsed_time": "5:18:29", "remaining_time": "4:42:28"} +{"current_steps": 3626, "total_steps": 6840, "loss": 0.5866390466690063, "lr": 9.835627362345636e-06, "epoch": 1.0602427255446703, "percentage": 53.01, "elapsed_time": "5:18:35", "remaining_time": "4:42:23"} +{"current_steps": 3627, "total_steps": 6840, "loss": 0.5936717987060547, "lr": 9.830793327848773e-06, "epoch": 1.0605351659599356, "percentage": 53.03, "elapsed_time": "5:18:39", "remaining_time": "4:42:17"} +{"current_steps": 3628, "total_steps": 6840, "loss": 0.6009070873260498, "lr": 9.82595933290294e-06, "epoch": 1.0608276063752011, "percentage": 53.04, "elapsed_time": "5:18:44", "remaining_time": "4:42:11"} +{"current_steps": 3629, "total_steps": 6840, "loss": 0.5361435413360596, "lr": 9.821125378638059e-06, "epoch": 1.0611200467904665, "percentage": 53.06, "elapsed_time": "5:18:49", "remaining_time": "4:42:05"} +{"current_steps": 3630, "total_steps": 6840, "loss": 0.5763939619064331, "lr": 9.816291466184025e-06, "epoch": 1.0614124872057318, "percentage": 53.07, "elapsed_time": "5:18:55", "remaining_time": "4:42:01"} +{"current_steps": 3631, "total_steps": 6840, "loss": 0.57512366771698, "lr": 9.81145759667075e-06, "epoch": 1.061704927620997, "percentage": 53.08, "elapsed_time": "5:18:59", "remaining_time": "4:41:55"} +{"current_steps": 3632, "total_steps": 6840, "loss": 0.6144367456436157, "lr": 9.806623771228115e-06, "epoch": 1.0619973680362627, "percentage": 53.1, "elapsed_time": "5:19:04", "remaining_time": "4:41:49"} +{"current_steps": 3633, "total_steps": 6840, "loss": 0.5715698003768921, "lr": 9.801789990985997e-06, "epoch": 1.062289808451528, "percentage": 53.11, "elapsed_time": "5:19:09", "remaining_time": "4:41:43"} +{"current_steps": 3634, "total_steps": 6840, "loss": 0.632681131362915, "lr": 9.796956257074263e-06, "epoch": 1.0625822488667933, "percentage": 53.13, "elapsed_time": "5:19:15", "remaining_time": "4:41:39"} +{"current_steps": 3635, "total_steps": 6840, "loss": 0.5362547636032104, "lr": 9.79212257062277e-06, "epoch": 1.0628746892820589, "percentage": 53.14, "elapsed_time": "5:19:22", "remaining_time": "4:41:35"} +{"current_steps": 3636, "total_steps": 6840, "loss": 0.553846538066864, "lr": 9.787288932761361e-06, "epoch": 1.0631671296973242, "percentage": 53.16, "elapsed_time": "5:19:27", "remaining_time": "4:41:29"} +{"current_steps": 3637, "total_steps": 6840, "loss": 0.7200362682342529, "lr": 9.782455344619871e-06, "epoch": 1.0634595701125895, "percentage": 53.17, "elapsed_time": "5:19:31", "remaining_time": "4:41:23"} +{"current_steps": 3638, "total_steps": 6840, "loss": 0.5544596910476685, "lr": 9.777621807328126e-06, "epoch": 1.063752010527855, "percentage": 53.19, "elapsed_time": "5:19:37", "remaining_time": "4:41:18"} +{"current_steps": 3639, "total_steps": 6840, "loss": 0.687321126461029, "lr": 9.772788322015926e-06, "epoch": 1.0640444509431204, "percentage": 53.2, "elapsed_time": "5:19:41", "remaining_time": "4:41:13"} +{"current_steps": 3640, "total_steps": 6840, "loss": 0.4986167550086975, "lr": 9.767954889813076e-06, "epoch": 1.0643368913583857, "percentage": 53.22, "elapsed_time": "5:19:47", "remaining_time": "4:41:08"} +{"current_steps": 3641, "total_steps": 6840, "loss": 0.5021307468414307, "lr": 9.763121511849358e-06, "epoch": 1.064629331773651, "percentage": 53.23, "elapsed_time": "5:19:52", "remaining_time": "4:41:02"} +{"current_steps": 3642, "total_steps": 6840, "loss": 0.5542711019515991, "lr": 9.758288189254548e-06, "epoch": 1.0649217721889166, "percentage": 53.25, "elapsed_time": "5:19:56", "remaining_time": "4:40:56"} +{"current_steps": 3643, "total_steps": 6840, "loss": 0.5161126852035522, "lr": 9.753454923158407e-06, "epoch": 1.065214212604182, "percentage": 53.26, "elapsed_time": "5:20:02", "remaining_time": "4:40:51"} +{"current_steps": 3644, "total_steps": 6840, "loss": 0.6041361093521118, "lr": 9.748621714690674e-06, "epoch": 1.0655066530194472, "percentage": 53.27, "elapsed_time": "5:20:08", "remaining_time": "4:40:46"} +{"current_steps": 3645, "total_steps": 6840, "loss": 0.5252672433853149, "lr": 9.74378856498109e-06, "epoch": 1.0657990934347126, "percentage": 53.29, "elapsed_time": "5:20:12", "remaining_time": "4:40:40"} +{"current_steps": 3646, "total_steps": 6840, "loss": 0.5198208093643188, "lr": 9.738955475159369e-06, "epoch": 1.0660915338499781, "percentage": 53.3, "elapsed_time": "5:20:18", "remaining_time": "4:40:36"} +{"current_steps": 3647, "total_steps": 6840, "loss": 0.5547968149185181, "lr": 9.734122446355219e-06, "epoch": 1.0663839742652435, "percentage": 53.32, "elapsed_time": "5:20:24", "remaining_time": "4:40:31"} +{"current_steps": 3648, "total_steps": 6840, "loss": 0.5854370594024658, "lr": 9.72928947969833e-06, "epoch": 1.0666764146805088, "percentage": 53.33, "elapsed_time": "5:20:31", "remaining_time": "4:40:27"} +{"current_steps": 3649, "total_steps": 6840, "loss": 0.5199173092842102, "lr": 9.724456576318383e-06, "epoch": 1.0669688550957743, "percentage": 53.35, "elapsed_time": "5:20:35", "remaining_time": "4:40:21"} +{"current_steps": 3650, "total_steps": 6840, "loss": 0.49684566259384155, "lr": 9.71962373734503e-06, "epoch": 1.0672612955110397, "percentage": 53.36, "elapsed_time": "5:20:41", "remaining_time": "4:40:16"} +{"current_steps": 3651, "total_steps": 6840, "loss": 0.593805193901062, "lr": 9.714790963907927e-06, "epoch": 1.067553735926305, "percentage": 53.38, "elapsed_time": "5:20:46", "remaining_time": "4:40:10"} +{"current_steps": 3652, "total_steps": 6840, "loss": 0.5524622201919556, "lr": 9.7099582571367e-06, "epoch": 1.0678461763415703, "percentage": 53.39, "elapsed_time": "5:20:51", "remaining_time": "4:40:05"} +{"current_steps": 3653, "total_steps": 6840, "loss": 0.5796955227851868, "lr": 9.70512561816097e-06, "epoch": 1.0681386167568359, "percentage": 53.41, "elapsed_time": "5:20:55", "remaining_time": "4:39:59"} +{"current_steps": 3654, "total_steps": 6840, "loss": 0.5470535159111023, "lr": 9.700293048110335e-06, "epoch": 1.0684310571721012, "percentage": 53.42, "elapsed_time": "5:21:01", "remaining_time": "4:39:54"} +{"current_steps": 3655, "total_steps": 6840, "loss": 0.5438790321350098, "lr": 9.695460548114374e-06, "epoch": 1.0687234975873665, "percentage": 53.44, "elapsed_time": "5:21:08", "remaining_time": "4:39:50"} +{"current_steps": 3656, "total_steps": 6840, "loss": 0.6324823498725891, "lr": 9.69062811930266e-06, "epoch": 1.069015938002632, "percentage": 53.45, "elapsed_time": "5:21:13", "remaining_time": "4:39:45"} +{"current_steps": 3657, "total_steps": 6840, "loss": 0.5261266231536865, "lr": 9.68579576280474e-06, "epoch": 1.0693083784178974, "percentage": 53.46, "elapsed_time": "5:21:17", "remaining_time": "4:39:38"} +{"current_steps": 3658, "total_steps": 6840, "loss": 0.49827292561531067, "lr": 9.680963479750152e-06, "epoch": 1.0696008188331627, "percentage": 53.48, "elapsed_time": "5:21:22", "remaining_time": "4:39:33"} +{"current_steps": 3659, "total_steps": 6840, "loss": 0.5273935794830322, "lr": 9.67613127126841e-06, "epoch": 1.069893259248428, "percentage": 53.49, "elapsed_time": "5:21:27", "remaining_time": "4:39:28"} +{"current_steps": 3660, "total_steps": 6840, "loss": 0.5816709995269775, "lr": 9.671299138489017e-06, "epoch": 1.0701856996636936, "percentage": 53.51, "elapsed_time": "5:21:33", "remaining_time": "4:39:23"} +{"current_steps": 3661, "total_steps": 6840, "loss": 0.5591616630554199, "lr": 9.66646708254145e-06, "epoch": 1.070478140078959, "percentage": 53.52, "elapsed_time": "5:21:38", "remaining_time": "4:39:17"} +{"current_steps": 3662, "total_steps": 6840, "loss": 0.581566572189331, "lr": 9.661635104555172e-06, "epoch": 1.0707705804942242, "percentage": 53.54, "elapsed_time": "5:21:43", "remaining_time": "4:39:11"} +{"current_steps": 3663, "total_steps": 6840, "loss": 0.5339047312736511, "lr": 9.656803205659632e-06, "epoch": 1.0710630209094898, "percentage": 53.55, "elapsed_time": "5:21:48", "remaining_time": "4:39:06"} +{"current_steps": 3664, "total_steps": 6840, "loss": 0.5200103521347046, "lr": 9.651971386984258e-06, "epoch": 1.0713554613247551, "percentage": 53.57, "elapsed_time": "5:21:53", "remaining_time": "4:39:01"} +{"current_steps": 3665, "total_steps": 6840, "loss": 0.7201805114746094, "lr": 9.647139649658454e-06, "epoch": 1.0716479017400204, "percentage": 53.58, "elapsed_time": "5:21:58", "remaining_time": "4:38:55"} +{"current_steps": 3666, "total_steps": 6840, "loss": 0.4801551103591919, "lr": 9.642307994811614e-06, "epoch": 1.0719403421552858, "percentage": 53.6, "elapsed_time": "5:22:03", "remaining_time": "4:38:50"} +{"current_steps": 3667, "total_steps": 6840, "loss": 0.5809728503227234, "lr": 9.637476423573106e-06, "epoch": 1.0722327825705513, "percentage": 53.61, "elapsed_time": "5:22:08", "remaining_time": "4:38:44"} +{"current_steps": 3668, "total_steps": 6840, "loss": 0.6493573188781738, "lr": 9.632644937072277e-06, "epoch": 1.0725252229858167, "percentage": 53.63, "elapsed_time": "5:22:14", "remaining_time": "4:38:39"} +{"current_steps": 3669, "total_steps": 6840, "loss": 0.5858349800109863, "lr": 9.627813536438461e-06, "epoch": 1.072817663401082, "percentage": 53.64, "elapsed_time": "5:22:19", "remaining_time": "4:38:34"} +{"current_steps": 3670, "total_steps": 6840, "loss": 0.604835033416748, "lr": 9.622982222800968e-06, "epoch": 1.0731101038163473, "percentage": 53.65, "elapsed_time": "5:22:26", "remaining_time": "4:38:30"} +{"current_steps": 3671, "total_steps": 6840, "loss": 0.6168441772460938, "lr": 9.618150997289091e-06, "epoch": 1.0734025442316129, "percentage": 53.67, "elapsed_time": "5:22:30", "remaining_time": "4:38:24"} +{"current_steps": 3672, "total_steps": 6840, "loss": 0.5297094583511353, "lr": 9.613319861032093e-06, "epoch": 1.0736949846468782, "percentage": 53.68, "elapsed_time": "5:22:36", "remaining_time": "4:38:19"} +{"current_steps": 3673, "total_steps": 6840, "loss": 0.513571560382843, "lr": 9.608488815159226e-06, "epoch": 1.0739874250621435, "percentage": 53.7, "elapsed_time": "5:22:41", "remaining_time": "4:38:13"} +{"current_steps": 3674, "total_steps": 6840, "loss": 0.4383837580680847, "lr": 9.603657860799721e-06, "epoch": 1.074279865477409, "percentage": 53.71, "elapsed_time": "5:22:45", "remaining_time": "4:38:07"} +{"current_steps": 3675, "total_steps": 6840, "loss": 0.5428420305252075, "lr": 9.59882699908278e-06, "epoch": 1.0745723058926744, "percentage": 53.73, "elapsed_time": "5:22:50", "remaining_time": "4:38:02"} +{"current_steps": 3676, "total_steps": 6840, "loss": 0.5193662047386169, "lr": 9.593996231137587e-06, "epoch": 1.0748647463079397, "percentage": 53.74, "elapsed_time": "5:22:56", "remaining_time": "4:37:57"} +{"current_steps": 3677, "total_steps": 6840, "loss": 0.47949904203414917, "lr": 9.589165558093311e-06, "epoch": 1.0751571867232053, "percentage": 53.76, "elapsed_time": "5:23:01", "remaining_time": "4:37:52"} +{"current_steps": 3678, "total_steps": 6840, "loss": 0.5092326402664185, "lr": 9.584334981079085e-06, "epoch": 1.0754496271384706, "percentage": 53.77, "elapsed_time": "5:23:07", "remaining_time": "4:37:47"} +{"current_steps": 3679, "total_steps": 6840, "loss": 0.6627280712127686, "lr": 9.579504501224028e-06, "epoch": 1.075742067553736, "percentage": 53.79, "elapsed_time": "5:23:12", "remaining_time": "4:37:41"} +{"current_steps": 3680, "total_steps": 6840, "loss": 0.45087775588035583, "lr": 9.57467411965724e-06, "epoch": 1.0760345079690012, "percentage": 53.8, "elapsed_time": "5:23:17", "remaining_time": "4:37:36"} +{"current_steps": 3681, "total_steps": 6840, "loss": 0.5745380520820618, "lr": 9.569843837507788e-06, "epoch": 1.0763269483842668, "percentage": 53.82, "elapsed_time": "5:23:22", "remaining_time": "4:37:31"} +{"current_steps": 3682, "total_steps": 6840, "loss": 0.4410436749458313, "lr": 9.565013655904728e-06, "epoch": 1.0766193887995321, "percentage": 53.83, "elapsed_time": "5:23:27", "remaining_time": "4:37:25"} +{"current_steps": 3683, "total_steps": 6840, "loss": 0.4991244375705719, "lr": 9.560183575977079e-06, "epoch": 1.0769118292147974, "percentage": 53.85, "elapsed_time": "5:23:34", "remaining_time": "4:37:21"} +{"current_steps": 3684, "total_steps": 6840, "loss": 0.6316145658493042, "lr": 9.555353598853842e-06, "epoch": 1.0772042696300628, "percentage": 53.86, "elapsed_time": "5:23:38", "remaining_time": "4:37:15"} +{"current_steps": 3685, "total_steps": 6840, "loss": 0.5593908429145813, "lr": 9.550523725664e-06, "epoch": 1.0774967100453283, "percentage": 53.87, "elapsed_time": "5:23:44", "remaining_time": "4:37:10"} +{"current_steps": 3686, "total_steps": 6840, "loss": 0.5491319894790649, "lr": 9.545693957536503e-06, "epoch": 1.0777891504605936, "percentage": 53.89, "elapsed_time": "5:23:49", "remaining_time": "4:37:05"} +{"current_steps": 3687, "total_steps": 6840, "loss": 0.6299821138381958, "lr": 9.540864295600282e-06, "epoch": 1.078081590875859, "percentage": 53.9, "elapsed_time": "5:23:54", "remaining_time": "4:36:59"} +{"current_steps": 3688, "total_steps": 6840, "loss": 0.5673841238021851, "lr": 9.536034740984244e-06, "epoch": 1.0783740312911245, "percentage": 53.92, "elapsed_time": "5:23:59", "remaining_time": "4:36:54"} +{"current_steps": 3689, "total_steps": 6840, "loss": 0.45966464281082153, "lr": 9.53120529481726e-06, "epoch": 1.0786664717063899, "percentage": 53.93, "elapsed_time": "5:24:05", "remaining_time": "4:36:49"} +{"current_steps": 3690, "total_steps": 6840, "loss": 0.5831631422042847, "lr": 9.526375958228191e-06, "epoch": 1.0789589121216552, "percentage": 53.95, "elapsed_time": "5:24:11", "remaining_time": "4:36:44"} +{"current_steps": 3691, "total_steps": 6840, "loss": 0.5456256866455078, "lr": 9.52154673234586e-06, "epoch": 1.0792513525369205, "percentage": 53.96, "elapsed_time": "5:24:15", "remaining_time": "4:36:38"} +{"current_steps": 3692, "total_steps": 6840, "loss": 0.46428292989730835, "lr": 9.516717618299069e-06, "epoch": 1.079543792952186, "percentage": 53.98, "elapsed_time": "5:24:20", "remaining_time": "4:36:33"} +{"current_steps": 3693, "total_steps": 6840, "loss": 0.47320839762687683, "lr": 9.511888617216602e-06, "epoch": 1.0798362333674514, "percentage": 53.99, "elapsed_time": "5:24:25", "remaining_time": "4:36:27"} +{"current_steps": 3694, "total_steps": 6840, "loss": 0.5205492973327637, "lr": 9.507059730227199e-06, "epoch": 1.0801286737827167, "percentage": 54.01, "elapsed_time": "5:24:31", "remaining_time": "4:36:23"} +{"current_steps": 3695, "total_steps": 6840, "loss": 0.42696553468704224, "lr": 9.502230958459587e-06, "epoch": 1.0804211141979823, "percentage": 54.02, "elapsed_time": "5:24:35", "remaining_time": "4:36:16"} +{"current_steps": 3696, "total_steps": 6840, "loss": 0.5147116780281067, "lr": 9.497402303042463e-06, "epoch": 1.0807135546132476, "percentage": 54.04, "elapsed_time": "5:24:40", "remaining_time": "4:36:11"} +{"current_steps": 3697, "total_steps": 6840, "loss": 0.5080294609069824, "lr": 9.492573765104494e-06, "epoch": 1.081005995028513, "percentage": 54.05, "elapsed_time": "5:24:47", "remaining_time": "4:36:07"} +{"current_steps": 3698, "total_steps": 6840, "loss": 0.6228866577148438, "lr": 9.487745345774323e-06, "epoch": 1.0812984354437782, "percentage": 54.06, "elapsed_time": "5:24:52", "remaining_time": "4:36:01"} +{"current_steps": 3699, "total_steps": 6840, "loss": 0.5560915470123291, "lr": 9.482917046180563e-06, "epoch": 1.0815908758590438, "percentage": 54.08, "elapsed_time": "5:24:57", "remaining_time": "4:35:56"} +{"current_steps": 3700, "total_steps": 6840, "loss": 0.5245859622955322, "lr": 9.4780888674518e-06, "epoch": 1.0818833162743091, "percentage": 54.09, "elapsed_time": "5:25:03", "remaining_time": "4:35:51"} +{"current_steps": 3701, "total_steps": 6840, "loss": 0.6462790966033936, "lr": 9.47326081071659e-06, "epoch": 1.0821757566895744, "percentage": 54.11, "elapsed_time": "5:25:13", "remaining_time": "4:35:50"} +{"current_steps": 3702, "total_steps": 6840, "loss": 0.5196692943572998, "lr": 9.468432877103462e-06, "epoch": 1.08246819710484, "percentage": 54.12, "elapsed_time": "5:25:18", "remaining_time": "4:35:45"} +{"current_steps": 3703, "total_steps": 6840, "loss": 0.5487779974937439, "lr": 9.463605067740917e-06, "epoch": 1.0827606375201053, "percentage": 54.14, "elapsed_time": "5:25:22", "remaining_time": "4:35:38"} +{"current_steps": 3704, "total_steps": 6840, "loss": 0.5471592545509338, "lr": 9.458777383757428e-06, "epoch": 1.0830530779353706, "percentage": 54.15, "elapsed_time": "5:25:27", "remaining_time": "4:35:32"} +{"current_steps": 3705, "total_steps": 6840, "loss": 0.6927378177642822, "lr": 9.453949826281436e-06, "epoch": 1.083345518350636, "percentage": 54.17, "elapsed_time": "5:25:31", "remaining_time": "4:35:26"} +{"current_steps": 3706, "total_steps": 6840, "loss": 0.569003164768219, "lr": 9.449122396441344e-06, "epoch": 1.0836379587659015, "percentage": 54.18, "elapsed_time": "5:25:37", "remaining_time": "4:35:21"} +{"current_steps": 3707, "total_steps": 6840, "loss": 0.5655964612960815, "lr": 9.444295095365549e-06, "epoch": 1.0839303991811668, "percentage": 54.2, "elapsed_time": "5:25:42", "remaining_time": "4:35:16"} +{"current_steps": 3708, "total_steps": 6840, "loss": 0.6223032474517822, "lr": 9.439467924182397e-06, "epoch": 1.0842228395964322, "percentage": 54.21, "elapsed_time": "5:25:46", "remaining_time": "4:35:10"} +{"current_steps": 3709, "total_steps": 6840, "loss": 0.6553555727005005, "lr": 9.43464088402021e-06, "epoch": 1.0845152800116975, "percentage": 54.23, "elapsed_time": "5:25:50", "remaining_time": "4:35:03"} +{"current_steps": 3710, "total_steps": 6840, "loss": 0.534509539604187, "lr": 9.429813976007277e-06, "epoch": 1.084807720426963, "percentage": 54.24, "elapsed_time": "5:25:56", "remaining_time": "4:34:58"} +{"current_steps": 3711, "total_steps": 6840, "loss": 0.5801417827606201, "lr": 9.42498720127186e-06, "epoch": 1.0851001608422284, "percentage": 54.25, "elapsed_time": "5:26:01", "remaining_time": "4:34:53"} +{"current_steps": 3712, "total_steps": 6840, "loss": 0.47260361909866333, "lr": 9.42016056094219e-06, "epoch": 1.0853926012574937, "percentage": 54.27, "elapsed_time": "5:26:06", "remaining_time": "4:34:48"} +{"current_steps": 3713, "total_steps": 6840, "loss": 0.5924841165542603, "lr": 9.415334056146464e-06, "epoch": 1.0856850416727593, "percentage": 54.28, "elapsed_time": "5:26:12", "remaining_time": "4:34:43"} +{"current_steps": 3714, "total_steps": 6840, "loss": 0.6029725074768066, "lr": 9.410507688012847e-06, "epoch": 1.0859774820880246, "percentage": 54.3, "elapsed_time": "5:26:17", "remaining_time": "4:34:37"} +{"current_steps": 3715, "total_steps": 6840, "loss": 0.5838413834571838, "lr": 9.405681457669472e-06, "epoch": 1.08626992250329, "percentage": 54.31, "elapsed_time": "5:26:21", "remaining_time": "4:34:31"} +{"current_steps": 3716, "total_steps": 6840, "loss": 0.4739546775817871, "lr": 9.400855366244445e-06, "epoch": 1.0865623629185555, "percentage": 54.33, "elapsed_time": "5:26:27", "remaining_time": "4:34:26"} +{"current_steps": 3717, "total_steps": 6840, "loss": 0.4870055913925171, "lr": 9.396029414865832e-06, "epoch": 1.0868548033338208, "percentage": 54.34, "elapsed_time": "5:26:32", "remaining_time": "4:34:21"} +{"current_steps": 3718, "total_steps": 6840, "loss": 0.5572132468223572, "lr": 9.39120360466167e-06, "epoch": 1.0871472437490861, "percentage": 54.36, "elapsed_time": "5:26:38", "remaining_time": "4:34:16"} +{"current_steps": 3719, "total_steps": 6840, "loss": 0.5601439476013184, "lr": 9.386377936759966e-06, "epoch": 1.0874396841643514, "percentage": 54.37, "elapsed_time": "5:26:42", "remaining_time": "4:34:10"} +{"current_steps": 3720, "total_steps": 6840, "loss": 0.4551504850387573, "lr": 9.38155241228869e-06, "epoch": 1.087732124579617, "percentage": 54.39, "elapsed_time": "5:26:47", "remaining_time": "4:34:05"} +{"current_steps": 3721, "total_steps": 6840, "loss": 0.5656375885009766, "lr": 9.376727032375773e-06, "epoch": 1.0880245649948823, "percentage": 54.4, "elapsed_time": "5:26:52", "remaining_time": "4:33:59"} +{"current_steps": 3722, "total_steps": 6840, "loss": 0.5597153902053833, "lr": 9.371901798149124e-06, "epoch": 1.0883170054101476, "percentage": 54.42, "elapsed_time": "5:26:58", "remaining_time": "4:33:54"} +{"current_steps": 3723, "total_steps": 6840, "loss": 0.5946288108825684, "lr": 9.367076710736613e-06, "epoch": 1.088609445825413, "percentage": 54.43, "elapsed_time": "5:27:04", "remaining_time": "4:33:50"} +{"current_steps": 3724, "total_steps": 6840, "loss": 0.5951449871063232, "lr": 9.36225177126607e-06, "epoch": 1.0889018862406785, "percentage": 54.44, "elapsed_time": "5:27:09", "remaining_time": "4:33:44"} +{"current_steps": 3725, "total_steps": 6840, "loss": 0.5755487680435181, "lr": 9.3574269808653e-06, "epoch": 1.0891943266559438, "percentage": 54.46, "elapsed_time": "5:27:14", "remaining_time": "4:33:39"} +{"current_steps": 3726, "total_steps": 6840, "loss": 0.5118892788887024, "lr": 9.352602340662065e-06, "epoch": 1.0894867670712092, "percentage": 54.47, "elapsed_time": "5:27:19", "remaining_time": "4:33:33"} +{"current_steps": 3727, "total_steps": 6840, "loss": 0.5652351975440979, "lr": 9.347777851784097e-06, "epoch": 1.0897792074864747, "percentage": 54.49, "elapsed_time": "5:27:24", "remaining_time": "4:33:28"} +{"current_steps": 3728, "total_steps": 6840, "loss": 0.624887228012085, "lr": 9.34295351535909e-06, "epoch": 1.09007164790174, "percentage": 54.5, "elapsed_time": "5:27:29", "remaining_time": "4:33:22"} +{"current_steps": 3729, "total_steps": 6840, "loss": 0.534363329410553, "lr": 9.338129332514705e-06, "epoch": 1.0903640883170054, "percentage": 54.52, "elapsed_time": "5:27:33", "remaining_time": "4:33:16"} +{"current_steps": 3730, "total_steps": 6840, "loss": 0.6203521490097046, "lr": 9.333305304378565e-06, "epoch": 1.0906565287322707, "percentage": 54.53, "elapsed_time": "5:27:38", "remaining_time": "4:33:10"} +{"current_steps": 3731, "total_steps": 6840, "loss": 0.64560866355896, "lr": 9.328481432078254e-06, "epoch": 1.0909489691475363, "percentage": 54.55, "elapsed_time": "5:27:43", "remaining_time": "4:33:05"} +{"current_steps": 3732, "total_steps": 6840, "loss": 0.5389514565467834, "lr": 9.323657716741327e-06, "epoch": 1.0912414095628016, "percentage": 54.56, "elapsed_time": "5:27:49", "remaining_time": "4:33:00"} +{"current_steps": 3733, "total_steps": 6840, "loss": 0.5245277881622314, "lr": 9.318834159495295e-06, "epoch": 1.091533849978067, "percentage": 54.58, "elapsed_time": "5:27:54", "remaining_time": "4:32:55"} +{"current_steps": 3734, "total_steps": 6840, "loss": 0.603967010974884, "lr": 9.314010761467637e-06, "epoch": 1.0918262903933325, "percentage": 54.59, "elapsed_time": "5:27:59", "remaining_time": "4:32:49"} +{"current_steps": 3735, "total_steps": 6840, "loss": 0.5426995754241943, "lr": 9.309187523785794e-06, "epoch": 1.0921187308085978, "percentage": 54.61, "elapsed_time": "5:28:03", "remaining_time": "4:32:43"} +{"current_steps": 3736, "total_steps": 6840, "loss": 0.5400352478027344, "lr": 9.30436444757717e-06, "epoch": 1.092411171223863, "percentage": 54.62, "elapsed_time": "5:28:07", "remaining_time": "4:32:37"} +{"current_steps": 3737, "total_steps": 6840, "loss": 0.5016524195671082, "lr": 9.299541533969121e-06, "epoch": 1.0927036116391284, "percentage": 54.63, "elapsed_time": "5:28:13", "remaining_time": "4:32:32"} +{"current_steps": 3738, "total_steps": 6840, "loss": 0.526217520236969, "lr": 9.294718784088982e-06, "epoch": 1.092996052054394, "percentage": 54.65, "elapsed_time": "5:28:18", "remaining_time": "4:32:26"} +{"current_steps": 3739, "total_steps": 6840, "loss": 0.525063157081604, "lr": 9.289896199064038e-06, "epoch": 1.0932884924696593, "percentage": 54.66, "elapsed_time": "5:28:23", "remaining_time": "4:32:21"} +{"current_steps": 3740, "total_steps": 6840, "loss": 0.3792048692703247, "lr": 9.285073780021541e-06, "epoch": 1.0935809328849246, "percentage": 54.68, "elapsed_time": "5:28:30", "remaining_time": "4:32:17"} +{"current_steps": 3741, "total_steps": 6840, "loss": 0.5326308012008667, "lr": 9.280251528088702e-06, "epoch": 1.0938733733001902, "percentage": 54.69, "elapsed_time": "5:28:34", "remaining_time": "4:32:11"} +{"current_steps": 3742, "total_steps": 6840, "loss": 0.5675199627876282, "lr": 9.275429444392692e-06, "epoch": 1.0941658137154555, "percentage": 54.71, "elapsed_time": "5:28:40", "remaining_time": "4:32:06"} +{"current_steps": 3743, "total_steps": 6840, "loss": 0.6525516510009766, "lr": 9.270607530060643e-06, "epoch": 1.0944582541307208, "percentage": 54.72, "elapsed_time": "5:28:45", "remaining_time": "4:32:01"} +{"current_steps": 3744, "total_steps": 6840, "loss": 0.6376343369483948, "lr": 9.265785786219647e-06, "epoch": 1.0947506945459862, "percentage": 54.74, "elapsed_time": "5:28:51", "remaining_time": "4:31:56"} +{"current_steps": 3745, "total_steps": 6840, "loss": 0.6440377235412598, "lr": 9.260964213996763e-06, "epoch": 1.0950431349612517, "percentage": 54.75, "elapsed_time": "5:28:57", "remaining_time": "4:31:52"} +{"current_steps": 3746, "total_steps": 6840, "loss": 0.5971434116363525, "lr": 9.256142814518997e-06, "epoch": 1.095335575376517, "percentage": 54.77, "elapsed_time": "5:29:02", "remaining_time": "4:31:46"} +{"current_steps": 3747, "total_steps": 6840, "loss": 0.5096890330314636, "lr": 9.251321588913331e-06, "epoch": 1.0956280157917824, "percentage": 54.78, "elapsed_time": "5:29:07", "remaining_time": "4:31:40"} +{"current_steps": 3748, "total_steps": 6840, "loss": 0.4303498864173889, "lr": 9.246500538306686e-06, "epoch": 1.0959204562070477, "percentage": 54.8, "elapsed_time": "5:29:11", "remaining_time": "4:31:34"} +{"current_steps": 3749, "total_steps": 6840, "loss": 0.5484192371368408, "lr": 9.241679663825961e-06, "epoch": 1.0962128966223132, "percentage": 54.81, "elapsed_time": "5:29:17", "remaining_time": "4:31:29"} +{"current_steps": 3750, "total_steps": 6840, "loss": 0.6057884693145752, "lr": 9.236858966598004e-06, "epoch": 1.0965053370375786, "percentage": 54.82, "elapsed_time": "5:29:23", "remaining_time": "4:31:24"} +{"current_steps": 3751, "total_steps": 6840, "loss": 0.5261536836624146, "lr": 9.232038447749623e-06, "epoch": 1.096797777452844, "percentage": 54.84, "elapsed_time": "5:29:27", "remaining_time": "4:31:18"} +{"current_steps": 3752, "total_steps": 6840, "loss": 0.470365047454834, "lr": 9.227218108407586e-06, "epoch": 1.0970902178681095, "percentage": 54.85, "elapsed_time": "5:29:31", "remaining_time": "4:31:12"} +{"current_steps": 3753, "total_steps": 6840, "loss": 0.6158323287963867, "lr": 9.222397949698618e-06, "epoch": 1.0973826582833748, "percentage": 54.87, "elapsed_time": "5:29:38", "remaining_time": "4:31:08"} +{"current_steps": 3754, "total_steps": 6840, "loss": 0.582190990447998, "lr": 9.217577972749401e-06, "epoch": 1.09767509869864, "percentage": 54.88, "elapsed_time": "5:29:43", "remaining_time": "4:31:02"} +{"current_steps": 3755, "total_steps": 6840, "loss": 0.4939305782318115, "lr": 9.212758178686575e-06, "epoch": 1.0979675391139057, "percentage": 54.9, "elapsed_time": "5:29:48", "remaining_time": "4:30:57"} +{"current_steps": 3756, "total_steps": 6840, "loss": 0.576829731464386, "lr": 9.207938568636739e-06, "epoch": 1.098259979529171, "percentage": 54.91, "elapsed_time": "5:29:52", "remaining_time": "4:30:51"} +{"current_steps": 3757, "total_steps": 6840, "loss": 0.581257164478302, "lr": 9.203119143726445e-06, "epoch": 1.0985524199444363, "percentage": 54.93, "elapsed_time": "5:29:59", "remaining_time": "4:30:47"} +{"current_steps": 3758, "total_steps": 6840, "loss": 0.6105127334594727, "lr": 9.19829990508221e-06, "epoch": 1.0988448603597016, "percentage": 54.94, "elapsed_time": "5:30:04", "remaining_time": "4:30:42"} +{"current_steps": 3759, "total_steps": 6840, "loss": 0.5311432480812073, "lr": 9.193480853830495e-06, "epoch": 1.0991373007749672, "percentage": 54.96, "elapsed_time": "5:30:10", "remaining_time": "4:30:37"} +{"current_steps": 3760, "total_steps": 6840, "loss": 0.44334596395492554, "lr": 9.188661991097726e-06, "epoch": 1.0994297411902325, "percentage": 54.97, "elapsed_time": "5:30:15", "remaining_time": "4:30:31"} +{"current_steps": 3761, "total_steps": 6840, "loss": 0.5795773267745972, "lr": 9.183843318010285e-06, "epoch": 1.0997221816054978, "percentage": 54.99, "elapsed_time": "5:30:18", "remaining_time": "4:30:24"} +{"current_steps": 3762, "total_steps": 6840, "loss": 0.619825541973114, "lr": 9.179024835694504e-06, "epoch": 1.1000146220207632, "percentage": 55.0, "elapsed_time": "5:30:24", "remaining_time": "4:30:20"} +{"current_steps": 3763, "total_steps": 6840, "loss": 0.633934497833252, "lr": 9.174206545276678e-06, "epoch": 1.1003070624360287, "percentage": 55.01, "elapsed_time": "5:30:29", "remaining_time": "4:30:14"} +{"current_steps": 3764, "total_steps": 6840, "loss": 0.48922473192214966, "lr": 9.169388447883053e-06, "epoch": 1.100599502851294, "percentage": 55.03, "elapsed_time": "5:30:34", "remaining_time": "4:30:09"} +{"current_steps": 3765, "total_steps": 6840, "loss": 0.6125025153160095, "lr": 9.164570544639825e-06, "epoch": 1.1008919432665594, "percentage": 55.04, "elapsed_time": "5:30:38", "remaining_time": "4:30:02"} +{"current_steps": 3766, "total_steps": 6840, "loss": 0.5428078174591064, "lr": 9.159752836673154e-06, "epoch": 1.101184383681825, "percentage": 55.06, "elapsed_time": "5:30:44", "remaining_time": "4:29:58"} +{"current_steps": 3767, "total_steps": 6840, "loss": 0.5848157405853271, "lr": 9.154935325109148e-06, "epoch": 1.1014768240970902, "percentage": 55.07, "elapsed_time": "5:30:50", "remaining_time": "4:29:53"} +{"current_steps": 3768, "total_steps": 6840, "loss": 0.5150102376937866, "lr": 9.150118011073872e-06, "epoch": 1.1017692645123556, "percentage": 55.09, "elapsed_time": "5:30:55", "remaining_time": "4:29:48"} +{"current_steps": 3769, "total_steps": 6840, "loss": 0.6106699705123901, "lr": 9.145300895693344e-06, "epoch": 1.102061704927621, "percentage": 55.1, "elapsed_time": "5:31:01", "remaining_time": "4:29:43"} +{"current_steps": 3770, "total_steps": 6840, "loss": 0.5819482803344727, "lr": 9.140483980093534e-06, "epoch": 1.1023541453428864, "percentage": 55.12, "elapsed_time": "5:31:07", "remaining_time": "4:29:38"} +{"current_steps": 3771, "total_steps": 6840, "loss": 0.6499812602996826, "lr": 9.135667265400369e-06, "epoch": 1.1026465857581518, "percentage": 55.13, "elapsed_time": "5:31:12", "remaining_time": "4:29:32"} +{"current_steps": 3772, "total_steps": 6840, "loss": 0.5375189781188965, "lr": 9.130850752739724e-06, "epoch": 1.102939026173417, "percentage": 55.15, "elapsed_time": "5:31:16", "remaining_time": "4:29:26"} +{"current_steps": 3773, "total_steps": 6840, "loss": 0.5582318902015686, "lr": 9.12603444323743e-06, "epoch": 1.1032314665886827, "percentage": 55.16, "elapsed_time": "5:31:21", "remaining_time": "4:29:21"} +{"current_steps": 3774, "total_steps": 6840, "loss": 0.5549799203872681, "lr": 9.121218338019273e-06, "epoch": 1.103523907003948, "percentage": 55.18, "elapsed_time": "5:31:26", "remaining_time": "4:29:16"} +{"current_steps": 3775, "total_steps": 6840, "loss": 0.4942197799682617, "lr": 9.116402438210988e-06, "epoch": 1.1038163474192133, "percentage": 55.19, "elapsed_time": "5:31:32", "remaining_time": "4:29:11"} +{"current_steps": 3776, "total_steps": 6840, "loss": 0.5039837956428528, "lr": 9.11158674493826e-06, "epoch": 1.1041087878344786, "percentage": 55.2, "elapsed_time": "5:31:37", "remaining_time": "4:29:05"} +{"current_steps": 3777, "total_steps": 6840, "loss": 0.49781280755996704, "lr": 9.106771259326726e-06, "epoch": 1.1044012282497442, "percentage": 55.22, "elapsed_time": "5:31:43", "remaining_time": "4:29:00"} +{"current_steps": 3778, "total_steps": 6840, "loss": 0.41755813360214233, "lr": 9.101955982501981e-06, "epoch": 1.1046936686650095, "percentage": 55.23, "elapsed_time": "5:31:48", "remaining_time": "4:28:55"} +{"current_steps": 3779, "total_steps": 6840, "loss": 0.5605067014694214, "lr": 9.097140915589564e-06, "epoch": 1.1049861090802748, "percentage": 55.25, "elapsed_time": "5:31:53", "remaining_time": "4:28:50"} +{"current_steps": 3780, "total_steps": 6840, "loss": 0.6291122436523438, "lr": 9.092326059714971e-06, "epoch": 1.1052785494955404, "percentage": 55.26, "elapsed_time": "5:31:58", "remaining_time": "4:28:44"} +{"current_steps": 3781, "total_steps": 6840, "loss": 0.5164260864257812, "lr": 9.087511416003636e-06, "epoch": 1.1055709899108057, "percentage": 55.28, "elapsed_time": "5:32:04", "remaining_time": "4:28:39"} +{"current_steps": 3782, "total_steps": 6840, "loss": 0.5002986192703247, "lr": 9.082696985580964e-06, "epoch": 1.105863430326071, "percentage": 55.29, "elapsed_time": "5:32:09", "remaining_time": "4:28:34"} +{"current_steps": 3783, "total_steps": 6840, "loss": 0.5149055123329163, "lr": 9.077882769572295e-06, "epoch": 1.1061558707413364, "percentage": 55.31, "elapsed_time": "5:32:13", "remaining_time": "4:28:28"} +{"current_steps": 3784, "total_steps": 6840, "loss": 0.5375808477401733, "lr": 9.073068769102925e-06, "epoch": 1.106448311156602, "percentage": 55.32, "elapsed_time": "5:32:19", "remaining_time": "4:28:23"} +{"current_steps": 3785, "total_steps": 6840, "loss": 0.5574408173561096, "lr": 9.06825498529809e-06, "epoch": 1.1067407515718672, "percentage": 55.34, "elapsed_time": "5:32:24", "remaining_time": "4:28:17"} +{"current_steps": 3786, "total_steps": 6840, "loss": 0.7410034537315369, "lr": 9.063441419282989e-06, "epoch": 1.1070331919871326, "percentage": 55.35, "elapsed_time": "5:32:29", "remaining_time": "4:28:12"} +{"current_steps": 3787, "total_steps": 6840, "loss": 0.4890757203102112, "lr": 9.058628072182759e-06, "epoch": 1.107325632402398, "percentage": 55.37, "elapsed_time": "5:32:36", "remaining_time": "4:28:08"} +{"current_steps": 3788, "total_steps": 6840, "loss": 0.5012304782867432, "lr": 9.053814945122496e-06, "epoch": 1.1076180728176634, "percentage": 55.38, "elapsed_time": "5:32:40", "remaining_time": "4:28:02"} +{"current_steps": 3789, "total_steps": 6840, "loss": 0.5235648155212402, "lr": 9.049002039227239e-06, "epoch": 1.1079105132329288, "percentage": 55.39, "elapsed_time": "5:32:44", "remaining_time": "4:27:56"} +{"current_steps": 3790, "total_steps": 6840, "loss": 0.44732457399368286, "lr": 9.044189355621969e-06, "epoch": 1.108202953648194, "percentage": 55.41, "elapsed_time": "5:32:48", "remaining_time": "4:27:49"} +{"current_steps": 3791, "total_steps": 6840, "loss": 0.5771712064743042, "lr": 9.039376895431627e-06, "epoch": 1.1084953940634596, "percentage": 55.42, "elapsed_time": "5:32:54", "remaining_time": "4:27:44"} +{"current_steps": 3792, "total_steps": 6840, "loss": 0.5361784100532532, "lr": 9.034564659781096e-06, "epoch": 1.108787834478725, "percentage": 55.44, "elapsed_time": "5:33:00", "remaining_time": "4:27:40"} +{"current_steps": 3793, "total_steps": 6840, "loss": 0.5305893421173096, "lr": 9.029752649795203e-06, "epoch": 1.1090802748939903, "percentage": 55.45, "elapsed_time": "5:33:05", "remaining_time": "4:27:34"} +{"current_steps": 3794, "total_steps": 6840, "loss": 0.5094715356826782, "lr": 9.02494086659873e-06, "epoch": 1.1093727153092559, "percentage": 55.47, "elapsed_time": "5:33:11", "remaining_time": "4:27:30"} +{"current_steps": 3795, "total_steps": 6840, "loss": 0.5406676530838013, "lr": 9.020129311316405e-06, "epoch": 1.1096651557245212, "percentage": 55.48, "elapsed_time": "5:33:17", "remaining_time": "4:27:25"} +{"current_steps": 3796, "total_steps": 6840, "loss": 0.5170687437057495, "lr": 9.015317985072893e-06, "epoch": 1.1099575961397865, "percentage": 55.5, "elapsed_time": "5:33:23", "remaining_time": "4:27:20"} +{"current_steps": 3797, "total_steps": 6840, "loss": 0.4632429778575897, "lr": 9.010506888992814e-06, "epoch": 1.1102500365550518, "percentage": 55.51, "elapsed_time": "5:33:28", "remaining_time": "4:27:15"} +{"current_steps": 3798, "total_steps": 6840, "loss": 0.5614180564880371, "lr": 9.005696024200734e-06, "epoch": 1.1105424769703174, "percentage": 55.53, "elapsed_time": "5:33:34", "remaining_time": "4:27:10"} +{"current_steps": 3799, "total_steps": 6840, "loss": 0.5660920143127441, "lr": 9.000885391821164e-06, "epoch": 1.1108349173855827, "percentage": 55.54, "elapsed_time": "5:33:39", "remaining_time": "4:27:04"} +{"current_steps": 3800, "total_steps": 6840, "loss": 0.6346436142921448, "lr": 8.996074992978558e-06, "epoch": 1.111127357800848, "percentage": 55.56, "elapsed_time": "5:33:45", "remaining_time": "4:27:00"} +{"current_steps": 3801, "total_steps": 6840, "loss": 0.4295850396156311, "lr": 8.991264828797319e-06, "epoch": 1.1114197982161134, "percentage": 55.57, "elapsed_time": "5:33:56", "remaining_time": "4:26:59"} +{"current_steps": 3802, "total_steps": 6840, "loss": 0.4797070622444153, "lr": 8.986454900401791e-06, "epoch": 1.111712238631379, "percentage": 55.58, "elapsed_time": "5:34:01", "remaining_time": "4:26:54"} +{"current_steps": 3803, "total_steps": 6840, "loss": 0.4912114143371582, "lr": 8.98164520891627e-06, "epoch": 1.1120046790466442, "percentage": 55.6, "elapsed_time": "5:34:06", "remaining_time": "4:26:49"} +{"current_steps": 3804, "total_steps": 6840, "loss": 0.4156647026538849, "lr": 8.976835755464988e-06, "epoch": 1.1122971194619096, "percentage": 55.61, "elapsed_time": "5:34:12", "remaining_time": "4:26:44"} +{"current_steps": 3805, "total_steps": 6840, "loss": 0.4527992010116577, "lr": 8.97202654117213e-06, "epoch": 1.1125895598771751, "percentage": 55.63, "elapsed_time": "5:34:18", "remaining_time": "4:26:39"} +{"current_steps": 3806, "total_steps": 6840, "loss": 0.5969425439834595, "lr": 8.967217567161817e-06, "epoch": 1.1128820002924404, "percentage": 55.64, "elapsed_time": "5:34:23", "remaining_time": "4:26:34"} +{"current_steps": 3807, "total_steps": 6840, "loss": 0.5867633819580078, "lr": 8.962408834558116e-06, "epoch": 1.1131744407077058, "percentage": 55.66, "elapsed_time": "5:34:28", "remaining_time": "4:26:28"} +{"current_steps": 3808, "total_steps": 6840, "loss": 0.549109697341919, "lr": 8.957600344485042e-06, "epoch": 1.113466881122971, "percentage": 55.67, "elapsed_time": "5:34:33", "remaining_time": "4:26:22"} +{"current_steps": 3809, "total_steps": 6840, "loss": 0.6336593627929688, "lr": 8.952792098066549e-06, "epoch": 1.1137593215382366, "percentage": 55.69, "elapsed_time": "5:34:38", "remaining_time": "4:26:17"} +{"current_steps": 3810, "total_steps": 6840, "loss": 0.5403220653533936, "lr": 8.947984096426537e-06, "epoch": 1.114051761953502, "percentage": 55.7, "elapsed_time": "5:34:44", "remaining_time": "4:26:12"} +{"current_steps": 3811, "total_steps": 6840, "loss": 0.37941914796829224, "lr": 8.943176340688846e-06, "epoch": 1.1143442023687673, "percentage": 55.72, "elapsed_time": "5:34:50", "remaining_time": "4:26:07"} +{"current_steps": 3812, "total_steps": 6840, "loss": 0.5509335994720459, "lr": 8.938368831977262e-06, "epoch": 1.1146366427840328, "percentage": 55.73, "elapsed_time": "5:34:56", "remaining_time": "4:26:03"} +{"current_steps": 3813, "total_steps": 6840, "loss": 0.5798860788345337, "lr": 8.933561571415506e-06, "epoch": 1.1149290831992982, "percentage": 55.75, "elapsed_time": "5:35:01", "remaining_time": "4:25:57"} +{"current_steps": 3814, "total_steps": 6840, "loss": 0.5549412965774536, "lr": 8.92875456012725e-06, "epoch": 1.1152215236145635, "percentage": 55.76, "elapsed_time": "5:35:06", "remaining_time": "4:25:52"} +{"current_steps": 3815, "total_steps": 6840, "loss": 0.4707058072090149, "lr": 8.9239477992361e-06, "epoch": 1.1155139640298288, "percentage": 55.77, "elapsed_time": "5:35:11", "remaining_time": "4:25:46"} +{"current_steps": 3816, "total_steps": 6840, "loss": 0.4717002511024475, "lr": 8.919141289865611e-06, "epoch": 1.1158064044450944, "percentage": 55.79, "elapsed_time": "5:35:17", "remaining_time": "4:25:42"} +{"current_steps": 3817, "total_steps": 6840, "loss": 0.48403650522232056, "lr": 8.914335033139274e-06, "epoch": 1.1160988448603597, "percentage": 55.8, "elapsed_time": "5:35:23", "remaining_time": "4:25:37"} +{"current_steps": 3818, "total_steps": 6840, "loss": 0.48592090606689453, "lr": 8.909529030180522e-06, "epoch": 1.116391285275625, "percentage": 55.82, "elapsed_time": "5:35:29", "remaining_time": "4:25:32"} +{"current_steps": 3819, "total_steps": 6840, "loss": 0.5052220225334167, "lr": 8.904723282112728e-06, "epoch": 1.1166837256908906, "percentage": 55.83, "elapsed_time": "5:35:34", "remaining_time": "4:25:27"} +{"current_steps": 3820, "total_steps": 6840, "loss": 0.7858535051345825, "lr": 8.899917790059208e-06, "epoch": 1.116976166106156, "percentage": 55.85, "elapsed_time": "5:35:39", "remaining_time": "4:25:21"} +{"current_steps": 3821, "total_steps": 6840, "loss": 0.6768159866333008, "lr": 8.895112555143217e-06, "epoch": 1.1172686065214212, "percentage": 55.86, "elapsed_time": "5:35:44", "remaining_time": "4:25:16"} +{"current_steps": 3822, "total_steps": 6840, "loss": 0.5661243200302124, "lr": 8.890307578487947e-06, "epoch": 1.1175610469366866, "percentage": 55.88, "elapsed_time": "5:35:49", "remaining_time": "4:25:10"} +{"current_steps": 3823, "total_steps": 6840, "loss": 0.5129438638687134, "lr": 8.885502861216535e-06, "epoch": 1.1178534873519521, "percentage": 55.89, "elapsed_time": "5:35:54", "remaining_time": "4:25:05"} +{"current_steps": 3824, "total_steps": 6840, "loss": 0.4813467264175415, "lr": 8.880698404452051e-06, "epoch": 1.1181459277672174, "percentage": 55.91, "elapsed_time": "5:35:58", "remaining_time": "4:24:59"} +{"current_steps": 3825, "total_steps": 6840, "loss": 0.5165577530860901, "lr": 8.87589420931751e-06, "epoch": 1.1184383681824828, "percentage": 55.92, "elapsed_time": "5:36:03", "remaining_time": "4:24:53"} +{"current_steps": 3826, "total_steps": 6840, "loss": 0.47335073351860046, "lr": 8.871090276935863e-06, "epoch": 1.118730808597748, "percentage": 55.94, "elapsed_time": "5:36:08", "remaining_time": "4:24:48"} +{"current_steps": 3827, "total_steps": 6840, "loss": 0.4902348518371582, "lr": 8.86628660843e-06, "epoch": 1.1190232490130136, "percentage": 55.95, "elapsed_time": "5:36:13", "remaining_time": "4:24:42"} +{"current_steps": 3828, "total_steps": 6840, "loss": 0.5933388471603394, "lr": 8.861483204922752e-06, "epoch": 1.119315689428279, "percentage": 55.96, "elapsed_time": "5:36:18", "remaining_time": "4:24:37"} +{"current_steps": 3829, "total_steps": 6840, "loss": 0.4898201823234558, "lr": 8.85668006753688e-06, "epoch": 1.1196081298435443, "percentage": 55.98, "elapsed_time": "5:36:22", "remaining_time": "4:24:31"} +{"current_steps": 3830, "total_steps": 6840, "loss": 0.4745003879070282, "lr": 8.851877197395088e-06, "epoch": 1.1199005702588098, "percentage": 55.99, "elapsed_time": "5:36:29", "remaining_time": "4:24:26"} +{"current_steps": 3831, "total_steps": 6840, "loss": 0.5246972441673279, "lr": 8.847074595620024e-06, "epoch": 1.1201930106740752, "percentage": 56.01, "elapsed_time": "5:36:33", "remaining_time": "4:24:20"} +{"current_steps": 3832, "total_steps": 6840, "loss": 0.5196787714958191, "lr": 8.842272263334263e-06, "epoch": 1.1204854510893405, "percentage": 56.02, "elapsed_time": "5:36:39", "remaining_time": "4:24:15"} +{"current_steps": 3833, "total_steps": 6840, "loss": 0.6721034049987793, "lr": 8.83747020166032e-06, "epoch": 1.120777891504606, "percentage": 56.04, "elapsed_time": "5:36:43", "remaining_time": "4:24:10"} +{"current_steps": 3834, "total_steps": 6840, "loss": 0.5654234886169434, "lr": 8.832668411720652e-06, "epoch": 1.1210703319198714, "percentage": 56.05, "elapsed_time": "5:36:49", "remaining_time": "4:24:04"} +{"current_steps": 3835, "total_steps": 6840, "loss": 0.7520767450332642, "lr": 8.827866894637642e-06, "epoch": 1.1213627723351367, "percentage": 56.07, "elapsed_time": "5:36:53", "remaining_time": "4:23:58"} +{"current_steps": 3836, "total_steps": 6840, "loss": 0.43645960092544556, "lr": 8.82306565153362e-06, "epoch": 1.121655212750402, "percentage": 56.08, "elapsed_time": "5:36:58", "remaining_time": "4:23:53"} +{"current_steps": 3837, "total_steps": 6840, "loss": 0.5802274942398071, "lr": 8.818264683530845e-06, "epoch": 1.1219476531656676, "percentage": 56.1, "elapsed_time": "5:37:04", "remaining_time": "4:23:48"} +{"current_steps": 3838, "total_steps": 6840, "loss": 0.5593410134315491, "lr": 8.813463991751516e-06, "epoch": 1.122240093580933, "percentage": 56.11, "elapsed_time": "5:37:10", "remaining_time": "4:23:43"} +{"current_steps": 3839, "total_steps": 6840, "loss": 0.6126681566238403, "lr": 8.808663577317765e-06, "epoch": 1.1225325339961982, "percentage": 56.13, "elapsed_time": "5:37:14", "remaining_time": "4:23:37"} +{"current_steps": 3840, "total_steps": 6840, "loss": 0.6245180368423462, "lr": 8.80386344135166e-06, "epoch": 1.1228249744114636, "percentage": 56.14, "elapsed_time": "5:37:18", "remaining_time": "4:23:31"} +{"current_steps": 3841, "total_steps": 6840, "loss": 0.6611473560333252, "lr": 8.799063584975201e-06, "epoch": 1.123117414826729, "percentage": 56.15, "elapsed_time": "5:37:25", "remaining_time": "4:23:27"} +{"current_steps": 3842, "total_steps": 6840, "loss": 0.40020978450775146, "lr": 8.79426400931033e-06, "epoch": 1.1234098552419944, "percentage": 56.17, "elapsed_time": "5:37:30", "remaining_time": "4:23:22"} +{"current_steps": 3843, "total_steps": 6840, "loss": 0.4965318441390991, "lr": 8.789464715478913e-06, "epoch": 1.1237022956572598, "percentage": 56.18, "elapsed_time": "5:37:37", "remaining_time": "4:23:17"} +{"current_steps": 3844, "total_steps": 6840, "loss": 0.4838374853134155, "lr": 8.784665704602758e-06, "epoch": 1.1239947360725253, "percentage": 56.2, "elapsed_time": "5:37:43", "remaining_time": "4:23:13"} +{"current_steps": 3845, "total_steps": 6840, "loss": 0.5756508708000183, "lr": 8.77986697780361e-06, "epoch": 1.1242871764877906, "percentage": 56.21, "elapsed_time": "5:37:48", "remaining_time": "4:23:07"} +{"current_steps": 3846, "total_steps": 6840, "loss": 0.5341511964797974, "lr": 8.775068536203132e-06, "epoch": 1.124579616903056, "percentage": 56.23, "elapsed_time": "5:37:54", "remaining_time": "4:23:02"} +{"current_steps": 3847, "total_steps": 6840, "loss": 0.6239134073257446, "lr": 8.77027038092294e-06, "epoch": 1.1248720573183213, "percentage": 56.24, "elapsed_time": "5:37:58", "remaining_time": "4:22:56"} +{"current_steps": 3848, "total_steps": 6840, "loss": 0.5642406940460205, "lr": 8.765472513084566e-06, "epoch": 1.1251644977335868, "percentage": 56.26, "elapsed_time": "5:38:02", "remaining_time": "4:22:50"} +{"current_steps": 3849, "total_steps": 6840, "loss": 0.5242771506309509, "lr": 8.760674933809488e-06, "epoch": 1.1254569381488522, "percentage": 56.27, "elapsed_time": "5:38:07", "remaining_time": "4:22:45"} +{"current_steps": 3850, "total_steps": 6840, "loss": 0.5205737352371216, "lr": 8.755877644219108e-06, "epoch": 1.1257493785641175, "percentage": 56.29, "elapsed_time": "5:38:11", "remaining_time": "4:22:39"} +{"current_steps": 3851, "total_steps": 6840, "loss": 0.5005168318748474, "lr": 8.751080645434768e-06, "epoch": 1.126041818979383, "percentage": 56.3, "elapsed_time": "5:38:15", "remaining_time": "4:22:32"} +{"current_steps": 3852, "total_steps": 6840, "loss": 0.44978275895118713, "lr": 8.74628393857773e-06, "epoch": 1.1263342593946484, "percentage": 56.32, "elapsed_time": "5:38:20", "remaining_time": "4:22:26"} +{"current_steps": 3853, "total_steps": 6840, "loss": 0.43631571531295776, "lr": 8.741487524769198e-06, "epoch": 1.1266266998099137, "percentage": 56.33, "elapsed_time": "5:38:25", "remaining_time": "4:22:21"} +{"current_steps": 3854, "total_steps": 6840, "loss": 0.4196016788482666, "lr": 8.736691405130306e-06, "epoch": 1.126919140225179, "percentage": 56.35, "elapsed_time": "5:38:31", "remaining_time": "4:22:16"} +{"current_steps": 3855, "total_steps": 6840, "loss": 0.6389856338500977, "lr": 8.731895580782118e-06, "epoch": 1.1272115806404446, "percentage": 56.36, "elapsed_time": "5:38:36", "remaining_time": "4:22:11"} +{"current_steps": 3856, "total_steps": 6840, "loss": 0.5465584993362427, "lr": 8.72710005284563e-06, "epoch": 1.12750402105571, "percentage": 56.37, "elapsed_time": "5:38:40", "remaining_time": "4:22:04"} +{"current_steps": 3857, "total_steps": 6840, "loss": 0.5513765811920166, "lr": 8.722304822441757e-06, "epoch": 1.1277964614709752, "percentage": 56.39, "elapsed_time": "5:38:46", "remaining_time": "4:22:00"} +{"current_steps": 3858, "total_steps": 6840, "loss": 0.6984349489212036, "lr": 8.717509890691369e-06, "epoch": 1.1280889018862408, "percentage": 56.4, "elapsed_time": "5:38:52", "remaining_time": "4:21:55"} +{"current_steps": 3859, "total_steps": 6840, "loss": 0.5311027765274048, "lr": 8.712715258715248e-06, "epoch": 1.128381342301506, "percentage": 56.42, "elapsed_time": "5:38:57", "remaining_time": "4:21:50"} +{"current_steps": 3860, "total_steps": 6840, "loss": 0.4598672091960907, "lr": 8.707920927634105e-06, "epoch": 1.1286737827167714, "percentage": 56.43, "elapsed_time": "5:39:03", "remaining_time": "4:21:45"} +{"current_steps": 3861, "total_steps": 6840, "loss": 0.6177612543106079, "lr": 8.703126898568591e-06, "epoch": 1.1289662231320368, "percentage": 56.45, "elapsed_time": "5:39:08", "remaining_time": "4:21:39"} +{"current_steps": 3862, "total_steps": 6840, "loss": 0.6442389488220215, "lr": 8.69833317263928e-06, "epoch": 1.1292586635473023, "percentage": 56.46, "elapsed_time": "5:39:13", "remaining_time": "4:21:34"} +{"current_steps": 3863, "total_steps": 6840, "loss": 0.5925737023353577, "lr": 8.693539750966672e-06, "epoch": 1.1295511039625676, "percentage": 56.48, "elapsed_time": "5:39:18", "remaining_time": "4:21:29"} +{"current_steps": 3864, "total_steps": 6840, "loss": 0.46009114384651184, "lr": 8.688746634671207e-06, "epoch": 1.129843544377833, "percentage": 56.49, "elapsed_time": "5:39:23", "remaining_time": "4:21:23"} +{"current_steps": 3865, "total_steps": 6840, "loss": 0.5438460111618042, "lr": 8.683953824873246e-06, "epoch": 1.1301359847930983, "percentage": 56.51, "elapsed_time": "5:39:28", "remaining_time": "4:21:18"} +{"current_steps": 3866, "total_steps": 6840, "loss": 0.5355101823806763, "lr": 8.679161322693073e-06, "epoch": 1.1304284252083638, "percentage": 56.52, "elapsed_time": "5:39:34", "remaining_time": "4:21:13"} +{"current_steps": 3867, "total_steps": 6840, "loss": 0.4494459629058838, "lr": 8.67436912925091e-06, "epoch": 1.1307208656236292, "percentage": 56.54, "elapsed_time": "5:39:38", "remaining_time": "4:21:07"} +{"current_steps": 3868, "total_steps": 6840, "loss": 0.5828550457954407, "lr": 8.669577245666905e-06, "epoch": 1.1310133060388945, "percentage": 56.55, "elapsed_time": "5:39:43", "remaining_time": "4:21:02"} +{"current_steps": 3869, "total_steps": 6840, "loss": 0.4956590235233307, "lr": 8.664785673061127e-06, "epoch": 1.13130574645416, "percentage": 56.56, "elapsed_time": "5:39:49", "remaining_time": "4:20:57"} +{"current_steps": 3870, "total_steps": 6840, "loss": 0.5447779893875122, "lr": 8.659994412553582e-06, "epoch": 1.1315981868694254, "percentage": 56.58, "elapsed_time": "5:39:55", "remaining_time": "4:20:52"} +{"current_steps": 3871, "total_steps": 6840, "loss": 0.6275361776351929, "lr": 8.655203465264196e-06, "epoch": 1.1318906272846907, "percentage": 56.59, "elapsed_time": "5:39:59", "remaining_time": "4:20:46"} +{"current_steps": 3872, "total_steps": 6840, "loss": 0.47899991273880005, "lr": 8.650412832312823e-06, "epoch": 1.1321830676999562, "percentage": 56.61, "elapsed_time": "5:40:04", "remaining_time": "4:20:40"} +{"current_steps": 3873, "total_steps": 6840, "loss": 0.3356127142906189, "lr": 8.645622514819243e-06, "epoch": 1.1324755081152216, "percentage": 56.62, "elapsed_time": "5:40:10", "remaining_time": "4:20:35"} +{"current_steps": 3874, "total_steps": 6840, "loss": 0.48855727910995483, "lr": 8.640832513903168e-06, "epoch": 1.132767948530487, "percentage": 56.64, "elapsed_time": "5:40:15", "remaining_time": "4:20:30"} +{"current_steps": 3875, "total_steps": 6840, "loss": 0.46642380952835083, "lr": 8.636042830684227e-06, "epoch": 1.1330603889457522, "percentage": 56.65, "elapsed_time": "5:40:21", "remaining_time": "4:20:25"} +{"current_steps": 3876, "total_steps": 6840, "loss": 0.6179598569869995, "lr": 8.631253466281984e-06, "epoch": 1.1333528293610178, "percentage": 56.67, "elapsed_time": "5:40:26", "remaining_time": "4:20:20"} +{"current_steps": 3877, "total_steps": 6840, "loss": 0.6361704468727112, "lr": 8.626464421815919e-06, "epoch": 1.133645269776283, "percentage": 56.68, "elapsed_time": "5:40:31", "remaining_time": "4:20:14"} +{"current_steps": 3878, "total_steps": 6840, "loss": 0.6243701577186584, "lr": 8.621675698405446e-06, "epoch": 1.1339377101915484, "percentage": 56.7, "elapsed_time": "5:40:37", "remaining_time": "4:20:09"} +{"current_steps": 3879, "total_steps": 6840, "loss": 0.5402215123176575, "lr": 8.616887297169895e-06, "epoch": 1.1342301506068138, "percentage": 56.71, "elapsed_time": "5:40:42", "remaining_time": "4:20:04"} +{"current_steps": 3880, "total_steps": 6840, "loss": 0.6050009727478027, "lr": 8.61209921922853e-06, "epoch": 1.1345225910220793, "percentage": 56.73, "elapsed_time": "5:40:47", "remaining_time": "4:19:59"} +{"current_steps": 3881, "total_steps": 6840, "loss": 0.5705801248550415, "lr": 8.607311465700534e-06, "epoch": 1.1348150314373446, "percentage": 56.74, "elapsed_time": "5:40:53", "remaining_time": "4:19:54"} +{"current_steps": 3882, "total_steps": 6840, "loss": 0.5467248558998108, "lr": 8.602524037705018e-06, "epoch": 1.13510747185261, "percentage": 56.75, "elapsed_time": "5:40:59", "remaining_time": "4:19:49"} +{"current_steps": 3883, "total_steps": 6840, "loss": 0.5903012752532959, "lr": 8.597736936361007e-06, "epoch": 1.1353999122678755, "percentage": 56.77, "elapsed_time": "5:41:04", "remaining_time": "4:19:44"} +{"current_steps": 3884, "total_steps": 6840, "loss": 0.6034090518951416, "lr": 8.592950162787463e-06, "epoch": 1.1356923526831408, "percentage": 56.78, "elapsed_time": "5:41:09", "remaining_time": "4:19:38"} +{"current_steps": 3885, "total_steps": 6840, "loss": 0.4282987117767334, "lr": 8.588163718103264e-06, "epoch": 1.1359847930984062, "percentage": 56.8, "elapsed_time": "5:41:15", "remaining_time": "4:19:34"} +{"current_steps": 3886, "total_steps": 6840, "loss": 0.47374194860458374, "lr": 8.583377603427212e-06, "epoch": 1.1362772335136717, "percentage": 56.81, "elapsed_time": "5:41:20", "remaining_time": "4:19:28"} +{"current_steps": 3887, "total_steps": 6840, "loss": 0.43954724073410034, "lr": 8.578591819878033e-06, "epoch": 1.136569673928937, "percentage": 56.83, "elapsed_time": "5:41:26", "remaining_time": "4:19:23"} +{"current_steps": 3888, "total_steps": 6840, "loss": 0.4731065034866333, "lr": 8.573806368574372e-06, "epoch": 1.1368621143442024, "percentage": 56.84, "elapsed_time": "5:41:32", "remaining_time": "4:19:19"} +{"current_steps": 3889, "total_steps": 6840, "loss": 0.5241256356239319, "lr": 8.5690212506348e-06, "epoch": 1.1371545547594677, "percentage": 56.86, "elapsed_time": "5:41:38", "remaining_time": "4:19:14"} +{"current_steps": 3890, "total_steps": 6840, "loss": 0.5823307037353516, "lr": 8.56423646717781e-06, "epoch": 1.1374469951747332, "percentage": 56.87, "elapsed_time": "5:41:43", "remaining_time": "4:19:09"} +{"current_steps": 3891, "total_steps": 6840, "loss": 0.5360631346702576, "lr": 8.55945201932182e-06, "epoch": 1.1377394355899986, "percentage": 56.89, "elapsed_time": "5:41:48", "remaining_time": "4:19:03"} +{"current_steps": 3892, "total_steps": 6840, "loss": 0.5227797627449036, "lr": 8.554667908185158e-06, "epoch": 1.138031876005264, "percentage": 56.9, "elapsed_time": "5:41:52", "remaining_time": "4:18:57"} +{"current_steps": 3893, "total_steps": 6840, "loss": 0.6232806444168091, "lr": 8.549884134886089e-06, "epoch": 1.1383243164205292, "percentage": 56.92, "elapsed_time": "5:41:58", "remaining_time": "4:18:52"} +{"current_steps": 3894, "total_steps": 6840, "loss": 0.6697877049446106, "lr": 8.545100700542782e-06, "epoch": 1.1386167568357948, "percentage": 56.93, "elapsed_time": "5:42:04", "remaining_time": "4:18:47"} +{"current_steps": 3895, "total_steps": 6840, "loss": 0.6348206400871277, "lr": 8.540317606273343e-06, "epoch": 1.13890919725106, "percentage": 56.94, "elapsed_time": "5:42:09", "remaining_time": "4:18:42"} +{"current_steps": 3896, "total_steps": 6840, "loss": 0.5578476190567017, "lr": 8.535534853195786e-06, "epoch": 1.1392016376663254, "percentage": 56.96, "elapsed_time": "5:42:14", "remaining_time": "4:18:37"} +{"current_steps": 3897, "total_steps": 6840, "loss": 0.6439946889877319, "lr": 8.530752442428055e-06, "epoch": 1.139494078081591, "percentage": 56.97, "elapsed_time": "5:42:19", "remaining_time": "4:18:31"} +{"current_steps": 3898, "total_steps": 6840, "loss": 0.5292261242866516, "lr": 8.525970375088006e-06, "epoch": 1.1397865184968563, "percentage": 56.99, "elapsed_time": "5:42:24", "remaining_time": "4:18:25"} +{"current_steps": 3899, "total_steps": 6840, "loss": 0.5836480855941772, "lr": 8.521188652293421e-06, "epoch": 1.1400789589121216, "percentage": 57.0, "elapsed_time": "5:42:29", "remaining_time": "4:18:20"} +{"current_steps": 3900, "total_steps": 6840, "loss": 0.5166354775428772, "lr": 8.516407275161998e-06, "epoch": 1.140371399327387, "percentage": 57.02, "elapsed_time": "5:42:35", "remaining_time": "4:18:15"} +{"current_steps": 3901, "total_steps": 6840, "loss": 0.5236127972602844, "lr": 8.511626244811352e-06, "epoch": 1.1406638397426525, "percentage": 57.03, "elapsed_time": "5:42:45", "remaining_time": "4:18:13"} +{"current_steps": 3902, "total_steps": 6840, "loss": 0.4900703430175781, "lr": 8.506845562359022e-06, "epoch": 1.1409562801579178, "percentage": 57.05, "elapsed_time": "5:42:51", "remaining_time": "4:18:09"} +{"current_steps": 3903, "total_steps": 6840, "loss": 0.5200212001800537, "lr": 8.502065228922464e-06, "epoch": 1.1412487205731832, "percentage": 57.06, "elapsed_time": "5:42:55", "remaining_time": "4:18:03"} +{"current_steps": 3904, "total_steps": 6840, "loss": 0.5553300976753235, "lr": 8.497285245619053e-06, "epoch": 1.1415411609884485, "percentage": 57.08, "elapsed_time": "5:42:59", "remaining_time": "4:17:57"} +{"current_steps": 3905, "total_steps": 6840, "loss": 0.5650131702423096, "lr": 8.492505613566075e-06, "epoch": 1.141833601403714, "percentage": 57.09, "elapsed_time": "5:43:06", "remaining_time": "4:17:52"} +{"current_steps": 3906, "total_steps": 6840, "loss": 0.4732077121734619, "lr": 8.487726333880746e-06, "epoch": 1.1421260418189794, "percentage": 57.11, "elapsed_time": "5:43:11", "remaining_time": "4:17:47"} +{"current_steps": 3907, "total_steps": 6840, "loss": 0.46741920709609985, "lr": 8.482947407680193e-06, "epoch": 1.1424184822342447, "percentage": 57.12, "elapsed_time": "5:43:16", "remaining_time": "4:17:41"} +{"current_steps": 3908, "total_steps": 6840, "loss": 0.606191873550415, "lr": 8.478168836081457e-06, "epoch": 1.1427109226495102, "percentage": 57.13, "elapsed_time": "5:43:21", "remaining_time": "4:17:36"} +{"current_steps": 3909, "total_steps": 6840, "loss": 0.4373897314071655, "lr": 8.473390620201505e-06, "epoch": 1.1430033630647756, "percentage": 57.15, "elapsed_time": "5:43:27", "remaining_time": "4:17:31"} +{"current_steps": 3910, "total_steps": 6840, "loss": 0.5460623502731323, "lr": 8.468612761157215e-06, "epoch": 1.143295803480041, "percentage": 57.16, "elapsed_time": "5:43:33", "remaining_time": "4:17:27"} +{"current_steps": 3911, "total_steps": 6840, "loss": 0.4939531087875366, "lr": 8.463835260065379e-06, "epoch": 1.1435882438953064, "percentage": 57.18, "elapsed_time": "5:43:38", "remaining_time": "4:17:21"} +{"current_steps": 3912, "total_steps": 6840, "loss": 0.544964611530304, "lr": 8.459058118042708e-06, "epoch": 1.1438806843105718, "percentage": 57.19, "elapsed_time": "5:43:44", "remaining_time": "4:17:16"} +{"current_steps": 3913, "total_steps": 6840, "loss": 0.6118921041488647, "lr": 8.454281336205836e-06, "epoch": 1.144173124725837, "percentage": 57.21, "elapsed_time": "5:43:50", "remaining_time": "4:17:12"} +{"current_steps": 3914, "total_steps": 6840, "loss": 0.561060905456543, "lr": 8.449504915671304e-06, "epoch": 1.1444655651411024, "percentage": 57.22, "elapsed_time": "5:43:56", "remaining_time": "4:17:07"} +{"current_steps": 3915, "total_steps": 6840, "loss": 0.430827796459198, "lr": 8.444728857555572e-06, "epoch": 1.144758005556368, "percentage": 57.24, "elapsed_time": "5:44:02", "remaining_time": "4:17:02"} +{"current_steps": 3916, "total_steps": 6840, "loss": 0.5482884645462036, "lr": 8.439953162975011e-06, "epoch": 1.1450504459716333, "percentage": 57.25, "elapsed_time": "5:44:06", "remaining_time": "4:16:56"} +{"current_steps": 3917, "total_steps": 6840, "loss": 0.6614879965782166, "lr": 8.435177833045911e-06, "epoch": 1.1453428863868986, "percentage": 57.27, "elapsed_time": "5:44:12", "remaining_time": "4:16:51"} +{"current_steps": 3918, "total_steps": 6840, "loss": 0.6290509104728699, "lr": 8.430402868884482e-06, "epoch": 1.145635326802164, "percentage": 57.28, "elapsed_time": "5:44:16", "remaining_time": "4:16:45"} +{"current_steps": 3919, "total_steps": 6840, "loss": 0.404970645904541, "lr": 8.425628271606836e-06, "epoch": 1.1459277672174295, "percentage": 57.3, "elapsed_time": "5:44:21", "remaining_time": "4:16:40"} +{"current_steps": 3920, "total_steps": 6840, "loss": 0.4902762174606323, "lr": 8.420854042329011e-06, "epoch": 1.1462202076326948, "percentage": 57.31, "elapsed_time": "5:44:27", "remaining_time": "4:16:35"} +{"current_steps": 3921, "total_steps": 6840, "loss": 0.5757346153259277, "lr": 8.416080182166955e-06, "epoch": 1.1465126480479602, "percentage": 57.32, "elapsed_time": "5:44:33", "remaining_time": "4:16:30"} +{"current_steps": 3922, "total_steps": 6840, "loss": 0.5453485250473022, "lr": 8.41130669223652e-06, "epoch": 1.1468050884632257, "percentage": 57.34, "elapsed_time": "5:44:38", "remaining_time": "4:16:24"} +{"current_steps": 3923, "total_steps": 6840, "loss": 0.5660290122032166, "lr": 8.40653357365349e-06, "epoch": 1.147097528878491, "percentage": 57.35, "elapsed_time": "5:44:43", "remaining_time": "4:16:19"} +{"current_steps": 3924, "total_steps": 6840, "loss": 0.46013498306274414, "lr": 8.40176082753355e-06, "epoch": 1.1473899692937564, "percentage": 57.37, "elapsed_time": "5:44:47", "remaining_time": "4:16:13"} +{"current_steps": 3925, "total_steps": 6840, "loss": 0.5183000564575195, "lr": 8.396988454992296e-06, "epoch": 1.147682409709022, "percentage": 57.38, "elapsed_time": "5:44:54", "remaining_time": "4:16:08"} +{"current_steps": 3926, "total_steps": 6840, "loss": 0.5407284498214722, "lr": 8.392216457145246e-06, "epoch": 1.1479748501242872, "percentage": 57.4, "elapsed_time": "5:44:58", "remaining_time": "4:16:03"} +{"current_steps": 3927, "total_steps": 6840, "loss": 0.5960655808448792, "lr": 8.387444835107824e-06, "epoch": 1.1482672905395526, "percentage": 57.41, "elapsed_time": "5:45:03", "remaining_time": "4:15:57"} +{"current_steps": 3928, "total_steps": 6840, "loss": 0.4363316297531128, "lr": 8.382673589995365e-06, "epoch": 1.148559730954818, "percentage": 57.43, "elapsed_time": "5:45:10", "remaining_time": "4:15:53"} +{"current_steps": 3929, "total_steps": 6840, "loss": 0.5143908262252808, "lr": 8.377902722923122e-06, "epoch": 1.1488521713700834, "percentage": 57.44, "elapsed_time": "5:45:16", "remaining_time": "4:15:49"} +{"current_steps": 3930, "total_steps": 6840, "loss": 0.6016460657119751, "lr": 8.373132235006254e-06, "epoch": 1.1491446117853488, "percentage": 57.46, "elapsed_time": "5:45:21", "remaining_time": "4:15:43"} +{"current_steps": 3931, "total_steps": 6840, "loss": 0.5120511651039124, "lr": 8.368362127359835e-06, "epoch": 1.149437052200614, "percentage": 57.47, "elapsed_time": "5:45:27", "remaining_time": "4:15:38"} +{"current_steps": 3932, "total_steps": 6840, "loss": 0.49658435583114624, "lr": 8.363592401098853e-06, "epoch": 1.1497294926158794, "percentage": 57.49, "elapsed_time": "5:45:32", "remaining_time": "4:15:33"} +{"current_steps": 3933, "total_steps": 6840, "loss": 0.584032416343689, "lr": 8.358823057338188e-06, "epoch": 1.150021933031145, "percentage": 57.5, "elapsed_time": "5:45:36", "remaining_time": "4:15:26"} +{"current_steps": 3934, "total_steps": 6840, "loss": 0.4673706293106079, "lr": 8.35405409719266e-06, "epoch": 1.1503143734464103, "percentage": 57.51, "elapsed_time": "5:45:42", "remaining_time": "4:15:21"} +{"current_steps": 3935, "total_steps": 6840, "loss": 0.633565366268158, "lr": 8.349285521776982e-06, "epoch": 1.1506068138616756, "percentage": 57.53, "elapsed_time": "5:45:46", "remaining_time": "4:15:15"} +{"current_steps": 3936, "total_steps": 6840, "loss": 0.6029015779495239, "lr": 8.344517332205774e-06, "epoch": 1.1508992542769412, "percentage": 57.54, "elapsed_time": "5:45:50", "remaining_time": "4:15:10"} +{"current_steps": 3937, "total_steps": 6840, "loss": 0.45594489574432373, "lr": 8.339749529593574e-06, "epoch": 1.1511916946922065, "percentage": 57.56, "elapsed_time": "5:45:55", "remaining_time": "4:15:04"} +{"current_steps": 3938, "total_steps": 6840, "loss": 0.4413541257381439, "lr": 8.334982115054828e-06, "epoch": 1.1514841351074718, "percentage": 57.57, "elapsed_time": "5:46:00", "remaining_time": "4:14:58"} +{"current_steps": 3939, "total_steps": 6840, "loss": 0.5674389004707336, "lr": 8.330215089703887e-06, "epoch": 1.1517765755227372, "percentage": 57.59, "elapsed_time": "5:46:05", "remaining_time": "4:14:53"} +{"current_steps": 3940, "total_steps": 6840, "loss": 0.43449294567108154, "lr": 8.325448454655019e-06, "epoch": 1.1520690159380027, "percentage": 57.6, "elapsed_time": "5:46:09", "remaining_time": "4:14:47"} +{"current_steps": 3941, "total_steps": 6840, "loss": 0.5190714597702026, "lr": 8.320682211022393e-06, "epoch": 1.152361456353268, "percentage": 57.62, "elapsed_time": "5:46:14", "remaining_time": "4:14:41"} +{"current_steps": 3942, "total_steps": 6840, "loss": 0.56162428855896, "lr": 8.31591635992009e-06, "epoch": 1.1526538967685334, "percentage": 57.63, "elapsed_time": "5:46:20", "remaining_time": "4:14:36"} +{"current_steps": 3943, "total_steps": 6840, "loss": 0.5588958263397217, "lr": 8.311150902462096e-06, "epoch": 1.1529463371837987, "percentage": 57.65, "elapsed_time": "5:46:25", "remaining_time": "4:14:31"} +{"current_steps": 3944, "total_steps": 6840, "loss": 0.5438264608383179, "lr": 8.306385839762312e-06, "epoch": 1.1532387775990642, "percentage": 57.66, "elapsed_time": "5:46:30", "remaining_time": "4:14:26"} +{"current_steps": 3945, "total_steps": 6840, "loss": 0.5860258340835571, "lr": 8.30162117293454e-06, "epoch": 1.1535312180143296, "percentage": 57.68, "elapsed_time": "5:46:36", "remaining_time": "4:14:21"} +{"current_steps": 3946, "total_steps": 6840, "loss": 0.4742947220802307, "lr": 8.296856903092494e-06, "epoch": 1.153823658429595, "percentage": 57.69, "elapsed_time": "5:46:41", "remaining_time": "4:14:15"} +{"current_steps": 3947, "total_steps": 6840, "loss": 0.47963109612464905, "lr": 8.292093031349791e-06, "epoch": 1.1541160988448604, "percentage": 57.7, "elapsed_time": "5:46:47", "remaining_time": "4:14:11"} +{"current_steps": 3948, "total_steps": 6840, "loss": 0.5404704213142395, "lr": 8.287329558819957e-06, "epoch": 1.1544085392601258, "percentage": 57.72, "elapsed_time": "5:46:54", "remaining_time": "4:14:06"} +{"current_steps": 3949, "total_steps": 6840, "loss": 0.6559766530990601, "lr": 8.282566486616425e-06, "epoch": 1.154700979675391, "percentage": 57.73, "elapsed_time": "5:46:59", "remaining_time": "4:14:01"} +{"current_steps": 3950, "total_steps": 6840, "loss": 0.4462929368019104, "lr": 8.277803815852535e-06, "epoch": 1.1549934200906566, "percentage": 57.75, "elapsed_time": "5:47:04", "remaining_time": "4:13:56"} +{"current_steps": 3951, "total_steps": 6840, "loss": 0.5672504901885986, "lr": 8.273041547641531e-06, "epoch": 1.155285860505922, "percentage": 57.76, "elapsed_time": "5:47:09", "remaining_time": "4:13:50"} +{"current_steps": 3952, "total_steps": 6840, "loss": 0.4040188193321228, "lr": 8.268279683096567e-06, "epoch": 1.1555783009211873, "percentage": 57.78, "elapsed_time": "5:47:14", "remaining_time": "4:13:44"} +{"current_steps": 3953, "total_steps": 6840, "loss": 0.4639814794063568, "lr": 8.263518223330698e-06, "epoch": 1.1558707413364526, "percentage": 57.79, "elapsed_time": "5:47:20", "remaining_time": "4:13:40"} +{"current_steps": 3954, "total_steps": 6840, "loss": 0.384866327047348, "lr": 8.258757169456885e-06, "epoch": 1.1561631817517182, "percentage": 57.81, "elapsed_time": "5:47:26", "remaining_time": "4:13:35"} +{"current_steps": 3955, "total_steps": 6840, "loss": 0.452106773853302, "lr": 8.253996522587997e-06, "epoch": 1.1564556221669835, "percentage": 57.82, "elapsed_time": "5:47:32", "remaining_time": "4:13:30"} +{"current_steps": 3956, "total_steps": 6840, "loss": 0.487504780292511, "lr": 8.249236283836806e-06, "epoch": 1.1567480625822488, "percentage": 57.84, "elapsed_time": "5:47:38", "remaining_time": "4:13:26"} +{"current_steps": 3957, "total_steps": 6840, "loss": 0.6225916147232056, "lr": 8.244476454315989e-06, "epoch": 1.1570405029975142, "percentage": 57.85, "elapsed_time": "5:47:42", "remaining_time": "4:13:20"} +{"current_steps": 3958, "total_steps": 6840, "loss": 0.5254271030426025, "lr": 8.239717035138128e-06, "epoch": 1.1573329434127797, "percentage": 57.87, "elapsed_time": "5:47:48", "remaining_time": "4:13:15"} +{"current_steps": 3959, "total_steps": 6840, "loss": 0.5759135484695435, "lr": 8.234958027415707e-06, "epoch": 1.157625383828045, "percentage": 57.88, "elapsed_time": "5:47:54", "remaining_time": "4:13:10"} +{"current_steps": 3960, "total_steps": 6840, "loss": 0.5720966458320618, "lr": 8.230199432261115e-06, "epoch": 1.1579178242433104, "percentage": 57.89, "elapsed_time": "5:47:59", "remaining_time": "4:13:05"} +{"current_steps": 3961, "total_steps": 6840, "loss": 0.4807323217391968, "lr": 8.225441250786643e-06, "epoch": 1.158210264658576, "percentage": 57.91, "elapsed_time": "5:48:06", "remaining_time": "4:13:01"} +{"current_steps": 3962, "total_steps": 6840, "loss": 0.5049746036529541, "lr": 8.22068348410449e-06, "epoch": 1.1585027050738412, "percentage": 57.92, "elapsed_time": "5:48:10", "remaining_time": "4:12:54"} +{"current_steps": 3963, "total_steps": 6840, "loss": 0.5321973562240601, "lr": 8.215926133326758e-06, "epoch": 1.1587951454891066, "percentage": 57.94, "elapsed_time": "5:48:15", "remaining_time": "4:12:49"} +{"current_steps": 3964, "total_steps": 6840, "loss": 0.5176634788513184, "lr": 8.211169199565444e-06, "epoch": 1.159087585904372, "percentage": 57.95, "elapsed_time": "5:48:21", "remaining_time": "4:12:44"} +{"current_steps": 3965, "total_steps": 6840, "loss": 0.5345112681388855, "lr": 8.20641268393245e-06, "epoch": 1.1593800263196374, "percentage": 57.97, "elapsed_time": "5:48:26", "remaining_time": "4:12:39"} +{"current_steps": 3966, "total_steps": 6840, "loss": 0.47578325867652893, "lr": 8.201656587539589e-06, "epoch": 1.1596724667349028, "percentage": 57.98, "elapsed_time": "5:48:32", "remaining_time": "4:12:34"} +{"current_steps": 3967, "total_steps": 6840, "loss": 0.5018264651298523, "lr": 8.196900911498563e-06, "epoch": 1.159964907150168, "percentage": 58.0, "elapsed_time": "5:48:37", "remaining_time": "4:12:28"} +{"current_steps": 3968, "total_steps": 6840, "loss": 0.4643394351005554, "lr": 8.192145656920989e-06, "epoch": 1.1602573475654336, "percentage": 58.01, "elapsed_time": "5:48:42", "remaining_time": "4:12:23"} +{"current_steps": 3969, "total_steps": 6840, "loss": 0.5391045808792114, "lr": 8.187390824918375e-06, "epoch": 1.160549787980699, "percentage": 58.03, "elapsed_time": "5:48:48", "remaining_time": "4:12:19"} +{"current_steps": 3970, "total_steps": 6840, "loss": 0.5168124437332153, "lr": 8.182636416602136e-06, "epoch": 1.1608422283959643, "percentage": 58.04, "elapsed_time": "5:48:53", "remaining_time": "4:12:13"} +{"current_steps": 3971, "total_steps": 6840, "loss": 0.5821055173873901, "lr": 8.177882433083583e-06, "epoch": 1.1611346688112296, "percentage": 58.06, "elapsed_time": "5:48:58", "remaining_time": "4:12:08"} +{"current_steps": 3972, "total_steps": 6840, "loss": 0.6031824946403503, "lr": 8.173128875473933e-06, "epoch": 1.1614271092264952, "percentage": 58.07, "elapsed_time": "5:49:03", "remaining_time": "4:12:02"} +{"current_steps": 3973, "total_steps": 6840, "loss": 0.5085259675979614, "lr": 8.1683757448843e-06, "epoch": 1.1617195496417605, "percentage": 58.08, "elapsed_time": "5:49:09", "remaining_time": "4:11:57"} +{"current_steps": 3974, "total_steps": 6840, "loss": 0.5654903650283813, "lr": 8.163623042425702e-06, "epoch": 1.1620119900570258, "percentage": 58.1, "elapsed_time": "5:49:14", "remaining_time": "4:11:51"} +{"current_steps": 3975, "total_steps": 6840, "loss": 0.3920902609825134, "lr": 8.158870769209051e-06, "epoch": 1.1623044304722914, "percentage": 58.11, "elapsed_time": "5:49:19", "remaining_time": "4:11:46"} +{"current_steps": 3976, "total_steps": 6840, "loss": 0.5334979891777039, "lr": 8.154118926345165e-06, "epoch": 1.1625968708875567, "percentage": 58.13, "elapsed_time": "5:49:25", "remaining_time": "4:11:41"} +{"current_steps": 3977, "total_steps": 6840, "loss": 0.6212184429168701, "lr": 8.149367514944754e-06, "epoch": 1.162889311302822, "percentage": 58.14, "elapsed_time": "5:49:30", "remaining_time": "4:11:36"} +{"current_steps": 3978, "total_steps": 6840, "loss": 0.71863853931427, "lr": 8.144616536118437e-06, "epoch": 1.1631817517180874, "percentage": 58.16, "elapsed_time": "5:49:35", "remaining_time": "4:11:31"} +{"current_steps": 3979, "total_steps": 6840, "loss": 0.5263794660568237, "lr": 8.139865990976722e-06, "epoch": 1.163474192133353, "percentage": 58.17, "elapsed_time": "5:49:42", "remaining_time": "4:11:26"} +{"current_steps": 3980, "total_steps": 6840, "loss": 0.5035576224327087, "lr": 8.135115880630025e-06, "epoch": 1.1637666325486182, "percentage": 58.19, "elapsed_time": "5:49:46", "remaining_time": "4:11:20"} +{"current_steps": 3981, "total_steps": 6840, "loss": 0.5695084929466248, "lr": 8.130366206188651e-06, "epoch": 1.1640590729638836, "percentage": 58.2, "elapsed_time": "5:49:53", "remaining_time": "4:11:16"} +{"current_steps": 3982, "total_steps": 6840, "loss": 0.5826396942138672, "lr": 8.125616968762806e-06, "epoch": 1.1643515133791489, "percentage": 58.22, "elapsed_time": "5:49:59", "remaining_time": "4:11:11"} +{"current_steps": 3983, "total_steps": 6840, "loss": 0.5132841467857361, "lr": 8.1208681694626e-06, "epoch": 1.1646439537944144, "percentage": 58.23, "elapsed_time": "5:50:03", "remaining_time": "4:11:06"} +{"current_steps": 3984, "total_steps": 6840, "loss": 0.6572669744491577, "lr": 8.116119809398034e-06, "epoch": 1.1649363942096798, "percentage": 58.25, "elapsed_time": "5:50:10", "remaining_time": "4:11:01"} +{"current_steps": 3985, "total_steps": 6840, "loss": 0.5365801453590393, "lr": 8.111371889679007e-06, "epoch": 1.165228834624945, "percentage": 58.26, "elapsed_time": "5:50:14", "remaining_time": "4:10:55"} +{"current_steps": 3986, "total_steps": 6840, "loss": 0.44511687755584717, "lr": 8.10662441141532e-06, "epoch": 1.1655212750402106, "percentage": 58.27, "elapsed_time": "5:50:20", "remaining_time": "4:10:51"} +{"current_steps": 3987, "total_steps": 6840, "loss": 0.47212404012680054, "lr": 8.101877375716666e-06, "epoch": 1.165813715455476, "percentage": 58.29, "elapsed_time": "5:50:24", "remaining_time": "4:10:44"} +{"current_steps": 3988, "total_steps": 6840, "loss": 0.5942205786705017, "lr": 8.097130783692631e-06, "epoch": 1.1661061558707413, "percentage": 58.3, "elapsed_time": "5:50:31", "remaining_time": "4:10:40"} +{"current_steps": 3989, "total_steps": 6840, "loss": 0.49162304401397705, "lr": 8.092384636452708e-06, "epoch": 1.1663985962860068, "percentage": 58.32, "elapsed_time": "5:50:36", "remaining_time": "4:10:34"} +{"current_steps": 3990, "total_steps": 6840, "loss": 0.6544803380966187, "lr": 8.087638935106277e-06, "epoch": 1.1666910367012722, "percentage": 58.33, "elapsed_time": "5:50:41", "remaining_time": "4:10:29"} +{"current_steps": 3991, "total_steps": 6840, "loss": 0.5572186708450317, "lr": 8.082893680762619e-06, "epoch": 1.1669834771165375, "percentage": 58.35, "elapsed_time": "5:50:46", "remaining_time": "4:10:23"} +{"current_steps": 3992, "total_steps": 6840, "loss": 0.5836775898933411, "lr": 8.078148874530906e-06, "epoch": 1.1672759175318028, "percentage": 58.36, "elapsed_time": "5:50:51", "remaining_time": "4:10:19"} +{"current_steps": 3993, "total_steps": 6840, "loss": 0.5507068634033203, "lr": 8.073404517520208e-06, "epoch": 1.1675683579470684, "percentage": 58.38, "elapsed_time": "5:50:58", "remaining_time": "4:10:14"} +{"current_steps": 3994, "total_steps": 6840, "loss": 0.5312684178352356, "lr": 8.068660610839489e-06, "epoch": 1.1678607983623337, "percentage": 58.39, "elapsed_time": "5:51:02", "remaining_time": "4:10:08"} +{"current_steps": 3995, "total_steps": 6840, "loss": 0.5555688142776489, "lr": 8.06391715559761e-06, "epoch": 1.168153238777599, "percentage": 58.41, "elapsed_time": "5:51:07", "remaining_time": "4:10:03"} +{"current_steps": 3996, "total_steps": 6840, "loss": 0.5724596977233887, "lr": 8.059174152903324e-06, "epoch": 1.1684456791928644, "percentage": 58.42, "elapsed_time": "5:51:12", "remaining_time": "4:09:57"} +{"current_steps": 3997, "total_steps": 6840, "loss": 0.6212218999862671, "lr": 8.054431603865282e-06, "epoch": 1.16873811960813, "percentage": 58.44, "elapsed_time": "5:51:17", "remaining_time": "4:09:52"} +{"current_steps": 3998, "total_steps": 6840, "loss": 0.5061509609222412, "lr": 8.049689509592023e-06, "epoch": 1.1690305600233952, "percentage": 58.45, "elapsed_time": "5:51:23", "remaining_time": "4:09:47"} +{"current_steps": 3999, "total_steps": 6840, "loss": 0.6143001914024353, "lr": 8.044947871191982e-06, "epoch": 1.1693230004386606, "percentage": 58.46, "elapsed_time": "5:51:28", "remaining_time": "4:09:41"} +{"current_steps": 4000, "total_steps": 6840, "loss": 0.5079911351203918, "lr": 8.040206689773487e-06, "epoch": 1.169615440853926, "percentage": 58.48, "elapsed_time": "5:51:33", "remaining_time": "4:09:36"} +{"current_steps": 4001, "total_steps": 6840, "loss": 0.6104908585548401, "lr": 8.035465966444764e-06, "epoch": 1.1699078812691914, "percentage": 58.49, "elapsed_time": "5:51:44", "remaining_time": "4:09:35"} +{"current_steps": 4002, "total_steps": 6840, "loss": 0.5953013896942139, "lr": 8.03072570231393e-06, "epoch": 1.1702003216844568, "percentage": 58.51, "elapsed_time": "5:51:48", "remaining_time": "4:09:28"} +{"current_steps": 4003, "total_steps": 6840, "loss": 0.4541323781013489, "lr": 8.025985898488986e-06, "epoch": 1.1704927620997223, "percentage": 58.52, "elapsed_time": "5:51:54", "remaining_time": "4:09:23"} +{"current_steps": 4004, "total_steps": 6840, "loss": 0.5708850026130676, "lr": 8.021246556077838e-06, "epoch": 1.1707852025149876, "percentage": 58.54, "elapsed_time": "5:51:59", "remaining_time": "4:09:18"} +{"current_steps": 4005, "total_steps": 6840, "loss": 0.5430601835250854, "lr": 8.016507676188275e-06, "epoch": 1.171077642930253, "percentage": 58.55, "elapsed_time": "5:52:03", "remaining_time": "4:09:12"} +{"current_steps": 4006, "total_steps": 6840, "loss": 0.5621174573898315, "lr": 8.011769259927981e-06, "epoch": 1.1713700833455183, "percentage": 58.57, "elapsed_time": "5:52:08", "remaining_time": "4:09:07"} +{"current_steps": 4007, "total_steps": 6840, "loss": 0.48092782497406006, "lr": 8.007031308404536e-06, "epoch": 1.1716625237607838, "percentage": 58.58, "elapsed_time": "5:52:14", "remaining_time": "4:09:02"} +{"current_steps": 4008, "total_steps": 6840, "loss": 0.5770663022994995, "lr": 8.002293822725404e-06, "epoch": 1.1719549641760492, "percentage": 58.6, "elapsed_time": "5:52:18", "remaining_time": "4:08:56"} +{"current_steps": 4009, "total_steps": 6840, "loss": 0.5692728757858276, "lr": 7.997556803997945e-06, "epoch": 1.1722474045913145, "percentage": 58.61, "elapsed_time": "5:52:24", "remaining_time": "4:08:51"} +{"current_steps": 4010, "total_steps": 6840, "loss": 0.6256895065307617, "lr": 7.99282025332941e-06, "epoch": 1.1725398450065798, "percentage": 58.63, "elapsed_time": "5:52:31", "remaining_time": "4:08:47"} +{"current_steps": 4011, "total_steps": 6840, "loss": 0.4272884130477905, "lr": 7.988084171826937e-06, "epoch": 1.1728322854218454, "percentage": 58.64, "elapsed_time": "5:52:36", "remaining_time": "4:08:41"} +{"current_steps": 4012, "total_steps": 6840, "loss": 0.5113184452056885, "lr": 7.983348560597557e-06, "epoch": 1.1731247258371107, "percentage": 58.65, "elapsed_time": "5:52:41", "remaining_time": "4:08:36"} +{"current_steps": 4013, "total_steps": 6840, "loss": 0.45635539293289185, "lr": 7.978613420748186e-06, "epoch": 1.173417166252376, "percentage": 58.67, "elapsed_time": "5:52:46", "remaining_time": "4:08:31"} +{"current_steps": 4014, "total_steps": 6840, "loss": 0.5539636611938477, "lr": 7.973878753385638e-06, "epoch": 1.1737096066676416, "percentage": 58.68, "elapsed_time": "5:52:52", "remaining_time": "4:08:26"} +{"current_steps": 4015, "total_steps": 6840, "loss": 0.6083431243896484, "lr": 7.969144559616615e-06, "epoch": 1.174002047082907, "percentage": 58.7, "elapsed_time": "5:52:57", "remaining_time": "4:08:20"} +{"current_steps": 4016, "total_steps": 6840, "loss": 0.5268326997756958, "lr": 7.9644108405477e-06, "epoch": 1.1742944874981722, "percentage": 58.71, "elapsed_time": "5:53:02", "remaining_time": "4:08:15"} +{"current_steps": 4017, "total_steps": 6840, "loss": 0.5680301189422607, "lr": 7.95967759728538e-06, "epoch": 1.1745869279134376, "percentage": 58.73, "elapsed_time": "5:53:07", "remaining_time": "4:08:10"} +{"current_steps": 4018, "total_steps": 6840, "loss": 0.5457121133804321, "lr": 7.954944830936012e-06, "epoch": 1.174879368328703, "percentage": 58.74, "elapsed_time": "5:53:12", "remaining_time": "4:08:04"} +{"current_steps": 4019, "total_steps": 6840, "loss": 0.5358338356018066, "lr": 7.950212542605857e-06, "epoch": 1.1751718087439684, "percentage": 58.76, "elapsed_time": "5:53:17", "remaining_time": "4:07:58"} +{"current_steps": 4020, "total_steps": 6840, "loss": 0.6094579696655273, "lr": 7.945480733401056e-06, "epoch": 1.1754642491592338, "percentage": 58.77, "elapsed_time": "5:53:22", "remaining_time": "4:07:53"} +{"current_steps": 4021, "total_steps": 6840, "loss": 0.5108463764190674, "lr": 7.940749404427642e-06, "epoch": 1.175756689574499, "percentage": 58.79, "elapsed_time": "5:53:26", "remaining_time": "4:07:47"} +{"current_steps": 4022, "total_steps": 6840, "loss": 0.4946494698524475, "lr": 7.936018556791537e-06, "epoch": 1.1760491299897646, "percentage": 58.8, "elapsed_time": "5:53:32", "remaining_time": "4:07:42"} +{"current_steps": 4023, "total_steps": 6840, "loss": 0.5056017637252808, "lr": 7.931288191598543e-06, "epoch": 1.17634157040503, "percentage": 58.82, "elapsed_time": "5:53:37", "remaining_time": "4:07:37"} +{"current_steps": 4024, "total_steps": 6840, "loss": 0.5242294073104858, "lr": 7.926558309954354e-06, "epoch": 1.1766340108202953, "percentage": 58.83, "elapsed_time": "5:53:43", "remaining_time": "4:07:32"} +{"current_steps": 4025, "total_steps": 6840, "loss": 0.5667276382446289, "lr": 7.921828912964556e-06, "epoch": 1.1769264512355608, "percentage": 58.85, "elapsed_time": "5:53:47", "remaining_time": "4:07:26"} +{"current_steps": 4026, "total_steps": 6840, "loss": 0.5282422304153442, "lr": 7.917100001734614e-06, "epoch": 1.1772188916508262, "percentage": 58.86, "elapsed_time": "5:53:53", "remaining_time": "4:07:20"} +{"current_steps": 4027, "total_steps": 6840, "loss": 0.4887520670890808, "lr": 7.912371577369881e-06, "epoch": 1.1775113320660915, "percentage": 58.87, "elapsed_time": "5:53:57", "remaining_time": "4:07:15"} +{"current_steps": 4028, "total_steps": 6840, "loss": 0.5082155466079712, "lr": 7.907643640975603e-06, "epoch": 1.177803772481357, "percentage": 58.89, "elapsed_time": "5:54:02", "remaining_time": "4:07:09"} +{"current_steps": 4029, "total_steps": 6840, "loss": 0.5432984828948975, "lr": 7.902916193656898e-06, "epoch": 1.1780962128966224, "percentage": 58.9, "elapsed_time": "5:54:08", "remaining_time": "4:07:04"} +{"current_steps": 4030, "total_steps": 6840, "loss": 0.4313681721687317, "lr": 7.898189236518783e-06, "epoch": 1.1783886533118877, "percentage": 58.92, "elapsed_time": "5:54:12", "remaining_time": "4:06:58"} +{"current_steps": 4031, "total_steps": 6840, "loss": 0.6051831245422363, "lr": 7.893462770666155e-06, "epoch": 1.178681093727153, "percentage": 58.93, "elapsed_time": "5:54:17", "remaining_time": "4:06:53"} +{"current_steps": 4032, "total_steps": 6840, "loss": 0.45805442333221436, "lr": 7.888736797203796e-06, "epoch": 1.1789735341424186, "percentage": 58.95, "elapsed_time": "5:54:23", "remaining_time": "4:06:48"} +{"current_steps": 4033, "total_steps": 6840, "loss": 0.4998340606689453, "lr": 7.884011317236376e-06, "epoch": 1.179265974557684, "percentage": 58.96, "elapsed_time": "5:54:27", "remaining_time": "4:06:42"} +{"current_steps": 4034, "total_steps": 6840, "loss": 0.5298212170600891, "lr": 7.879286331868443e-06, "epoch": 1.1795584149729492, "percentage": 58.98, "elapsed_time": "5:54:32", "remaining_time": "4:06:37"} +{"current_steps": 4035, "total_steps": 6840, "loss": 0.5104682445526123, "lr": 7.874561842204437e-06, "epoch": 1.1798508553882145, "percentage": 58.99, "elapsed_time": "5:54:37", "remaining_time": "4:06:31"} +{"current_steps": 4036, "total_steps": 6840, "loss": 0.5793051719665527, "lr": 7.869837849348676e-06, "epoch": 1.18014329580348, "percentage": 59.01, "elapsed_time": "5:54:42", "remaining_time": "4:06:26"} +{"current_steps": 4037, "total_steps": 6840, "loss": 0.42913323640823364, "lr": 7.865114354405367e-06, "epoch": 1.1804357362187454, "percentage": 59.02, "elapsed_time": "5:54:49", "remaining_time": "4:06:21"} +{"current_steps": 4038, "total_steps": 6840, "loss": 0.5183675289154053, "lr": 7.860391358478596e-06, "epoch": 1.1807281766340108, "percentage": 59.04, "elapsed_time": "5:54:53", "remaining_time": "4:06:15"} +{"current_steps": 4039, "total_steps": 6840, "loss": 0.444034218788147, "lr": 7.855668862672339e-06, "epoch": 1.1810206170492763, "percentage": 59.05, "elapsed_time": "5:54:58", "remaining_time": "4:06:10"} +{"current_steps": 4040, "total_steps": 6840, "loss": 0.4357207417488098, "lr": 7.850946868090446e-06, "epoch": 1.1813130574645416, "percentage": 59.06, "elapsed_time": "5:55:03", "remaining_time": "4:06:04"} +{"current_steps": 4041, "total_steps": 6840, "loss": 0.4517707824707031, "lr": 7.846225375836657e-06, "epoch": 1.181605497879807, "percentage": 59.08, "elapsed_time": "5:55:07", "remaining_time": "4:05:58"} +{"current_steps": 4042, "total_steps": 6840, "loss": 0.4437381625175476, "lr": 7.841504387014589e-06, "epoch": 1.1818979382950725, "percentage": 59.09, "elapsed_time": "5:55:12", "remaining_time": "4:05:53"} +{"current_steps": 4043, "total_steps": 6840, "loss": 0.5364828109741211, "lr": 7.836783902727746e-06, "epoch": 1.1821903787103378, "percentage": 59.11, "elapsed_time": "5:55:18", "remaining_time": "4:05:48"} +{"current_steps": 4044, "total_steps": 6840, "loss": 0.4814251661300659, "lr": 7.832063924079516e-06, "epoch": 1.1824828191256032, "percentage": 59.12, "elapsed_time": "5:55:23", "remaining_time": "4:05:43"} +{"current_steps": 4045, "total_steps": 6840, "loss": 0.5376232862472534, "lr": 7.827344452173163e-06, "epoch": 1.1827752595408685, "percentage": 59.14, "elapsed_time": "5:55:28", "remaining_time": "4:05:37"} +{"current_steps": 4046, "total_steps": 6840, "loss": 0.6005147695541382, "lr": 7.822625488111833e-06, "epoch": 1.183067699956134, "percentage": 59.15, "elapsed_time": "5:55:34", "remaining_time": "4:05:32"} +{"current_steps": 4047, "total_steps": 6840, "loss": 0.5276827216148376, "lr": 7.817907032998556e-06, "epoch": 1.1833601403713994, "percentage": 59.17, "elapsed_time": "5:55:39", "remaining_time": "4:05:27"} +{"current_steps": 4048, "total_steps": 6840, "loss": 0.6425626277923584, "lr": 7.813189087936243e-06, "epoch": 1.1836525807866647, "percentage": 59.18, "elapsed_time": "5:55:44", "remaining_time": "4:05:21"} +{"current_steps": 4049, "total_steps": 6840, "loss": 0.44388407468795776, "lr": 7.808471654027685e-06, "epoch": 1.18394502120193, "percentage": 59.2, "elapsed_time": "5:55:49", "remaining_time": "4:05:16"} +{"current_steps": 4050, "total_steps": 6840, "loss": 0.5044336318969727, "lr": 7.803754732375554e-06, "epoch": 1.1842374616171956, "percentage": 59.21, "elapsed_time": "5:55:53", "remaining_time": "4:05:10"} +{"current_steps": 4051, "total_steps": 6840, "loss": 0.6964906454086304, "lr": 7.7990383240824e-06, "epoch": 1.184529902032461, "percentage": 59.23, "elapsed_time": "5:55:58", "remaining_time": "4:05:04"} +{"current_steps": 4052, "total_steps": 6840, "loss": 0.6093637943267822, "lr": 7.794322430250654e-06, "epoch": 1.1848223424477262, "percentage": 59.24, "elapsed_time": "5:56:02", "remaining_time": "4:04:58"} +{"current_steps": 4053, "total_steps": 6840, "loss": 0.5264803171157837, "lr": 7.78960705198263e-06, "epoch": 1.1851147828629918, "percentage": 59.25, "elapsed_time": "5:56:07", "remaining_time": "4:04:53"} +{"current_steps": 4054, "total_steps": 6840, "loss": 0.5336456298828125, "lr": 7.78489219038052e-06, "epoch": 1.185407223278257, "percentage": 59.27, "elapsed_time": "5:56:14", "remaining_time": "4:04:49"} +{"current_steps": 4055, "total_steps": 6840, "loss": 0.5266311168670654, "lr": 7.78017784654639e-06, "epoch": 1.1856996636935224, "percentage": 59.28, "elapsed_time": "5:56:19", "remaining_time": "4:04:43"} +{"current_steps": 4056, "total_steps": 6840, "loss": 0.6281685829162598, "lr": 7.775464021582195e-06, "epoch": 1.1859921041087877, "percentage": 59.3, "elapsed_time": "5:56:24", "remaining_time": "4:04:38"} +{"current_steps": 4057, "total_steps": 6840, "loss": 0.560591995716095, "lr": 7.770750716589758e-06, "epoch": 1.1862845445240533, "percentage": 59.31, "elapsed_time": "5:56:28", "remaining_time": "4:04:31"} +{"current_steps": 4058, "total_steps": 6840, "loss": 0.5751859545707703, "lr": 7.766037932670786e-06, "epoch": 1.1865769849393186, "percentage": 59.33, "elapsed_time": "5:56:32", "remaining_time": "4:04:26"} +{"current_steps": 4059, "total_steps": 6840, "loss": 0.5404624938964844, "lr": 7.761325670926864e-06, "epoch": 1.186869425354584, "percentage": 59.34, "elapsed_time": "5:56:37", "remaining_time": "4:04:20"} +{"current_steps": 4060, "total_steps": 6840, "loss": 0.4714626669883728, "lr": 7.756613932459456e-06, "epoch": 1.1871618657698493, "percentage": 59.36, "elapsed_time": "5:56:42", "remaining_time": "4:04:15"} +{"current_steps": 4061, "total_steps": 6840, "loss": 0.5449519157409668, "lr": 7.751902718369903e-06, "epoch": 1.1874543061851148, "percentage": 59.37, "elapsed_time": "5:56:49", "remaining_time": "4:04:10"} +{"current_steps": 4062, "total_steps": 6840, "loss": 0.6518754959106445, "lr": 7.747192029759419e-06, "epoch": 1.1877467466003802, "percentage": 59.39, "elapsed_time": "5:56:54", "remaining_time": "4:04:05"} +{"current_steps": 4063, "total_steps": 6840, "loss": 0.47224369645118713, "lr": 7.7424818677291e-06, "epoch": 1.1880391870156455, "percentage": 59.4, "elapsed_time": "5:57:00", "remaining_time": "4:04:00"} +{"current_steps": 4064, "total_steps": 6840, "loss": 0.5482417345046997, "lr": 7.737772233379919e-06, "epoch": 1.188331627430911, "percentage": 59.42, "elapsed_time": "5:57:04", "remaining_time": "4:03:54"} +{"current_steps": 4065, "total_steps": 6840, "loss": 0.5401996374130249, "lr": 7.733063127812724e-06, "epoch": 1.1886240678461764, "percentage": 59.43, "elapsed_time": "5:57:11", "remaining_time": "4:03:50"} +{"current_steps": 4066, "total_steps": 6840, "loss": 0.4678424596786499, "lr": 7.72835455212824e-06, "epoch": 1.1889165082614417, "percentage": 59.44, "elapsed_time": "5:57:16", "remaining_time": "4:03:44"} +{"current_steps": 4067, "total_steps": 6840, "loss": 0.5191294550895691, "lr": 7.72364650742707e-06, "epoch": 1.1892089486767072, "percentage": 59.46, "elapsed_time": "5:57:22", "remaining_time": "4:03:40"} +{"current_steps": 4068, "total_steps": 6840, "loss": 0.44018834829330444, "lr": 7.718938994809685e-06, "epoch": 1.1895013890919726, "percentage": 59.47, "elapsed_time": "5:57:27", "remaining_time": "4:03:34"} +{"current_steps": 4069, "total_steps": 6840, "loss": 0.47852614521980286, "lr": 7.714232015376442e-06, "epoch": 1.1897938295072379, "percentage": 59.49, "elapsed_time": "5:57:34", "remaining_time": "4:03:30"} +{"current_steps": 4070, "total_steps": 6840, "loss": 0.5748994946479797, "lr": 7.709525570227567e-06, "epoch": 1.1900862699225032, "percentage": 59.5, "elapsed_time": "5:57:39", "remaining_time": "4:03:25"} +{"current_steps": 4071, "total_steps": 6840, "loss": 0.5015645027160645, "lr": 7.704819660463164e-06, "epoch": 1.1903787103377688, "percentage": 59.52, "elapsed_time": "5:57:44", "remaining_time": "4:03:19"} +{"current_steps": 4072, "total_steps": 6840, "loss": 0.6200511455535889, "lr": 7.70011428718321e-06, "epoch": 1.190671150753034, "percentage": 59.53, "elapsed_time": "5:57:49", "remaining_time": "4:03:14"} +{"current_steps": 4073, "total_steps": 6840, "loss": 0.6311289668083191, "lr": 7.69540945148756e-06, "epoch": 1.1909635911682994, "percentage": 59.55, "elapsed_time": "5:57:54", "remaining_time": "4:03:08"} +{"current_steps": 4074, "total_steps": 6840, "loss": 0.5707247257232666, "lr": 7.690705154475937e-06, "epoch": 1.1912560315835647, "percentage": 59.56, "elapsed_time": "5:57:58", "remaining_time": "4:03:02"} +{"current_steps": 4075, "total_steps": 6840, "loss": 0.5616360902786255, "lr": 7.686001397247944e-06, "epoch": 1.1915484719988303, "percentage": 59.58, "elapsed_time": "5:58:02", "remaining_time": "4:02:56"} +{"current_steps": 4076, "total_steps": 6840, "loss": 0.5955555438995361, "lr": 7.681298180903054e-06, "epoch": 1.1918409124140956, "percentage": 59.59, "elapsed_time": "5:58:07", "remaining_time": "4:02:51"} +{"current_steps": 4077, "total_steps": 6840, "loss": 0.5057257413864136, "lr": 7.676595506540615e-06, "epoch": 1.192133352829361, "percentage": 59.61, "elapsed_time": "5:58:13", "remaining_time": "4:02:45"} +{"current_steps": 4078, "total_steps": 6840, "loss": 0.5795278549194336, "lr": 7.671893375259854e-06, "epoch": 1.1924257932446265, "percentage": 59.62, "elapsed_time": "5:58:18", "remaining_time": "4:02:40"} +{"current_steps": 4079, "total_steps": 6840, "loss": 0.5213087797164917, "lr": 7.66719178815986e-06, "epoch": 1.1927182336598918, "percentage": 59.63, "elapsed_time": "5:58:21", "remaining_time": "4:02:34"} +{"current_steps": 4080, "total_steps": 6840, "loss": 0.5333693027496338, "lr": 7.662490746339601e-06, "epoch": 1.1930106740751572, "percentage": 59.65, "elapsed_time": "5:58:27", "remaining_time": "4:02:28"} +{"current_steps": 4081, "total_steps": 6840, "loss": 0.4705297648906708, "lr": 7.657790250897916e-06, "epoch": 1.1933031144904227, "percentage": 59.66, "elapsed_time": "5:58:33", "remaining_time": "4:02:24"} +{"current_steps": 4082, "total_steps": 6840, "loss": 0.5376054644584656, "lr": 7.65309030293352e-06, "epoch": 1.193595554905688, "percentage": 59.68, "elapsed_time": "5:58:38", "remaining_time": "4:02:19"} +{"current_steps": 4083, "total_steps": 6840, "loss": 0.47457355260849, "lr": 7.648390903544997e-06, "epoch": 1.1938879953209534, "percentage": 59.69, "elapsed_time": "5:58:44", "remaining_time": "4:02:14"} +{"current_steps": 4084, "total_steps": 6840, "loss": 0.48752763867378235, "lr": 7.6436920538308e-06, "epoch": 1.1941804357362187, "percentage": 59.71, "elapsed_time": "5:58:49", "remaining_time": "4:02:08"} +{"current_steps": 4085, "total_steps": 6840, "loss": 0.48227858543395996, "lr": 7.63899375488926e-06, "epoch": 1.1944728761514842, "percentage": 59.72, "elapsed_time": "5:58:55", "remaining_time": "4:02:03"} +{"current_steps": 4086, "total_steps": 6840, "loss": 0.4294116497039795, "lr": 7.634296007818576e-06, "epoch": 1.1947653165667496, "percentage": 59.74, "elapsed_time": "5:59:01", "remaining_time": "4:01:58"} +{"current_steps": 4087, "total_steps": 6840, "loss": 0.5562552809715271, "lr": 7.629598813716817e-06, "epoch": 1.1950577569820149, "percentage": 59.75, "elapsed_time": "5:59:07", "remaining_time": "4:01:54"} +{"current_steps": 4088, "total_steps": 6840, "loss": 0.6466431617736816, "lr": 7.624902173681923e-06, "epoch": 1.1953501973972802, "percentage": 59.77, "elapsed_time": "5:59:13", "remaining_time": "4:01:49"} +{"current_steps": 4089, "total_steps": 6840, "loss": 0.7183903455734253, "lr": 7.620206088811704e-06, "epoch": 1.1956426378125458, "percentage": 59.78, "elapsed_time": "5:59:18", "remaining_time": "4:01:44"} +{"current_steps": 4090, "total_steps": 6840, "loss": 0.5667496919631958, "lr": 7.615510560203841e-06, "epoch": 1.195935078227811, "percentage": 59.8, "elapsed_time": "5:59:23", "remaining_time": "4:01:38"} +{"current_steps": 4091, "total_steps": 6840, "loss": 0.5603050589561462, "lr": 7.610815588955888e-06, "epoch": 1.1962275186430764, "percentage": 59.81, "elapsed_time": "5:59:28", "remaining_time": "4:01:33"} +{"current_steps": 4092, "total_steps": 6840, "loss": 0.5305474996566772, "lr": 7.606121176165267e-06, "epoch": 1.196519959058342, "percentage": 59.82, "elapsed_time": "5:59:32", "remaining_time": "4:01:27"} +{"current_steps": 4093, "total_steps": 6840, "loss": 0.7321374416351318, "lr": 7.6014273229292625e-06, "epoch": 1.1968123994736073, "percentage": 59.84, "elapsed_time": "5:59:36", "remaining_time": "4:01:21"} +{"current_steps": 4094, "total_steps": 6840, "loss": 0.44885972142219543, "lr": 7.5967340303450385e-06, "epoch": 1.1971048398888726, "percentage": 59.85, "elapsed_time": "5:59:41", "remaining_time": "4:01:15"} +{"current_steps": 4095, "total_steps": 6840, "loss": 0.593859076499939, "lr": 7.592041299509624e-06, "epoch": 1.197397280304138, "percentage": 59.87, "elapsed_time": "5:59:46", "remaining_time": "4:01:10"} +{"current_steps": 4096, "total_steps": 6840, "loss": 0.6701182723045349, "lr": 7.587349131519913e-06, "epoch": 1.1976897207194035, "percentage": 59.88, "elapsed_time": "5:59:51", "remaining_time": "4:01:04"} +{"current_steps": 4097, "total_steps": 6840, "loss": 0.6456711292266846, "lr": 7.582657527472674e-06, "epoch": 1.1979821611346688, "percentage": 59.9, "elapsed_time": "5:59:56", "remaining_time": "4:00:59"} +{"current_steps": 4098, "total_steps": 6840, "loss": 0.5933864116668701, "lr": 7.577966488464543e-06, "epoch": 1.1982746015499341, "percentage": 59.91, "elapsed_time": "6:00:01", "remaining_time": "4:00:53"} +{"current_steps": 4099, "total_steps": 6840, "loss": 0.4609876275062561, "lr": 7.5732760155920175e-06, "epoch": 1.1985670419651995, "percentage": 59.93, "elapsed_time": "6:00:08", "remaining_time": "4:00:49"} +{"current_steps": 4100, "total_steps": 6840, "loss": 0.540961503982544, "lr": 7.568586109951468e-06, "epoch": 1.198859482380465, "percentage": 59.94, "elapsed_time": "6:00:14", "remaining_time": "4:00:44"} +{"current_steps": 4101, "total_steps": 6840, "loss": 0.5522942543029785, "lr": 7.563896772639132e-06, "epoch": 1.1991519227957304, "percentage": 59.96, "elapsed_time": "6:00:23", "remaining_time": "4:00:41"} +{"current_steps": 4102, "total_steps": 6840, "loss": 0.483737587928772, "lr": 7.559208004751114e-06, "epoch": 1.1994443632109957, "percentage": 59.97, "elapsed_time": "6:00:27", "remaining_time": "4:00:36"} +{"current_steps": 4103, "total_steps": 6840, "loss": 0.3760339915752411, "lr": 7.554519807383384e-06, "epoch": 1.1997368036262612, "percentage": 59.99, "elapsed_time": "6:00:34", "remaining_time": "4:00:31"} +{"current_steps": 4104, "total_steps": 6840, "loss": 0.5034801959991455, "lr": 7.549832181631782e-06, "epoch": 1.2000292440415266, "percentage": 60.0, "elapsed_time": "6:00:39", "remaining_time": "4:00:26"} +{"current_steps": 4105, "total_steps": 6840, "loss": 0.5605261325836182, "lr": 7.545145128592009e-06, "epoch": 1.2003216844567919, "percentage": 60.01, "elapsed_time": "6:00:45", "remaining_time": "4:00:21"} +{"current_steps": 4106, "total_steps": 6840, "loss": 0.4724245071411133, "lr": 7.540458649359637e-06, "epoch": 1.2006141248720574, "percentage": 60.03, "elapsed_time": "6:00:50", "remaining_time": "4:00:16"} +{"current_steps": 4107, "total_steps": 6840, "loss": 0.564873218536377, "lr": 7.535772745030101e-06, "epoch": 1.2009065652873228, "percentage": 60.04, "elapsed_time": "6:00:54", "remaining_time": "4:00:10"} +{"current_steps": 4108, "total_steps": 6840, "loss": 0.699596643447876, "lr": 7.531087416698702e-06, "epoch": 1.201199005702588, "percentage": 60.06, "elapsed_time": "6:00:59", "remaining_time": "4:00:04"} +{"current_steps": 4109, "total_steps": 6840, "loss": 0.47448351979255676, "lr": 7.526402665460612e-06, "epoch": 1.2014914461178534, "percentage": 60.07, "elapsed_time": "6:01:04", "remaining_time": "3:59:59"} +{"current_steps": 4110, "total_steps": 6840, "loss": 0.4681323766708374, "lr": 7.521718492410855e-06, "epoch": 1.201783886533119, "percentage": 60.09, "elapsed_time": "6:01:10", "remaining_time": "3:59:54"} +{"current_steps": 4111, "total_steps": 6840, "loss": 0.6361842155456543, "lr": 7.517034898644333e-06, "epoch": 1.2020763269483843, "percentage": 60.1, "elapsed_time": "6:01:14", "remaining_time": "3:59:48"} +{"current_steps": 4112, "total_steps": 6840, "loss": 0.4732646942138672, "lr": 7.5123518852558075e-06, "epoch": 1.2023687673636496, "percentage": 60.12, "elapsed_time": "6:01:18", "remaining_time": "3:59:42"} +{"current_steps": 4113, "total_steps": 6840, "loss": 0.57124263048172, "lr": 7.507669453339903e-06, "epoch": 1.202661207778915, "percentage": 60.13, "elapsed_time": "6:01:22", "remaining_time": "3:59:35"} +{"current_steps": 4114, "total_steps": 6840, "loss": 0.5228173732757568, "lr": 7.502987603991111e-06, "epoch": 1.2029536481941805, "percentage": 60.15, "elapsed_time": "6:01:26", "remaining_time": "3:59:30"} +{"current_steps": 4115, "total_steps": 6840, "loss": 0.5501765012741089, "lr": 7.4983063383037864e-06, "epoch": 1.2032460886094458, "percentage": 60.16, "elapsed_time": "6:01:32", "remaining_time": "3:59:25"} +{"current_steps": 4116, "total_steps": 6840, "loss": 0.5062840580940247, "lr": 7.493625657372141e-06, "epoch": 1.2035385290247111, "percentage": 60.18, "elapsed_time": "6:01:38", "remaining_time": "3:59:20"} +{"current_steps": 4117, "total_steps": 6840, "loss": 0.7060763835906982, "lr": 7.4889455622902616e-06, "epoch": 1.2038309694399767, "percentage": 60.19, "elapsed_time": "6:01:42", "remaining_time": "3:59:14"} +{"current_steps": 4118, "total_steps": 6840, "loss": 0.42127668857574463, "lr": 7.484266054152088e-06, "epoch": 1.204123409855242, "percentage": 60.2, "elapsed_time": "6:01:49", "remaining_time": "3:59:09"} +{"current_steps": 4119, "total_steps": 6840, "loss": 0.490860253572464, "lr": 7.479587134051429e-06, "epoch": 1.2044158502705073, "percentage": 60.22, "elapsed_time": "6:01:54", "remaining_time": "3:59:04"} +{"current_steps": 4120, "total_steps": 6840, "loss": 0.45786625146865845, "lr": 7.474908803081955e-06, "epoch": 1.204708290685773, "percentage": 60.23, "elapsed_time": "6:01:58", "remaining_time": "3:58:58"} +{"current_steps": 4121, "total_steps": 6840, "loss": 0.5267277359962463, "lr": 7.470231062337192e-06, "epoch": 1.2050007311010382, "percentage": 60.25, "elapsed_time": "6:02:02", "remaining_time": "3:58:52"} +{"current_steps": 4122, "total_steps": 6840, "loss": 0.47834646701812744, "lr": 7.465553912910539e-06, "epoch": 1.2052931715163036, "percentage": 60.26, "elapsed_time": "6:02:07", "remaining_time": "3:58:47"} +{"current_steps": 4123, "total_steps": 6840, "loss": 0.5348576903343201, "lr": 7.460877355895249e-06, "epoch": 1.2055856119315689, "percentage": 60.28, "elapsed_time": "6:02:14", "remaining_time": "3:58:42"} +{"current_steps": 4124, "total_steps": 6840, "loss": 0.47992441058158875, "lr": 7.456201392384437e-06, "epoch": 1.2058780523468344, "percentage": 60.29, "elapsed_time": "6:02:19", "remaining_time": "3:58:37"} +{"current_steps": 4125, "total_steps": 6840, "loss": 0.5693913698196411, "lr": 7.451526023471085e-06, "epoch": 1.2061704927620998, "percentage": 60.31, "elapsed_time": "6:02:25", "remaining_time": "3:58:32"} +{"current_steps": 4126, "total_steps": 6840, "loss": 0.5165153741836548, "lr": 7.4468512502480305e-06, "epoch": 1.206462933177365, "percentage": 60.32, "elapsed_time": "6:02:29", "remaining_time": "3:58:26"} +{"current_steps": 4127, "total_steps": 6840, "loss": 0.522534966468811, "lr": 7.442177073807973e-06, "epoch": 1.2067553735926304, "percentage": 60.34, "elapsed_time": "6:02:34", "remaining_time": "3:58:20"} +{"current_steps": 4128, "total_steps": 6840, "loss": 0.6298432946205139, "lr": 7.43750349524347e-06, "epoch": 1.207047814007896, "percentage": 60.35, "elapsed_time": "6:02:39", "remaining_time": "3:58:15"} +{"current_steps": 4129, "total_steps": 6840, "loss": 0.5077394247055054, "lr": 7.432830515646947e-06, "epoch": 1.2073402544231613, "percentage": 60.37, "elapsed_time": "6:02:45", "remaining_time": "3:58:10"} +{"current_steps": 4130, "total_steps": 6840, "loss": 0.6492841839790344, "lr": 7.428158136110681e-06, "epoch": 1.2076326948384266, "percentage": 60.38, "elapsed_time": "6:02:49", "remaining_time": "3:58:04"} +{"current_steps": 4131, "total_steps": 6840, "loss": 0.5204535126686096, "lr": 7.423486357726813e-06, "epoch": 1.2079251352536922, "percentage": 60.39, "elapsed_time": "6:02:53", "remaining_time": "3:57:58"} +{"current_steps": 4132, "total_steps": 6840, "loss": 0.56598961353302, "lr": 7.418815181587347e-06, "epoch": 1.2082175756689575, "percentage": 60.41, "elapsed_time": "6:02:58", "remaining_time": "3:57:53"} +{"current_steps": 4133, "total_steps": 6840, "loss": 0.486950159072876, "lr": 7.4141446087841364e-06, "epoch": 1.2085100160842228, "percentage": 60.42, "elapsed_time": "6:03:03", "remaining_time": "3:57:47"} +{"current_steps": 4134, "total_steps": 6840, "loss": 0.7218466997146606, "lr": 7.4094746404089e-06, "epoch": 1.2088024564994881, "percentage": 60.44, "elapsed_time": "6:03:08", "remaining_time": "3:57:41"} +{"current_steps": 4135, "total_steps": 6840, "loss": 0.6486172676086426, "lr": 7.404805277553218e-06, "epoch": 1.2090948969147537, "percentage": 60.45, "elapsed_time": "6:03:14", "remaining_time": "3:57:37"} +{"current_steps": 4136, "total_steps": 6840, "loss": 0.6160574555397034, "lr": 7.400136521308521e-06, "epoch": 1.209387337330019, "percentage": 60.47, "elapsed_time": "6:03:19", "remaining_time": "3:57:32"} +{"current_steps": 4137, "total_steps": 6840, "loss": 0.6184699535369873, "lr": 7.395468372766107e-06, "epoch": 1.2096797777452843, "percentage": 60.48, "elapsed_time": "6:03:24", "remaining_time": "3:57:26"} +{"current_steps": 4138, "total_steps": 6840, "loss": 0.5795263051986694, "lr": 7.390800833017124e-06, "epoch": 1.2099722181605497, "percentage": 60.5, "elapsed_time": "6:03:29", "remaining_time": "3:57:21"} +{"current_steps": 4139, "total_steps": 6840, "loss": 0.5409367084503174, "lr": 7.386133903152581e-06, "epoch": 1.2102646585758152, "percentage": 60.51, "elapsed_time": "6:03:34", "remaining_time": "3:57:15"} +{"current_steps": 4140, "total_steps": 6840, "loss": 0.47924935817718506, "lr": 7.3814675842633465e-06, "epoch": 1.2105570989910805, "percentage": 60.53, "elapsed_time": "6:03:39", "remaining_time": "3:57:10"} +{"current_steps": 4141, "total_steps": 6840, "loss": 0.5737412571907043, "lr": 7.376801877440143e-06, "epoch": 1.2108495394063459, "percentage": 60.54, "elapsed_time": "6:03:44", "remaining_time": "3:57:04"} +{"current_steps": 4142, "total_steps": 6840, "loss": 0.538013219833374, "lr": 7.372136783773551e-06, "epoch": 1.2111419798216114, "percentage": 60.56, "elapsed_time": "6:03:50", "remaining_time": "3:56:59"} +{"current_steps": 4143, "total_steps": 6840, "loss": 0.4523904323577881, "lr": 7.367472304354011e-06, "epoch": 1.2114344202368768, "percentage": 60.57, "elapsed_time": "6:03:55", "remaining_time": "3:56:54"} +{"current_steps": 4144, "total_steps": 6840, "loss": 0.5057293176651001, "lr": 7.362808440271811e-06, "epoch": 1.211726860652142, "percentage": 60.58, "elapsed_time": "6:04:01", "remaining_time": "3:56:49"} +{"current_steps": 4145, "total_steps": 6840, "loss": 0.4653171896934509, "lr": 7.358145192617103e-06, "epoch": 1.2120193010674076, "percentage": 60.6, "elapsed_time": "6:04:06", "remaining_time": "3:56:44"} +{"current_steps": 4146, "total_steps": 6840, "loss": 0.607070803642273, "lr": 7.353482562479896e-06, "epoch": 1.212311741482673, "percentage": 60.61, "elapsed_time": "6:04:11", "remaining_time": "3:56:38"} +{"current_steps": 4147, "total_steps": 6840, "loss": 0.4721861481666565, "lr": 7.348820550950047e-06, "epoch": 1.2126041818979383, "percentage": 60.63, "elapsed_time": "6:04:16", "remaining_time": "3:56:33"} +{"current_steps": 4148, "total_steps": 6840, "loss": 0.6656746864318848, "lr": 7.3441591591172765e-06, "epoch": 1.2128966223132036, "percentage": 60.64, "elapsed_time": "6:04:21", "remaining_time": "3:56:28"} +{"current_steps": 4149, "total_steps": 6840, "loss": 0.5231848359107971, "lr": 7.339498388071154e-06, "epoch": 1.2131890627284692, "percentage": 60.66, "elapsed_time": "6:04:27", "remaining_time": "3:56:23"} +{"current_steps": 4150, "total_steps": 6840, "loss": 0.42241257429122925, "lr": 7.334838238901106e-06, "epoch": 1.2134815031437345, "percentage": 60.67, "elapsed_time": "6:04:33", "remaining_time": "3:56:18"} +{"current_steps": 4151, "total_steps": 6840, "loss": 0.427111953496933, "lr": 7.3301787126964165e-06, "epoch": 1.2137739435589998, "percentage": 60.69, "elapsed_time": "6:04:38", "remaining_time": "3:56:12"} +{"current_steps": 4152, "total_steps": 6840, "loss": 0.6208339929580688, "lr": 7.325519810546219e-06, "epoch": 1.2140663839742651, "percentage": 60.7, "elapsed_time": "6:04:42", "remaining_time": "3:56:06"} +{"current_steps": 4153, "total_steps": 6840, "loss": 0.5031273365020752, "lr": 7.320861533539505e-06, "epoch": 1.2143588243895307, "percentage": 60.72, "elapsed_time": "6:04:48", "remaining_time": "3:56:01"} +{"current_steps": 4154, "total_steps": 6840, "loss": 0.5617444515228271, "lr": 7.3162038827651205e-06, "epoch": 1.214651264804796, "percentage": 60.73, "elapsed_time": "6:04:54", "remaining_time": "3:55:56"} +{"current_steps": 4155, "total_steps": 6840, "loss": 0.4616255462169647, "lr": 7.311546859311758e-06, "epoch": 1.2149437052200613, "percentage": 60.75, "elapsed_time": "6:04:59", "remaining_time": "3:55:51"} +{"current_steps": 4156, "total_steps": 6840, "loss": 0.5799977779388428, "lr": 7.306890464267972e-06, "epoch": 1.215236145635327, "percentage": 60.76, "elapsed_time": "6:05:04", "remaining_time": "3:55:46"} +{"current_steps": 4157, "total_steps": 6840, "loss": 0.5669786930084229, "lr": 7.302234698722165e-06, "epoch": 1.2155285860505922, "percentage": 60.77, "elapsed_time": "6:05:10", "remaining_time": "3:55:41"} +{"current_steps": 4158, "total_steps": 6840, "loss": 0.5622642040252686, "lr": 7.297579563762595e-06, "epoch": 1.2158210264658575, "percentage": 60.79, "elapsed_time": "6:05:14", "remaining_time": "3:55:35"} +{"current_steps": 4159, "total_steps": 6840, "loss": 0.6896791458129883, "lr": 7.292925060477367e-06, "epoch": 1.216113466881123, "percentage": 60.8, "elapsed_time": "6:05:19", "remaining_time": "3:55:29"} +{"current_steps": 4160, "total_steps": 6840, "loss": 0.6704437136650085, "lr": 7.288271189954451e-06, "epoch": 1.2164059072963884, "percentage": 60.82, "elapsed_time": "6:05:25", "remaining_time": "3:55:24"} +{"current_steps": 4161, "total_steps": 6840, "loss": 0.6340646743774414, "lr": 7.2836179532816565e-06, "epoch": 1.2166983477116537, "percentage": 60.83, "elapsed_time": "6:05:29", "remaining_time": "3:55:19"} +{"current_steps": 4162, "total_steps": 6840, "loss": 0.528992772102356, "lr": 7.278965351546648e-06, "epoch": 1.216990788126919, "percentage": 60.85, "elapsed_time": "6:05:35", "remaining_time": "3:55:14"} +{"current_steps": 4163, "total_steps": 6840, "loss": 0.45160621404647827, "lr": 7.274313385836949e-06, "epoch": 1.2172832285421846, "percentage": 60.86, "elapsed_time": "6:05:41", "remaining_time": "3:55:09"} +{"current_steps": 4164, "total_steps": 6840, "loss": 0.5398670434951782, "lr": 7.269662057239919e-06, "epoch": 1.21757566895745, "percentage": 60.88, "elapsed_time": "6:05:46", "remaining_time": "3:55:03"} +{"current_steps": 4165, "total_steps": 6840, "loss": 0.5174476504325867, "lr": 7.265011366842785e-06, "epoch": 1.2178681093727153, "percentage": 60.89, "elapsed_time": "6:05:50", "remaining_time": "3:54:57"} +{"current_steps": 4166, "total_steps": 6840, "loss": 0.4830206632614136, "lr": 7.260361315732613e-06, "epoch": 1.2181605497879806, "percentage": 60.91, "elapsed_time": "6:05:55", "remaining_time": "3:54:52"} +{"current_steps": 4167, "total_steps": 6840, "loss": 0.42422181367874146, "lr": 7.2557119049963266e-06, "epoch": 1.2184529902032462, "percentage": 60.92, "elapsed_time": "6:06:00", "remaining_time": "3:54:47"} +{"current_steps": 4168, "total_steps": 6840, "loss": 0.43544018268585205, "lr": 7.251063135720699e-06, "epoch": 1.2187454306185115, "percentage": 60.94, "elapsed_time": "6:06:07", "remaining_time": "3:54:42"} +{"current_steps": 4169, "total_steps": 6840, "loss": 0.5352005362510681, "lr": 7.2464150089923465e-06, "epoch": 1.2190378710337768, "percentage": 60.95, "elapsed_time": "6:06:13", "remaining_time": "3:54:37"} +{"current_steps": 4170, "total_steps": 6840, "loss": 0.4718678891658783, "lr": 7.241767525897746e-06, "epoch": 1.2193303114490424, "percentage": 60.96, "elapsed_time": "6:06:18", "remaining_time": "3:54:32"} +{"current_steps": 4171, "total_steps": 6840, "loss": 0.618084192276001, "lr": 7.237120687523214e-06, "epoch": 1.2196227518643077, "percentage": 60.98, "elapsed_time": "6:06:23", "remaining_time": "3:54:27"} +{"current_steps": 4172, "total_steps": 6840, "loss": 0.625995397567749, "lr": 7.232474494954924e-06, "epoch": 1.219915192279573, "percentage": 60.99, "elapsed_time": "6:06:27", "remaining_time": "3:54:21"} +{"current_steps": 4173, "total_steps": 6840, "loss": 0.5382465124130249, "lr": 7.227828949278894e-06, "epoch": 1.2202076326948383, "percentage": 61.01, "elapsed_time": "6:06:32", "remaining_time": "3:54:15"} +{"current_steps": 4174, "total_steps": 6840, "loss": 0.5299465656280518, "lr": 7.223184051580992e-06, "epoch": 1.220500073110104, "percentage": 61.02, "elapsed_time": "6:06:38", "remaining_time": "3:54:10"} +{"current_steps": 4175, "total_steps": 6840, "loss": 0.5899940729141235, "lr": 7.218539802946934e-06, "epoch": 1.2207925135253692, "percentage": 61.04, "elapsed_time": "6:06:43", "remaining_time": "3:54:05"} +{"current_steps": 4176, "total_steps": 6840, "loss": 0.6126594543457031, "lr": 7.213896204462286e-06, "epoch": 1.2210849539406345, "percentage": 61.05, "elapsed_time": "6:06:48", "remaining_time": "3:53:59"} +{"current_steps": 4177, "total_steps": 6840, "loss": 0.5576338768005371, "lr": 7.20925325721246e-06, "epoch": 1.2213773943558999, "percentage": 61.07, "elapsed_time": "6:06:55", "remaining_time": "3:53:55"} +{"current_steps": 4178, "total_steps": 6840, "loss": 0.540515661239624, "lr": 7.204610962282717e-06, "epoch": 1.2216698347711654, "percentage": 61.08, "elapsed_time": "6:07:00", "remaining_time": "3:53:50"} +{"current_steps": 4179, "total_steps": 6840, "loss": 0.5306440591812134, "lr": 7.1999693207581675e-06, "epoch": 1.2219622751864307, "percentage": 61.1, "elapsed_time": "6:07:04", "remaining_time": "3:53:44"} +{"current_steps": 4180, "total_steps": 6840, "loss": 0.6274853944778442, "lr": 7.195328333723763e-06, "epoch": 1.222254715601696, "percentage": 61.11, "elapsed_time": "6:07:09", "remaining_time": "3:53:38"} +{"current_steps": 4181, "total_steps": 6840, "loss": 0.5626333951950073, "lr": 7.190688002264308e-06, "epoch": 1.2225471560169616, "percentage": 61.13, "elapsed_time": "6:07:13", "remaining_time": "3:53:32"} +{"current_steps": 4182, "total_steps": 6840, "loss": 0.5938719511032104, "lr": 7.18604832746445e-06, "epoch": 1.222839596432227, "percentage": 61.14, "elapsed_time": "6:07:17", "remaining_time": "3:53:26"} +{"current_steps": 4183, "total_steps": 6840, "loss": 0.4599727988243103, "lr": 7.181409310408688e-06, "epoch": 1.2231320368474923, "percentage": 61.15, "elapsed_time": "6:07:23", "remaining_time": "3:53:21"} +{"current_steps": 4184, "total_steps": 6840, "loss": 0.5912302732467651, "lr": 7.176770952181363e-06, "epoch": 1.2234244772627578, "percentage": 61.17, "elapsed_time": "6:07:29", "remaining_time": "3:53:16"} +{"current_steps": 4185, "total_steps": 6840, "loss": 0.534631073474884, "lr": 7.172133253866662e-06, "epoch": 1.2237169176780232, "percentage": 61.18, "elapsed_time": "6:07:35", "remaining_time": "3:53:12"} +{"current_steps": 4186, "total_steps": 6840, "loss": 0.5084418058395386, "lr": 7.167496216548618e-06, "epoch": 1.2240093580932885, "percentage": 61.2, "elapsed_time": "6:07:43", "remaining_time": "3:53:08"} +{"current_steps": 4187, "total_steps": 6840, "loss": 0.6906956434249878, "lr": 7.162859841311112e-06, "epoch": 1.2243017985085538, "percentage": 61.21, "elapsed_time": "6:07:46", "remaining_time": "3:53:02"} +{"current_steps": 4188, "total_steps": 6840, "loss": 0.5578658580780029, "lr": 7.158224129237867e-06, "epoch": 1.2245942389238194, "percentage": 61.23, "elapsed_time": "6:07:52", "remaining_time": "3:52:57"} +{"current_steps": 4189, "total_steps": 6840, "loss": 0.4438907206058502, "lr": 7.153589081412455e-06, "epoch": 1.2248866793390847, "percentage": 61.24, "elapsed_time": "6:07:59", "remaining_time": "3:52:52"} +{"current_steps": 4190, "total_steps": 6840, "loss": 0.6366580724716187, "lr": 7.148954698918289e-06, "epoch": 1.22517911975435, "percentage": 61.26, "elapsed_time": "6:08:03", "remaining_time": "3:52:46"} +{"current_steps": 4191, "total_steps": 6840, "loss": 0.5532524585723877, "lr": 7.144320982838628e-06, "epoch": 1.2254715601696153, "percentage": 61.27, "elapsed_time": "6:08:08", "remaining_time": "3:52:41"} +{"current_steps": 4192, "total_steps": 6840, "loss": 0.4847594201564789, "lr": 7.139687934256574e-06, "epoch": 1.2257640005848809, "percentage": 61.29, "elapsed_time": "6:08:12", "remaining_time": "3:52:35"} +{"current_steps": 4193, "total_steps": 6840, "loss": 0.6273454427719116, "lr": 7.135055554255073e-06, "epoch": 1.2260564410001462, "percentage": 61.3, "elapsed_time": "6:08:18", "remaining_time": "3:52:30"} +{"current_steps": 4194, "total_steps": 6840, "loss": 0.6320512294769287, "lr": 7.130423843916917e-06, "epoch": 1.2263488814154115, "percentage": 61.32, "elapsed_time": "6:08:23", "remaining_time": "3:52:25"} +{"current_steps": 4195, "total_steps": 6840, "loss": 0.5499723553657532, "lr": 7.125792804324741e-06, "epoch": 1.226641321830677, "percentage": 61.33, "elapsed_time": "6:08:29", "remaining_time": "3:52:20"} +{"current_steps": 4196, "total_steps": 6840, "loss": 0.5855484008789062, "lr": 7.121162436561023e-06, "epoch": 1.2269337622459424, "percentage": 61.35, "elapsed_time": "6:08:34", "remaining_time": "3:52:14"} +{"current_steps": 4197, "total_steps": 6840, "loss": 0.5998305678367615, "lr": 7.11653274170808e-06, "epoch": 1.2272262026612077, "percentage": 61.36, "elapsed_time": "6:08:38", "remaining_time": "3:52:09"} +{"current_steps": 4198, "total_steps": 6840, "loss": 0.6963703632354736, "lr": 7.111903720848077e-06, "epoch": 1.2275186430764733, "percentage": 61.37, "elapsed_time": "6:08:43", "remaining_time": "3:52:03"} +{"current_steps": 4199, "total_steps": 6840, "loss": 0.5664974451065063, "lr": 7.10727537506302e-06, "epoch": 1.2278110834917386, "percentage": 61.39, "elapsed_time": "6:08:48", "remaining_time": "3:51:57"} +{"current_steps": 4200, "total_steps": 6840, "loss": 0.6502630710601807, "lr": 7.102647705434755e-06, "epoch": 1.228103523907004, "percentage": 61.4, "elapsed_time": "6:08:52", "remaining_time": "3:51:51"} +{"current_steps": 4201, "total_steps": 6840, "loss": 0.5727233290672302, "lr": 7.098020713044973e-06, "epoch": 1.2283959643222693, "percentage": 61.42, "elapsed_time": "6:09:01", "remaining_time": "3:51:49"} +{"current_steps": 4202, "total_steps": 6840, "loss": 0.47885602712631226, "lr": 7.093394398975206e-06, "epoch": 1.2286884047375348, "percentage": 61.43, "elapsed_time": "6:09:07", "remaining_time": "3:51:43"} +{"current_steps": 4203, "total_steps": 6840, "loss": 0.46089547872543335, "lr": 7.088768764306826e-06, "epoch": 1.2289808451528001, "percentage": 61.45, "elapsed_time": "6:09:11", "remaining_time": "3:51:38"} +{"current_steps": 4204, "total_steps": 6840, "loss": 0.48920977115631104, "lr": 7.084143810121044e-06, "epoch": 1.2292732855680655, "percentage": 61.46, "elapsed_time": "6:09:18", "remaining_time": "3:51:33"} +{"current_steps": 4205, "total_steps": 6840, "loss": 0.5320104956626892, "lr": 7.07951953749892e-06, "epoch": 1.2295657259833308, "percentage": 61.48, "elapsed_time": "6:09:24", "remaining_time": "3:51:28"} +{"current_steps": 4206, "total_steps": 6840, "loss": 0.6403206586837769, "lr": 7.074895947521347e-06, "epoch": 1.2298581663985964, "percentage": 61.49, "elapsed_time": "6:09:28", "remaining_time": "3:51:22"} +{"current_steps": 4207, "total_steps": 6840, "loss": 0.5522217750549316, "lr": 7.070273041269062e-06, "epoch": 1.2301506068138617, "percentage": 61.51, "elapsed_time": "6:09:34", "remaining_time": "3:51:18"} +{"current_steps": 4208, "total_steps": 6840, "loss": 0.5235073566436768, "lr": 7.0656508198226405e-06, "epoch": 1.230443047229127, "percentage": 61.52, "elapsed_time": "6:09:39", "remaining_time": "3:51:12"} +{"current_steps": 4209, "total_steps": 6840, "loss": 0.5972521305084229, "lr": 7.061029284262497e-06, "epoch": 1.2307354876443926, "percentage": 61.54, "elapsed_time": "6:09:43", "remaining_time": "3:51:06"} +{"current_steps": 4210, "total_steps": 6840, "loss": 0.5989280343055725, "lr": 7.0564084356688885e-06, "epoch": 1.2310279280596579, "percentage": 61.55, "elapsed_time": "6:09:47", "remaining_time": "3:51:00"} +{"current_steps": 4211, "total_steps": 6840, "loss": 0.5714213848114014, "lr": 7.051788275121913e-06, "epoch": 1.2313203684749232, "percentage": 61.56, "elapsed_time": "6:09:53", "remaining_time": "3:50:55"} +{"current_steps": 4212, "total_steps": 6840, "loss": 0.5588504076004028, "lr": 7.047168803701502e-06, "epoch": 1.2316128088901885, "percentage": 61.58, "elapsed_time": "6:09:57", "remaining_time": "3:50:49"} +{"current_steps": 4213, "total_steps": 6840, "loss": 0.47527533769607544, "lr": 7.042550022487431e-06, "epoch": 1.231905249305454, "percentage": 61.59, "elapsed_time": "6:10:04", "remaining_time": "3:50:45"} +{"current_steps": 4214, "total_steps": 6840, "loss": 0.5281137228012085, "lr": 7.03793193255931e-06, "epoch": 1.2321976897207194, "percentage": 61.61, "elapsed_time": "6:10:09", "remaining_time": "3:50:39"} +{"current_steps": 4215, "total_steps": 6840, "loss": 0.5509631037712097, "lr": 7.033314534996589e-06, "epoch": 1.2324901301359847, "percentage": 61.62, "elapsed_time": "6:10:14", "remaining_time": "3:50:34"} +{"current_steps": 4216, "total_steps": 6840, "loss": 0.5291438698768616, "lr": 7.028697830878557e-06, "epoch": 1.23278257055125, "percentage": 61.64, "elapsed_time": "6:10:19", "remaining_time": "3:50:29"} +{"current_steps": 4217, "total_steps": 6840, "loss": 0.5931780934333801, "lr": 7.024081821284343e-06, "epoch": 1.2330750109665156, "percentage": 61.65, "elapsed_time": "6:10:23", "remaining_time": "3:50:23"} +{"current_steps": 4218, "total_steps": 6840, "loss": 0.4883537292480469, "lr": 7.019466507292908e-06, "epoch": 1.233367451381781, "percentage": 61.67, "elapsed_time": "6:10:28", "remaining_time": "3:50:17"} +{"current_steps": 4219, "total_steps": 6840, "loss": 0.45155030488967896, "lr": 7.014851889983058e-06, "epoch": 1.2336598917970463, "percentage": 61.68, "elapsed_time": "6:10:35", "remaining_time": "3:50:13"} +{"current_steps": 4220, "total_steps": 6840, "loss": 0.6107507944107056, "lr": 7.010237970433426e-06, "epoch": 1.2339523322123118, "percentage": 61.7, "elapsed_time": "6:10:41", "remaining_time": "3:50:08"} +{"current_steps": 4221, "total_steps": 6840, "loss": 0.41764840483665466, "lr": 7.0056247497224905e-06, "epoch": 1.2342447726275771, "percentage": 61.71, "elapsed_time": "6:10:46", "remaining_time": "3:50:03"} +{"current_steps": 4222, "total_steps": 6840, "loss": 0.6786199808120728, "lr": 7.0010122289285635e-06, "epoch": 1.2345372130428425, "percentage": 61.73, "elapsed_time": "6:10:51", "remaining_time": "3:49:57"} +{"current_steps": 4223, "total_steps": 6840, "loss": 0.5378292798995972, "lr": 6.996400409129793e-06, "epoch": 1.234829653458108, "percentage": 61.74, "elapsed_time": "6:10:56", "remaining_time": "3:49:52"} +{"current_steps": 4224, "total_steps": 6840, "loss": 0.47646570205688477, "lr": 6.9917892914041685e-06, "epoch": 1.2351220938733734, "percentage": 61.75, "elapsed_time": "6:11:02", "remaining_time": "3:49:47"} +{"current_steps": 4225, "total_steps": 6840, "loss": 0.554225504398346, "lr": 6.987178876829503e-06, "epoch": 1.2354145342886387, "percentage": 61.77, "elapsed_time": "6:11:07", "remaining_time": "3:49:42"} +{"current_steps": 4226, "total_steps": 6840, "loss": 0.42614030838012695, "lr": 6.982569166483459e-06, "epoch": 1.235706974703904, "percentage": 61.78, "elapsed_time": "6:11:13", "remaining_time": "3:49:37"} +{"current_steps": 4227, "total_steps": 6840, "loss": 0.5043676495552063, "lr": 6.977960161443524e-06, "epoch": 1.2359994151191696, "percentage": 61.8, "elapsed_time": "6:11:19", "remaining_time": "3:49:32"} +{"current_steps": 4228, "total_steps": 6840, "loss": 0.4905642569065094, "lr": 6.973351862787029e-06, "epoch": 1.2362918555344349, "percentage": 61.81, "elapsed_time": "6:11:25", "remaining_time": "3:49:27"} +{"current_steps": 4229, "total_steps": 6840, "loss": 0.5860332250595093, "lr": 6.9687442715911325e-06, "epoch": 1.2365842959497002, "percentage": 61.83, "elapsed_time": "6:11:29", "remaining_time": "3:49:21"} +{"current_steps": 4230, "total_steps": 6840, "loss": 0.4900137782096863, "lr": 6.9641373889328345e-06, "epoch": 1.2368767363649655, "percentage": 61.84, "elapsed_time": "6:11:35", "remaining_time": "3:49:16"} +{"current_steps": 4231, "total_steps": 6840, "loss": 0.5736855268478394, "lr": 6.959531215888961e-06, "epoch": 1.237169176780231, "percentage": 61.86, "elapsed_time": "6:11:40", "remaining_time": "3:49:11"} +{"current_steps": 4232, "total_steps": 6840, "loss": 0.6390400528907776, "lr": 6.95492575353618e-06, "epoch": 1.2374616171954964, "percentage": 61.87, "elapsed_time": "6:11:45", "remaining_time": "3:49:05"} +{"current_steps": 4233, "total_steps": 6840, "loss": 0.6553822159767151, "lr": 6.95032100295099e-06, "epoch": 1.2377540576107617, "percentage": 61.89, "elapsed_time": "6:11:51", "remaining_time": "3:49:01"} +{"current_steps": 4234, "total_steps": 6840, "loss": 0.6685863733291626, "lr": 6.945716965209723e-06, "epoch": 1.2380464980260273, "percentage": 61.9, "elapsed_time": "6:11:56", "remaining_time": "3:48:55"} +{"current_steps": 4235, "total_steps": 6840, "loss": 0.5172277688980103, "lr": 6.941113641388542e-06, "epoch": 1.2383389384412926, "percentage": 61.92, "elapsed_time": "6:12:02", "remaining_time": "3:48:50"} +{"current_steps": 4236, "total_steps": 6840, "loss": 0.6578007936477661, "lr": 6.936511032563451e-06, "epoch": 1.238631378856558, "percentage": 61.93, "elapsed_time": "6:12:06", "remaining_time": "3:48:44"} +{"current_steps": 4237, "total_steps": 6840, "loss": 0.5679500699043274, "lr": 6.931909139810283e-06, "epoch": 1.2389238192718235, "percentage": 61.94, "elapsed_time": "6:12:10", "remaining_time": "3:48:38"} +{"current_steps": 4238, "total_steps": 6840, "loss": 0.49142318964004517, "lr": 6.927307964204695e-06, "epoch": 1.2392162596870888, "percentage": 61.96, "elapsed_time": "6:12:16", "remaining_time": "3:48:33"} +{"current_steps": 4239, "total_steps": 6840, "loss": 0.5339487195014954, "lr": 6.9227075068221926e-06, "epoch": 1.2395087001023541, "percentage": 61.97, "elapsed_time": "6:12:21", "remaining_time": "3:48:28"} +{"current_steps": 4240, "total_steps": 6840, "loss": 0.5845860242843628, "lr": 6.918107768738097e-06, "epoch": 1.2398011405176195, "percentage": 61.99, "elapsed_time": "6:12:26", "remaining_time": "3:48:23"} +{"current_steps": 4241, "total_steps": 6840, "loss": 0.6767281889915466, "lr": 6.9135087510275735e-06, "epoch": 1.240093580932885, "percentage": 62.0, "elapsed_time": "6:12:30", "remaining_time": "3:48:16"} +{"current_steps": 4242, "total_steps": 6840, "loss": 0.6119472980499268, "lr": 6.908910454765612e-06, "epoch": 1.2403860213481503, "percentage": 62.02, "elapsed_time": "6:12:36", "remaining_time": "3:48:12"} +{"current_steps": 4243, "total_steps": 6840, "loss": 0.6375409364700317, "lr": 6.904312881027038e-06, "epoch": 1.2406784617634157, "percentage": 62.03, "elapsed_time": "6:12:41", "remaining_time": "3:48:06"} +{"current_steps": 4244, "total_steps": 6840, "loss": 0.7059881687164307, "lr": 6.899716030886508e-06, "epoch": 1.240970902178681, "percentage": 62.05, "elapsed_time": "6:12:45", "remaining_time": "3:48:00"} +{"current_steps": 4245, "total_steps": 6840, "loss": 0.6463328003883362, "lr": 6.895119905418504e-06, "epoch": 1.2412633425939466, "percentage": 62.06, "elapsed_time": "6:12:49", "remaining_time": "3:47:54"} +{"current_steps": 4246, "total_steps": 6840, "loss": 0.5374869108200073, "lr": 6.890524505697345e-06, "epoch": 1.2415557830092119, "percentage": 62.08, "elapsed_time": "6:12:55", "remaining_time": "3:47:50"} +{"current_steps": 4247, "total_steps": 6840, "loss": 0.5219276547431946, "lr": 6.885929832797176e-06, "epoch": 1.2418482234244772, "percentage": 62.09, "elapsed_time": "6:13:00", "remaining_time": "3:47:44"} +{"current_steps": 4248, "total_steps": 6840, "loss": 0.4815624952316284, "lr": 6.881335887791973e-06, "epoch": 1.2421406638397428, "percentage": 62.11, "elapsed_time": "6:13:04", "remaining_time": "3:47:38"} +{"current_steps": 4249, "total_steps": 6840, "loss": 0.5111992955207825, "lr": 6.8767426717555475e-06, "epoch": 1.242433104255008, "percentage": 62.12, "elapsed_time": "6:13:10", "remaining_time": "3:47:33"} +{"current_steps": 4250, "total_steps": 6840, "loss": 0.5331606268882751, "lr": 6.872150185761533e-06, "epoch": 1.2427255446702734, "percentage": 62.13, "elapsed_time": "6:13:15", "remaining_time": "3:47:27"} +{"current_steps": 4251, "total_steps": 6840, "loss": 0.5375202894210815, "lr": 6.867558430883393e-06, "epoch": 1.2430179850855387, "percentage": 62.15, "elapsed_time": "6:13:20", "remaining_time": "3:47:22"} +{"current_steps": 4252, "total_steps": 6840, "loss": 0.5667152404785156, "lr": 6.862967408194425e-06, "epoch": 1.2433104255008043, "percentage": 62.16, "elapsed_time": "6:13:25", "remaining_time": "3:47:17"} +{"current_steps": 4253, "total_steps": 6840, "loss": 0.5679255723953247, "lr": 6.858377118767752e-06, "epoch": 1.2436028659160696, "percentage": 62.18, "elapsed_time": "6:13:30", "remaining_time": "3:47:11"} +{"current_steps": 4254, "total_steps": 6840, "loss": 0.6097947359085083, "lr": 6.853787563676324e-06, "epoch": 1.243895306331335, "percentage": 62.19, "elapsed_time": "6:13:34", "remaining_time": "3:47:05"} +{"current_steps": 4255, "total_steps": 6840, "loss": 0.41869044303894043, "lr": 6.849198743992927e-06, "epoch": 1.2441877467466003, "percentage": 62.21, "elapsed_time": "6:13:39", "remaining_time": "3:47:00"} +{"current_steps": 4256, "total_steps": 6840, "loss": 0.6414821147918701, "lr": 6.8446106607901655e-06, "epoch": 1.2444801871618658, "percentage": 62.22, "elapsed_time": "6:13:43", "remaining_time": "3:46:54"} +{"current_steps": 4257, "total_steps": 6840, "loss": 0.5985021591186523, "lr": 6.840023315140476e-06, "epoch": 1.2447726275771311, "percentage": 62.24, "elapsed_time": "6:13:49", "remaining_time": "3:46:49"} +{"current_steps": 4258, "total_steps": 6840, "loss": 0.4718092381954193, "lr": 6.8354367081161235e-06, "epoch": 1.2450650679923965, "percentage": 62.25, "elapsed_time": "6:13:53", "remaining_time": "3:46:43"} +{"current_steps": 4259, "total_steps": 6840, "loss": 0.46431800723075867, "lr": 6.8308508407892e-06, "epoch": 1.245357508407662, "percentage": 62.27, "elapsed_time": "6:13:59", "remaining_time": "3:46:38"} +{"current_steps": 4260, "total_steps": 6840, "loss": 0.5499997735023499, "lr": 6.826265714231624e-06, "epoch": 1.2456499488229273, "percentage": 62.28, "elapsed_time": "6:14:04", "remaining_time": "3:46:33"} +{"current_steps": 4261, "total_steps": 6840, "loss": 0.6078206300735474, "lr": 6.8216813295151415e-06, "epoch": 1.2459423892381927, "percentage": 62.3, "elapsed_time": "6:14:09", "remaining_time": "3:46:27"} +{"current_steps": 4262, "total_steps": 6840, "loss": 0.5706520080566406, "lr": 6.817097687711322e-06, "epoch": 1.2462348296534582, "percentage": 62.31, "elapsed_time": "6:14:14", "remaining_time": "3:46:22"} +{"current_steps": 4263, "total_steps": 6840, "loss": 0.5210137367248535, "lr": 6.812514789891566e-06, "epoch": 1.2465272700687235, "percentage": 62.32, "elapsed_time": "6:14:18", "remaining_time": "3:46:16"} +{"current_steps": 4264, "total_steps": 6840, "loss": 0.42632028460502625, "lr": 6.807932637127097e-06, "epoch": 1.2468197104839889, "percentage": 62.34, "elapsed_time": "6:14:25", "remaining_time": "3:46:11"} +{"current_steps": 4265, "total_steps": 6840, "loss": 0.49990004301071167, "lr": 6.803351230488967e-06, "epoch": 1.2471121508992542, "percentage": 62.35, "elapsed_time": "6:14:31", "remaining_time": "3:46:07"} +{"current_steps": 4266, "total_steps": 6840, "loss": 0.557829737663269, "lr": 6.798770571048052e-06, "epoch": 1.2474045913145198, "percentage": 62.37, "elapsed_time": "6:14:36", "remaining_time": "3:46:01"} +{"current_steps": 4267, "total_steps": 6840, "loss": 0.4784187078475952, "lr": 6.794190659875052e-06, "epoch": 1.247697031729785, "percentage": 62.38, "elapsed_time": "6:14:41", "remaining_time": "3:45:56"} +{"current_steps": 4268, "total_steps": 6840, "loss": 0.4795057773590088, "lr": 6.789611498040492e-06, "epoch": 1.2479894721450504, "percentage": 62.4, "elapsed_time": "6:14:48", "remaining_time": "3:45:51"} +{"current_steps": 4269, "total_steps": 6840, "loss": 0.415715754032135, "lr": 6.785033086614725e-06, "epoch": 1.2482819125603157, "percentage": 62.41, "elapsed_time": "6:14:54", "remaining_time": "3:45:46"} +{"current_steps": 4270, "total_steps": 6840, "loss": 0.49056607484817505, "lr": 6.7804554266679266e-06, "epoch": 1.2485743529755813, "percentage": 62.43, "elapsed_time": "6:14:58", "remaining_time": "3:45:41"} +{"current_steps": 4271, "total_steps": 6840, "loss": 0.5268200039863586, "lr": 6.775878519270098e-06, "epoch": 1.2488667933908466, "percentage": 62.44, "elapsed_time": "6:15:03", "remaining_time": "3:45:35"} +{"current_steps": 4272, "total_steps": 6840, "loss": 0.6250356435775757, "lr": 6.771302365491064e-06, "epoch": 1.249159233806112, "percentage": 62.46, "elapsed_time": "6:15:09", "remaining_time": "3:45:31"} +{"current_steps": 4273, "total_steps": 6840, "loss": 0.5403029918670654, "lr": 6.76672696640047e-06, "epoch": 1.2494516742213775, "percentage": 62.47, "elapsed_time": "6:15:14", "remaining_time": "3:45:25"} +{"current_steps": 4274, "total_steps": 6840, "loss": 0.47006577253341675, "lr": 6.762152323067787e-06, "epoch": 1.2497441146366428, "percentage": 62.49, "elapsed_time": "6:15:19", "remaining_time": "3:45:20"} +{"current_steps": 4275, "total_steps": 6840, "loss": 0.5088232755661011, "lr": 6.7575784365623134e-06, "epoch": 1.2500365550519081, "percentage": 62.5, "elapsed_time": "6:15:24", "remaining_time": "3:45:14"} +{"current_steps": 4276, "total_steps": 6840, "loss": 0.5438642501831055, "lr": 6.7530053079531664e-06, "epoch": 1.2503289954671737, "percentage": 62.51, "elapsed_time": "6:15:29", "remaining_time": "3:45:09"} +{"current_steps": 4277, "total_steps": 6840, "loss": 0.45436567068099976, "lr": 6.748432938309286e-06, "epoch": 1.250621435882439, "percentage": 62.53, "elapsed_time": "6:15:34", "remaining_time": "3:45:04"} +{"current_steps": 4278, "total_steps": 6840, "loss": 0.5298944115638733, "lr": 6.743861328699438e-06, "epoch": 1.2509138762977043, "percentage": 62.54, "elapsed_time": "6:15:40", "remaining_time": "3:44:58"} +{"current_steps": 4279, "total_steps": 6840, "loss": 0.49393707513809204, "lr": 6.7392904801922055e-06, "epoch": 1.2512063167129697, "percentage": 62.56, "elapsed_time": "6:15:46", "remaining_time": "3:44:54"} +{"current_steps": 4280, "total_steps": 6840, "loss": 0.5540947318077087, "lr": 6.734720393855998e-06, "epoch": 1.251498757128235, "percentage": 62.57, "elapsed_time": "6:15:50", "remaining_time": "3:44:48"} +{"current_steps": 4281, "total_steps": 6840, "loss": 0.47406166791915894, "lr": 6.730151070759043e-06, "epoch": 1.2517911975435005, "percentage": 62.59, "elapsed_time": "6:15:57", "remaining_time": "3:44:43"} +{"current_steps": 4282, "total_steps": 6840, "loss": 0.46885907649993896, "lr": 6.725582511969397e-06, "epoch": 1.2520836379587659, "percentage": 62.6, "elapsed_time": "6:16:02", "remaining_time": "3:44:38"} +{"current_steps": 4283, "total_steps": 6840, "loss": 0.537517786026001, "lr": 6.721014718554931e-06, "epoch": 1.2523760783740312, "percentage": 62.62, "elapsed_time": "6:16:08", "remaining_time": "3:44:33"} +{"current_steps": 4284, "total_steps": 6840, "loss": 0.514340341091156, "lr": 6.716447691583336e-06, "epoch": 1.2526685187892967, "percentage": 62.63, "elapsed_time": "6:16:14", "remaining_time": "3:44:28"} +{"current_steps": 4285, "total_steps": 6840, "loss": 0.5696117281913757, "lr": 6.711881432122129e-06, "epoch": 1.252960959204562, "percentage": 62.65, "elapsed_time": "6:16:19", "remaining_time": "3:44:23"} +{"current_steps": 4286, "total_steps": 6840, "loss": 0.5620799660682678, "lr": 6.707315941238645e-06, "epoch": 1.2532533996198274, "percentage": 62.66, "elapsed_time": "6:16:23", "remaining_time": "3:44:17"} +{"current_steps": 4287, "total_steps": 6840, "loss": 0.4832923412322998, "lr": 6.702751220000039e-06, "epoch": 1.253545840035093, "percentage": 62.68, "elapsed_time": "6:16:28", "remaining_time": "3:44:11"} +{"current_steps": 4288, "total_steps": 6840, "loss": 0.6608176231384277, "lr": 6.698187269473289e-06, "epoch": 1.2538382804503583, "percentage": 62.69, "elapsed_time": "6:16:32", "remaining_time": "3:44:06"} +{"current_steps": 4289, "total_steps": 6840, "loss": 0.6002779006958008, "lr": 6.69362409072519e-06, "epoch": 1.2541307208656236, "percentage": 62.7, "elapsed_time": "6:16:36", "remaining_time": "3:44:00"} +{"current_steps": 4290, "total_steps": 6840, "loss": 0.49898988008499146, "lr": 6.689061684822357e-06, "epoch": 1.2544231612808892, "percentage": 62.72, "elapsed_time": "6:16:41", "remaining_time": "3:43:54"} +{"current_steps": 4291, "total_steps": 6840, "loss": 0.5887055397033691, "lr": 6.684500052831222e-06, "epoch": 1.2547156016961545, "percentage": 62.73, "elapsed_time": "6:16:45", "remaining_time": "3:43:48"} +{"current_steps": 4292, "total_steps": 6840, "loss": 0.6494714617729187, "lr": 6.679939195818043e-06, "epoch": 1.2550080421114198, "percentage": 62.75, "elapsed_time": "6:16:51", "remaining_time": "3:43:43"} +{"current_steps": 4293, "total_steps": 6840, "loss": 0.5708397626876831, "lr": 6.67537911484889e-06, "epoch": 1.2553004825266851, "percentage": 62.76, "elapsed_time": "6:16:55", "remaining_time": "3:43:37"} +{"current_steps": 4294, "total_steps": 6840, "loss": 0.40412014722824097, "lr": 6.670819810989656e-06, "epoch": 1.2555929229419505, "percentage": 62.78, "elapsed_time": "6:17:01", "remaining_time": "3:43:32"} +{"current_steps": 4295, "total_steps": 6840, "loss": 0.5141078233718872, "lr": 6.666261285306048e-06, "epoch": 1.255885363357216, "percentage": 62.79, "elapsed_time": "6:17:06", "remaining_time": "3:43:27"} +{"current_steps": 4296, "total_steps": 6840, "loss": 0.6463406085968018, "lr": 6.661703538863595e-06, "epoch": 1.2561778037724813, "percentage": 62.81, "elapsed_time": "6:17:11", "remaining_time": "3:43:22"} +{"current_steps": 4297, "total_steps": 6840, "loss": 0.5809177160263062, "lr": 6.657146572727643e-06, "epoch": 1.2564702441877467, "percentage": 62.82, "elapsed_time": "6:17:17", "remaining_time": "3:43:16"} +{"current_steps": 4298, "total_steps": 6840, "loss": 0.5124412775039673, "lr": 6.652590387963354e-06, "epoch": 1.2567626846030122, "percentage": 62.84, "elapsed_time": "6:17:22", "remaining_time": "3:43:11"} +{"current_steps": 4299, "total_steps": 6840, "loss": 0.5399736762046814, "lr": 6.64803498563571e-06, "epoch": 1.2570551250182775, "percentage": 62.85, "elapsed_time": "6:17:27", "remaining_time": "3:43:06"} +{"current_steps": 4300, "total_steps": 6840, "loss": 0.548133373260498, "lr": 6.6434803668095095e-06, "epoch": 1.2573475654335429, "percentage": 62.87, "elapsed_time": "6:17:33", "remaining_time": "3:43:01"} +{"current_steps": 4301, "total_steps": 6840, "loss": 0.45056310296058655, "lr": 6.638926532549364e-06, "epoch": 1.2576400058488084, "percentage": 62.88, "elapsed_time": "6:17:42", "remaining_time": "3:42:58"} +{"current_steps": 4302, "total_steps": 6840, "loss": 0.5191814303398132, "lr": 6.634373483919705e-06, "epoch": 1.2579324462640737, "percentage": 62.89, "elapsed_time": "6:17:46", "remaining_time": "3:42:52"} +{"current_steps": 4303, "total_steps": 6840, "loss": 0.41939109563827515, "lr": 6.62982122198478e-06, "epoch": 1.258224886679339, "percentage": 62.91, "elapsed_time": "6:17:51", "remaining_time": "3:42:46"} +{"current_steps": 4304, "total_steps": 6840, "loss": 0.6535190939903259, "lr": 6.625269747808655e-06, "epoch": 1.2585173270946044, "percentage": 62.92, "elapsed_time": "6:17:57", "remaining_time": "3:42:41"} +{"current_steps": 4305, "total_steps": 6840, "loss": 0.6282539367675781, "lr": 6.620719062455207e-06, "epoch": 1.2588097675098697, "percentage": 62.94, "elapsed_time": "6:18:02", "remaining_time": "3:42:36"} +{"current_steps": 4306, "total_steps": 6840, "loss": 0.5378686189651489, "lr": 6.616169166988133e-06, "epoch": 1.2591022079251353, "percentage": 62.95, "elapsed_time": "6:18:08", "remaining_time": "3:42:31"} +{"current_steps": 4307, "total_steps": 6840, "loss": 0.5278643369674683, "lr": 6.611620062470942e-06, "epoch": 1.2593946483404006, "percentage": 62.97, "elapsed_time": "6:18:13", "remaining_time": "3:42:26"} +{"current_steps": 4308, "total_steps": 6840, "loss": 0.5578285455703735, "lr": 6.607071749966958e-06, "epoch": 1.259687088755666, "percentage": 62.98, "elapsed_time": "6:18:20", "remaining_time": "3:42:21"} +{"current_steps": 4309, "total_steps": 6840, "loss": 0.6452580094337463, "lr": 6.602524230539324e-06, "epoch": 1.2599795291709315, "percentage": 63.0, "elapsed_time": "6:18:24", "remaining_time": "3:42:15"} +{"current_steps": 4310, "total_steps": 6840, "loss": 0.6133028268814087, "lr": 6.597977505250992e-06, "epoch": 1.2602719695861968, "percentage": 63.01, "elapsed_time": "6:18:28", "remaining_time": "3:42:09"} +{"current_steps": 4311, "total_steps": 6840, "loss": 0.4930221140384674, "lr": 6.5934315751647345e-06, "epoch": 1.2605644100014621, "percentage": 63.03, "elapsed_time": "6:18:33", "remaining_time": "3:42:04"} +{"current_steps": 4312, "total_steps": 6840, "loss": 0.48653531074523926, "lr": 6.588886441343136e-06, "epoch": 1.2608568504167277, "percentage": 63.04, "elapsed_time": "6:18:38", "remaining_time": "3:41:59"} +{"current_steps": 4313, "total_steps": 6840, "loss": 0.6594399213790894, "lr": 6.5843421048485915e-06, "epoch": 1.261149290831993, "percentage": 63.06, "elapsed_time": "6:18:44", "remaining_time": "3:41:54"} +{"current_steps": 4314, "total_steps": 6840, "loss": 0.5164401531219482, "lr": 6.579798566743314e-06, "epoch": 1.2614417312472583, "percentage": 63.07, "elapsed_time": "6:18:49", "remaining_time": "3:41:49"} +{"current_steps": 4315, "total_steps": 6840, "loss": 0.6338971853256226, "lr": 6.5752558280893245e-06, "epoch": 1.2617341716625239, "percentage": 63.08, "elapsed_time": "6:18:54", "remaining_time": "3:41:43"} +{"current_steps": 4316, "total_steps": 6840, "loss": 0.5301859974861145, "lr": 6.570713889948461e-06, "epoch": 1.2620266120777892, "percentage": 63.1, "elapsed_time": "6:18:59", "remaining_time": "3:41:37"} +{"current_steps": 4317, "total_steps": 6840, "loss": 0.4572887420654297, "lr": 6.566172753382376e-06, "epoch": 1.2623190524930545, "percentage": 63.11, "elapsed_time": "6:19:04", "remaining_time": "3:41:32"} +{"current_steps": 4318, "total_steps": 6840, "loss": 0.5235984325408936, "lr": 6.561632419452532e-06, "epoch": 1.2626114929083199, "percentage": 63.13, "elapsed_time": "6:19:09", "remaining_time": "3:41:27"} +{"current_steps": 4319, "total_steps": 6840, "loss": 0.586036205291748, "lr": 6.557092889220206e-06, "epoch": 1.2629039333235852, "percentage": 63.14, "elapsed_time": "6:19:14", "remaining_time": "3:41:21"} +{"current_steps": 4320, "total_steps": 6840, "loss": 0.4728356599807739, "lr": 6.5525541637464855e-06, "epoch": 1.2631963737388507, "percentage": 63.16, "elapsed_time": "6:19:19", "remaining_time": "3:41:16"} +{"current_steps": 4321, "total_steps": 6840, "loss": 0.4932190179824829, "lr": 6.548016244092265e-06, "epoch": 1.263488814154116, "percentage": 63.17, "elapsed_time": "6:19:24", "remaining_time": "3:41:11"} +{"current_steps": 4322, "total_steps": 6840, "loss": 0.525676429271698, "lr": 6.543479131318259e-06, "epoch": 1.2637812545693814, "percentage": 63.19, "elapsed_time": "6:19:30", "remaining_time": "3:41:06"} +{"current_steps": 4323, "total_steps": 6840, "loss": 0.5462610721588135, "lr": 6.538942826484991e-06, "epoch": 1.264073694984647, "percentage": 63.2, "elapsed_time": "6:19:36", "remaining_time": "3:41:01"} +{"current_steps": 4324, "total_steps": 6840, "loss": 0.5391229391098022, "lr": 6.534407330652792e-06, "epoch": 1.2643661353999123, "percentage": 63.22, "elapsed_time": "6:19:41", "remaining_time": "3:40:56"} +{"current_steps": 4325, "total_steps": 6840, "loss": 0.5361309051513672, "lr": 6.529872644881811e-06, "epoch": 1.2646585758151776, "percentage": 63.23, "elapsed_time": "6:19:46", "remaining_time": "3:40:50"} +{"current_steps": 4326, "total_steps": 6840, "loss": 0.5692390203475952, "lr": 6.525338770232001e-06, "epoch": 1.2649510162304431, "percentage": 63.25, "elapsed_time": "6:19:52", "remaining_time": "3:40:45"} +{"current_steps": 4327, "total_steps": 6840, "loss": 0.5337555408477783, "lr": 6.520805707763125e-06, "epoch": 1.2652434566457085, "percentage": 63.26, "elapsed_time": "6:19:57", "remaining_time": "3:40:40"} +{"current_steps": 4328, "total_steps": 6840, "loss": 0.604168176651001, "lr": 6.5162734585347605e-06, "epoch": 1.2655358970609738, "percentage": 63.27, "elapsed_time": "6:20:03", "remaining_time": "3:40:35"} +{"current_steps": 4329, "total_steps": 6840, "loss": 0.5404821038246155, "lr": 6.5117420236062955e-06, "epoch": 1.2658283374762394, "percentage": 63.29, "elapsed_time": "6:20:08", "remaining_time": "3:40:29"} +{"current_steps": 4330, "total_steps": 6840, "loss": 0.6097038388252258, "lr": 6.507211404036922e-06, "epoch": 1.2661207778915047, "percentage": 63.3, "elapsed_time": "6:20:14", "remaining_time": "3:40:24"} +{"current_steps": 4331, "total_steps": 6840, "loss": 0.44309180974960327, "lr": 6.50268160088565e-06, "epoch": 1.26641321830677, "percentage": 63.32, "elapsed_time": "6:20:20", "remaining_time": "3:40:20"} +{"current_steps": 4332, "total_steps": 6840, "loss": 0.5703015923500061, "lr": 6.498152615211286e-06, "epoch": 1.2667056587220353, "percentage": 63.33, "elapsed_time": "6:20:26", "remaining_time": "3:40:15"} +{"current_steps": 4333, "total_steps": 6840, "loss": 0.5745347738265991, "lr": 6.4936244480724575e-06, "epoch": 1.2669980991373007, "percentage": 63.35, "elapsed_time": "6:20:32", "remaining_time": "3:40:10"} +{"current_steps": 4334, "total_steps": 6840, "loss": 0.6611922979354858, "lr": 6.489097100527595e-06, "epoch": 1.2672905395525662, "percentage": 63.36, "elapsed_time": "6:20:38", "remaining_time": "3:40:05"} +{"current_steps": 4335, "total_steps": 6840, "loss": 0.4560534358024597, "lr": 6.484570573634939e-06, "epoch": 1.2675829799678315, "percentage": 63.38, "elapsed_time": "6:20:44", "remaining_time": "3:40:00"} +{"current_steps": 4336, "total_steps": 6840, "loss": 0.3765673041343689, "lr": 6.480044868452535e-06, "epoch": 1.2678754203830969, "percentage": 63.39, "elapsed_time": "6:20:49", "remaining_time": "3:39:55"} +{"current_steps": 4337, "total_steps": 6840, "loss": 0.6471004486083984, "lr": 6.475519986038246e-06, "epoch": 1.2681678607983624, "percentage": 63.41, "elapsed_time": "6:20:53", "remaining_time": "3:39:49"} +{"current_steps": 4338, "total_steps": 6840, "loss": 0.5639084577560425, "lr": 6.4709959274497284e-06, "epoch": 1.2684603012136277, "percentage": 63.42, "elapsed_time": "6:20:58", "remaining_time": "3:39:44"} +{"current_steps": 4339, "total_steps": 6840, "loss": 0.6367507576942444, "lr": 6.4664726937444545e-06, "epoch": 1.268752741628893, "percentage": 63.44, "elapsed_time": "6:21:04", "remaining_time": "3:39:39"} +{"current_steps": 4340, "total_steps": 6840, "loss": 0.6803586483001709, "lr": 6.4619502859797055e-06, "epoch": 1.2690451820441586, "percentage": 63.45, "elapsed_time": "6:21:09", "remaining_time": "3:39:33"} +{"current_steps": 4341, "total_steps": 6840, "loss": 0.49068397283554077, "lr": 6.457428705212565e-06, "epoch": 1.269337622459424, "percentage": 63.46, "elapsed_time": "6:21:16", "remaining_time": "3:39:29"} +{"current_steps": 4342, "total_steps": 6840, "loss": 0.616880476474762, "lr": 6.4529079524999296e-06, "epoch": 1.2696300628746893, "percentage": 63.48, "elapsed_time": "6:21:20", "remaining_time": "3:39:23"} +{"current_steps": 4343, "total_steps": 6840, "loss": 0.45614945888519287, "lr": 6.448388028898489e-06, "epoch": 1.2699225032899546, "percentage": 63.49, "elapsed_time": "6:21:27", "remaining_time": "3:39:18"} +{"current_steps": 4344, "total_steps": 6840, "loss": 0.49267178773880005, "lr": 6.443868935464754e-06, "epoch": 1.27021494370522, "percentage": 63.51, "elapsed_time": "6:21:31", "remaining_time": "3:39:13"} +{"current_steps": 4345, "total_steps": 6840, "loss": 0.5169225335121155, "lr": 6.439350673255033e-06, "epoch": 1.2705073841204855, "percentage": 63.52, "elapsed_time": "6:21:36", "remaining_time": "3:39:07"} +{"current_steps": 4346, "total_steps": 6840, "loss": 0.4999169111251831, "lr": 6.434833243325442e-06, "epoch": 1.2707998245357508, "percentage": 63.54, "elapsed_time": "6:21:43", "remaining_time": "3:39:03"} +{"current_steps": 4347, "total_steps": 6840, "loss": 0.6282567977905273, "lr": 6.430316646731906e-06, "epoch": 1.2710922649510161, "percentage": 63.55, "elapsed_time": "6:21:47", "remaining_time": "3:38:57"} +{"current_steps": 4348, "total_steps": 6840, "loss": 0.5007494688034058, "lr": 6.425800884530151e-06, "epoch": 1.2713847053662817, "percentage": 63.57, "elapsed_time": "6:21:54", "remaining_time": "3:38:53"} +{"current_steps": 4349, "total_steps": 6840, "loss": 0.5178118944168091, "lr": 6.421285957775705e-06, "epoch": 1.271677145781547, "percentage": 63.58, "elapsed_time": "6:21:58", "remaining_time": "3:38:47"} +{"current_steps": 4350, "total_steps": 6840, "loss": 0.5473636388778687, "lr": 6.4167718675239075e-06, "epoch": 1.2719695861968123, "percentage": 63.6, "elapsed_time": "6:22:03", "remaining_time": "3:38:41"} +{"current_steps": 4351, "total_steps": 6840, "loss": 0.5863620042800903, "lr": 6.4122586148299004e-06, "epoch": 1.2722620266120779, "percentage": 63.61, "elapsed_time": "6:22:07", "remaining_time": "3:38:35"} +{"current_steps": 4352, "total_steps": 6840, "loss": 0.5301654934883118, "lr": 6.407746200748628e-06, "epoch": 1.2725544670273432, "percentage": 63.63, "elapsed_time": "6:22:13", "remaining_time": "3:38:30"} +{"current_steps": 4353, "total_steps": 6840, "loss": 0.5856075286865234, "lr": 6.403234626334842e-06, "epoch": 1.2728469074426085, "percentage": 63.64, "elapsed_time": "6:22:18", "remaining_time": "3:38:25"} +{"current_steps": 4354, "total_steps": 6840, "loss": 0.49686455726623535, "lr": 6.39872389264309e-06, "epoch": 1.273139347857874, "percentage": 63.65, "elapsed_time": "6:22:24", "remaining_time": "3:38:20"} +{"current_steps": 4355, "total_steps": 6840, "loss": 0.5032684803009033, "lr": 6.394214000727734e-06, "epoch": 1.2734317882731394, "percentage": 63.67, "elapsed_time": "6:22:29", "remaining_time": "3:38:14"} +{"current_steps": 4356, "total_steps": 6840, "loss": 0.6855330467224121, "lr": 6.389704951642931e-06, "epoch": 1.2737242286884047, "percentage": 63.68, "elapsed_time": "6:22:33", "remaining_time": "3:38:09"} +{"current_steps": 4357, "total_steps": 6840, "loss": 0.5333864688873291, "lr": 6.385196746442644e-06, "epoch": 1.27401666910367, "percentage": 63.7, "elapsed_time": "6:22:39", "remaining_time": "3:38:04"} +{"current_steps": 4358, "total_steps": 6840, "loss": 0.5597629547119141, "lr": 6.380689386180641e-06, "epoch": 1.2743091095189354, "percentage": 63.71, "elapsed_time": "6:22:44", "remaining_time": "3:37:59"} +{"current_steps": 4359, "total_steps": 6840, "loss": 0.4576488137245178, "lr": 6.376182871910488e-06, "epoch": 1.274601549934201, "percentage": 63.73, "elapsed_time": "6:22:48", "remaining_time": "3:37:52"} +{"current_steps": 4360, "total_steps": 6840, "loss": 0.45165061950683594, "lr": 6.371677204685555e-06, "epoch": 1.2748939903494663, "percentage": 63.74, "elapsed_time": "6:22:52", "remaining_time": "3:37:47"} +{"current_steps": 4361, "total_steps": 6840, "loss": 0.5451514720916748, "lr": 6.367172385559014e-06, "epoch": 1.2751864307647316, "percentage": 63.76, "elapsed_time": "6:22:58", "remaining_time": "3:37:42"} +{"current_steps": 4362, "total_steps": 6840, "loss": 0.6141163110733032, "lr": 6.362668415583841e-06, "epoch": 1.2754788711799971, "percentage": 63.77, "elapsed_time": "6:23:03", "remaining_time": "3:37:36"} +{"current_steps": 4363, "total_steps": 6840, "loss": 0.5156669020652771, "lr": 6.358165295812809e-06, "epoch": 1.2757713115952625, "percentage": 63.79, "elapsed_time": "6:23:09", "remaining_time": "3:37:31"} +{"current_steps": 4364, "total_steps": 6840, "loss": 0.41485118865966797, "lr": 6.3536630272984974e-06, "epoch": 1.2760637520105278, "percentage": 63.8, "elapsed_time": "6:23:13", "remaining_time": "3:37:25"} +{"current_steps": 4365, "total_steps": 6840, "loss": 0.386514276266098, "lr": 6.3491616110932845e-06, "epoch": 1.2763561924257933, "percentage": 63.82, "elapsed_time": "6:23:19", "remaining_time": "3:37:21"} +{"current_steps": 4366, "total_steps": 6840, "loss": 0.5620483160018921, "lr": 6.344661048249345e-06, "epoch": 1.2766486328410587, "percentage": 63.83, "elapsed_time": "6:23:25", "remaining_time": "3:37:15"} +{"current_steps": 4367, "total_steps": 6840, "loss": 0.4910007119178772, "lr": 6.340161339818662e-06, "epoch": 1.276941073256324, "percentage": 63.85, "elapsed_time": "6:23:28", "remaining_time": "3:37:09"} +{"current_steps": 4368, "total_steps": 6840, "loss": 0.4628123939037323, "lr": 6.335662486853014e-06, "epoch": 1.2772335136715895, "percentage": 63.86, "elapsed_time": "6:23:33", "remaining_time": "3:37:04"} +{"current_steps": 4369, "total_steps": 6840, "loss": 0.5129125118255615, "lr": 6.331164490403978e-06, "epoch": 1.2775259540868549, "percentage": 63.87, "elapsed_time": "6:23:38", "remaining_time": "3:36:58"} +{"current_steps": 4370, "total_steps": 6840, "loss": 0.45091521739959717, "lr": 6.326667351522939e-06, "epoch": 1.2778183945021202, "percentage": 63.89, "elapsed_time": "6:23:43", "remaining_time": "3:36:53"} +{"current_steps": 4371, "total_steps": 6840, "loss": 0.4914324879646301, "lr": 6.322171071261071e-06, "epoch": 1.2781108349173855, "percentage": 63.9, "elapsed_time": "6:23:48", "remaining_time": "3:36:48"} +{"current_steps": 4372, "total_steps": 6840, "loss": 0.6361461877822876, "lr": 6.317675650669353e-06, "epoch": 1.2784032753326509, "percentage": 63.92, "elapsed_time": "6:23:52", "remaining_time": "3:36:42"} +{"current_steps": 4373, "total_steps": 6840, "loss": 0.4251636564731598, "lr": 6.313181090798561e-06, "epoch": 1.2786957157479164, "percentage": 63.93, "elapsed_time": "6:23:58", "remaining_time": "3:36:37"} +{"current_steps": 4374, "total_steps": 6840, "loss": 0.5605714321136475, "lr": 6.308687392699275e-06, "epoch": 1.2789881561631817, "percentage": 63.95, "elapsed_time": "6:24:03", "remaining_time": "3:36:31"} +{"current_steps": 4375, "total_steps": 6840, "loss": 0.5366392731666565, "lr": 6.304194557421867e-06, "epoch": 1.279280596578447, "percentage": 63.96, "elapsed_time": "6:24:09", "remaining_time": "3:36:26"} +{"current_steps": 4376, "total_steps": 6840, "loss": 0.5501587986946106, "lr": 6.299702586016512e-06, "epoch": 1.2795730369937126, "percentage": 63.98, "elapsed_time": "6:24:12", "remaining_time": "3:36:20"} +{"current_steps": 4377, "total_steps": 6840, "loss": 0.6145694851875305, "lr": 6.295211479533177e-06, "epoch": 1.279865477408978, "percentage": 63.99, "elapsed_time": "6:24:16", "remaining_time": "3:36:14"} +{"current_steps": 4378, "total_steps": 6840, "loss": 0.5921984910964966, "lr": 6.2907212390216335e-06, "epoch": 1.2801579178242433, "percentage": 64.01, "elapsed_time": "6:24:22", "remaining_time": "3:36:09"} +{"current_steps": 4379, "total_steps": 6840, "loss": 0.4376833140850067, "lr": 6.286231865531447e-06, "epoch": 1.2804503582395088, "percentage": 64.02, "elapsed_time": "6:24:27", "remaining_time": "3:36:03"} +{"current_steps": 4380, "total_steps": 6840, "loss": 0.5141662955284119, "lr": 6.281743360111983e-06, "epoch": 1.2807427986547741, "percentage": 64.04, "elapsed_time": "6:24:31", "remaining_time": "3:35:58"} +{"current_steps": 4381, "total_steps": 6840, "loss": 0.7065848112106323, "lr": 6.2772557238124025e-06, "epoch": 1.2810352390700395, "percentage": 64.05, "elapsed_time": "6:24:38", "remaining_time": "3:35:53"} +{"current_steps": 4382, "total_steps": 6840, "loss": 0.5662813186645508, "lr": 6.272768957681659e-06, "epoch": 1.2813276794853048, "percentage": 64.06, "elapsed_time": "6:24:43", "remaining_time": "3:35:48"} +{"current_steps": 4383, "total_steps": 6840, "loss": 0.46340662240982056, "lr": 6.268283062768512e-06, "epoch": 1.2816201199005701, "percentage": 64.08, "elapsed_time": "6:24:49", "remaining_time": "3:35:43"} +{"current_steps": 4384, "total_steps": 6840, "loss": 0.5258422493934631, "lr": 6.263798040121508e-06, "epoch": 1.2819125603158357, "percentage": 64.09, "elapsed_time": "6:24:55", "remaining_time": "3:35:38"} +{"current_steps": 4385, "total_steps": 6840, "loss": 0.5586943030357361, "lr": 6.2593138907889965e-06, "epoch": 1.282205000731101, "percentage": 64.11, "elapsed_time": "6:25:00", "remaining_time": "3:35:32"} +{"current_steps": 4386, "total_steps": 6840, "loss": 0.5224723815917969, "lr": 6.254830615819116e-06, "epoch": 1.2824974411463663, "percentage": 64.12, "elapsed_time": "6:25:04", "remaining_time": "3:35:27"} +{"current_steps": 4387, "total_steps": 6840, "loss": 0.6092125177383423, "lr": 6.250348216259812e-06, "epoch": 1.2827898815616319, "percentage": 64.14, "elapsed_time": "6:25:10", "remaining_time": "3:35:22"} +{"current_steps": 4388, "total_steps": 6840, "loss": 0.5582839250564575, "lr": 6.245866693158813e-06, "epoch": 1.2830823219768972, "percentage": 64.15, "elapsed_time": "6:25:15", "remaining_time": "3:35:16"} +{"current_steps": 4389, "total_steps": 6840, "loss": 0.6074620485305786, "lr": 6.241386047563649e-06, "epoch": 1.2833747623921625, "percentage": 64.17, "elapsed_time": "6:25:20", "remaining_time": "3:35:11"} +{"current_steps": 4390, "total_steps": 6840, "loss": 0.6247550845146179, "lr": 6.236906280521646e-06, "epoch": 1.283667202807428, "percentage": 64.18, "elapsed_time": "6:25:25", "remaining_time": "3:35:06"} +{"current_steps": 4391, "total_steps": 6840, "loss": 0.5325940847396851, "lr": 6.232427393079919e-06, "epoch": 1.2839596432226934, "percentage": 64.2, "elapsed_time": "6:25:31", "remaining_time": "3:35:00"} +{"current_steps": 4392, "total_steps": 6840, "loss": 0.5082288980484009, "lr": 6.227949386285379e-06, "epoch": 1.2842520836379587, "percentage": 64.21, "elapsed_time": "6:25:35", "remaining_time": "3:34:55"} +{"current_steps": 4393, "total_steps": 6840, "loss": 0.5704036355018616, "lr": 6.223472261184738e-06, "epoch": 1.2845445240532243, "percentage": 64.23, "elapsed_time": "6:25:42", "remaining_time": "3:34:51"} +{"current_steps": 4394, "total_steps": 6840, "loss": 0.5301543474197388, "lr": 6.218996018824492e-06, "epoch": 1.2848369644684896, "percentage": 64.24, "elapsed_time": "6:25:47", "remaining_time": "3:34:45"} +{"current_steps": 4395, "total_steps": 6840, "loss": 0.48660725355148315, "lr": 6.21452066025094e-06, "epoch": 1.285129404883755, "percentage": 64.25, "elapsed_time": "6:25:52", "remaining_time": "3:34:40"} +{"current_steps": 4396, "total_steps": 6840, "loss": 0.5744560956954956, "lr": 6.210046186510168e-06, "epoch": 1.2854218452990203, "percentage": 64.27, "elapsed_time": "6:25:58", "remaining_time": "3:34:35"} +{"current_steps": 4397, "total_steps": 6840, "loss": 0.5714898109436035, "lr": 6.205572598648055e-06, "epoch": 1.2857142857142856, "percentage": 64.28, "elapsed_time": "6:26:03", "remaining_time": "3:34:30"} +{"current_steps": 4398, "total_steps": 6840, "loss": 0.6616571545600891, "lr": 6.201099897710277e-06, "epoch": 1.2860067261295511, "percentage": 64.3, "elapsed_time": "6:26:08", "remaining_time": "3:34:24"} +{"current_steps": 4399, "total_steps": 6840, "loss": 0.5552959442138672, "lr": 6.1966280847423e-06, "epoch": 1.2862991665448165, "percentage": 64.31, "elapsed_time": "6:26:13", "remaining_time": "3:34:18"} +{"current_steps": 4400, "total_steps": 6840, "loss": 0.5544919967651367, "lr": 6.192157160789382e-06, "epoch": 1.2865916069600818, "percentage": 64.33, "elapsed_time": "6:26:19", "remaining_time": "3:34:14"} +{"current_steps": 4401, "total_steps": 6840, "loss": 0.5914726853370667, "lr": 6.18768712689658e-06, "epoch": 1.2868840473753473, "percentage": 64.34, "elapsed_time": "6:26:28", "remaining_time": "3:34:10"} +{"current_steps": 4402, "total_steps": 6840, "loss": 0.47191259264945984, "lr": 6.183217984108729e-06, "epoch": 1.2871764877906127, "percentage": 64.36, "elapsed_time": "6:26:32", "remaining_time": "3:34:04"} +{"current_steps": 4403, "total_steps": 6840, "loss": 0.6479181051254272, "lr": 6.178749733470468e-06, "epoch": 1.287468928205878, "percentage": 64.37, "elapsed_time": "6:26:37", "remaining_time": "3:33:59"} +{"current_steps": 4404, "total_steps": 6840, "loss": 0.42491137981414795, "lr": 6.174282376026225e-06, "epoch": 1.2877613686211435, "percentage": 64.39, "elapsed_time": "6:26:42", "remaining_time": "3:33:53"} +{"current_steps": 4405, "total_steps": 6840, "loss": 0.6037728786468506, "lr": 6.169815912820214e-06, "epoch": 1.2880538090364089, "percentage": 64.4, "elapsed_time": "6:26:47", "remaining_time": "3:33:48"} +{"current_steps": 4406, "total_steps": 6840, "loss": 0.4979787766933441, "lr": 6.165350344896446e-06, "epoch": 1.2883462494516742, "percentage": 64.42, "elapsed_time": "6:26:50", "remaining_time": "3:33:42"} +{"current_steps": 4407, "total_steps": 6840, "loss": 0.5863564014434814, "lr": 6.160885673298722e-06, "epoch": 1.2886386898669397, "percentage": 64.43, "elapsed_time": "6:26:56", "remaining_time": "3:33:37"} +{"current_steps": 4408, "total_steps": 6840, "loss": 0.6516878008842468, "lr": 6.156421899070628e-06, "epoch": 1.288931130282205, "percentage": 64.44, "elapsed_time": "6:27:01", "remaining_time": "3:33:31"} +{"current_steps": 4409, "total_steps": 6840, "loss": 0.45655903220176697, "lr": 6.151959023255545e-06, "epoch": 1.2892235706974704, "percentage": 64.46, "elapsed_time": "6:27:06", "remaining_time": "3:33:26"} +{"current_steps": 4410, "total_steps": 6840, "loss": 0.4751289486885071, "lr": 6.147497046896644e-06, "epoch": 1.2895160111127357, "percentage": 64.47, "elapsed_time": "6:27:11", "remaining_time": "3:33:21"} +{"current_steps": 4411, "total_steps": 6840, "loss": 0.48472684621810913, "lr": 6.1430359710368845e-06, "epoch": 1.289808451528001, "percentage": 64.49, "elapsed_time": "6:27:16", "remaining_time": "3:33:15"} +{"current_steps": 4412, "total_steps": 6840, "loss": 0.5014214515686035, "lr": 6.138575796719017e-06, "epoch": 1.2901008919432666, "percentage": 64.5, "elapsed_time": "6:27:21", "remaining_time": "3:33:10"} +{"current_steps": 4413, "total_steps": 6840, "loss": 0.5979991555213928, "lr": 6.134116524985581e-06, "epoch": 1.290393332358532, "percentage": 64.52, "elapsed_time": "6:27:26", "remaining_time": "3:33:04"} +{"current_steps": 4414, "total_steps": 6840, "loss": 0.4651130437850952, "lr": 6.129658156878899e-06, "epoch": 1.2906857727737973, "percentage": 64.53, "elapsed_time": "6:27:31", "remaining_time": "3:32:59"} +{"current_steps": 4415, "total_steps": 6840, "loss": 0.5938215255737305, "lr": 6.125200693441092e-06, "epoch": 1.2909782131890628, "percentage": 64.55, "elapsed_time": "6:27:36", "remaining_time": "3:32:54"} +{"current_steps": 4416, "total_steps": 6840, "loss": 0.4893927574157715, "lr": 6.1207441357140626e-06, "epoch": 1.2912706536043281, "percentage": 64.56, "elapsed_time": "6:27:41", "remaining_time": "3:32:48"} +{"current_steps": 4417, "total_steps": 6840, "loss": 0.5546435713768005, "lr": 6.116288484739507e-06, "epoch": 1.2915630940195935, "percentage": 64.58, "elapsed_time": "6:27:46", "remaining_time": "3:32:43"} +{"current_steps": 4418, "total_steps": 6840, "loss": 0.545367419719696, "lr": 6.111833741558905e-06, "epoch": 1.291855534434859, "percentage": 64.59, "elapsed_time": "6:27:51", "remaining_time": "3:32:37"} +{"current_steps": 4419, "total_steps": 6840, "loss": 0.47479283809661865, "lr": 6.1073799072135245e-06, "epoch": 1.2921479748501243, "percentage": 64.61, "elapsed_time": "6:27:56", "remaining_time": "3:32:32"} +{"current_steps": 4420, "total_steps": 6840, "loss": 0.5109270215034485, "lr": 6.102926982744423e-06, "epoch": 1.2924404152653897, "percentage": 64.62, "elapsed_time": "6:28:00", "remaining_time": "3:32:26"} +{"current_steps": 4421, "total_steps": 6840, "loss": 0.5862404108047485, "lr": 6.098474969192445e-06, "epoch": 1.292732855680655, "percentage": 64.63, "elapsed_time": "6:28:04", "remaining_time": "3:32:20"} +{"current_steps": 4422, "total_steps": 6840, "loss": 0.5031660795211792, "lr": 6.09402386759822e-06, "epoch": 1.2930252960959203, "percentage": 64.65, "elapsed_time": "6:28:09", "remaining_time": "3:32:15"} +{"current_steps": 4423, "total_steps": 6840, "loss": 0.47179776430130005, "lr": 6.089573679002168e-06, "epoch": 1.2933177365111859, "percentage": 64.66, "elapsed_time": "6:28:13", "remaining_time": "3:32:09"} +{"current_steps": 4424, "total_steps": 6840, "loss": 0.45889902114868164, "lr": 6.085124404444495e-06, "epoch": 1.2936101769264512, "percentage": 64.68, "elapsed_time": "6:28:18", "remaining_time": "3:32:03"} +{"current_steps": 4425, "total_steps": 6840, "loss": 0.49759042263031006, "lr": 6.080676044965188e-06, "epoch": 1.2939026173417165, "percentage": 64.69, "elapsed_time": "6:28:23", "remaining_time": "3:31:58"} +{"current_steps": 4426, "total_steps": 6840, "loss": 0.5980732440948486, "lr": 6.076228601604024e-06, "epoch": 1.294195057756982, "percentage": 64.71, "elapsed_time": "6:28:27", "remaining_time": "3:31:52"} +{"current_steps": 4427, "total_steps": 6840, "loss": 0.6167548894882202, "lr": 6.07178207540057e-06, "epoch": 1.2944874981722474, "percentage": 64.72, "elapsed_time": "6:28:32", "remaining_time": "3:31:46"} +{"current_steps": 4428, "total_steps": 6840, "loss": 0.5632568597793579, "lr": 6.067336467394169e-06, "epoch": 1.2947799385875127, "percentage": 64.74, "elapsed_time": "6:28:37", "remaining_time": "3:31:41"} +{"current_steps": 4429, "total_steps": 6840, "loss": 0.5521456003189087, "lr": 6.062891778623961e-06, "epoch": 1.2950723790027783, "percentage": 64.75, "elapsed_time": "6:28:42", "remaining_time": "3:31:35"} +{"current_steps": 4430, "total_steps": 6840, "loss": 0.5916576385498047, "lr": 6.058448010128861e-06, "epoch": 1.2953648194180436, "percentage": 64.77, "elapsed_time": "6:28:49", "remaining_time": "3:31:31"} +{"current_steps": 4431, "total_steps": 6840, "loss": 0.546825647354126, "lr": 6.054005162947571e-06, "epoch": 1.295657259833309, "percentage": 64.78, "elapsed_time": "6:28:55", "remaining_time": "3:31:26"} +{"current_steps": 4432, "total_steps": 6840, "loss": 0.5704302787780762, "lr": 6.049563238118584e-06, "epoch": 1.2959497002485745, "percentage": 64.8, "elapsed_time": "6:29:00", "remaining_time": "3:31:21"} +{"current_steps": 4433, "total_steps": 6840, "loss": 0.5791710615158081, "lr": 6.0451222366801706e-06, "epoch": 1.2962421406638398, "percentage": 64.81, "elapsed_time": "6:29:05", "remaining_time": "3:31:15"} +{"current_steps": 4434, "total_steps": 6840, "loss": 0.41179752349853516, "lr": 6.040682159670389e-06, "epoch": 1.2965345810791051, "percentage": 64.82, "elapsed_time": "6:29:09", "remaining_time": "3:31:10"} +{"current_steps": 4435, "total_steps": 6840, "loss": 0.5213680267333984, "lr": 6.03624300812708e-06, "epoch": 1.2968270214943705, "percentage": 64.84, "elapsed_time": "6:29:14", "remaining_time": "3:31:04"} +{"current_steps": 4436, "total_steps": 6840, "loss": 0.4917318522930145, "lr": 6.0318047830878675e-06, "epoch": 1.2971194619096358, "percentage": 64.85, "elapsed_time": "6:29:19", "remaining_time": "3:30:59"} +{"current_steps": 4437, "total_steps": 6840, "loss": 0.6347956657409668, "lr": 6.027367485590159e-06, "epoch": 1.2974119023249013, "percentage": 64.87, "elapsed_time": "6:29:23", "remaining_time": "3:30:53"} +{"current_steps": 4438, "total_steps": 6840, "loss": 0.5263427495956421, "lr": 6.022931116671147e-06, "epoch": 1.2977043427401667, "percentage": 64.88, "elapsed_time": "6:29:29", "remaining_time": "3:30:48"} +{"current_steps": 4439, "total_steps": 6840, "loss": 0.5686784982681274, "lr": 6.018495677367806e-06, "epoch": 1.297996783155432, "percentage": 64.9, "elapsed_time": "6:29:35", "remaining_time": "3:30:43"} +{"current_steps": 4440, "total_steps": 6840, "loss": 0.576974630355835, "lr": 6.0140611687168934e-06, "epoch": 1.2982892235706975, "percentage": 64.91, "elapsed_time": "6:29:40", "remaining_time": "3:30:37"} +{"current_steps": 4441, "total_steps": 6840, "loss": 0.5375877618789673, "lr": 6.009627591754946e-06, "epoch": 1.2985816639859629, "percentage": 64.93, "elapsed_time": "6:29:45", "remaining_time": "3:30:32"} +{"current_steps": 4442, "total_steps": 6840, "loss": 0.6106576919555664, "lr": 6.005194947518287e-06, "epoch": 1.2988741044012282, "percentage": 64.94, "elapsed_time": "6:29:51", "remaining_time": "3:30:28"} +{"current_steps": 4443, "total_steps": 6840, "loss": 0.475483238697052, "lr": 6.000763237043021e-06, "epoch": 1.2991665448164937, "percentage": 64.96, "elapsed_time": "6:29:57", "remaining_time": "3:30:22"} +{"current_steps": 4444, "total_steps": 6840, "loss": 0.5819226503372192, "lr": 5.9963324613650335e-06, "epoch": 1.299458985231759, "percentage": 64.97, "elapsed_time": "6:30:03", "remaining_time": "3:30:17"} +{"current_steps": 4445, "total_steps": 6840, "loss": 0.6394410133361816, "lr": 5.991902621519988e-06, "epoch": 1.2997514256470244, "percentage": 64.99, "elapsed_time": "6:30:08", "remaining_time": "3:30:12"} +{"current_steps": 4446, "total_steps": 6840, "loss": 0.48502016067504883, "lr": 5.987473718543338e-06, "epoch": 1.30004386606229, "percentage": 65.0, "elapsed_time": "6:30:15", "remaining_time": "3:30:08"} +{"current_steps": 4447, "total_steps": 6840, "loss": 0.5782333612442017, "lr": 5.983045753470308e-06, "epoch": 1.3003363064775553, "percentage": 65.01, "elapsed_time": "6:30:20", "remaining_time": "3:30:02"} +{"current_steps": 4448, "total_steps": 6840, "loss": 0.5498893857002258, "lr": 5.97861872733591e-06, "epoch": 1.3006287468928206, "percentage": 65.03, "elapsed_time": "6:30:24", "remaining_time": "3:29:57"} +{"current_steps": 4449, "total_steps": 6840, "loss": 0.47757571935653687, "lr": 5.974192641174934e-06, "epoch": 1.300921187308086, "percentage": 65.04, "elapsed_time": "6:30:30", "remaining_time": "3:29:52"} +{"current_steps": 4450, "total_steps": 6840, "loss": 0.5401994585990906, "lr": 5.96976749602195e-06, "epoch": 1.3012136277233513, "percentage": 65.06, "elapsed_time": "6:30:36", "remaining_time": "3:29:47"} +{"current_steps": 4451, "total_steps": 6840, "loss": 0.5818814635276794, "lr": 5.965343292911309e-06, "epoch": 1.3015060681386168, "percentage": 65.07, "elapsed_time": "6:30:40", "remaining_time": "3:29:41"} +{"current_steps": 4452, "total_steps": 6840, "loss": 0.524645984172821, "lr": 5.9609200328771465e-06, "epoch": 1.3017985085538821, "percentage": 65.09, "elapsed_time": "6:30:47", "remaining_time": "3:29:36"} +{"current_steps": 4453, "total_steps": 6840, "loss": 0.46523183584213257, "lr": 5.956497716953365e-06, "epoch": 1.3020909489691475, "percentage": 65.1, "elapsed_time": "6:30:52", "remaining_time": "3:29:31"} +{"current_steps": 4454, "total_steps": 6840, "loss": 0.6066159009933472, "lr": 5.952076346173657e-06, "epoch": 1.302383389384413, "percentage": 65.12, "elapsed_time": "6:30:57", "remaining_time": "3:29:26"} +{"current_steps": 4455, "total_steps": 6840, "loss": 0.48635774850845337, "lr": 5.947655921571491e-06, "epoch": 1.3026758297996783, "percentage": 65.13, "elapsed_time": "6:31:02", "remaining_time": "3:29:20"} +{"current_steps": 4456, "total_steps": 6840, "loss": 0.5159435868263245, "lr": 5.943236444180116e-06, "epoch": 1.3029682702149437, "percentage": 65.15, "elapsed_time": "6:31:07", "remaining_time": "3:29:15"} +{"current_steps": 4457, "total_steps": 6840, "loss": 0.5566878914833069, "lr": 5.938817915032558e-06, "epoch": 1.3032607106302092, "percentage": 65.16, "elapsed_time": "6:31:13", "remaining_time": "3:29:10"} +{"current_steps": 4458, "total_steps": 6840, "loss": 0.46998029947280884, "lr": 5.934400335161618e-06, "epoch": 1.3035531510454745, "percentage": 65.18, "elapsed_time": "6:31:18", "remaining_time": "3:29:04"} +{"current_steps": 4459, "total_steps": 6840, "loss": 0.5554553270339966, "lr": 5.92998370559988e-06, "epoch": 1.3038455914607399, "percentage": 65.19, "elapsed_time": "6:31:22", "remaining_time": "3:28:59"} +{"current_steps": 4460, "total_steps": 6840, "loss": 0.5659651756286621, "lr": 5.925568027379704e-06, "epoch": 1.3041380318760052, "percentage": 65.2, "elapsed_time": "6:31:27", "remaining_time": "3:28:53"} +{"current_steps": 4461, "total_steps": 6840, "loss": 0.5105445981025696, "lr": 5.921153301533229e-06, "epoch": 1.3044304722912705, "percentage": 65.22, "elapsed_time": "6:31:33", "remaining_time": "3:28:48"} +{"current_steps": 4462, "total_steps": 6840, "loss": 0.5255740284919739, "lr": 5.91673952909237e-06, "epoch": 1.304722912706536, "percentage": 65.23, "elapsed_time": "6:31:38", "remaining_time": "3:28:43"} +{"current_steps": 4463, "total_steps": 6840, "loss": 0.5691270232200623, "lr": 5.912326711088821e-06, "epoch": 1.3050153531218014, "percentage": 65.25, "elapsed_time": "6:31:44", "remaining_time": "3:28:38"} +{"current_steps": 4464, "total_steps": 6840, "loss": 0.5783474445343018, "lr": 5.907914848554048e-06, "epoch": 1.3053077935370667, "percentage": 65.26, "elapsed_time": "6:31:50", "remaining_time": "3:28:33"} +{"current_steps": 4465, "total_steps": 6840, "loss": 0.6305002570152283, "lr": 5.903503942519299e-06, "epoch": 1.3056002339523323, "percentage": 65.28, "elapsed_time": "6:31:55", "remaining_time": "3:28:27"} +{"current_steps": 4466, "total_steps": 6840, "loss": 0.6465631723403931, "lr": 5.8990939940156e-06, "epoch": 1.3058926743675976, "percentage": 65.29, "elapsed_time": "6:32:00", "remaining_time": "3:28:22"} +{"current_steps": 4467, "total_steps": 6840, "loss": 0.4883456230163574, "lr": 5.8946850040737434e-06, "epoch": 1.306185114782863, "percentage": 65.31, "elapsed_time": "6:32:04", "remaining_time": "3:28:17"} +{"current_steps": 4468, "total_steps": 6840, "loss": 0.4896056056022644, "lr": 5.890276973724305e-06, "epoch": 1.3064775551981285, "percentage": 65.32, "elapsed_time": "6:32:09", "remaining_time": "3:28:11"} +{"current_steps": 4469, "total_steps": 6840, "loss": 0.603757917881012, "lr": 5.885869903997638e-06, "epoch": 1.3067699956133938, "percentage": 65.34, "elapsed_time": "6:32:15", "remaining_time": "3:28:06"} +{"current_steps": 4470, "total_steps": 6840, "loss": 0.5412129163742065, "lr": 5.881463795923866e-06, "epoch": 1.3070624360286591, "percentage": 65.35, "elapsed_time": "6:32:20", "remaining_time": "3:28:01"} +{"current_steps": 4471, "total_steps": 6840, "loss": 0.5255335569381714, "lr": 5.877058650532891e-06, "epoch": 1.3073548764439247, "percentage": 65.37, "elapsed_time": "6:32:24", "remaining_time": "3:27:55"} +{"current_steps": 4472, "total_steps": 6840, "loss": 0.5855039358139038, "lr": 5.87265446885439e-06, "epoch": 1.30764731685919, "percentage": 65.38, "elapsed_time": "6:32:29", "remaining_time": "3:27:50"} +{"current_steps": 4473, "total_steps": 6840, "loss": 0.5763603448867798, "lr": 5.868251251917811e-06, "epoch": 1.3079397572744553, "percentage": 65.39, "elapsed_time": "6:32:33", "remaining_time": "3:27:44"} +{"current_steps": 4474, "total_steps": 6840, "loss": 0.5148910880088806, "lr": 5.86384900075238e-06, "epoch": 1.3082321976897207, "percentage": 65.41, "elapsed_time": "6:32:39", "remaining_time": "3:27:39"} +{"current_steps": 4475, "total_steps": 6840, "loss": 0.6387143135070801, "lr": 5.859447716387097e-06, "epoch": 1.308524638104986, "percentage": 65.42, "elapsed_time": "6:32:45", "remaining_time": "3:27:34"} +{"current_steps": 4476, "total_steps": 6840, "loss": 0.5492211580276489, "lr": 5.855047399850735e-06, "epoch": 1.3088170785202515, "percentage": 65.44, "elapsed_time": "6:32:50", "remaining_time": "3:27:28"} +{"current_steps": 4477, "total_steps": 6840, "loss": 0.5715115070343018, "lr": 5.850648052171843e-06, "epoch": 1.3091095189355169, "percentage": 65.45, "elapsed_time": "6:32:55", "remaining_time": "3:27:23"} +{"current_steps": 4478, "total_steps": 6840, "loss": 0.6295989155769348, "lr": 5.8462496743787385e-06, "epoch": 1.3094019593507822, "percentage": 65.47, "elapsed_time": "6:32:59", "remaining_time": "3:27:17"} +{"current_steps": 4479, "total_steps": 6840, "loss": 0.5843105316162109, "lr": 5.841852267499518e-06, "epoch": 1.3096943997660477, "percentage": 65.48, "elapsed_time": "6:33:05", "remaining_time": "3:27:12"} +{"current_steps": 4480, "total_steps": 6840, "loss": 0.43283605575561523, "lr": 5.837455832562049e-06, "epoch": 1.309986840181313, "percentage": 65.5, "elapsed_time": "6:33:09", "remaining_time": "3:27:06"} +{"current_steps": 4481, "total_steps": 6840, "loss": 0.6115404367446899, "lr": 5.8330603705939684e-06, "epoch": 1.3102792805965784, "percentage": 65.51, "elapsed_time": "6:33:14", "remaining_time": "3:27:01"} +{"current_steps": 4482, "total_steps": 6840, "loss": 0.4274179935455322, "lr": 5.828665882622692e-06, "epoch": 1.310571721011844, "percentage": 65.53, "elapsed_time": "6:33:21", "remaining_time": "3:26:56"} +{"current_steps": 4483, "total_steps": 6840, "loss": 0.4385778307914734, "lr": 5.824272369675403e-06, "epoch": 1.3108641614271093, "percentage": 65.54, "elapsed_time": "6:33:26", "remaining_time": "3:26:51"} +{"current_steps": 4484, "total_steps": 6840, "loss": 0.6310205459594727, "lr": 5.819879832779058e-06, "epoch": 1.3111566018423746, "percentage": 65.56, "elapsed_time": "6:33:32", "remaining_time": "3:26:46"} +{"current_steps": 4485, "total_steps": 6840, "loss": 0.6309192180633545, "lr": 5.815488272960388e-06, "epoch": 1.3114490422576401, "percentage": 65.57, "elapsed_time": "6:33:37", "remaining_time": "3:26:41"} +{"current_steps": 4486, "total_steps": 6840, "loss": 0.4751497507095337, "lr": 5.811097691245895e-06, "epoch": 1.3117414826729055, "percentage": 65.58, "elapsed_time": "6:33:41", "remaining_time": "3:26:35"} +{"current_steps": 4487, "total_steps": 6840, "loss": 0.5540175437927246, "lr": 5.806708088661846e-06, "epoch": 1.3120339230881708, "percentage": 65.6, "elapsed_time": "6:33:46", "remaining_time": "3:26:29"} +{"current_steps": 4488, "total_steps": 6840, "loss": 0.5533273816108704, "lr": 5.802319466234283e-06, "epoch": 1.3123263635034361, "percentage": 65.61, "elapsed_time": "6:33:51", "remaining_time": "3:26:24"} +{"current_steps": 4489, "total_steps": 6840, "loss": 0.463643878698349, "lr": 5.797931824989023e-06, "epoch": 1.3126188039187014, "percentage": 65.63, "elapsed_time": "6:33:57", "remaining_time": "3:26:19"} +{"current_steps": 4490, "total_steps": 6840, "loss": 0.5990232229232788, "lr": 5.79354516595165e-06, "epoch": 1.312911244333967, "percentage": 65.64, "elapsed_time": "6:34:01", "remaining_time": "3:26:13"} +{"current_steps": 4491, "total_steps": 6840, "loss": 0.5569760799407959, "lr": 5.789159490147518e-06, "epoch": 1.3132036847492323, "percentage": 65.66, "elapsed_time": "6:34:06", "remaining_time": "3:26:07"} +{"current_steps": 4492, "total_steps": 6840, "loss": 0.5016749501228333, "lr": 5.784774798601755e-06, "epoch": 1.3134961251644977, "percentage": 65.67, "elapsed_time": "6:34:10", "remaining_time": "3:26:02"} +{"current_steps": 4493, "total_steps": 6840, "loss": 0.5624934434890747, "lr": 5.780391092339253e-06, "epoch": 1.3137885655797632, "percentage": 65.69, "elapsed_time": "6:34:15", "remaining_time": "3:25:57"} +{"current_steps": 4494, "total_steps": 6840, "loss": 0.7445797920227051, "lr": 5.776008372384676e-06, "epoch": 1.3140810059950285, "percentage": 65.7, "elapsed_time": "6:34:22", "remaining_time": "3:25:52"} +{"current_steps": 4495, "total_steps": 6840, "loss": 0.5849495530128479, "lr": 5.771626639762461e-06, "epoch": 1.3143734464102939, "percentage": 65.72, "elapsed_time": "6:34:26", "remaining_time": "3:25:46"} +{"current_steps": 4496, "total_steps": 6840, "loss": 0.5672163367271423, "lr": 5.767245895496809e-06, "epoch": 1.3146658868255594, "percentage": 65.73, "elapsed_time": "6:34:31", "remaining_time": "3:25:41"} +{"current_steps": 4497, "total_steps": 6840, "loss": 0.5278276801109314, "lr": 5.762866140611698e-06, "epoch": 1.3149583272408247, "percentage": 65.75, "elapsed_time": "6:34:36", "remaining_time": "3:25:36"} +{"current_steps": 4498, "total_steps": 6840, "loss": 0.54908686876297, "lr": 5.7584873761308615e-06, "epoch": 1.31525076765609, "percentage": 65.76, "elapsed_time": "6:34:41", "remaining_time": "3:25:30"} +{"current_steps": 4499, "total_steps": 6840, "loss": 0.5257589817047119, "lr": 5.754109603077811e-06, "epoch": 1.3155432080713554, "percentage": 65.77, "elapsed_time": "6:34:45", "remaining_time": "3:25:24"} +{"current_steps": 4500, "total_steps": 6840, "loss": 0.5744988918304443, "lr": 5.749732822475825e-06, "epoch": 1.3158356484866207, "percentage": 65.79, "elapsed_time": "6:34:50", "remaining_time": "3:25:19"} +{"current_steps": 4501, "total_steps": 6840, "loss": 0.5186365246772766, "lr": 5.74535703534795e-06, "epoch": 1.3161280889018863, "percentage": 65.8, "elapsed_time": "6:35:01", "remaining_time": "3:25:16"} +{"current_steps": 4502, "total_steps": 6840, "loss": 0.53574538230896, "lr": 5.740982242716999e-06, "epoch": 1.3164205293171516, "percentage": 65.82, "elapsed_time": "6:35:06", "remaining_time": "3:25:11"} +{"current_steps": 4503, "total_steps": 6840, "loss": 0.6087717413902283, "lr": 5.736608445605555e-06, "epoch": 1.316712969732417, "percentage": 65.83, "elapsed_time": "6:35:12", "remaining_time": "3:25:06"} +{"current_steps": 4504, "total_steps": 6840, "loss": 0.5132769346237183, "lr": 5.732235645035964e-06, "epoch": 1.3170054101476825, "percentage": 65.85, "elapsed_time": "6:35:17", "remaining_time": "3:25:01"} +{"current_steps": 4505, "total_steps": 6840, "loss": 0.588458776473999, "lr": 5.727863842030342e-06, "epoch": 1.3172978505629478, "percentage": 65.86, "elapsed_time": "6:35:21", "remaining_time": "3:24:55"} +{"current_steps": 4506, "total_steps": 6840, "loss": 0.5154894590377808, "lr": 5.723493037610572e-06, "epoch": 1.3175902909782131, "percentage": 65.88, "elapsed_time": "6:35:26", "remaining_time": "3:24:49"} +{"current_steps": 4507, "total_steps": 6840, "loss": 0.586688220500946, "lr": 5.719123232798304e-06, "epoch": 1.3178827313934787, "percentage": 65.89, "elapsed_time": "6:35:33", "remaining_time": "3:24:45"} +{"current_steps": 4508, "total_steps": 6840, "loss": 0.4948856830596924, "lr": 5.714754428614956e-06, "epoch": 1.318175171808744, "percentage": 65.91, "elapsed_time": "6:35:37", "remaining_time": "3:24:39"} +{"current_steps": 4509, "total_steps": 6840, "loss": 0.6179821491241455, "lr": 5.7103866260817005e-06, "epoch": 1.3184676122240093, "percentage": 65.92, "elapsed_time": "6:35:43", "remaining_time": "3:24:34"} +{"current_steps": 4510, "total_steps": 6840, "loss": 0.5865011811256409, "lr": 5.7060198262194914e-06, "epoch": 1.3187600526392749, "percentage": 65.94, "elapsed_time": "6:35:48", "remaining_time": "3:24:29"} +{"current_steps": 4511, "total_steps": 6840, "loss": 0.519783079624176, "lr": 5.701654030049038e-06, "epoch": 1.3190524930545402, "percentage": 65.95, "elapsed_time": "6:35:52", "remaining_time": "3:24:23"} +{"current_steps": 4512, "total_steps": 6840, "loss": 0.4238147437572479, "lr": 5.697289238590822e-06, "epoch": 1.3193449334698055, "percentage": 65.96, "elapsed_time": "6:35:57", "remaining_time": "3:24:17"} +{"current_steps": 4513, "total_steps": 6840, "loss": 0.5931107997894287, "lr": 5.6929254528650855e-06, "epoch": 1.3196373738850709, "percentage": 65.98, "elapsed_time": "6:36:02", "remaining_time": "3:24:12"} +{"current_steps": 4514, "total_steps": 6840, "loss": 0.7454524040222168, "lr": 5.688562673891837e-06, "epoch": 1.3199298143003362, "percentage": 65.99, "elapsed_time": "6:36:07", "remaining_time": "3:24:06"} +{"current_steps": 4515, "total_steps": 6840, "loss": 0.5909554362297058, "lr": 5.684200902690848e-06, "epoch": 1.3202222547156017, "percentage": 66.01, "elapsed_time": "6:36:11", "remaining_time": "3:24:01"} +{"current_steps": 4516, "total_steps": 6840, "loss": 0.5059943199157715, "lr": 5.67984014028166e-06, "epoch": 1.320514695130867, "percentage": 66.02, "elapsed_time": "6:36:16", "remaining_time": "3:23:55"} +{"current_steps": 4517, "total_steps": 6840, "loss": 0.4387373924255371, "lr": 5.675480387683572e-06, "epoch": 1.3208071355461324, "percentage": 66.04, "elapsed_time": "6:36:21", "remaining_time": "3:23:50"} +{"current_steps": 4518, "total_steps": 6840, "loss": 0.6452310681343079, "lr": 5.671121645915648e-06, "epoch": 1.321099575961398, "percentage": 66.05, "elapsed_time": "6:36:27", "remaining_time": "3:23:45"} +{"current_steps": 4519, "total_steps": 6840, "loss": 0.5629088282585144, "lr": 5.666763915996725e-06, "epoch": 1.3213920163766633, "percentage": 66.07, "elapsed_time": "6:36:33", "remaining_time": "3:23:40"} +{"current_steps": 4520, "total_steps": 6840, "loss": 0.6442849636077881, "lr": 5.662407198945386e-06, "epoch": 1.3216844567919286, "percentage": 66.08, "elapsed_time": "6:36:38", "remaining_time": "3:23:35"} +{"current_steps": 4521, "total_steps": 6840, "loss": 0.5330031514167786, "lr": 5.6580514957799894e-06, "epoch": 1.3219768972071941, "percentage": 66.1, "elapsed_time": "6:36:43", "remaining_time": "3:23:29"} +{"current_steps": 4522, "total_steps": 6840, "loss": 0.471035361289978, "lr": 5.6536968075186575e-06, "epoch": 1.3222693376224595, "percentage": 66.11, "elapsed_time": "6:36:49", "remaining_time": "3:23:24"} +{"current_steps": 4523, "total_steps": 6840, "loss": 0.5675650835037231, "lr": 5.649343135179271e-06, "epoch": 1.3225617780377248, "percentage": 66.13, "elapsed_time": "6:36:54", "remaining_time": "3:23:19"} +{"current_steps": 4524, "total_steps": 6840, "loss": 0.5458093881607056, "lr": 5.644990479779473e-06, "epoch": 1.3228542184529903, "percentage": 66.14, "elapsed_time": "6:36:59", "remaining_time": "3:23:14"} +{"current_steps": 4525, "total_steps": 6840, "loss": 0.5625189542770386, "lr": 5.640638842336672e-06, "epoch": 1.3231466588682557, "percentage": 66.15, "elapsed_time": "6:37:05", "remaining_time": "3:23:08"} +{"current_steps": 4526, "total_steps": 6840, "loss": 0.5868214964866638, "lr": 5.636288223868038e-06, "epoch": 1.323439099283521, "percentage": 66.17, "elapsed_time": "6:37:09", "remaining_time": "3:23:03"} +{"current_steps": 4527, "total_steps": 6840, "loss": 0.5340765714645386, "lr": 5.631938625390498e-06, "epoch": 1.3237315396987863, "percentage": 66.18, "elapsed_time": "6:37:15", "remaining_time": "3:22:58"} +{"current_steps": 4528, "total_steps": 6840, "loss": 0.4487069845199585, "lr": 5.627590047920747e-06, "epoch": 1.3240239801140516, "percentage": 66.2, "elapsed_time": "6:37:20", "remaining_time": "3:22:52"} +{"current_steps": 4529, "total_steps": 6840, "loss": 0.4246913194656372, "lr": 5.623242492475237e-06, "epoch": 1.3243164205293172, "percentage": 66.21, "elapsed_time": "6:37:25", "remaining_time": "3:22:47"} +{"current_steps": 4530, "total_steps": 6840, "loss": 0.49904564023017883, "lr": 5.618895960070188e-06, "epoch": 1.3246088609445825, "percentage": 66.23, "elapsed_time": "6:37:31", "remaining_time": "3:22:42"} +{"current_steps": 4531, "total_steps": 6840, "loss": 0.5506085157394409, "lr": 5.614550451721566e-06, "epoch": 1.3249013013598478, "percentage": 66.24, "elapsed_time": "6:37:37", "remaining_time": "3:22:37"} +{"current_steps": 4532, "total_steps": 6840, "loss": 0.4861884117126465, "lr": 5.610205968445111e-06, "epoch": 1.3251937417751134, "percentage": 66.26, "elapsed_time": "6:37:42", "remaining_time": "3:22:32"} +{"current_steps": 4533, "total_steps": 6840, "loss": 0.5639146566390991, "lr": 5.605862511256322e-06, "epoch": 1.3254861821903787, "percentage": 66.27, "elapsed_time": "6:37:47", "remaining_time": "3:22:27"} +{"current_steps": 4534, "total_steps": 6840, "loss": 0.43305879831314087, "lr": 5.601520081170455e-06, "epoch": 1.325778622605644, "percentage": 66.29, "elapsed_time": "6:37:53", "remaining_time": "3:22:22"} +{"current_steps": 4535, "total_steps": 6840, "loss": 0.4820408821105957, "lr": 5.597178679202524e-06, "epoch": 1.3260710630209096, "percentage": 66.3, "elapsed_time": "6:37:59", "remaining_time": "3:22:17"} +{"current_steps": 4536, "total_steps": 6840, "loss": 0.5601707100868225, "lr": 5.592838306367307e-06, "epoch": 1.326363503436175, "percentage": 66.32, "elapsed_time": "6:38:03", "remaining_time": "3:22:11"} +{"current_steps": 4537, "total_steps": 6840, "loss": 0.5655055046081543, "lr": 5.588498963679339e-06, "epoch": 1.3266559438514403, "percentage": 66.33, "elapsed_time": "6:38:09", "remaining_time": "3:22:06"} +{"current_steps": 4538, "total_steps": 6840, "loss": 0.5425975322723389, "lr": 5.584160652152917e-06, "epoch": 1.3269483842667056, "percentage": 66.35, "elapsed_time": "6:38:13", "remaining_time": "3:22:00"} +{"current_steps": 4539, "total_steps": 6840, "loss": 0.607103168964386, "lr": 5.579823372802098e-06, "epoch": 1.327240824681971, "percentage": 66.36, "elapsed_time": "6:38:18", "remaining_time": "3:21:55"} +{"current_steps": 4540, "total_steps": 6840, "loss": 0.6011538505554199, "lr": 5.575487126640686e-06, "epoch": 1.3275332650972365, "percentage": 66.37, "elapsed_time": "6:38:24", "remaining_time": "3:21:50"} +{"current_steps": 4541, "total_steps": 6840, "loss": 0.5333601236343384, "lr": 5.571151914682258e-06, "epoch": 1.3278257055125018, "percentage": 66.39, "elapsed_time": "6:38:30", "remaining_time": "3:21:45"} +{"current_steps": 4542, "total_steps": 6840, "loss": 0.576410174369812, "lr": 5.566817737940142e-06, "epoch": 1.3281181459277671, "percentage": 66.4, "elapsed_time": "6:38:35", "remaining_time": "3:21:39"} +{"current_steps": 4543, "total_steps": 6840, "loss": 0.506458044052124, "lr": 5.562484597427425e-06, "epoch": 1.3284105863430327, "percentage": 66.42, "elapsed_time": "6:38:39", "remaining_time": "3:21:33"} +{"current_steps": 4544, "total_steps": 6840, "loss": 0.5893718004226685, "lr": 5.558152494156955e-06, "epoch": 1.328703026758298, "percentage": 66.43, "elapsed_time": "6:38:44", "remaining_time": "3:21:28"} +{"current_steps": 4545, "total_steps": 6840, "loss": 0.508120059967041, "lr": 5.55382142914133e-06, "epoch": 1.3289954671735633, "percentage": 66.45, "elapsed_time": "6:38:48", "remaining_time": "3:21:22"} +{"current_steps": 4546, "total_steps": 6840, "loss": 0.6103616952896118, "lr": 5.5494914033929126e-06, "epoch": 1.3292879075888289, "percentage": 66.46, "elapsed_time": "6:38:54", "remaining_time": "3:21:17"} +{"current_steps": 4547, "total_steps": 6840, "loss": 0.5290235280990601, "lr": 5.545162417923822e-06, "epoch": 1.3295803480040942, "percentage": 66.48, "elapsed_time": "6:38:59", "remaining_time": "3:21:12"} +{"current_steps": 4548, "total_steps": 6840, "loss": 0.5729631185531616, "lr": 5.540834473745929e-06, "epoch": 1.3298727884193595, "percentage": 66.49, "elapsed_time": "6:39:03", "remaining_time": "3:21:06"} +{"current_steps": 4549, "total_steps": 6840, "loss": 0.48720547556877136, "lr": 5.536507571870866e-06, "epoch": 1.330165228834625, "percentage": 66.51, "elapsed_time": "6:39:09", "remaining_time": "3:21:01"} +{"current_steps": 4550, "total_steps": 6840, "loss": 0.4987955689430237, "lr": 5.532181713310023e-06, "epoch": 1.3304576692498904, "percentage": 66.52, "elapsed_time": "6:39:14", "remaining_time": "3:20:56"} +{"current_steps": 4551, "total_steps": 6840, "loss": 0.4002467393875122, "lr": 5.527856899074536e-06, "epoch": 1.3307501096651557, "percentage": 66.54, "elapsed_time": "6:39:20", "remaining_time": "3:20:51"} +{"current_steps": 4552, "total_steps": 6840, "loss": 0.7435724139213562, "lr": 5.523533130175308e-06, "epoch": 1.331042550080421, "percentage": 66.55, "elapsed_time": "6:39:25", "remaining_time": "3:20:46"} +{"current_steps": 4553, "total_steps": 6840, "loss": 0.34711340069770813, "lr": 5.519210407622993e-06, "epoch": 1.3313349904956864, "percentage": 66.56, "elapsed_time": "6:39:31", "remaining_time": "3:20:41"} +{"current_steps": 4554, "total_steps": 6840, "loss": 0.4749720096588135, "lr": 5.514888732428003e-06, "epoch": 1.331627430910952, "percentage": 66.58, "elapsed_time": "6:39:37", "remaining_time": "3:20:36"} +{"current_steps": 4555, "total_steps": 6840, "loss": 0.5818741321563721, "lr": 5.5105681056005e-06, "epoch": 1.3319198713262173, "percentage": 66.59, "elapsed_time": "6:39:42", "remaining_time": "3:20:30"} +{"current_steps": 4556, "total_steps": 6840, "loss": 0.5715004801750183, "lr": 5.506248528150407e-06, "epoch": 1.3322123117414826, "percentage": 66.61, "elapsed_time": "6:39:47", "remaining_time": "3:20:25"} +{"current_steps": 4557, "total_steps": 6840, "loss": 0.5465661287307739, "lr": 5.501930001087399e-06, "epoch": 1.3325047521567481, "percentage": 66.62, "elapsed_time": "6:39:52", "remaining_time": "3:20:19"} +{"current_steps": 4558, "total_steps": 6840, "loss": 0.6324847936630249, "lr": 5.4976125254209035e-06, "epoch": 1.3327971925720135, "percentage": 66.64, "elapsed_time": "6:39:56", "remaining_time": "3:20:13"} +{"current_steps": 4559, "total_steps": 6840, "loss": 0.4616294503211975, "lr": 5.493296102160105e-06, "epoch": 1.3330896329872788, "percentage": 66.65, "elapsed_time": "6:40:01", "remaining_time": "3:20:08"} +{"current_steps": 4560, "total_steps": 6840, "loss": 0.5187079310417175, "lr": 5.488980732313942e-06, "epoch": 1.3333820734025443, "percentage": 66.67, "elapsed_time": "6:40:05", "remaining_time": "3:20:02"} +{"current_steps": 4561, "total_steps": 6840, "loss": 0.6120654344558716, "lr": 5.484666416891109e-06, "epoch": 1.3336745138178097, "percentage": 66.68, "elapsed_time": "6:40:09", "remaining_time": "3:19:56"} +{"current_steps": 4562, "total_steps": 6840, "loss": 0.6171379685401917, "lr": 5.480353156900044e-06, "epoch": 1.333966954233075, "percentage": 66.7, "elapsed_time": "6:40:14", "remaining_time": "3:19:51"} +{"current_steps": 4563, "total_steps": 6840, "loss": 0.4690072536468506, "lr": 5.4760409533489475e-06, "epoch": 1.3342593946483405, "percentage": 66.71, "elapsed_time": "6:40:19", "remaining_time": "3:19:46"} +{"current_steps": 4564, "total_steps": 6840, "loss": 0.511309802532196, "lr": 5.471729807245773e-06, "epoch": 1.3345518350636059, "percentage": 66.73, "elapsed_time": "6:40:26", "remaining_time": "3:19:41"} +{"current_steps": 4565, "total_steps": 6840, "loss": 0.5657862424850464, "lr": 5.467419719598223e-06, "epoch": 1.3348442754788712, "percentage": 66.74, "elapsed_time": "6:40:33", "remaining_time": "3:19:37"} +{"current_steps": 4566, "total_steps": 6840, "loss": 0.4263400733470917, "lr": 5.4631106914137555e-06, "epoch": 1.3351367158941365, "percentage": 66.75, "elapsed_time": "6:40:37", "remaining_time": "3:19:31"} +{"current_steps": 4567, "total_steps": 6840, "loss": 0.6275177001953125, "lr": 5.458802723699579e-06, "epoch": 1.3354291563094018, "percentage": 66.77, "elapsed_time": "6:40:43", "remaining_time": "3:19:26"} +{"current_steps": 4568, "total_steps": 6840, "loss": 0.3857421278953552, "lr": 5.454495817462655e-06, "epoch": 1.3357215967246674, "percentage": 66.78, "elapsed_time": "6:40:47", "remaining_time": "3:19:20"} +{"current_steps": 4569, "total_steps": 6840, "loss": 0.5834560394287109, "lr": 5.450189973709697e-06, "epoch": 1.3360140371399327, "percentage": 66.8, "elapsed_time": "6:40:54", "remaining_time": "3:19:16"} +{"current_steps": 4570, "total_steps": 6840, "loss": 0.6165010929107666, "lr": 5.445885193447169e-06, "epoch": 1.336306477555198, "percentage": 66.81, "elapsed_time": "6:40:59", "remaining_time": "3:19:10"} +{"current_steps": 4571, "total_steps": 6840, "loss": 0.6034595966339111, "lr": 5.441581477681288e-06, "epoch": 1.3365989179704636, "percentage": 66.83, "elapsed_time": "6:41:05", "remaining_time": "3:19:05"} +{"current_steps": 4572, "total_steps": 6840, "loss": 0.570164144039154, "lr": 5.43727882741802e-06, "epoch": 1.336891358385729, "percentage": 66.84, "elapsed_time": "6:41:09", "remaining_time": "3:19:00"} +{"current_steps": 4573, "total_steps": 6840, "loss": 0.5369169116020203, "lr": 5.432977243663089e-06, "epoch": 1.3371837988009942, "percentage": 66.86, "elapsed_time": "6:41:15", "remaining_time": "3:18:54"} +{"current_steps": 4574, "total_steps": 6840, "loss": 0.5624364614486694, "lr": 5.428676727421954e-06, "epoch": 1.3374762392162598, "percentage": 66.87, "elapsed_time": "6:41:19", "remaining_time": "3:18:49"} +{"current_steps": 4575, "total_steps": 6840, "loss": 0.5002127885818481, "lr": 5.424377279699842e-06, "epoch": 1.3377686796315251, "percentage": 66.89, "elapsed_time": "6:41:24", "remaining_time": "3:18:43"} +{"current_steps": 4576, "total_steps": 6840, "loss": 0.5998499393463135, "lr": 5.42007890150172e-06, "epoch": 1.3380611200467905, "percentage": 66.9, "elapsed_time": "6:41:28", "remaining_time": "3:18:37"} +{"current_steps": 4577, "total_steps": 6840, "loss": 0.5988572835922241, "lr": 5.415781593832307e-06, "epoch": 1.3383535604620558, "percentage": 66.92, "elapsed_time": "6:41:33", "remaining_time": "3:18:32"} +{"current_steps": 4578, "total_steps": 6840, "loss": 0.5202064514160156, "lr": 5.411485357696075e-06, "epoch": 1.338646000877321, "percentage": 66.93, "elapsed_time": "6:41:37", "remaining_time": "3:18:26"} +{"current_steps": 4579, "total_steps": 6840, "loss": 0.5246714949607849, "lr": 5.407190194097241e-06, "epoch": 1.3389384412925867, "percentage": 66.94, "elapsed_time": "6:41:43", "remaining_time": "3:18:21"} +{"current_steps": 4580, "total_steps": 6840, "loss": 0.5998588800430298, "lr": 5.4028961040397765e-06, "epoch": 1.339230881707852, "percentage": 66.96, "elapsed_time": "6:41:49", "remaining_time": "3:18:16"} +{"current_steps": 4581, "total_steps": 6840, "loss": 0.5971418023109436, "lr": 5.3986030885273945e-06, "epoch": 1.3395233221231173, "percentage": 66.97, "elapsed_time": "6:41:54", "remaining_time": "3:18:11"} +{"current_steps": 4582, "total_steps": 6840, "loss": 0.4638952910900116, "lr": 5.3943111485635644e-06, "epoch": 1.3398157625383829, "percentage": 66.99, "elapsed_time": "6:42:00", "remaining_time": "3:18:06"} +{"current_steps": 4583, "total_steps": 6840, "loss": 0.5007182955741882, "lr": 5.390020285151502e-06, "epoch": 1.3401082029536482, "percentage": 67.0, "elapsed_time": "6:42:06", "remaining_time": "3:18:01"} +{"current_steps": 4584, "total_steps": 6840, "loss": 0.5013964772224426, "lr": 5.385730499294171e-06, "epoch": 1.3404006433689135, "percentage": 67.02, "elapsed_time": "6:42:11", "remaining_time": "3:17:56"} +{"current_steps": 4585, "total_steps": 6840, "loss": 0.5699980854988098, "lr": 5.381441791994276e-06, "epoch": 1.340693083784179, "percentage": 67.03, "elapsed_time": "6:42:16", "remaining_time": "3:17:50"} +{"current_steps": 4586, "total_steps": 6840, "loss": 0.5326210260391235, "lr": 5.377154164254283e-06, "epoch": 1.3409855241994444, "percentage": 67.05, "elapsed_time": "6:42:21", "remaining_time": "3:17:45"} +{"current_steps": 4587, "total_steps": 6840, "loss": 0.6065158843994141, "lr": 5.372867617076395e-06, "epoch": 1.3412779646147097, "percentage": 67.06, "elapsed_time": "6:42:27", "remaining_time": "3:17:40"} +{"current_steps": 4588, "total_steps": 6840, "loss": 0.48427143692970276, "lr": 5.368582151462569e-06, "epoch": 1.3415704050299753, "percentage": 67.08, "elapsed_time": "6:42:34", "remaining_time": "3:17:36"} +{"current_steps": 4589, "total_steps": 6840, "loss": 0.5755994915962219, "lr": 5.364297768414505e-06, "epoch": 1.3418628454452406, "percentage": 67.09, "elapsed_time": "6:42:40", "remaining_time": "3:17:31"} +{"current_steps": 4590, "total_steps": 6840, "loss": 0.4959644377231598, "lr": 5.360014468933652e-06, "epoch": 1.342155285860506, "percentage": 67.11, "elapsed_time": "6:42:46", "remaining_time": "3:17:26"} +{"current_steps": 4591, "total_steps": 6840, "loss": 0.5374274253845215, "lr": 5.355732254021205e-06, "epoch": 1.3424477262757712, "percentage": 67.12, "elapsed_time": "6:42:52", "remaining_time": "3:17:21"} +{"current_steps": 4592, "total_steps": 6840, "loss": 0.5875111818313599, "lr": 5.351451124678106e-06, "epoch": 1.3427401666910366, "percentage": 67.13, "elapsed_time": "6:42:57", "remaining_time": "3:17:15"} +{"current_steps": 4593, "total_steps": 6840, "loss": 0.5230692028999329, "lr": 5.347171081905045e-06, "epoch": 1.3430326071063021, "percentage": 67.15, "elapsed_time": "6:43:02", "remaining_time": "3:17:10"} +{"current_steps": 4594, "total_steps": 6840, "loss": 0.4624518156051636, "lr": 5.342892126702453e-06, "epoch": 1.3433250475215675, "percentage": 67.16, "elapsed_time": "6:43:07", "remaining_time": "3:17:05"} +{"current_steps": 4595, "total_steps": 6840, "loss": 0.5141074061393738, "lr": 5.3386142600705134e-06, "epoch": 1.3436174879368328, "percentage": 67.18, "elapsed_time": "6:43:11", "remaining_time": "3:16:59"} +{"current_steps": 4596, "total_steps": 6840, "loss": 0.4655565023422241, "lr": 5.334337483009147e-06, "epoch": 1.3439099283520983, "percentage": 67.19, "elapsed_time": "6:43:16", "remaining_time": "3:16:54"} +{"current_steps": 4597, "total_steps": 6840, "loss": 0.6135094165802002, "lr": 5.330061796518025e-06, "epoch": 1.3442023687673637, "percentage": 67.21, "elapsed_time": "6:43:21", "remaining_time": "3:16:48"} +{"current_steps": 4598, "total_steps": 6840, "loss": 0.5865254402160645, "lr": 5.325787201596563e-06, "epoch": 1.344494809182629, "percentage": 67.22, "elapsed_time": "6:43:27", "remaining_time": "3:16:43"} +{"current_steps": 4599, "total_steps": 6840, "loss": 0.5290840268135071, "lr": 5.321513699243924e-06, "epoch": 1.3447872495978945, "percentage": 67.24, "elapsed_time": "6:43:32", "remaining_time": "3:16:38"} +{"current_steps": 4600, "total_steps": 6840, "loss": 0.554675817489624, "lr": 5.317241290459012e-06, "epoch": 1.3450796900131599, "percentage": 67.25, "elapsed_time": "6:43:38", "remaining_time": "3:16:33"} +{"current_steps": 4601, "total_steps": 6840, "loss": 0.5033853650093079, "lr": 5.312969976240479e-06, "epoch": 1.3453721304284252, "percentage": 67.27, "elapsed_time": "6:43:48", "remaining_time": "3:16:30"} +{"current_steps": 4602, "total_steps": 6840, "loss": 0.44666093587875366, "lr": 5.308699757586713e-06, "epoch": 1.3456645708436907, "percentage": 67.28, "elapsed_time": "6:43:54", "remaining_time": "3:16:25"} +{"current_steps": 4603, "total_steps": 6840, "loss": 0.5447900891304016, "lr": 5.304430635495856e-06, "epoch": 1.345957011258956, "percentage": 67.3, "elapsed_time": "6:43:58", "remaining_time": "3:16:19"} +{"current_steps": 4604, "total_steps": 6840, "loss": 0.4425917863845825, "lr": 5.30016261096579e-06, "epoch": 1.3462494516742214, "percentage": 67.31, "elapsed_time": "6:44:01", "remaining_time": "3:16:13"} +{"current_steps": 4605, "total_steps": 6840, "loss": 0.4411497712135315, "lr": 5.295895684994137e-06, "epoch": 1.3465418920894867, "percentage": 67.32, "elapsed_time": "6:44:06", "remaining_time": "3:16:07"} +{"current_steps": 4606, "total_steps": 6840, "loss": 0.5577414631843567, "lr": 5.291629858578271e-06, "epoch": 1.346834332504752, "percentage": 67.34, "elapsed_time": "6:44:12", "remaining_time": "3:16:02"} +{"current_steps": 4607, "total_steps": 6840, "loss": 0.4754186272621155, "lr": 5.287365132715293e-06, "epoch": 1.3471267729200176, "percentage": 67.35, "elapsed_time": "6:44:17", "remaining_time": "3:15:57"} +{"current_steps": 4608, "total_steps": 6840, "loss": 0.5582431554794312, "lr": 5.283101508402063e-06, "epoch": 1.347419213335283, "percentage": 67.37, "elapsed_time": "6:44:22", "remaining_time": "3:15:51"} +{"current_steps": 4609, "total_steps": 6840, "loss": 0.5552654266357422, "lr": 5.2788389866351755e-06, "epoch": 1.3477116537505482, "percentage": 67.38, "elapsed_time": "6:44:27", "remaining_time": "3:15:46"} +{"current_steps": 4610, "total_steps": 6840, "loss": 0.5776556730270386, "lr": 5.2745775684109705e-06, "epoch": 1.3480040941658138, "percentage": 67.4, "elapsed_time": "6:44:33", "remaining_time": "3:15:41"} +{"current_steps": 4611, "total_steps": 6840, "loss": 0.5859286785125732, "lr": 5.270317254725528e-06, "epoch": 1.3482965345810791, "percentage": 67.41, "elapsed_time": "6:44:38", "remaining_time": "3:15:36"} +{"current_steps": 4612, "total_steps": 6840, "loss": 0.5914887189865112, "lr": 5.2660580465746694e-06, "epoch": 1.3485889749963444, "percentage": 67.43, "elapsed_time": "6:44:43", "remaining_time": "3:15:31"} +{"current_steps": 4613, "total_steps": 6840, "loss": 0.43669426441192627, "lr": 5.261799944953956e-06, "epoch": 1.34888141541161, "percentage": 67.44, "elapsed_time": "6:44:48", "remaining_time": "3:15:25"} +{"current_steps": 4614, "total_steps": 6840, "loss": 0.473773717880249, "lr": 5.2575429508587e-06, "epoch": 1.3491738558268753, "percentage": 67.46, "elapsed_time": "6:44:54", "remaining_time": "3:15:20"} +{"current_steps": 4615, "total_steps": 6840, "loss": 0.5011228919029236, "lr": 5.253287065283949e-06, "epoch": 1.3494662962421407, "percentage": 67.47, "elapsed_time": "6:45:00", "remaining_time": "3:15:15"} +{"current_steps": 4616, "total_steps": 6840, "loss": 0.5839254856109619, "lr": 5.249032289224483e-06, "epoch": 1.349758736657406, "percentage": 67.49, "elapsed_time": "6:45:05", "remaining_time": "3:15:10"} +{"current_steps": 4617, "total_steps": 6840, "loss": 0.5375077128410339, "lr": 5.244778623674831e-06, "epoch": 1.3500511770726713, "percentage": 67.5, "elapsed_time": "6:45:10", "remaining_time": "3:15:04"} +{"current_steps": 4618, "total_steps": 6840, "loss": 0.49445679783821106, "lr": 5.240526069629265e-06, "epoch": 1.3503436174879369, "percentage": 67.51, "elapsed_time": "6:45:15", "remaining_time": "3:14:59"} +{"current_steps": 4619, "total_steps": 6840, "loss": 0.5369694828987122, "lr": 5.236274628081792e-06, "epoch": 1.3506360579032022, "percentage": 67.53, "elapsed_time": "6:45:20", "remaining_time": "3:14:54"} +{"current_steps": 4620, "total_steps": 6840, "loss": 0.6017554402351379, "lr": 5.23202430002616e-06, "epoch": 1.3509284983184675, "percentage": 67.54, "elapsed_time": "6:45:26", "remaining_time": "3:14:49"} +{"current_steps": 4621, "total_steps": 6840, "loss": 0.5380403995513916, "lr": 5.227775086455859e-06, "epoch": 1.351220938733733, "percentage": 67.56, "elapsed_time": "6:45:31", "remaining_time": "3:14:44"} +{"current_steps": 4622, "total_steps": 6840, "loss": 0.5650593042373657, "lr": 5.223526988364116e-06, "epoch": 1.3515133791489984, "percentage": 67.57, "elapsed_time": "6:45:36", "remaining_time": "3:14:38"} +{"current_steps": 4623, "total_steps": 6840, "loss": 0.5572884678840637, "lr": 5.219280006743897e-06, "epoch": 1.3518058195642637, "percentage": 67.59, "elapsed_time": "6:45:42", "remaining_time": "3:14:33"} +{"current_steps": 4624, "total_steps": 6840, "loss": 0.5304458141326904, "lr": 5.21503414258791e-06, "epoch": 1.3520982599795293, "percentage": 67.6, "elapsed_time": "6:45:47", "remaining_time": "3:14:28"} +{"current_steps": 4625, "total_steps": 6840, "loss": 0.6702588796615601, "lr": 5.2107893968886005e-06, "epoch": 1.3523907003947946, "percentage": 67.62, "elapsed_time": "6:45:51", "remaining_time": "3:14:22"} +{"current_steps": 4626, "total_steps": 6840, "loss": 0.4607279300689697, "lr": 5.206545770638152e-06, "epoch": 1.35268314081006, "percentage": 67.63, "elapsed_time": "6:45:57", "remaining_time": "3:14:17"} +{"current_steps": 4627, "total_steps": 6840, "loss": 0.5759040713310242, "lr": 5.202303264828482e-06, "epoch": 1.3529755812253255, "percentage": 67.65, "elapsed_time": "6:46:03", "remaining_time": "3:14:12"} +{"current_steps": 4628, "total_steps": 6840, "loss": 0.446469783782959, "lr": 5.198061880451253e-06, "epoch": 1.3532680216405908, "percentage": 67.66, "elapsed_time": "6:46:08", "remaining_time": "3:14:07"} +{"current_steps": 4629, "total_steps": 6840, "loss": 0.4869040846824646, "lr": 5.193821618497864e-06, "epoch": 1.3535604620558561, "percentage": 67.68, "elapsed_time": "6:46:13", "remaining_time": "3:14:01"} +{"current_steps": 4630, "total_steps": 6840, "loss": 0.5153477191925049, "lr": 5.189582479959449e-06, "epoch": 1.3538529024711214, "percentage": 67.69, "elapsed_time": "6:46:17", "remaining_time": "3:13:55"} +{"current_steps": 4631, "total_steps": 6840, "loss": 0.4958652853965759, "lr": 5.185344465826883e-06, "epoch": 1.3541453428863868, "percentage": 67.7, "elapsed_time": "6:46:21", "remaining_time": "3:13:50"} +{"current_steps": 4632, "total_steps": 6840, "loss": 0.5314347743988037, "lr": 5.1811075770907715e-06, "epoch": 1.3544377833016523, "percentage": 67.72, "elapsed_time": "6:46:28", "remaining_time": "3:13:45"} +{"current_steps": 4633, "total_steps": 6840, "loss": 0.5366088151931763, "lr": 5.176871814741466e-06, "epoch": 1.3547302237169176, "percentage": 67.73, "elapsed_time": "6:46:34", "remaining_time": "3:13:40"} +{"current_steps": 4634, "total_steps": 6840, "loss": 0.6239185929298401, "lr": 5.172637179769049e-06, "epoch": 1.355022664132183, "percentage": 67.75, "elapsed_time": "6:46:40", "remaining_time": "3:13:35"} +{"current_steps": 4635, "total_steps": 6840, "loss": 0.5516507625579834, "lr": 5.168403673163341e-06, "epoch": 1.3553151045474485, "percentage": 67.76, "elapsed_time": "6:46:44", "remaining_time": "3:13:30"} +{"current_steps": 4636, "total_steps": 6840, "loss": 0.5859683156013489, "lr": 5.164171295913898e-06, "epoch": 1.3556075449627139, "percentage": 67.78, "elapsed_time": "6:46:50", "remaining_time": "3:13:25"} +{"current_steps": 4637, "total_steps": 6840, "loss": 0.5913225412368774, "lr": 5.159940049010015e-06, "epoch": 1.3558999853779792, "percentage": 67.79, "elapsed_time": "6:46:55", "remaining_time": "3:13:19"} +{"current_steps": 4638, "total_steps": 6840, "loss": 0.650983989238739, "lr": 5.155709933440714e-06, "epoch": 1.3561924257932447, "percentage": 67.81, "elapsed_time": "6:47:00", "remaining_time": "3:13:14"} +{"current_steps": 4639, "total_steps": 6840, "loss": 0.5631625652313232, "lr": 5.151480950194762e-06, "epoch": 1.35648486620851, "percentage": 67.82, "elapsed_time": "6:47:06", "remaining_time": "3:13:09"} +{"current_steps": 4640, "total_steps": 6840, "loss": 0.48153650760650635, "lr": 5.147253100260659e-06, "epoch": 1.3567773066237754, "percentage": 67.84, "elapsed_time": "6:47:10", "remaining_time": "3:13:03"} +{"current_steps": 4641, "total_steps": 6840, "loss": 0.43598422408103943, "lr": 5.143026384626637e-06, "epoch": 1.357069747039041, "percentage": 67.85, "elapsed_time": "6:47:16", "remaining_time": "3:12:58"} +{"current_steps": 4642, "total_steps": 6840, "loss": 0.5323987007141113, "lr": 5.138800804280668e-06, "epoch": 1.3573621874543063, "percentage": 67.87, "elapsed_time": "6:47:20", "remaining_time": "3:12:52"} +{"current_steps": 4643, "total_steps": 6840, "loss": 0.5386587977409363, "lr": 5.134576360210454e-06, "epoch": 1.3576546278695716, "percentage": 67.88, "elapsed_time": "6:47:24", "remaining_time": "3:12:46"} +{"current_steps": 4644, "total_steps": 6840, "loss": 0.4913867115974426, "lr": 5.130353053403434e-06, "epoch": 1.357947068284837, "percentage": 67.89, "elapsed_time": "6:47:29", "remaining_time": "3:12:41"} +{"current_steps": 4645, "total_steps": 6840, "loss": 0.6516048908233643, "lr": 5.12613088484678e-06, "epoch": 1.3582395087001022, "percentage": 67.91, "elapsed_time": "6:47:35", "remaining_time": "3:12:36"} +{"current_steps": 4646, "total_steps": 6840, "loss": 0.5290599465370178, "lr": 5.121909855527398e-06, "epoch": 1.3585319491153678, "percentage": 67.92, "elapsed_time": "6:47:40", "remaining_time": "3:12:30"} +{"current_steps": 4647, "total_steps": 6840, "loss": 0.7909928560256958, "lr": 5.117689966431927e-06, "epoch": 1.3588243895306331, "percentage": 67.94, "elapsed_time": "6:47:45", "remaining_time": "3:12:25"} +{"current_steps": 4648, "total_steps": 6840, "loss": 0.4751276969909668, "lr": 5.113471218546746e-06, "epoch": 1.3591168299458984, "percentage": 67.95, "elapsed_time": "6:47:51", "remaining_time": "3:12:20"} +{"current_steps": 4649, "total_steps": 6840, "loss": 0.4542301893234253, "lr": 5.109253612857954e-06, "epoch": 1.359409270361164, "percentage": 67.97, "elapsed_time": "6:47:55", "remaining_time": "3:12:14"} +{"current_steps": 4650, "total_steps": 6840, "loss": 0.5355349779129028, "lr": 5.105037150351393e-06, "epoch": 1.3597017107764293, "percentage": 67.98, "elapsed_time": "6:48:01", "remaining_time": "3:12:09"} +{"current_steps": 4651, "total_steps": 6840, "loss": 0.4994719326496124, "lr": 5.100821832012637e-06, "epoch": 1.3599941511916946, "percentage": 68.0, "elapsed_time": "6:48:06", "remaining_time": "3:12:04"} +{"current_steps": 4652, "total_steps": 6840, "loss": 0.6171674728393555, "lr": 5.096607658826989e-06, "epoch": 1.3602865916069602, "percentage": 68.01, "elapsed_time": "6:48:11", "remaining_time": "3:11:59"} +{"current_steps": 4653, "total_steps": 6840, "loss": 0.5386878252029419, "lr": 5.092394631779487e-06, "epoch": 1.3605790320222255, "percentage": 68.03, "elapsed_time": "6:48:15", "remaining_time": "3:11:53"} +{"current_steps": 4654, "total_steps": 6840, "loss": 0.4495810270309448, "lr": 5.088182751854903e-06, "epoch": 1.3608714724374908, "percentage": 68.04, "elapsed_time": "6:48:22", "remaining_time": "3:11:48"} +{"current_steps": 4655, "total_steps": 6840, "loss": 0.5540642142295837, "lr": 5.083972020037735e-06, "epoch": 1.3611639128527562, "percentage": 68.06, "elapsed_time": "6:48:27", "remaining_time": "3:11:43"} +{"current_steps": 4656, "total_steps": 6840, "loss": 0.6020554900169373, "lr": 5.079762437312219e-06, "epoch": 1.3614563532680215, "percentage": 68.07, "elapsed_time": "6:48:31", "remaining_time": "3:11:37"} +{"current_steps": 4657, "total_steps": 6840, "loss": 0.47981250286102295, "lr": 5.075554004662316e-06, "epoch": 1.361748793683287, "percentage": 68.08, "elapsed_time": "6:48:36", "remaining_time": "3:11:32"} +{"current_steps": 4658, "total_steps": 6840, "loss": 0.6206443905830383, "lr": 5.071346723071724e-06, "epoch": 1.3620412340985524, "percentage": 68.1, "elapsed_time": "6:48:42", "remaining_time": "3:11:27"} +{"current_steps": 4659, "total_steps": 6840, "loss": 0.46899446845054626, "lr": 5.067140593523869e-06, "epoch": 1.3623336745138177, "percentage": 68.11, "elapsed_time": "6:48:48", "remaining_time": "3:11:22"} +{"current_steps": 4660, "total_steps": 6840, "loss": 0.5695985555648804, "lr": 5.062935617001912e-06, "epoch": 1.3626261149290833, "percentage": 68.13, "elapsed_time": "6:48:55", "remaining_time": "3:11:17"} +{"current_steps": 4661, "total_steps": 6840, "loss": 0.5524671077728271, "lr": 5.058731794488732e-06, "epoch": 1.3629185553443486, "percentage": 68.14, "elapsed_time": "6:49:00", "remaining_time": "3:11:12"} +{"current_steps": 4662, "total_steps": 6840, "loss": 0.4655245244503021, "lr": 5.054529126966953e-06, "epoch": 1.363210995759614, "percentage": 68.16, "elapsed_time": "6:49:05", "remaining_time": "3:11:07"} +{"current_steps": 4663, "total_steps": 6840, "loss": 0.5617693662643433, "lr": 5.050327615418921e-06, "epoch": 1.3635034361748795, "percentage": 68.17, "elapsed_time": "6:49:12", "remaining_time": "3:11:02"} +{"current_steps": 4664, "total_steps": 6840, "loss": 0.52044677734375, "lr": 5.046127260826714e-06, "epoch": 1.3637958765901448, "percentage": 68.19, "elapsed_time": "6:49:16", "remaining_time": "3:10:56"} +{"current_steps": 4665, "total_steps": 6840, "loss": 0.4567520022392273, "lr": 5.041928064172139e-06, "epoch": 1.3640883170054101, "percentage": 68.2, "elapsed_time": "6:49:21", "remaining_time": "3:10:51"} +{"current_steps": 4666, "total_steps": 6840, "loss": 0.5942729711532593, "lr": 5.037730026436736e-06, "epoch": 1.3643807574206757, "percentage": 68.22, "elapsed_time": "6:49:26", "remaining_time": "3:10:46"} +{"current_steps": 4667, "total_steps": 6840, "loss": 0.3824811279773712, "lr": 5.033533148601766e-06, "epoch": 1.364673197835941, "percentage": 68.23, "elapsed_time": "6:49:30", "remaining_time": "3:10:40"} +{"current_steps": 4668, "total_steps": 6840, "loss": 0.4710771441459656, "lr": 5.029337431648227e-06, "epoch": 1.3649656382512063, "percentage": 68.25, "elapsed_time": "6:49:36", "remaining_time": "3:10:35"} +{"current_steps": 4669, "total_steps": 6840, "loss": 0.6617978811264038, "lr": 5.02514287655684e-06, "epoch": 1.3652580786664716, "percentage": 68.26, "elapsed_time": "6:49:42", "remaining_time": "3:10:30"} +{"current_steps": 4670, "total_steps": 6840, "loss": 0.5237355828285217, "lr": 5.020949484308058e-06, "epoch": 1.365550519081737, "percentage": 68.27, "elapsed_time": "6:49:48", "remaining_time": "3:10:25"} +{"current_steps": 4671, "total_steps": 6840, "loss": 0.4544803500175476, "lr": 5.016757255882065e-06, "epoch": 1.3658429594970025, "percentage": 68.29, "elapsed_time": "6:49:54", "remaining_time": "3:10:20"} +{"current_steps": 4672, "total_steps": 6840, "loss": 0.5854490399360657, "lr": 5.012566192258763e-06, "epoch": 1.3661353999122678, "percentage": 68.3, "elapsed_time": "6:49:59", "remaining_time": "3:10:15"} +{"current_steps": 4673, "total_steps": 6840, "loss": 0.6275635361671448, "lr": 5.008376294417787e-06, "epoch": 1.3664278403275332, "percentage": 68.32, "elapsed_time": "6:50:05", "remaining_time": "3:10:10"} +{"current_steps": 4674, "total_steps": 6840, "loss": 0.5160082578659058, "lr": 5.004187563338504e-06, "epoch": 1.3667202807427987, "percentage": 68.33, "elapsed_time": "6:50:09", "remaining_time": "3:10:04"} +{"current_steps": 4675, "total_steps": 6840, "loss": 0.5203640460968018, "lr": 5.000000000000003e-06, "epoch": 1.367012721158064, "percentage": 68.35, "elapsed_time": "6:50:14", "remaining_time": "3:09:58"} +{"current_steps": 4676, "total_steps": 6840, "loss": 0.6836066246032715, "lr": 4.9958136053811e-06, "epoch": 1.3673051615733294, "percentage": 68.36, "elapsed_time": "6:50:19", "remaining_time": "3:09:53"} +{"current_steps": 4677, "total_steps": 6840, "loss": 0.5566641092300415, "lr": 4.991628380460343e-06, "epoch": 1.367597601988595, "percentage": 68.38, "elapsed_time": "6:50:24", "remaining_time": "3:09:48"} +{"current_steps": 4678, "total_steps": 6840, "loss": 0.5618000030517578, "lr": 4.9874443262159984e-06, "epoch": 1.3678900424038603, "percentage": 68.39, "elapsed_time": "6:50:29", "remaining_time": "3:09:42"} +{"current_steps": 4679, "total_steps": 6840, "loss": 0.4605063796043396, "lr": 4.983261443626068e-06, "epoch": 1.3681824828191256, "percentage": 68.41, "elapsed_time": "6:50:34", "remaining_time": "3:09:37"} +{"current_steps": 4680, "total_steps": 6840, "loss": 0.48282021284103394, "lr": 4.97907973366827e-06, "epoch": 1.3684749232343911, "percentage": 68.42, "elapsed_time": "6:50:38", "remaining_time": "3:09:31"} +{"current_steps": 4681, "total_steps": 6840, "loss": 0.42356133460998535, "lr": 4.974899197320059e-06, "epoch": 1.3687673636496565, "percentage": 68.44, "elapsed_time": "6:50:44", "remaining_time": "3:09:26"} +{"current_steps": 4682, "total_steps": 6840, "loss": 0.459377646446228, "lr": 4.97071983555861e-06, "epoch": 1.3690598040649218, "percentage": 68.45, "elapsed_time": "6:50:51", "remaining_time": "3:09:22"} +{"current_steps": 4683, "total_steps": 6840, "loss": 0.5539775490760803, "lr": 4.966541649360819e-06, "epoch": 1.369352244480187, "percentage": 68.46, "elapsed_time": "6:50:55", "remaining_time": "3:09:16"} +{"current_steps": 4684, "total_steps": 6840, "loss": 0.5593239068984985, "lr": 4.962364639703311e-06, "epoch": 1.3696446848954524, "percentage": 68.48, "elapsed_time": "6:51:00", "remaining_time": "3:09:10"} +{"current_steps": 4685, "total_steps": 6840, "loss": 0.5425251722335815, "lr": 4.958188807562441e-06, "epoch": 1.369937125310718, "percentage": 68.49, "elapsed_time": "6:51:05", "remaining_time": "3:09:05"} +{"current_steps": 4686, "total_steps": 6840, "loss": 0.5183289051055908, "lr": 4.954014153914282e-06, "epoch": 1.3702295657259833, "percentage": 68.51, "elapsed_time": "6:51:10", "remaining_time": "3:09:00"} +{"current_steps": 4687, "total_steps": 6840, "loss": 0.5278980731964111, "lr": 4.9498406797346345e-06, "epoch": 1.3705220061412486, "percentage": 68.52, "elapsed_time": "6:51:15", "remaining_time": "3:08:55"} +{"current_steps": 4688, "total_steps": 6840, "loss": 0.4857858419418335, "lr": 4.9456683859990185e-06, "epoch": 1.3708144465565142, "percentage": 68.54, "elapsed_time": "6:51:22", "remaining_time": "3:08:50"} +{"current_steps": 4689, "total_steps": 6840, "loss": 0.4889591336250305, "lr": 4.94149727368269e-06, "epoch": 1.3711068869717795, "percentage": 68.55, "elapsed_time": "6:51:27", "remaining_time": "3:08:45"} +{"current_steps": 4690, "total_steps": 6840, "loss": 0.5475220680236816, "lr": 4.937327343760617e-06, "epoch": 1.3713993273870448, "percentage": 68.57, "elapsed_time": "6:51:32", "remaining_time": "3:08:39"} +{"current_steps": 4691, "total_steps": 6840, "loss": 0.5794380903244019, "lr": 4.933158597207501e-06, "epoch": 1.3716917678023104, "percentage": 68.58, "elapsed_time": "6:51:39", "remaining_time": "3:08:35"} +{"current_steps": 4692, "total_steps": 6840, "loss": 0.42212024331092834, "lr": 4.928991034997752e-06, "epoch": 1.3719842082175757, "percentage": 68.6, "elapsed_time": "6:51:43", "remaining_time": "3:08:29"} +{"current_steps": 4693, "total_steps": 6840, "loss": 0.6091631054878235, "lr": 4.924824658105516e-06, "epoch": 1.372276648632841, "percentage": 68.61, "elapsed_time": "6:51:48", "remaining_time": "3:08:24"} +{"current_steps": 4694, "total_steps": 6840, "loss": 0.544279158115387, "lr": 4.9206594675046595e-06, "epoch": 1.3725690890481064, "percentage": 68.63, "elapsed_time": "6:51:54", "remaining_time": "3:08:18"} +{"current_steps": 4695, "total_steps": 6840, "loss": 0.46237099170684814, "lr": 4.916495464168768e-06, "epoch": 1.3728615294633717, "percentage": 68.64, "elapsed_time": "6:52:00", "remaining_time": "3:08:13"} +{"current_steps": 4696, "total_steps": 6840, "loss": 0.5615352392196655, "lr": 4.912332649071154e-06, "epoch": 1.3731539698786372, "percentage": 68.65, "elapsed_time": "6:52:04", "remaining_time": "3:08:08"} +{"current_steps": 4697, "total_steps": 6840, "loss": 0.5552200078964233, "lr": 4.90817102318485e-06, "epoch": 1.3734464102939026, "percentage": 68.67, "elapsed_time": "6:52:08", "remaining_time": "3:08:02"} +{"current_steps": 4698, "total_steps": 6840, "loss": 0.5466557741165161, "lr": 4.904010587482612e-06, "epoch": 1.373738850709168, "percentage": 68.68, "elapsed_time": "6:52:12", "remaining_time": "3:07:56"} +{"current_steps": 4699, "total_steps": 6840, "loss": 0.6131544709205627, "lr": 4.8998513429369135e-06, "epoch": 1.3740312911244335, "percentage": 68.7, "elapsed_time": "6:52:18", "remaining_time": "3:07:51"} +{"current_steps": 4700, "total_steps": 6840, "loss": 0.5264796018600464, "lr": 4.895693290519954e-06, "epoch": 1.3743237315396988, "percentage": 68.71, "elapsed_time": "6:52:23", "remaining_time": "3:07:46"} +{"current_steps": 4701, "total_steps": 6840, "loss": 0.5179097652435303, "lr": 4.891536431203653e-06, "epoch": 1.374616171954964, "percentage": 68.73, "elapsed_time": "6:52:33", "remaining_time": "3:07:43"} +{"current_steps": 4702, "total_steps": 6840, "loss": 0.46007782220840454, "lr": 4.887380765959655e-06, "epoch": 1.3749086123702297, "percentage": 68.74, "elapsed_time": "6:52:39", "remaining_time": "3:07:37"} +{"current_steps": 4703, "total_steps": 6840, "loss": 0.48182815313339233, "lr": 4.8832262957593145e-06, "epoch": 1.375201052785495, "percentage": 68.76, "elapsed_time": "6:52:45", "remaining_time": "3:07:33"} +{"current_steps": 4704, "total_steps": 6840, "loss": 0.5334529280662537, "lr": 4.879073021573717e-06, "epoch": 1.3754934932007603, "percentage": 68.77, "elapsed_time": "6:52:50", "remaining_time": "3:07:27"} +{"current_steps": 4705, "total_steps": 6840, "loss": 0.5984899997711182, "lr": 4.874920944373665e-06, "epoch": 1.3757859336160259, "percentage": 68.79, "elapsed_time": "6:52:54", "remaining_time": "3:07:22"} +{"current_steps": 4706, "total_steps": 6840, "loss": 0.46676474809646606, "lr": 4.870770065129681e-06, "epoch": 1.3760783740312912, "percentage": 68.8, "elapsed_time": "6:52:59", "remaining_time": "3:07:16"} +{"current_steps": 4707, "total_steps": 6840, "loss": 0.4608241617679596, "lr": 4.866620384812008e-06, "epoch": 1.3763708144465565, "percentage": 68.82, "elapsed_time": "6:53:04", "remaining_time": "3:07:11"} +{"current_steps": 4708, "total_steps": 6840, "loss": 0.5877207517623901, "lr": 4.862471904390609e-06, "epoch": 1.3766632548618218, "percentage": 68.83, "elapsed_time": "6:53:09", "remaining_time": "3:07:05"} +{"current_steps": 4709, "total_steps": 6840, "loss": 0.5243252515792847, "lr": 4.858324624835164e-06, "epoch": 1.3769556952770872, "percentage": 68.85, "elapsed_time": "6:53:14", "remaining_time": "3:07:00"} +{"current_steps": 4710, "total_steps": 6840, "loss": 0.528606653213501, "lr": 4.854178547115078e-06, "epoch": 1.3772481356923527, "percentage": 68.86, "elapsed_time": "6:53:20", "remaining_time": "3:06:55"} +{"current_steps": 4711, "total_steps": 6840, "loss": 0.46468549966812134, "lr": 4.850033672199469e-06, "epoch": 1.377540576107618, "percentage": 68.87, "elapsed_time": "6:53:25", "remaining_time": "3:06:50"} +{"current_steps": 4712, "total_steps": 6840, "loss": 0.5368300676345825, "lr": 4.8458900010571765e-06, "epoch": 1.3778330165228834, "percentage": 68.89, "elapsed_time": "6:53:30", "remaining_time": "3:06:44"} +{"current_steps": 4713, "total_steps": 6840, "loss": 0.5156906843185425, "lr": 4.8417475346567635e-06, "epoch": 1.378125456938149, "percentage": 68.9, "elapsed_time": "6:53:36", "remaining_time": "3:06:39"} +{"current_steps": 4714, "total_steps": 6840, "loss": 0.5899196863174438, "lr": 4.837606273966496e-06, "epoch": 1.3784178973534142, "percentage": 68.92, "elapsed_time": "6:53:41", "remaining_time": "3:06:34"} +{"current_steps": 4715, "total_steps": 6840, "loss": 0.5820844769477844, "lr": 4.833466219954376e-06, "epoch": 1.3787103377686796, "percentage": 68.93, "elapsed_time": "6:53:46", "remaining_time": "3:06:28"} +{"current_steps": 4716, "total_steps": 6840, "loss": 0.4926246404647827, "lr": 4.829327373588113e-06, "epoch": 1.3790027781839451, "percentage": 68.95, "elapsed_time": "6:53:50", "remaining_time": "3:06:23"} +{"current_steps": 4717, "total_steps": 6840, "loss": 0.5417006611824036, "lr": 4.825189735835138e-06, "epoch": 1.3792952185992104, "percentage": 68.96, "elapsed_time": "6:53:56", "remaining_time": "3:06:18"} +{"current_steps": 4718, "total_steps": 6840, "loss": 0.4130229949951172, "lr": 4.821053307662599e-06, "epoch": 1.3795876590144758, "percentage": 68.98, "elapsed_time": "6:54:01", "remaining_time": "3:06:12"} +{"current_steps": 4719, "total_steps": 6840, "loss": 0.4553627371788025, "lr": 4.8169180900373615e-06, "epoch": 1.3798800994297413, "percentage": 68.99, "elapsed_time": "6:54:07", "remaining_time": "3:06:07"} +{"current_steps": 4720, "total_steps": 6840, "loss": 0.523567259311676, "lr": 4.812784083926005e-06, "epoch": 1.3801725398450067, "percentage": 69.01, "elapsed_time": "6:54:13", "remaining_time": "3:06:02"} +{"current_steps": 4721, "total_steps": 6840, "loss": 0.4643239378929138, "lr": 4.808651290294832e-06, "epoch": 1.380464980260272, "percentage": 69.02, "elapsed_time": "6:54:19", "remaining_time": "3:05:57"} +{"current_steps": 4722, "total_steps": 6840, "loss": 0.4631537199020386, "lr": 4.804519710109856e-06, "epoch": 1.3807574206755373, "percentage": 69.04, "elapsed_time": "6:54:23", "remaining_time": "3:05:52"} +{"current_steps": 4723, "total_steps": 6840, "loss": 0.5304736495018005, "lr": 4.8003893443368075e-06, "epoch": 1.3810498610908026, "percentage": 69.05, "elapsed_time": "6:54:28", "remaining_time": "3:05:46"} +{"current_steps": 4724, "total_steps": 6840, "loss": 0.4357796907424927, "lr": 4.79626019394114e-06, "epoch": 1.3813423015060682, "percentage": 69.06, "elapsed_time": "6:54:32", "remaining_time": "3:05:41"} +{"current_steps": 4725, "total_steps": 6840, "loss": 0.6693407297134399, "lr": 4.7921322598880095e-06, "epoch": 1.3816347419213335, "percentage": 69.08, "elapsed_time": "6:54:38", "remaining_time": "3:05:36"} +{"current_steps": 4726, "total_steps": 6840, "loss": 0.5333320498466492, "lr": 4.788005543142299e-06, "epoch": 1.3819271823365988, "percentage": 69.09, "elapsed_time": "6:54:44", "remaining_time": "3:05:31"} +{"current_steps": 4727, "total_steps": 6840, "loss": 0.5782167911529541, "lr": 4.783880044668603e-06, "epoch": 1.3822196227518644, "percentage": 69.11, "elapsed_time": "6:54:49", "remaining_time": "3:05:25"} +{"current_steps": 4728, "total_steps": 6840, "loss": 0.581318199634552, "lr": 4.779755765431231e-06, "epoch": 1.3825120631671297, "percentage": 69.12, "elapsed_time": "6:54:54", "remaining_time": "3:05:20"} +{"current_steps": 4729, "total_steps": 6840, "loss": 0.5812945365905762, "lr": 4.775632706394211e-06, "epoch": 1.382804503582395, "percentage": 69.14, "elapsed_time": "6:54:59", "remaining_time": "3:05:14"} +{"current_steps": 4730, "total_steps": 6840, "loss": 0.460615873336792, "lr": 4.771510868521279e-06, "epoch": 1.3830969439976606, "percentage": 69.15, "elapsed_time": "6:55:03", "remaining_time": "3:05:09"} +{"current_steps": 4731, "total_steps": 6840, "loss": 0.5934186577796936, "lr": 4.767390252775894e-06, "epoch": 1.383389384412926, "percentage": 69.17, "elapsed_time": "6:55:08", "remaining_time": "3:05:03"} +{"current_steps": 4732, "total_steps": 6840, "loss": 0.4928584098815918, "lr": 4.763270860121222e-06, "epoch": 1.3836818248281912, "percentage": 69.18, "elapsed_time": "6:55:13", "remaining_time": "3:04:58"} +{"current_steps": 4733, "total_steps": 6840, "loss": 0.505489706993103, "lr": 4.759152691520146e-06, "epoch": 1.3839742652434566, "percentage": 69.2, "elapsed_time": "6:55:17", "remaining_time": "3:04:52"} +{"current_steps": 4734, "total_steps": 6840, "loss": 0.5679354667663574, "lr": 4.755035747935264e-06, "epoch": 1.3842667056587221, "percentage": 69.21, "elapsed_time": "6:55:22", "remaining_time": "3:04:47"} +{"current_steps": 4735, "total_steps": 6840, "loss": 0.5744746923446655, "lr": 4.750920030328889e-06, "epoch": 1.3845591460739874, "percentage": 69.23, "elapsed_time": "6:55:28", "remaining_time": "3:04:42"} +{"current_steps": 4736, "total_steps": 6840, "loss": 0.4953685402870178, "lr": 4.7468055396630395e-06, "epoch": 1.3848515864892528, "percentage": 69.24, "elapsed_time": "6:55:33", "remaining_time": "3:04:36"} +{"current_steps": 4737, "total_steps": 6840, "loss": 0.6083461046218872, "lr": 4.742692276899454e-06, "epoch": 1.385144026904518, "percentage": 69.25, "elapsed_time": "6:55:38", "remaining_time": "3:04:31"} +{"current_steps": 4738, "total_steps": 6840, "loss": 0.4980735778808594, "lr": 4.738580242999584e-06, "epoch": 1.3854364673197836, "percentage": 69.27, "elapsed_time": "6:55:44", "remaining_time": "3:04:26"} +{"current_steps": 4739, "total_steps": 6840, "loss": 0.46363019943237305, "lr": 4.734469438924594e-06, "epoch": 1.385728907735049, "percentage": 69.28, "elapsed_time": "6:55:50", "remaining_time": "3:04:21"} +{"current_steps": 4740, "total_steps": 6840, "loss": 0.5946298837661743, "lr": 4.730359865635355e-06, "epoch": 1.3860213481503143, "percentage": 69.3, "elapsed_time": "6:55:55", "remaining_time": "3:04:16"} +{"current_steps": 4741, "total_steps": 6840, "loss": 0.5630123615264893, "lr": 4.726251524092459e-06, "epoch": 1.3863137885655799, "percentage": 69.31, "elapsed_time": "6:56:01", "remaining_time": "3:04:11"} +{"current_steps": 4742, "total_steps": 6840, "loss": 0.5353481769561768, "lr": 4.7221444152562045e-06, "epoch": 1.3866062289808452, "percentage": 69.33, "elapsed_time": "6:56:06", "remaining_time": "3:04:06"} +{"current_steps": 4743, "total_steps": 6840, "loss": 0.5170711874961853, "lr": 4.718038540086602e-06, "epoch": 1.3868986693961105, "percentage": 69.34, "elapsed_time": "6:56:11", "remaining_time": "3:04:00"} +{"current_steps": 4744, "total_steps": 6840, "loss": 0.600492000579834, "lr": 4.713933899543377e-06, "epoch": 1.387191109811376, "percentage": 69.36, "elapsed_time": "6:56:15", "remaining_time": "3:03:54"} +{"current_steps": 4745, "total_steps": 6840, "loss": 0.5291938781738281, "lr": 4.709830494585962e-06, "epoch": 1.3874835502266414, "percentage": 69.37, "elapsed_time": "6:56:19", "remaining_time": "3:03:48"} +{"current_steps": 4746, "total_steps": 6840, "loss": 0.5664317011833191, "lr": 4.7057283261735055e-06, "epoch": 1.3877759906419067, "percentage": 69.39, "elapsed_time": "6:56:24", "remaining_time": "3:03:43"} +{"current_steps": 4747, "total_steps": 6840, "loss": 0.606655478477478, "lr": 4.701627395264866e-06, "epoch": 1.388068431057172, "percentage": 69.4, "elapsed_time": "6:56:29", "remaining_time": "3:03:38"} +{"current_steps": 4748, "total_steps": 6840, "loss": 0.6160893440246582, "lr": 4.697527702818604e-06, "epoch": 1.3883608714724374, "percentage": 69.42, "elapsed_time": "6:56:34", "remaining_time": "3:03:32"} +{"current_steps": 4749, "total_steps": 6840, "loss": 0.45944249629974365, "lr": 4.693429249793002e-06, "epoch": 1.388653311887703, "percentage": 69.43, "elapsed_time": "6:56:39", "remaining_time": "3:03:27"} +{"current_steps": 4750, "total_steps": 6840, "loss": 0.5737302303314209, "lr": 4.689332037146049e-06, "epoch": 1.3889457523029682, "percentage": 69.44, "elapsed_time": "6:56:44", "remaining_time": "3:03:21"} +{"current_steps": 4751, "total_steps": 6840, "loss": 0.4075150787830353, "lr": 4.685236065835443e-06, "epoch": 1.3892381927182336, "percentage": 69.46, "elapsed_time": "6:56:48", "remaining_time": "3:03:16"} +{"current_steps": 4752, "total_steps": 6840, "loss": 0.5832744836807251, "lr": 4.681141336818592e-06, "epoch": 1.3895306331334991, "percentage": 69.47, "elapsed_time": "6:56:53", "remaining_time": "3:03:10"} +{"current_steps": 4753, "total_steps": 6840, "loss": 0.5444560647010803, "lr": 4.6770478510526155e-06, "epoch": 1.3898230735487644, "percentage": 69.49, "elapsed_time": "6:56:58", "remaining_time": "3:03:05"} +{"current_steps": 4754, "total_steps": 6840, "loss": 0.6087433695793152, "lr": 4.672955609494339e-06, "epoch": 1.3901155139640298, "percentage": 69.5, "elapsed_time": "6:57:04", "remaining_time": "3:03:00"} +{"current_steps": 4755, "total_steps": 6840, "loss": 0.3781468868255615, "lr": 4.6688646131002995e-06, "epoch": 1.3904079543792953, "percentage": 69.52, "elapsed_time": "6:57:09", "remaining_time": "3:02:55"} +{"current_steps": 4756, "total_steps": 6840, "loss": 0.43719804286956787, "lr": 4.664774862826742e-06, "epoch": 1.3907003947945606, "percentage": 69.53, "elapsed_time": "6:57:14", "remaining_time": "3:02:49"} +{"current_steps": 4757, "total_steps": 6840, "loss": 0.550011932849884, "lr": 4.660686359629623e-06, "epoch": 1.390992835209826, "percentage": 69.55, "elapsed_time": "6:57:20", "remaining_time": "3:02:44"} +{"current_steps": 4758, "total_steps": 6840, "loss": 0.6060909032821655, "lr": 4.656599104464607e-06, "epoch": 1.3912852756250915, "percentage": 69.56, "elapsed_time": "6:57:24", "remaining_time": "3:02:39"} +{"current_steps": 4759, "total_steps": 6840, "loss": 0.5169791579246521, "lr": 4.652513098287058e-06, "epoch": 1.3915777160403568, "percentage": 69.58, "elapsed_time": "6:57:29", "remaining_time": "3:02:33"} +{"current_steps": 4760, "total_steps": 6840, "loss": 0.43063026666641235, "lr": 4.6484283420520594e-06, "epoch": 1.3918701564556222, "percentage": 69.59, "elapsed_time": "6:57:33", "remaining_time": "3:02:27"} +{"current_steps": 4761, "total_steps": 6840, "loss": 0.5426993370056152, "lr": 4.644344836714397e-06, "epoch": 1.3921625968708875, "percentage": 69.61, "elapsed_time": "6:57:40", "remaining_time": "3:02:23"} +{"current_steps": 4762, "total_steps": 6840, "loss": 0.5260995030403137, "lr": 4.6402625832285665e-06, "epoch": 1.3924550372861528, "percentage": 69.62, "elapsed_time": "6:57:45", "remaining_time": "3:02:18"} +{"current_steps": 4763, "total_steps": 6840, "loss": 0.5206680297851562, "lr": 4.63618158254877e-06, "epoch": 1.3927474777014184, "percentage": 69.63, "elapsed_time": "6:57:51", "remaining_time": "3:02:13"} +{"current_steps": 4764, "total_steps": 6840, "loss": 0.5250430703163147, "lr": 4.632101835628912e-06, "epoch": 1.3930399181166837, "percentage": 69.65, "elapsed_time": "6:57:56", "remaining_time": "3:02:07"} +{"current_steps": 4765, "total_steps": 6840, "loss": 0.5409445762634277, "lr": 4.628023343422616e-06, "epoch": 1.393332358531949, "percentage": 69.66, "elapsed_time": "6:58:00", "remaining_time": "3:02:01"} +{"current_steps": 4766, "total_steps": 6840, "loss": 0.4676284193992615, "lr": 4.6239461068832056e-06, "epoch": 1.3936247989472146, "percentage": 69.68, "elapsed_time": "6:58:07", "remaining_time": "3:01:57"} +{"current_steps": 4767, "total_steps": 6840, "loss": 0.6019079089164734, "lr": 4.6198701269637014e-06, "epoch": 1.39391723936248, "percentage": 69.69, "elapsed_time": "6:58:13", "remaining_time": "3:01:52"} +{"current_steps": 4768, "total_steps": 6840, "loss": 0.5434615612030029, "lr": 4.615795404616844e-06, "epoch": 1.3942096797777452, "percentage": 69.71, "elapsed_time": "6:58:17", "remaining_time": "3:01:46"} +{"current_steps": 4769, "total_steps": 6840, "loss": 0.5817157030105591, "lr": 4.611721940795074e-06, "epoch": 1.3945021201930108, "percentage": 69.72, "elapsed_time": "6:58:22", "remaining_time": "3:01:41"} +{"current_steps": 4770, "total_steps": 6840, "loss": 0.5601100921630859, "lr": 4.607649736450539e-06, "epoch": 1.3947945606082761, "percentage": 69.74, "elapsed_time": "6:58:27", "remaining_time": "3:01:35"} +{"current_steps": 4771, "total_steps": 6840, "loss": 0.5955039262771606, "lr": 4.6035787925350915e-06, "epoch": 1.3950870010235414, "percentage": 69.75, "elapsed_time": "6:58:33", "remaining_time": "3:01:30"} +{"current_steps": 4772, "total_steps": 6840, "loss": 0.47491732239723206, "lr": 4.5995091100002905e-06, "epoch": 1.3953794414388068, "percentage": 69.77, "elapsed_time": "6:58:38", "remaining_time": "3:01:25"} +{"current_steps": 4773, "total_steps": 6840, "loss": 0.5451281070709229, "lr": 4.595440689797402e-06, "epoch": 1.3956718818540723, "percentage": 69.78, "elapsed_time": "6:58:43", "remaining_time": "3:01:19"} +{"current_steps": 4774, "total_steps": 6840, "loss": 0.3973035514354706, "lr": 4.591373532877389e-06, "epoch": 1.3959643222693376, "percentage": 69.8, "elapsed_time": "6:58:48", "remaining_time": "3:01:14"} +{"current_steps": 4775, "total_steps": 6840, "loss": 0.604694128036499, "lr": 4.587307640190929e-06, "epoch": 1.396256762684603, "percentage": 69.81, "elapsed_time": "6:58:53", "remaining_time": "3:01:09"} +{"current_steps": 4776, "total_steps": 6840, "loss": 0.4120032489299774, "lr": 4.583243012688397e-06, "epoch": 1.3965492030998683, "percentage": 69.82, "elapsed_time": "6:58:58", "remaining_time": "3:01:03"} +{"current_steps": 4777, "total_steps": 6840, "loss": 0.4864089787006378, "lr": 4.579179651319878e-06, "epoch": 1.3968416435151338, "percentage": 69.84, "elapsed_time": "6:59:03", "remaining_time": "3:00:58"} +{"current_steps": 4778, "total_steps": 6840, "loss": 0.5774982571601868, "lr": 4.57511755703516e-06, "epoch": 1.3971340839303992, "percentage": 69.85, "elapsed_time": "6:59:08", "remaining_time": "3:00:53"} +{"current_steps": 4779, "total_steps": 6840, "loss": 0.48220688104629517, "lr": 4.571056730783725e-06, "epoch": 1.3974265243456645, "percentage": 69.87, "elapsed_time": "6:59:14", "remaining_time": "3:00:48"} +{"current_steps": 4780, "total_steps": 6840, "loss": 0.4636304974555969, "lr": 4.566997173514771e-06, "epoch": 1.39771896476093, "percentage": 69.88, "elapsed_time": "6:59:21", "remaining_time": "3:00:43"} +{"current_steps": 4781, "total_steps": 6840, "loss": 0.500522792339325, "lr": 4.562938886177194e-06, "epoch": 1.3980114051761954, "percentage": 69.9, "elapsed_time": "6:59:27", "remaining_time": "3:00:38"} +{"current_steps": 4782, "total_steps": 6840, "loss": 0.5322657823562622, "lr": 4.558881869719595e-06, "epoch": 1.3983038455914607, "percentage": 69.91, "elapsed_time": "6:59:32", "remaining_time": "3:00:33"} +{"current_steps": 4783, "total_steps": 6840, "loss": 0.5013759136199951, "lr": 4.554826125090276e-06, "epoch": 1.3985962860067263, "percentage": 69.93, "elapsed_time": "6:59:36", "remaining_time": "3:00:27"} +{"current_steps": 4784, "total_steps": 6840, "loss": 0.4261836111545563, "lr": 4.550771653237242e-06, "epoch": 1.3988887264219916, "percentage": 69.94, "elapsed_time": "6:59:40", "remaining_time": "3:00:21"} +{"current_steps": 4785, "total_steps": 6840, "loss": 0.6181522607803345, "lr": 4.546718455108205e-06, "epoch": 1.399181166837257, "percentage": 69.96, "elapsed_time": "6:59:46", "remaining_time": "3:00:16"} +{"current_steps": 4786, "total_steps": 6840, "loss": 0.6267478466033936, "lr": 4.54266653165057e-06, "epoch": 1.3994736072525222, "percentage": 69.97, "elapsed_time": "6:59:51", "remaining_time": "3:00:11"} +{"current_steps": 4787, "total_steps": 6840, "loss": 0.5382452607154846, "lr": 4.5386158838114535e-06, "epoch": 1.3997660476677876, "percentage": 69.99, "elapsed_time": "6:59:57", "remaining_time": "3:00:06"} +{"current_steps": 4788, "total_steps": 6840, "loss": 0.5973625183105469, "lr": 4.534566512537668e-06, "epoch": 1.400058488083053, "percentage": 70.0, "elapsed_time": "7:00:02", "remaining_time": "3:00:01"} +{"current_steps": 4789, "total_steps": 6840, "loss": 0.57401442527771, "lr": 4.530518418775734e-06, "epoch": 1.4003509284983184, "percentage": 70.01, "elapsed_time": "7:00:09", "remaining_time": "2:59:56"} +{"current_steps": 4790, "total_steps": 6840, "loss": 0.5712965726852417, "lr": 4.52647160347186e-06, "epoch": 1.4006433689135838, "percentage": 70.03, "elapsed_time": "7:00:14", "remaining_time": "2:59:51"} +{"current_steps": 4791, "total_steps": 6840, "loss": 0.5678268671035767, "lr": 4.52242606757197e-06, "epoch": 1.4009358093288493, "percentage": 70.04, "elapsed_time": "7:00:19", "remaining_time": "2:59:45"} +{"current_steps": 4792, "total_steps": 6840, "loss": 0.4798399806022644, "lr": 4.518381812021682e-06, "epoch": 1.4012282497441146, "percentage": 70.06, "elapsed_time": "7:00:23", "remaining_time": "2:59:40"} +{"current_steps": 4793, "total_steps": 6840, "loss": 0.48918360471725464, "lr": 4.514338837766317e-06, "epoch": 1.40152069015938, "percentage": 70.07, "elapsed_time": "7:00:29", "remaining_time": "2:59:34"} +{"current_steps": 4794, "total_steps": 6840, "loss": 0.47836846113204956, "lr": 4.510297145750894e-06, "epoch": 1.4018131305746455, "percentage": 70.09, "elapsed_time": "7:00:35", "remaining_time": "2:59:29"} +{"current_steps": 4795, "total_steps": 6840, "loss": 0.4956067204475403, "lr": 4.506256736920136e-06, "epoch": 1.4021055709899108, "percentage": 70.1, "elapsed_time": "7:00:40", "remaining_time": "2:59:24"} +{"current_steps": 4796, "total_steps": 6840, "loss": 0.39146924018859863, "lr": 4.502217612218463e-06, "epoch": 1.4023980114051762, "percentage": 70.12, "elapsed_time": "7:00:46", "remaining_time": "2:59:19"} +{"current_steps": 4797, "total_steps": 6840, "loss": 0.46657800674438477, "lr": 4.498179772589998e-06, "epoch": 1.4026904518204417, "percentage": 70.13, "elapsed_time": "7:00:51", "remaining_time": "2:59:14"} +{"current_steps": 4798, "total_steps": 6840, "loss": 0.4949738383293152, "lr": 4.4941432189785574e-06, "epoch": 1.402982892235707, "percentage": 70.15, "elapsed_time": "7:00:57", "remaining_time": "2:59:09"} +{"current_steps": 4799, "total_steps": 6840, "loss": 0.5256912708282471, "lr": 4.490107952327663e-06, "epoch": 1.4032753326509724, "percentage": 70.16, "elapsed_time": "7:01:02", "remaining_time": "2:59:04"} +{"current_steps": 4800, "total_steps": 6840, "loss": 0.38139551877975464, "lr": 4.486073973580539e-06, "epoch": 1.4035677730662377, "percentage": 70.18, "elapsed_time": "7:01:07", "remaining_time": "2:58:58"} +{"current_steps": 4801, "total_steps": 6840, "loss": 0.5014597177505493, "lr": 4.482041283680095e-06, "epoch": 1.403860213481503, "percentage": 70.19, "elapsed_time": "7:01:17", "remaining_time": "2:58:55"} +{"current_steps": 4802, "total_steps": 6840, "loss": 0.5497276186943054, "lr": 4.478009883568951e-06, "epoch": 1.4041526538967686, "percentage": 70.2, "elapsed_time": "7:01:21", "remaining_time": "2:58:49"} +{"current_steps": 4803, "total_steps": 6840, "loss": 0.6098340749740601, "lr": 4.473979774189422e-06, "epoch": 1.404445094312034, "percentage": 70.22, "elapsed_time": "7:01:26", "remaining_time": "2:58:44"} +{"current_steps": 4804, "total_steps": 6840, "loss": 0.40206801891326904, "lr": 4.469950956483522e-06, "epoch": 1.4047375347272992, "percentage": 70.23, "elapsed_time": "7:01:31", "remaining_time": "2:58:39"} +{"current_steps": 4805, "total_steps": 6840, "loss": 0.5362050533294678, "lr": 4.465923431392962e-06, "epoch": 1.4050299751425648, "percentage": 70.25, "elapsed_time": "7:01:37", "remaining_time": "2:58:34"} +{"current_steps": 4806, "total_steps": 6840, "loss": 0.5688962936401367, "lr": 4.461897199859153e-06, "epoch": 1.40532241555783, "percentage": 70.26, "elapsed_time": "7:01:43", "remaining_time": "2:58:29"} +{"current_steps": 4807, "total_steps": 6840, "loss": 0.5270779132843018, "lr": 4.457872262823202e-06, "epoch": 1.4056148559730954, "percentage": 70.28, "elapsed_time": "7:01:47", "remaining_time": "2:58:23"} +{"current_steps": 4808, "total_steps": 6840, "loss": 0.5656974911689758, "lr": 4.453848621225913e-06, "epoch": 1.405907296388361, "percentage": 70.29, "elapsed_time": "7:01:51", "remaining_time": "2:58:17"} +{"current_steps": 4809, "total_steps": 6840, "loss": 0.44072896242141724, "lr": 4.449826276007786e-06, "epoch": 1.4061997368036263, "percentage": 70.31, "elapsed_time": "7:01:57", "remaining_time": "2:58:12"} +{"current_steps": 4810, "total_steps": 6840, "loss": 0.5851765871047974, "lr": 4.445805228109022e-06, "epoch": 1.4064921772188916, "percentage": 70.32, "elapsed_time": "7:02:03", "remaining_time": "2:58:07"} +{"current_steps": 4811, "total_steps": 6840, "loss": 0.6174030303955078, "lr": 4.441785478469519e-06, "epoch": 1.406784617634157, "percentage": 70.34, "elapsed_time": "7:02:08", "remaining_time": "2:58:01"} +{"current_steps": 4812, "total_steps": 6840, "loss": 0.542346715927124, "lr": 4.437767028028863e-06, "epoch": 1.4070770580494225, "percentage": 70.35, "elapsed_time": "7:02:13", "remaining_time": "2:57:56"} +{"current_steps": 4813, "total_steps": 6840, "loss": 0.4964073598384857, "lr": 4.433749877726345e-06, "epoch": 1.4073694984646878, "percentage": 70.37, "elapsed_time": "7:02:19", "remaining_time": "2:57:51"} +{"current_steps": 4814, "total_steps": 6840, "loss": 0.5309566259384155, "lr": 4.429734028500951e-06, "epoch": 1.4076619388799532, "percentage": 70.38, "elapsed_time": "7:02:25", "remaining_time": "2:57:46"} +{"current_steps": 4815, "total_steps": 6840, "loss": 0.5799233913421631, "lr": 4.425719481291359e-06, "epoch": 1.4079543792952185, "percentage": 70.39, "elapsed_time": "7:02:29", "remaining_time": "2:57:40"} +{"current_steps": 4816, "total_steps": 6840, "loss": 0.37344229221343994, "lr": 4.4217062370359456e-06, "epoch": 1.408246819710484, "percentage": 70.41, "elapsed_time": "7:02:34", "remaining_time": "2:57:35"} +{"current_steps": 4817, "total_steps": 6840, "loss": 0.5752555727958679, "lr": 4.417694296672783e-06, "epoch": 1.4085392601257494, "percentage": 70.42, "elapsed_time": "7:02:40", "remaining_time": "2:57:30"} +{"current_steps": 4818, "total_steps": 6840, "loss": 0.61701500415802, "lr": 4.413683661139638e-06, "epoch": 1.4088317005410147, "percentage": 70.44, "elapsed_time": "7:02:46", "remaining_time": "2:57:25"} +{"current_steps": 4819, "total_steps": 6840, "loss": 0.4163259267807007, "lr": 4.409674331373972e-06, "epoch": 1.4091241409562802, "percentage": 70.45, "elapsed_time": "7:02:51", "remaining_time": "2:57:20"} +{"current_steps": 4820, "total_steps": 6840, "loss": 0.46583253145217896, "lr": 4.40566630831294e-06, "epoch": 1.4094165813715456, "percentage": 70.47, "elapsed_time": "7:02:58", "remaining_time": "2:57:15"} +{"current_steps": 4821, "total_steps": 6840, "loss": 0.5230617523193359, "lr": 4.401659592893396e-06, "epoch": 1.409709021786811, "percentage": 70.48, "elapsed_time": "7:03:03", "remaining_time": "2:57:10"} +{"current_steps": 4822, "total_steps": 6840, "loss": 0.6351375579833984, "lr": 4.397654186051887e-06, "epoch": 1.4100014622020764, "percentage": 70.5, "elapsed_time": "7:03:09", "remaining_time": "2:57:05"} +{"current_steps": 4823, "total_steps": 6840, "loss": 0.5895766615867615, "lr": 4.3936500887246445e-06, "epoch": 1.4102939026173418, "percentage": 70.51, "elapsed_time": "7:03:16", "remaining_time": "2:57:00"} +{"current_steps": 4824, "total_steps": 6840, "loss": 0.49772539734840393, "lr": 4.389647301847607e-06, "epoch": 1.410586343032607, "percentage": 70.53, "elapsed_time": "7:03:22", "remaining_time": "2:56:55"} +{"current_steps": 4825, "total_steps": 6840, "loss": 0.593197226524353, "lr": 4.385645826356402e-06, "epoch": 1.4108787834478724, "percentage": 70.54, "elapsed_time": "7:03:27", "remaining_time": "2:56:50"} +{"current_steps": 4826, "total_steps": 6840, "loss": 0.4971385598182678, "lr": 4.381645663186348e-06, "epoch": 1.4111712238631378, "percentage": 70.56, "elapsed_time": "7:03:32", "remaining_time": "2:56:45"} +{"current_steps": 4827, "total_steps": 6840, "loss": 0.5452263951301575, "lr": 4.3776468132724605e-06, "epoch": 1.4114636642784033, "percentage": 70.57, "elapsed_time": "7:03:37", "remaining_time": "2:56:39"} +{"current_steps": 4828, "total_steps": 6840, "loss": 0.6085976362228394, "lr": 4.373649277549446e-06, "epoch": 1.4117561046936686, "percentage": 70.58, "elapsed_time": "7:03:43", "remaining_time": "2:56:34"} +{"current_steps": 4829, "total_steps": 6840, "loss": 0.5594700574874878, "lr": 4.369653056951705e-06, "epoch": 1.412048545108934, "percentage": 70.6, "elapsed_time": "7:03:48", "remaining_time": "2:56:29"} +{"current_steps": 4830, "total_steps": 6840, "loss": 0.5099719166755676, "lr": 4.365658152413328e-06, "epoch": 1.4123409855241995, "percentage": 70.61, "elapsed_time": "7:03:53", "remaining_time": "2:56:24"} +{"current_steps": 4831, "total_steps": 6840, "loss": 0.5683532953262329, "lr": 4.3616645648681e-06, "epoch": 1.4126334259394648, "percentage": 70.63, "elapsed_time": "7:03:57", "remaining_time": "2:56:18"} +{"current_steps": 4832, "total_steps": 6840, "loss": 0.5311406850814819, "lr": 4.3576722952495e-06, "epoch": 1.4129258663547302, "percentage": 70.64, "elapsed_time": "7:04:01", "remaining_time": "2:56:12"} +{"current_steps": 4833, "total_steps": 6840, "loss": 0.5299100875854492, "lr": 4.353681344490693e-06, "epoch": 1.4132183067699957, "percentage": 70.66, "elapsed_time": "7:04:06", "remaining_time": "2:56:07"} +{"current_steps": 4834, "total_steps": 6840, "loss": 0.5531362891197205, "lr": 4.349691713524546e-06, "epoch": 1.413510747185261, "percentage": 70.67, "elapsed_time": "7:04:11", "remaining_time": "2:56:01"} +{"current_steps": 4835, "total_steps": 6840, "loss": 0.5315259099006653, "lr": 4.345703403283603e-06, "epoch": 1.4138031876005264, "percentage": 70.69, "elapsed_time": "7:04:15", "remaining_time": "2:55:56"} +{"current_steps": 4836, "total_steps": 6840, "loss": 0.583083987236023, "lr": 4.341716414700112e-06, "epoch": 1.414095628015792, "percentage": 70.7, "elapsed_time": "7:04:19", "remaining_time": "2:55:50"} +{"current_steps": 4837, "total_steps": 6840, "loss": 0.5273857116699219, "lr": 4.337730748706005e-06, "epoch": 1.4143880684310572, "percentage": 70.72, "elapsed_time": "7:04:23", "remaining_time": "2:55:44"} +{"current_steps": 4838, "total_steps": 6840, "loss": 0.4903373718261719, "lr": 4.333746406232908e-06, "epoch": 1.4146805088463226, "percentage": 70.73, "elapsed_time": "7:04:28", "remaining_time": "2:55:38"} +{"current_steps": 4839, "total_steps": 6840, "loss": 0.5807479619979858, "lr": 4.329763388212134e-06, "epoch": 1.414972949261588, "percentage": 70.75, "elapsed_time": "7:04:34", "remaining_time": "2:55:34"} +{"current_steps": 4840, "total_steps": 6840, "loss": 0.5613743662834167, "lr": 4.325781695574695e-06, "epoch": 1.4152653896768532, "percentage": 70.76, "elapsed_time": "7:04:41", "remaining_time": "2:55:29"} +{"current_steps": 4841, "total_steps": 6840, "loss": 0.5801016092300415, "lr": 4.321801329251286e-06, "epoch": 1.4155578300921188, "percentage": 70.77, "elapsed_time": "7:04:46", "remaining_time": "2:55:24"} +{"current_steps": 4842, "total_steps": 6840, "loss": 0.6412584781646729, "lr": 4.3178222901722956e-06, "epoch": 1.415850270507384, "percentage": 70.79, "elapsed_time": "7:04:52", "remaining_time": "2:55:19"} +{"current_steps": 4843, "total_steps": 6840, "loss": 0.5687737464904785, "lr": 4.313844579267793e-06, "epoch": 1.4161427109226494, "percentage": 70.8, "elapsed_time": "7:04:57", "remaining_time": "2:55:13"} +{"current_steps": 4844, "total_steps": 6840, "loss": 0.5668497085571289, "lr": 4.309868197467548e-06, "epoch": 1.416435151337915, "percentage": 70.82, "elapsed_time": "7:05:02", "remaining_time": "2:55:08"} +{"current_steps": 4845, "total_steps": 6840, "loss": 0.5814717411994934, "lr": 4.305893145701015e-06, "epoch": 1.4167275917531803, "percentage": 70.83, "elapsed_time": "7:05:07", "remaining_time": "2:55:02"} +{"current_steps": 4846, "total_steps": 6840, "loss": 0.5974467992782593, "lr": 4.301919424897339e-06, "epoch": 1.4170200321684456, "percentage": 70.85, "elapsed_time": "7:05:12", "remaining_time": "2:54:57"} +{"current_steps": 4847, "total_steps": 6840, "loss": 0.48333030939102173, "lr": 4.297947035985351e-06, "epoch": 1.4173124725837112, "percentage": 70.86, "elapsed_time": "7:05:17", "remaining_time": "2:54:52"} +{"current_steps": 4848, "total_steps": 6840, "loss": 0.5851039886474609, "lr": 4.293975979893576e-06, "epoch": 1.4176049129989765, "percentage": 70.88, "elapsed_time": "7:05:23", "remaining_time": "2:54:47"} +{"current_steps": 4849, "total_steps": 6840, "loss": 0.5510480403900146, "lr": 4.290006257550221e-06, "epoch": 1.4178973534142418, "percentage": 70.89, "elapsed_time": "7:05:28", "remaining_time": "2:54:42"} +{"current_steps": 4850, "total_steps": 6840, "loss": 0.6053529977798462, "lr": 4.286037869883187e-06, "epoch": 1.4181897938295072, "percentage": 70.91, "elapsed_time": "7:05:34", "remaining_time": "2:54:36"} +{"current_steps": 4851, "total_steps": 6840, "loss": 0.471671462059021, "lr": 4.282070817820059e-06, "epoch": 1.4184822342447727, "percentage": 70.92, "elapsed_time": "7:05:41", "remaining_time": "2:54:32"} +{"current_steps": 4852, "total_steps": 6840, "loss": 0.4864043593406677, "lr": 4.278105102288113e-06, "epoch": 1.418774674660038, "percentage": 70.94, "elapsed_time": "7:05:47", "remaining_time": "2:54:27"} +{"current_steps": 4853, "total_steps": 6840, "loss": 0.6283255815505981, "lr": 4.274140724214311e-06, "epoch": 1.4190671150753034, "percentage": 70.95, "elapsed_time": "7:05:52", "remaining_time": "2:54:22"} +{"current_steps": 4854, "total_steps": 6840, "loss": 0.4990651607513428, "lr": 4.270177684525299e-06, "epoch": 1.4193595554905687, "percentage": 70.96, "elapsed_time": "7:05:56", "remaining_time": "2:54:16"} +{"current_steps": 4855, "total_steps": 6840, "loss": 0.6053239703178406, "lr": 4.2662159841474145e-06, "epoch": 1.4196519959058342, "percentage": 70.98, "elapsed_time": "7:06:01", "remaining_time": "2:54:11"} +{"current_steps": 4856, "total_steps": 6840, "loss": 0.45790988206863403, "lr": 4.262255624006683e-06, "epoch": 1.4199444363210996, "percentage": 70.99, "elapsed_time": "7:06:07", "remaining_time": "2:54:06"} +{"current_steps": 4857, "total_steps": 6840, "loss": 0.49944519996643066, "lr": 4.2582966050288125e-06, "epoch": 1.420236876736365, "percentage": 71.01, "elapsed_time": "7:06:12", "remaining_time": "2:54:00"} +{"current_steps": 4858, "total_steps": 6840, "loss": 0.5365482568740845, "lr": 4.2543389281392e-06, "epoch": 1.4205293171516304, "percentage": 71.02, "elapsed_time": "7:06:17", "remaining_time": "2:53:55"} +{"current_steps": 4859, "total_steps": 6840, "loss": 0.7763599157333374, "lr": 4.2503825942629285e-06, "epoch": 1.4208217575668958, "percentage": 71.04, "elapsed_time": "7:06:24", "remaining_time": "2:53:50"} +{"current_steps": 4860, "total_steps": 6840, "loss": 0.6125203371047974, "lr": 4.246427604324768e-06, "epoch": 1.421114197982161, "percentage": 71.05, "elapsed_time": "7:06:28", "remaining_time": "2:53:44"} +{"current_steps": 4861, "total_steps": 6840, "loss": 0.6634939312934875, "lr": 4.242473959249172e-06, "epoch": 1.4214066383974266, "percentage": 71.07, "elapsed_time": "7:06:33", "remaining_time": "2:53:39"} +{"current_steps": 4862, "total_steps": 6840, "loss": 0.5117735862731934, "lr": 4.238521659960283e-06, "epoch": 1.421699078812692, "percentage": 71.08, "elapsed_time": "7:06:38", "remaining_time": "2:53:34"} +{"current_steps": 4863, "total_steps": 6840, "loss": 0.5700962543487549, "lr": 4.234570707381925e-06, "epoch": 1.4219915192279573, "percentage": 71.1, "elapsed_time": "7:06:43", "remaining_time": "2:53:28"} +{"current_steps": 4864, "total_steps": 6840, "loss": 0.5443791151046753, "lr": 4.23062110243761e-06, "epoch": 1.4222839596432226, "percentage": 71.11, "elapsed_time": "7:06:49", "remaining_time": "2:53:23"} +{"current_steps": 4865, "total_steps": 6840, "loss": 0.5474614500999451, "lr": 4.226672846050538e-06, "epoch": 1.422576400058488, "percentage": 71.13, "elapsed_time": "7:06:55", "remaining_time": "2:53:19"} +{"current_steps": 4866, "total_steps": 6840, "loss": 0.5938940048217773, "lr": 4.222725939143582e-06, "epoch": 1.4228688404737535, "percentage": 71.14, "elapsed_time": "7:07:00", "remaining_time": "2:53:13"} +{"current_steps": 4867, "total_steps": 6840, "loss": 0.5010229349136353, "lr": 4.21878038263931e-06, "epoch": 1.4231612808890188, "percentage": 71.15, "elapsed_time": "7:07:04", "remaining_time": "2:53:07"} +{"current_steps": 4868, "total_steps": 6840, "loss": 0.5186876058578491, "lr": 4.214836177459975e-06, "epoch": 1.4234537213042842, "percentage": 71.17, "elapsed_time": "7:07:10", "remaining_time": "2:53:02"} +{"current_steps": 4869, "total_steps": 6840, "loss": 0.5998060703277588, "lr": 4.210893324527507e-06, "epoch": 1.4237461617195497, "percentage": 71.18, "elapsed_time": "7:07:15", "remaining_time": "2:52:57"} +{"current_steps": 4870, "total_steps": 6840, "loss": 0.5127147436141968, "lr": 4.206951824763528e-06, "epoch": 1.424038602134815, "percentage": 71.2, "elapsed_time": "7:07:21", "remaining_time": "2:52:52"} +{"current_steps": 4871, "total_steps": 6840, "loss": 0.5134439468383789, "lr": 4.203011679089336e-06, "epoch": 1.4243310425500804, "percentage": 71.21, "elapsed_time": "7:07:27", "remaining_time": "2:52:47"} +{"current_steps": 4872, "total_steps": 6840, "loss": 0.6244111657142639, "lr": 4.199072888425919e-06, "epoch": 1.424623482965346, "percentage": 71.23, "elapsed_time": "7:07:32", "remaining_time": "2:52:41"} +{"current_steps": 4873, "total_steps": 6840, "loss": 0.4431127905845642, "lr": 4.195135453693944e-06, "epoch": 1.4249159233806112, "percentage": 71.24, "elapsed_time": "7:07:36", "remaining_time": "2:52:36"} +{"current_steps": 4874, "total_steps": 6840, "loss": 0.6479794979095459, "lr": 4.191199375813761e-06, "epoch": 1.4252083637958766, "percentage": 71.26, "elapsed_time": "7:07:42", "remaining_time": "2:52:31"} +{"current_steps": 4875, "total_steps": 6840, "loss": 0.6386070847511292, "lr": 4.187264655705407e-06, "epoch": 1.4255008042111421, "percentage": 71.27, "elapsed_time": "7:07:47", "remaining_time": "2:52:26"} +{"current_steps": 4876, "total_steps": 6840, "loss": 0.5201597213745117, "lr": 4.183331294288603e-06, "epoch": 1.4257932446264074, "percentage": 71.29, "elapsed_time": "7:07:52", "remaining_time": "2:52:20"} +{"current_steps": 4877, "total_steps": 6840, "loss": 0.46355581283569336, "lr": 4.179399292482737e-06, "epoch": 1.4260856850416728, "percentage": 71.3, "elapsed_time": "7:07:58", "remaining_time": "2:52:15"} +{"current_steps": 4878, "total_steps": 6840, "loss": 0.5360985398292542, "lr": 4.175468651206898e-06, "epoch": 1.426378125456938, "percentage": 71.32, "elapsed_time": "7:08:05", "remaining_time": "2:52:10"} +{"current_steps": 4879, "total_steps": 6840, "loss": 0.5545670390129089, "lr": 4.171539371379847e-06, "epoch": 1.4266705658722034, "percentage": 71.33, "elapsed_time": "7:08:09", "remaining_time": "2:52:05"} +{"current_steps": 4880, "total_steps": 6840, "loss": 0.445978581905365, "lr": 4.167611453920031e-06, "epoch": 1.426963006287469, "percentage": 71.35, "elapsed_time": "7:08:15", "remaining_time": "2:52:00"} +{"current_steps": 4881, "total_steps": 6840, "loss": 0.5242947340011597, "lr": 4.163684899745576e-06, "epoch": 1.4272554467027343, "percentage": 71.36, "elapsed_time": "7:08:20", "remaining_time": "2:51:54"} +{"current_steps": 4882, "total_steps": 6840, "loss": 0.5544728636741638, "lr": 4.15975970977429e-06, "epoch": 1.4275478871179996, "percentage": 71.37, "elapsed_time": "7:08:25", "remaining_time": "2:51:49"} +{"current_steps": 4883, "total_steps": 6840, "loss": 0.5400837063789368, "lr": 4.1558358849236626e-06, "epoch": 1.4278403275332652, "percentage": 71.39, "elapsed_time": "7:08:30", "remaining_time": "2:51:44"} +{"current_steps": 4884, "total_steps": 6840, "loss": 0.5201395153999329, "lr": 4.151913426110864e-06, "epoch": 1.4281327679485305, "percentage": 71.4, "elapsed_time": "7:08:35", "remaining_time": "2:51:38"} +{"current_steps": 4885, "total_steps": 6840, "loss": 0.4414210319519043, "lr": 4.147992334252745e-06, "epoch": 1.4284252083637958, "percentage": 71.42, "elapsed_time": "7:08:40", "remaining_time": "2:51:33"} +{"current_steps": 4886, "total_steps": 6840, "loss": 0.6590272188186646, "lr": 4.144072610265838e-06, "epoch": 1.4287176487790614, "percentage": 71.43, "elapsed_time": "7:08:44", "remaining_time": "2:51:27"} +{"current_steps": 4887, "total_steps": 6840, "loss": 0.4734429717063904, "lr": 4.140154255066356e-06, "epoch": 1.4290100891943267, "percentage": 71.45, "elapsed_time": "7:08:50", "remaining_time": "2:51:22"} +{"current_steps": 4888, "total_steps": 6840, "loss": 0.45204073190689087, "lr": 4.136237269570186e-06, "epoch": 1.429302529609592, "percentage": 71.46, "elapsed_time": "7:08:56", "remaining_time": "2:51:17"} +{"current_steps": 4889, "total_steps": 6840, "loss": 0.6570174694061279, "lr": 4.132321654692901e-06, "epoch": 1.4295949700248574, "percentage": 71.48, "elapsed_time": "7:09:02", "remaining_time": "2:51:12"} +{"current_steps": 4890, "total_steps": 6840, "loss": 0.5159077644348145, "lr": 4.128407411349754e-06, "epoch": 1.429887410440123, "percentage": 71.49, "elapsed_time": "7:09:07", "remaining_time": "2:51:07"} +{"current_steps": 4891, "total_steps": 6840, "loss": 0.5778994560241699, "lr": 4.124494540455674e-06, "epoch": 1.4301798508553882, "percentage": 71.51, "elapsed_time": "7:09:14", "remaining_time": "2:51:02"} +{"current_steps": 4892, "total_steps": 6840, "loss": 0.4740722179412842, "lr": 4.120583042925273e-06, "epoch": 1.4304722912706536, "percentage": 71.52, "elapsed_time": "7:09:19", "remaining_time": "2:50:57"} +{"current_steps": 4893, "total_steps": 6840, "loss": 0.5561014413833618, "lr": 4.116672919672837e-06, "epoch": 1.430764731685919, "percentage": 71.54, "elapsed_time": "7:09:25", "remaining_time": "2:50:52"} +{"current_steps": 4894, "total_steps": 6840, "loss": 0.4834856688976288, "lr": 4.112764171612335e-06, "epoch": 1.4310571721011844, "percentage": 71.55, "elapsed_time": "7:09:32", "remaining_time": "2:50:47"} +{"current_steps": 4895, "total_steps": 6840, "loss": 0.5565547943115234, "lr": 4.108856799657412e-06, "epoch": 1.4313496125164498, "percentage": 71.56, "elapsed_time": "7:09:38", "remaining_time": "2:50:42"} +{"current_steps": 4896, "total_steps": 6840, "loss": 0.5401065349578857, "lr": 4.104950804721395e-06, "epoch": 1.431642052931715, "percentage": 71.58, "elapsed_time": "7:09:42", "remaining_time": "2:50:37"} +{"current_steps": 4897, "total_steps": 6840, "loss": 0.4792686700820923, "lr": 4.101046187717284e-06, "epoch": 1.4319344933469806, "percentage": 71.59, "elapsed_time": "7:09:49", "remaining_time": "2:50:32"} +{"current_steps": 4898, "total_steps": 6840, "loss": 0.5255981683731079, "lr": 4.097142949557764e-06, "epoch": 1.432226933762246, "percentage": 71.61, "elapsed_time": "7:09:54", "remaining_time": "2:50:27"} +{"current_steps": 4899, "total_steps": 6840, "loss": 0.5535293817520142, "lr": 4.093241091155187e-06, "epoch": 1.4325193741775113, "percentage": 71.62, "elapsed_time": "7:10:00", "remaining_time": "2:50:22"} +{"current_steps": 4900, "total_steps": 6840, "loss": 0.5235373973846436, "lr": 4.089340613421589e-06, "epoch": 1.4328118145927768, "percentage": 71.64, "elapsed_time": "7:10:05", "remaining_time": "2:50:16"} +{"current_steps": 4901, "total_steps": 6840, "loss": 0.5538134574890137, "lr": 4.085441517268687e-06, "epoch": 1.4331042550080422, "percentage": 71.65, "elapsed_time": "7:10:14", "remaining_time": "2:50:13"} +{"current_steps": 4902, "total_steps": 6840, "loss": 0.5394395589828491, "lr": 4.081543803607869e-06, "epoch": 1.4333966954233075, "percentage": 71.67, "elapsed_time": "7:10:20", "remaining_time": "2:50:08"} +{"current_steps": 4903, "total_steps": 6840, "loss": 0.522742509841919, "lr": 4.077647473350201e-06, "epoch": 1.4336891358385728, "percentage": 71.68, "elapsed_time": "7:10:25", "remaining_time": "2:50:02"} +{"current_steps": 4904, "total_steps": 6840, "loss": 0.559830367565155, "lr": 4.073752527406429e-06, "epoch": 1.4339815762538382, "percentage": 71.7, "elapsed_time": "7:10:30", "remaining_time": "2:49:57"} +{"current_steps": 4905, "total_steps": 6840, "loss": 0.42535799741744995, "lr": 4.069858966686971e-06, "epoch": 1.4342740166691037, "percentage": 71.71, "elapsed_time": "7:10:35", "remaining_time": "2:49:52"} +{"current_steps": 4906, "total_steps": 6840, "loss": 0.6075177192687988, "lr": 4.065966792101924e-06, "epoch": 1.434566457084369, "percentage": 71.73, "elapsed_time": "7:10:41", "remaining_time": "2:49:47"} +{"current_steps": 4907, "total_steps": 6840, "loss": 0.5010570883750916, "lr": 4.06207600456106e-06, "epoch": 1.4348588974996344, "percentage": 71.74, "elapsed_time": "7:10:45", "remaining_time": "2:49:41"} +{"current_steps": 4908, "total_steps": 6840, "loss": 0.571307897567749, "lr": 4.058186604973826e-06, "epoch": 1.4351513379149, "percentage": 71.75, "elapsed_time": "7:10:52", "remaining_time": "2:49:36"} +{"current_steps": 4909, "total_steps": 6840, "loss": 0.4918866455554962, "lr": 4.0542985942493505e-06, "epoch": 1.4354437783301652, "percentage": 71.77, "elapsed_time": "7:10:58", "remaining_time": "2:49:31"} +{"current_steps": 4910, "total_steps": 6840, "loss": 0.6588176488876343, "lr": 4.050411973296425e-06, "epoch": 1.4357362187454306, "percentage": 71.78, "elapsed_time": "7:11:04", "remaining_time": "2:49:26"} +{"current_steps": 4911, "total_steps": 6840, "loss": 0.7341527938842773, "lr": 4.046526743023526e-06, "epoch": 1.436028659160696, "percentage": 71.8, "elapsed_time": "7:11:08", "remaining_time": "2:49:21"} +{"current_steps": 4912, "total_steps": 6840, "loss": 0.5233849287033081, "lr": 4.042642904338801e-06, "epoch": 1.4363210995759614, "percentage": 71.81, "elapsed_time": "7:11:13", "remaining_time": "2:49:15"} +{"current_steps": 4913, "total_steps": 6840, "loss": 0.5144373178482056, "lr": 4.038760458150079e-06, "epoch": 1.4366135399912268, "percentage": 71.83, "elapsed_time": "7:11:18", "remaining_time": "2:49:10"} +{"current_steps": 4914, "total_steps": 6840, "loss": 0.4520954489707947, "lr": 4.034879405364853e-06, "epoch": 1.4369059804064923, "percentage": 71.84, "elapsed_time": "7:11:24", "remaining_time": "2:49:05"} +{"current_steps": 4915, "total_steps": 6840, "loss": 0.5632743835449219, "lr": 4.030999746890295e-06, "epoch": 1.4371984208217576, "percentage": 71.86, "elapsed_time": "7:11:30", "remaining_time": "2:49:00"} +{"current_steps": 4916, "total_steps": 6840, "loss": 0.49681180715560913, "lr": 4.027121483633257e-06, "epoch": 1.437490861237023, "percentage": 71.87, "elapsed_time": "7:11:34", "remaining_time": "2:48:54"} +{"current_steps": 4917, "total_steps": 6840, "loss": 0.5182398557662964, "lr": 4.023244616500257e-06, "epoch": 1.4377833016522883, "percentage": 71.89, "elapsed_time": "7:11:38", "remaining_time": "2:48:48"} +{"current_steps": 4918, "total_steps": 6840, "loss": 0.5686701536178589, "lr": 4.019369146397493e-06, "epoch": 1.4380757420675536, "percentage": 71.9, "elapsed_time": "7:11:44", "remaining_time": "2:48:43"} +{"current_steps": 4919, "total_steps": 6840, "loss": 0.5668520927429199, "lr": 4.015495074230823e-06, "epoch": 1.4383681824828192, "percentage": 71.92, "elapsed_time": "7:11:50", "remaining_time": "2:48:38"} +{"current_steps": 4920, "total_steps": 6840, "loss": 0.4511116147041321, "lr": 4.011622400905794e-06, "epoch": 1.4386606228980845, "percentage": 71.93, "elapsed_time": "7:11:56", "remaining_time": "2:48:33"} +{"current_steps": 4921, "total_steps": 6840, "loss": 0.4736326336860657, "lr": 4.007751127327618e-06, "epoch": 1.4389530633133498, "percentage": 71.94, "elapsed_time": "7:12:00", "remaining_time": "2:48:27"} +{"current_steps": 4922, "total_steps": 6840, "loss": 0.5705248117446899, "lr": 4.003881254401183e-06, "epoch": 1.4392455037286154, "percentage": 71.96, "elapsed_time": "7:12:05", "remaining_time": "2:48:22"} +{"current_steps": 4923, "total_steps": 6840, "loss": 0.45527490973472595, "lr": 4.000012783031047e-06, "epoch": 1.4395379441438807, "percentage": 71.97, "elapsed_time": "7:12:11", "remaining_time": "2:48:17"} +{"current_steps": 4924, "total_steps": 6840, "loss": 0.4926735758781433, "lr": 3.996145714121444e-06, "epoch": 1.439830384559146, "percentage": 71.99, "elapsed_time": "7:12:18", "remaining_time": "2:48:12"} +{"current_steps": 4925, "total_steps": 6840, "loss": 0.42700374126434326, "lr": 3.992280048576276e-06, "epoch": 1.4401228249744116, "percentage": 72.0, "elapsed_time": "7:12:23", "remaining_time": "2:48:07"} +{"current_steps": 4926, "total_steps": 6840, "loss": 0.5833145976066589, "lr": 3.988415787299118e-06, "epoch": 1.440415265389677, "percentage": 72.02, "elapsed_time": "7:12:27", "remaining_time": "2:48:02"} +{"current_steps": 4927, "total_steps": 6840, "loss": 0.5290282964706421, "lr": 3.98455293119322e-06, "epoch": 1.4407077058049422, "percentage": 72.03, "elapsed_time": "7:12:32", "remaining_time": "2:47:56"} +{"current_steps": 4928, "total_steps": 6840, "loss": 0.4489266872406006, "lr": 3.9806914811614984e-06, "epoch": 1.4410001462202076, "percentage": 72.05, "elapsed_time": "7:12:37", "remaining_time": "2:47:51"} +{"current_steps": 4929, "total_steps": 6840, "loss": 0.5630865097045898, "lr": 3.97683143810655e-06, "epoch": 1.441292586635473, "percentage": 72.06, "elapsed_time": "7:12:42", "remaining_time": "2:47:45"} +{"current_steps": 4930, "total_steps": 6840, "loss": 0.5962105989456177, "lr": 3.972972802930627e-06, "epoch": 1.4415850270507384, "percentage": 72.08, "elapsed_time": "7:12:49", "remaining_time": "2:47:41"} +{"current_steps": 4931, "total_steps": 6840, "loss": 0.6059410572052002, "lr": 3.9691155765356674e-06, "epoch": 1.4418774674660038, "percentage": 72.09, "elapsed_time": "7:12:54", "remaining_time": "2:47:35"} +{"current_steps": 4932, "total_steps": 6840, "loss": 0.5476605296134949, "lr": 3.965259759823272e-06, "epoch": 1.442169907881269, "percentage": 72.11, "elapsed_time": "7:12:59", "remaining_time": "2:47:30"} +{"current_steps": 4933, "total_steps": 6840, "loss": 0.70278000831604, "lr": 3.961405353694716e-06, "epoch": 1.4424623482965346, "percentage": 72.12, "elapsed_time": "7:13:04", "remaining_time": "2:47:25"} +{"current_steps": 4934, "total_steps": 6840, "loss": 0.5838963389396667, "lr": 3.9575523590509445e-06, "epoch": 1.4427547887118, "percentage": 72.13, "elapsed_time": "7:13:08", "remaining_time": "2:47:19"} +{"current_steps": 4935, "total_steps": 6840, "loss": 0.508273720741272, "lr": 3.95370077679257e-06, "epoch": 1.4430472291270653, "percentage": 72.15, "elapsed_time": "7:13:14", "remaining_time": "2:47:14"} +{"current_steps": 4936, "total_steps": 6840, "loss": 0.5053583383560181, "lr": 3.949850607819876e-06, "epoch": 1.4433396695423308, "percentage": 72.16, "elapsed_time": "7:13:20", "remaining_time": "2:47:09"} +{"current_steps": 4937, "total_steps": 6840, "loss": 0.5729954242706299, "lr": 3.946001853032818e-06, "epoch": 1.4436321099575962, "percentage": 72.18, "elapsed_time": "7:13:25", "remaining_time": "2:47:03"} +{"current_steps": 4938, "total_steps": 6840, "loss": 0.5261870622634888, "lr": 3.942154513331018e-06, "epoch": 1.4439245503728615, "percentage": 72.19, "elapsed_time": "7:13:29", "remaining_time": "2:46:58"} +{"current_steps": 4939, "total_steps": 6840, "loss": 0.34802311658859253, "lr": 3.9383085896137675e-06, "epoch": 1.444216990788127, "percentage": 72.21, "elapsed_time": "7:13:36", "remaining_time": "2:46:53"} +{"current_steps": 4940, "total_steps": 6840, "loss": 0.48302024602890015, "lr": 3.934464082780032e-06, "epoch": 1.4445094312033924, "percentage": 72.22, "elapsed_time": "7:13:41", "remaining_time": "2:46:48"} +{"current_steps": 4941, "total_steps": 6840, "loss": 0.6649061441421509, "lr": 3.930620993728434e-06, "epoch": 1.4448018716186577, "percentage": 72.24, "elapsed_time": "7:13:46", "remaining_time": "2:46:42"} +{"current_steps": 4942, "total_steps": 6840, "loss": 0.5945848822593689, "lr": 3.926779323357278e-06, "epoch": 1.445094312033923, "percentage": 72.25, "elapsed_time": "7:13:51", "remaining_time": "2:46:37"} +{"current_steps": 4943, "total_steps": 6840, "loss": 0.4783032536506653, "lr": 3.922939072564528e-06, "epoch": 1.4453867524491884, "percentage": 72.27, "elapsed_time": "7:13:58", "remaining_time": "2:46:32"} +{"current_steps": 4944, "total_steps": 6840, "loss": 0.4619516134262085, "lr": 3.919100242247821e-06, "epoch": 1.445679192864454, "percentage": 72.28, "elapsed_time": "7:14:02", "remaining_time": "2:46:27"} +{"current_steps": 4945, "total_steps": 6840, "loss": 0.5652358531951904, "lr": 3.915262833304461e-06, "epoch": 1.4459716332797192, "percentage": 72.3, "elapsed_time": "7:14:09", "remaining_time": "2:46:22"} +{"current_steps": 4946, "total_steps": 6840, "loss": 0.4523610472679138, "lr": 3.911426846631416e-06, "epoch": 1.4462640736949846, "percentage": 72.31, "elapsed_time": "7:14:15", "remaining_time": "2:46:17"} +{"current_steps": 4947, "total_steps": 6840, "loss": 0.4914482831954956, "lr": 3.9075922831253276e-06, "epoch": 1.44655651411025, "percentage": 72.32, "elapsed_time": "7:14:20", "remaining_time": "2:46:12"} +{"current_steps": 4948, "total_steps": 6840, "loss": 0.4060005247592926, "lr": 3.9037591436825005e-06, "epoch": 1.4468489545255154, "percentage": 72.34, "elapsed_time": "7:14:25", "remaining_time": "2:46:06"} +{"current_steps": 4949, "total_steps": 6840, "loss": 0.49987125396728516, "lr": 3.899927429198908e-06, "epoch": 1.4471413949407808, "percentage": 72.35, "elapsed_time": "7:14:31", "remaining_time": "2:46:01"} +{"current_steps": 4950, "total_steps": 6840, "loss": 0.6205358505249023, "lr": 3.896097140570189e-06, "epoch": 1.4474338353560463, "percentage": 72.37, "elapsed_time": "7:14:36", "remaining_time": "2:45:56"} +{"current_steps": 4951, "total_steps": 6840, "loss": 0.5302955508232117, "lr": 3.892268278691651e-06, "epoch": 1.4477262757713116, "percentage": 72.38, "elapsed_time": "7:14:43", "remaining_time": "2:45:51"} +{"current_steps": 4952, "total_steps": 6840, "loss": 0.5225962400436401, "lr": 3.888440844458272e-06, "epoch": 1.448018716186577, "percentage": 72.4, "elapsed_time": "7:14:49", "remaining_time": "2:45:46"} +{"current_steps": 4953, "total_steps": 6840, "loss": 0.5030089616775513, "lr": 3.884614838764682e-06, "epoch": 1.4483111566018425, "percentage": 72.41, "elapsed_time": "7:14:54", "remaining_time": "2:45:41"} +{"current_steps": 4954, "total_steps": 6840, "loss": 0.6060030460357666, "lr": 3.880790262505192e-06, "epoch": 1.4486035970171078, "percentage": 72.43, "elapsed_time": "7:15:00", "remaining_time": "2:45:36"} +{"current_steps": 4955, "total_steps": 6840, "loss": 0.5244846343994141, "lr": 3.8769671165737725e-06, "epoch": 1.4488960374323732, "percentage": 72.44, "elapsed_time": "7:15:05", "remaining_time": "2:45:31"} +{"current_steps": 4956, "total_steps": 6840, "loss": 0.46979671716690063, "lr": 3.873145401864061e-06, "epoch": 1.4491884778476385, "percentage": 72.46, "elapsed_time": "7:15:11", "remaining_time": "2:45:26"} +{"current_steps": 4957, "total_steps": 6840, "loss": 0.5201131105422974, "lr": 3.8693251192693596e-06, "epoch": 1.4494809182629038, "percentage": 72.47, "elapsed_time": "7:15:17", "remaining_time": "2:45:21"} +{"current_steps": 4958, "total_steps": 6840, "loss": 0.5124838352203369, "lr": 3.865506269682638e-06, "epoch": 1.4497733586781694, "percentage": 72.49, "elapsed_time": "7:15:22", "remaining_time": "2:45:15"} +{"current_steps": 4959, "total_steps": 6840, "loss": 0.5613473653793335, "lr": 3.861688853996525e-06, "epoch": 1.4500657990934347, "percentage": 72.5, "elapsed_time": "7:15:26", "remaining_time": "2:45:10"} +{"current_steps": 4960, "total_steps": 6840, "loss": 0.46196621656417847, "lr": 3.857872873103322e-06, "epoch": 1.4503582395087, "percentage": 72.51, "elapsed_time": "7:15:30", "remaining_time": "2:45:04"} +{"current_steps": 4961, "total_steps": 6840, "loss": 0.6427509784698486, "lr": 3.8540583278949905e-06, "epoch": 1.4506506799239656, "percentage": 72.53, "elapsed_time": "7:15:34", "remaining_time": "2:44:58"} +{"current_steps": 4962, "total_steps": 6840, "loss": 0.6306381821632385, "lr": 3.850245219263157e-06, "epoch": 1.450943120339231, "percentage": 72.54, "elapsed_time": "7:15:41", "remaining_time": "2:44:53"} +{"current_steps": 4963, "total_steps": 6840, "loss": 0.46638673543930054, "lr": 3.846433548099114e-06, "epoch": 1.4512355607544962, "percentage": 72.56, "elapsed_time": "7:15:46", "remaining_time": "2:44:48"} +{"current_steps": 4964, "total_steps": 6840, "loss": 0.4950143098831177, "lr": 3.842623315293814e-06, "epoch": 1.4515280011697618, "percentage": 72.57, "elapsed_time": "7:15:51", "remaining_time": "2:44:43"} +{"current_steps": 4965, "total_steps": 6840, "loss": 0.45698249340057373, "lr": 3.838814521737875e-06, "epoch": 1.451820441585027, "percentage": 72.59, "elapsed_time": "7:15:56", "remaining_time": "2:44:37"} +{"current_steps": 4966, "total_steps": 6840, "loss": 0.6068260669708252, "lr": 3.8350071683215814e-06, "epoch": 1.4521128820002924, "percentage": 72.6, "elapsed_time": "7:16:02", "remaining_time": "2:44:32"} +{"current_steps": 4967, "total_steps": 6840, "loss": 0.5264104008674622, "lr": 3.831201255934879e-06, "epoch": 1.4524053224155578, "percentage": 72.62, "elapsed_time": "7:16:07", "remaining_time": "2:44:27"} +{"current_steps": 4968, "total_steps": 6840, "loss": 0.5198315978050232, "lr": 3.827396785467375e-06, "epoch": 1.4526977628308233, "percentage": 72.63, "elapsed_time": "7:16:13", "remaining_time": "2:44:22"} +{"current_steps": 4969, "total_steps": 6840, "loss": 0.504194438457489, "lr": 3.823593757808342e-06, "epoch": 1.4529902032460886, "percentage": 72.65, "elapsed_time": "7:16:19", "remaining_time": "2:44:17"} +{"current_steps": 4970, "total_steps": 6840, "loss": 0.5018986463546753, "lr": 3.819792173846717e-06, "epoch": 1.453282643661354, "percentage": 72.66, "elapsed_time": "7:16:24", "remaining_time": "2:44:12"} +{"current_steps": 4971, "total_steps": 6840, "loss": 0.4847358465194702, "lr": 3.8159920344710936e-06, "epoch": 1.4535750840766193, "percentage": 72.68, "elapsed_time": "7:16:30", "remaining_time": "2:44:07"} +{"current_steps": 4972, "total_steps": 6840, "loss": 0.547623872756958, "lr": 3.812193340569733e-06, "epoch": 1.4538675244918848, "percentage": 72.69, "elapsed_time": "7:16:34", "remaining_time": "2:44:01"} +{"current_steps": 4973, "total_steps": 6840, "loss": 0.534354031085968, "lr": 3.8083960930305562e-06, "epoch": 1.4541599649071502, "percentage": 72.7, "elapsed_time": "7:16:40", "remaining_time": "2:43:56"} +{"current_steps": 4974, "total_steps": 6840, "loss": 0.6123033165931702, "lr": 3.8046002927411506e-06, "epoch": 1.4544524053224155, "percentage": 72.72, "elapsed_time": "7:16:44", "remaining_time": "2:43:50"} +{"current_steps": 4975, "total_steps": 6840, "loss": 0.5222622752189636, "lr": 3.8008059405887553e-06, "epoch": 1.454744845737681, "percentage": 72.73, "elapsed_time": "7:16:50", "remaining_time": "2:43:45"} +{"current_steps": 4976, "total_steps": 6840, "loss": 0.5568759441375732, "lr": 3.7970130374602785e-06, "epoch": 1.4550372861529464, "percentage": 72.75, "elapsed_time": "7:16:55", "remaining_time": "2:43:40"} +{"current_steps": 4977, "total_steps": 6840, "loss": 0.5458661317825317, "lr": 3.7932215842422903e-06, "epoch": 1.4553297265682117, "percentage": 72.76, "elapsed_time": "7:17:01", "remaining_time": "2:43:35"} +{"current_steps": 4978, "total_steps": 6840, "loss": 0.48293566703796387, "lr": 3.789431581821019e-06, "epoch": 1.4556221669834772, "percentage": 72.78, "elapsed_time": "7:17:06", "remaining_time": "2:43:29"} +{"current_steps": 4979, "total_steps": 6840, "loss": 0.647431492805481, "lr": 3.7856430310823546e-06, "epoch": 1.4559146073987426, "percentage": 72.79, "elapsed_time": "7:17:11", "remaining_time": "2:43:24"} +{"current_steps": 4980, "total_steps": 6840, "loss": 0.48039543628692627, "lr": 3.7818559329118475e-06, "epoch": 1.456207047814008, "percentage": 72.81, "elapsed_time": "7:17:16", "remaining_time": "2:43:19"} +{"current_steps": 4981, "total_steps": 6840, "loss": 0.6705803871154785, "lr": 3.7780702881947084e-06, "epoch": 1.4564994882292732, "percentage": 72.82, "elapsed_time": "7:17:21", "remaining_time": "2:43:13"} +{"current_steps": 4982, "total_steps": 6840, "loss": 0.564405083656311, "lr": 3.7742860978158103e-06, "epoch": 1.4567919286445385, "percentage": 72.84, "elapsed_time": "7:17:27", "remaining_time": "2:43:08"} +{"current_steps": 4983, "total_steps": 6840, "loss": 0.5208612084388733, "lr": 3.7705033626596844e-06, "epoch": 1.457084369059804, "percentage": 72.85, "elapsed_time": "7:17:32", "remaining_time": "2:43:03"} +{"current_steps": 4984, "total_steps": 6840, "loss": 0.6230732202529907, "lr": 3.766722083610521e-06, "epoch": 1.4573768094750694, "percentage": 72.87, "elapsed_time": "7:17:37", "remaining_time": "2:42:58"} +{"current_steps": 4985, "total_steps": 6840, "loss": 0.5741504430770874, "lr": 3.7629422615521747e-06, "epoch": 1.4576692498903348, "percentage": 72.88, "elapsed_time": "7:17:42", "remaining_time": "2:42:52"} +{"current_steps": 4986, "total_steps": 6840, "loss": 0.5321571826934814, "lr": 3.75916389736815e-06, "epoch": 1.4579616903056003, "percentage": 72.89, "elapsed_time": "7:17:48", "remaining_time": "2:42:47"} +{"current_steps": 4987, "total_steps": 6840, "loss": 0.6367009878158569, "lr": 3.7553869919416186e-06, "epoch": 1.4582541307208656, "percentage": 72.91, "elapsed_time": "7:17:53", "remaining_time": "2:42:42"} +{"current_steps": 4988, "total_steps": 6840, "loss": 0.5736235976219177, "lr": 3.75161154615541e-06, "epoch": 1.458546571136131, "percentage": 72.92, "elapsed_time": "7:17:57", "remaining_time": "2:42:36"} +{"current_steps": 4989, "total_steps": 6840, "loss": 0.5799358487129211, "lr": 3.7478375608920127e-06, "epoch": 1.4588390115513965, "percentage": 72.94, "elapsed_time": "7:18:02", "remaining_time": "2:42:31"} +{"current_steps": 4990, "total_steps": 6840, "loss": 0.6065561771392822, "lr": 3.7440650370335675e-06, "epoch": 1.4591314519666618, "percentage": 72.95, "elapsed_time": "7:18:07", "remaining_time": "2:42:25"} +{"current_steps": 4991, "total_steps": 6840, "loss": 0.5182442665100098, "lr": 3.740293975461886e-06, "epoch": 1.4594238923819272, "percentage": 72.97, "elapsed_time": "7:18:15", "remaining_time": "2:42:21"} +{"current_steps": 4992, "total_steps": 6840, "loss": 0.5065605640411377, "lr": 3.736524377058429e-06, "epoch": 1.4597163327971927, "percentage": 72.98, "elapsed_time": "7:18:20", "remaining_time": "2:42:16"} +{"current_steps": 4993, "total_steps": 6840, "loss": 0.44326460361480713, "lr": 3.7327562427043163e-06, "epoch": 1.460008773212458, "percentage": 73.0, "elapsed_time": "7:18:25", "remaining_time": "2:42:11"} +{"current_steps": 4994, "total_steps": 6840, "loss": 0.6192547082901001, "lr": 3.7289895732803306e-06, "epoch": 1.4603012136277234, "percentage": 73.01, "elapsed_time": "7:18:31", "remaining_time": "2:42:05"} +{"current_steps": 4995, "total_steps": 6840, "loss": 0.5487738847732544, "lr": 3.725224369666899e-06, "epoch": 1.4605936540429887, "percentage": 73.03, "elapsed_time": "7:18:37", "remaining_time": "2:42:00"} +{"current_steps": 4996, "total_steps": 6840, "loss": 0.558982253074646, "lr": 3.7214606327441203e-06, "epoch": 1.460886094458254, "percentage": 73.04, "elapsed_time": "7:18:41", "remaining_time": "2:41:55"} +{"current_steps": 4997, "total_steps": 6840, "loss": 0.5277853012084961, "lr": 3.717698363391744e-06, "epoch": 1.4611785348735196, "percentage": 73.06, "elapsed_time": "7:18:46", "remaining_time": "2:41:49"} +{"current_steps": 4998, "total_steps": 6840, "loss": 0.6561184525489807, "lr": 3.7139375624891795e-06, "epoch": 1.461470975288785, "percentage": 73.07, "elapsed_time": "7:18:51", "remaining_time": "2:41:44"} +{"current_steps": 4999, "total_steps": 6840, "loss": 0.46555888652801514, "lr": 3.710178230915489e-06, "epoch": 1.4617634157040502, "percentage": 73.08, "elapsed_time": "7:18:56", "remaining_time": "2:41:38"} +{"current_steps": 5000, "total_steps": 6840, "loss": 0.5808060765266418, "lr": 3.706420369549394e-06, "epoch": 1.4620558561193158, "percentage": 73.1, "elapsed_time": "7:19:01", "remaining_time": "2:41:33"} +{"current_steps": 5001, "total_steps": 6840, "loss": 0.5407893061637878, "lr": 3.7026639792692722e-06, "epoch": 1.462348296534581, "percentage": 73.11, "elapsed_time": "7:19:11", "remaining_time": "2:41:30"} +{"current_steps": 5002, "total_steps": 6840, "loss": 0.538393497467041, "lr": 3.6989090609531574e-06, "epoch": 1.4626407369498464, "percentage": 73.13, "elapsed_time": "7:19:16", "remaining_time": "2:41:24"} +{"current_steps": 5003, "total_steps": 6840, "loss": 0.530704140663147, "lr": 3.6951556154787373e-06, "epoch": 1.462933177365112, "percentage": 73.14, "elapsed_time": "7:19:20", "remaining_time": "2:41:19"} +{"current_steps": 5004, "total_steps": 6840, "loss": 0.43352627754211426, "lr": 3.691403643723359e-06, "epoch": 1.4632256177803773, "percentage": 73.16, "elapsed_time": "7:19:25", "remaining_time": "2:41:13"} +{"current_steps": 5005, "total_steps": 6840, "loss": 0.6047205924987793, "lr": 3.687653146564025e-06, "epoch": 1.4635180581956426, "percentage": 73.17, "elapsed_time": "7:19:30", "remaining_time": "2:41:08"} +{"current_steps": 5006, "total_steps": 6840, "loss": 0.44708865880966187, "lr": 3.6839041248773857e-06, "epoch": 1.463810498610908, "percentage": 73.19, "elapsed_time": "7:19:35", "remaining_time": "2:41:03"} +{"current_steps": 5007, "total_steps": 6840, "loss": 0.5653451681137085, "lr": 3.680156579539753e-06, "epoch": 1.4641029390261735, "percentage": 73.2, "elapsed_time": "7:19:42", "remaining_time": "2:40:58"} +{"current_steps": 5008, "total_steps": 6840, "loss": 0.49293750524520874, "lr": 3.6764105114270966e-06, "epoch": 1.4643953794414388, "percentage": 73.22, "elapsed_time": "7:19:48", "remaining_time": "2:40:53"} +{"current_steps": 5009, "total_steps": 6840, "loss": 0.5761851072311401, "lr": 3.672665921415034e-06, "epoch": 1.4646878198567042, "percentage": 73.23, "elapsed_time": "7:19:52", "remaining_time": "2:40:47"} +{"current_steps": 5010, "total_steps": 6840, "loss": 0.5188437700271606, "lr": 3.668922810378841e-06, "epoch": 1.4649802602719695, "percentage": 73.25, "elapsed_time": "7:19:58", "remaining_time": "2:40:42"} +{"current_steps": 5011, "total_steps": 6840, "loss": 0.5159400701522827, "lr": 3.6651811791934476e-06, "epoch": 1.465272700687235, "percentage": 73.26, "elapsed_time": "7:20:04", "remaining_time": "2:40:37"} +{"current_steps": 5012, "total_steps": 6840, "loss": 0.478866845369339, "lr": 3.6614410287334377e-06, "epoch": 1.4655651411025004, "percentage": 73.27, "elapsed_time": "7:20:10", "remaining_time": "2:40:32"} +{"current_steps": 5013, "total_steps": 6840, "loss": 0.5509926080703735, "lr": 3.6577023598730486e-06, "epoch": 1.4658575815177657, "percentage": 73.29, "elapsed_time": "7:20:15", "remaining_time": "2:40:27"} +{"current_steps": 5014, "total_steps": 6840, "loss": 0.4872981309890747, "lr": 3.6539651734861705e-06, "epoch": 1.4661500219330312, "percentage": 73.3, "elapsed_time": "7:20:20", "remaining_time": "2:40:21"} +{"current_steps": 5015, "total_steps": 6840, "loss": 0.47478264570236206, "lr": 3.6502294704463493e-06, "epoch": 1.4664424623482966, "percentage": 73.32, "elapsed_time": "7:20:25", "remaining_time": "2:40:16"} +{"current_steps": 5016, "total_steps": 6840, "loss": 0.5140335559844971, "lr": 3.646495251626785e-06, "epoch": 1.4667349027635619, "percentage": 73.33, "elapsed_time": "7:20:31", "remaining_time": "2:40:11"} +{"current_steps": 5017, "total_steps": 6840, "loss": 0.41033172607421875, "lr": 3.6427625179003223e-06, "epoch": 1.4670273431788274, "percentage": 73.35, "elapsed_time": "7:20:35", "remaining_time": "2:40:05"} +{"current_steps": 5018, "total_steps": 6840, "loss": 0.4290558099746704, "lr": 3.639031270139468e-06, "epoch": 1.4673197835940928, "percentage": 73.36, "elapsed_time": "7:20:39", "remaining_time": "2:40:00"} +{"current_steps": 5019, "total_steps": 6840, "loss": 0.5903435349464417, "lr": 3.635301509216379e-06, "epoch": 1.467612224009358, "percentage": 73.38, "elapsed_time": "7:20:44", "remaining_time": "2:39:54"} +{"current_steps": 5020, "total_steps": 6840, "loss": 0.6410748958587646, "lr": 3.6315732360028655e-06, "epoch": 1.4679046644246234, "percentage": 73.39, "elapsed_time": "7:20:48", "remaining_time": "2:39:49"} +{"current_steps": 5021, "total_steps": 6840, "loss": 0.5499910712242126, "lr": 3.6278464513703858e-06, "epoch": 1.4681971048398887, "percentage": 73.41, "elapsed_time": "7:20:54", "remaining_time": "2:39:43"} +{"current_steps": 5022, "total_steps": 6840, "loss": 0.4980154037475586, "lr": 3.624121156190056e-06, "epoch": 1.4684895452551543, "percentage": 73.42, "elapsed_time": "7:20:59", "remaining_time": "2:39:38"} +{"current_steps": 5023, "total_steps": 6840, "loss": 0.5910995006561279, "lr": 3.6203973513326395e-06, "epoch": 1.4687819856704196, "percentage": 73.44, "elapsed_time": "7:21:03", "remaining_time": "2:39:32"} +{"current_steps": 5024, "total_steps": 6840, "loss": 0.6003058552742004, "lr": 3.6166750376685534e-06, "epoch": 1.469074426085685, "percentage": 73.45, "elapsed_time": "7:21:07", "remaining_time": "2:39:27"} +{"current_steps": 5025, "total_steps": 6840, "loss": 0.5655561685562134, "lr": 3.6129542160678655e-06, "epoch": 1.4693668665009505, "percentage": 73.46, "elapsed_time": "7:21:13", "remaining_time": "2:39:21"} +{"current_steps": 5026, "total_steps": 6840, "loss": 0.713152289390564, "lr": 3.609234887400297e-06, "epoch": 1.4696593069162158, "percentage": 73.48, "elapsed_time": "7:21:19", "remaining_time": "2:39:17"} +{"current_steps": 5027, "total_steps": 6840, "loss": 0.41018784046173096, "lr": 3.605517052535219e-06, "epoch": 1.4699517473314812, "percentage": 73.49, "elapsed_time": "7:21:25", "remaining_time": "2:39:12"} +{"current_steps": 5028, "total_steps": 6840, "loss": 0.5852759480476379, "lr": 3.6018007123416486e-06, "epoch": 1.4702441877467467, "percentage": 73.51, "elapsed_time": "7:21:31", "remaining_time": "2:39:07"} +{"current_steps": 5029, "total_steps": 6840, "loss": 0.5942279696464539, "lr": 3.598085867688259e-06, "epoch": 1.470536628162012, "percentage": 73.52, "elapsed_time": "7:21:36", "remaining_time": "2:39:01"} +{"current_steps": 5030, "total_steps": 6840, "loss": 0.6265639662742615, "lr": 3.594372519443374e-06, "epoch": 1.4708290685772774, "percentage": 73.54, "elapsed_time": "7:21:41", "remaining_time": "2:38:56"} +{"current_steps": 5031, "total_steps": 6840, "loss": 0.4539163112640381, "lr": 3.5906606684749668e-06, "epoch": 1.471121508992543, "percentage": 73.55, "elapsed_time": "7:21:45", "remaining_time": "2:38:50"} +{"current_steps": 5032, "total_steps": 6840, "loss": 0.5682815909385681, "lr": 3.586950315650658e-06, "epoch": 1.4714139494078082, "percentage": 73.57, "elapsed_time": "7:21:50", "remaining_time": "2:38:45"} +{"current_steps": 5033, "total_steps": 6840, "loss": 0.5188582539558411, "lr": 3.583241461837721e-06, "epoch": 1.4717063898230736, "percentage": 73.58, "elapsed_time": "7:21:55", "remaining_time": "2:38:40"} +{"current_steps": 5034, "total_steps": 6840, "loss": 0.501958966255188, "lr": 3.5795341079030777e-06, "epoch": 1.4719988302383389, "percentage": 73.6, "elapsed_time": "7:22:01", "remaining_time": "2:38:34"} +{"current_steps": 5035, "total_steps": 6840, "loss": 0.5748735666275024, "lr": 3.5758282547132995e-06, "epoch": 1.4722912706536042, "percentage": 73.61, "elapsed_time": "7:22:06", "remaining_time": "2:38:29"} +{"current_steps": 5036, "total_steps": 6840, "loss": 0.5796875357627869, "lr": 3.5721239031346067e-06, "epoch": 1.4725837110688698, "percentage": 73.63, "elapsed_time": "7:22:13", "remaining_time": "2:38:24"} +{"current_steps": 5037, "total_steps": 6840, "loss": 0.457103431224823, "lr": 3.56842105403287e-06, "epoch": 1.472876151484135, "percentage": 73.64, "elapsed_time": "7:22:18", "remaining_time": "2:38:19"} +{"current_steps": 5038, "total_steps": 6840, "loss": 0.5300487875938416, "lr": 3.564719708273607e-06, "epoch": 1.4731685918994004, "percentage": 73.65, "elapsed_time": "7:22:23", "remaining_time": "2:38:14"} +{"current_steps": 5039, "total_steps": 6840, "loss": 0.48143619298934937, "lr": 3.5610198667219886e-06, "epoch": 1.473461032314666, "percentage": 73.67, "elapsed_time": "7:22:28", "remaining_time": "2:38:08"} +{"current_steps": 5040, "total_steps": 6840, "loss": 0.5523685216903687, "lr": 3.557321530242824e-06, "epoch": 1.4737534727299313, "percentage": 73.68, "elapsed_time": "7:22:33", "remaining_time": "2:38:03"} +{"current_steps": 5041, "total_steps": 6840, "loss": 0.5820931196212769, "lr": 3.5536246997005785e-06, "epoch": 1.4740459131451966, "percentage": 73.7, "elapsed_time": "7:22:38", "remaining_time": "2:37:58"} +{"current_steps": 5042, "total_steps": 6840, "loss": 0.6287394762039185, "lr": 3.5499293759593656e-06, "epoch": 1.4743383535604622, "percentage": 73.71, "elapsed_time": "7:22:44", "remaining_time": "2:37:52"} +{"current_steps": 5043, "total_steps": 6840, "loss": 0.4621254801750183, "lr": 3.5462355598829433e-06, "epoch": 1.4746307939757275, "percentage": 73.73, "elapsed_time": "7:22:50", "remaining_time": "2:37:48"} +{"current_steps": 5044, "total_steps": 6840, "loss": 0.5571160316467285, "lr": 3.5425432523347205e-06, "epoch": 1.4749232343909928, "percentage": 73.74, "elapsed_time": "7:22:57", "remaining_time": "2:37:43"} +{"current_steps": 5045, "total_steps": 6840, "loss": 0.4135715365409851, "lr": 3.5388524541777492e-06, "epoch": 1.4752156748062581, "percentage": 73.76, "elapsed_time": "7:23:01", "remaining_time": "2:37:37"} +{"current_steps": 5046, "total_steps": 6840, "loss": 0.524153470993042, "lr": 3.535163166274733e-06, "epoch": 1.4755081152215237, "percentage": 73.77, "elapsed_time": "7:23:06", "remaining_time": "2:37:32"} +{"current_steps": 5047, "total_steps": 6840, "loss": 0.6330267786979675, "lr": 3.5314753894880205e-06, "epoch": 1.475800555636789, "percentage": 73.79, "elapsed_time": "7:23:11", "remaining_time": "2:37:26"} +{"current_steps": 5048, "total_steps": 6840, "loss": 0.46210330724716187, "lr": 3.527789124679605e-06, "epoch": 1.4760929960520544, "percentage": 73.8, "elapsed_time": "7:23:16", "remaining_time": "2:37:21"} +{"current_steps": 5049, "total_steps": 6840, "loss": 0.49293309450149536, "lr": 3.524104372711131e-06, "epoch": 1.4763854364673197, "percentage": 73.82, "elapsed_time": "7:23:22", "remaining_time": "2:37:16"} +{"current_steps": 5050, "total_steps": 6840, "loss": 0.6196815967559814, "lr": 3.520421134443889e-06, "epoch": 1.4766778768825852, "percentage": 73.83, "elapsed_time": "7:23:26", "remaining_time": "2:37:10"} +{"current_steps": 5051, "total_steps": 6840, "loss": 0.42622530460357666, "lr": 3.5167394107388064e-06, "epoch": 1.4769703172978506, "percentage": 73.85, "elapsed_time": "7:23:32", "remaining_time": "2:37:05"} +{"current_steps": 5052, "total_steps": 6840, "loss": 0.4475107491016388, "lr": 3.513059202456468e-06, "epoch": 1.4772627577131159, "percentage": 73.86, "elapsed_time": "7:23:35", "remaining_time": "2:36:59"} +{"current_steps": 5053, "total_steps": 6840, "loss": 0.4295683205127716, "lr": 3.5093805104571e-06, "epoch": 1.4775551981283814, "percentage": 73.87, "elapsed_time": "7:23:40", "remaining_time": "2:36:54"} +{"current_steps": 5054, "total_steps": 6840, "loss": 0.5331642627716064, "lr": 3.505703335600573e-06, "epoch": 1.4778476385436468, "percentage": 73.89, "elapsed_time": "7:23:46", "remaining_time": "2:36:49"} +{"current_steps": 5055, "total_steps": 6840, "loss": 0.5615599155426025, "lr": 3.5020276787464058e-06, "epoch": 1.478140078958912, "percentage": 73.9, "elapsed_time": "7:23:51", "remaining_time": "2:36:44"} +{"current_steps": 5056, "total_steps": 6840, "loss": 0.5611366033554077, "lr": 3.4983535407537618e-06, "epoch": 1.4784325193741776, "percentage": 73.92, "elapsed_time": "7:23:57", "remaining_time": "2:36:38"} +{"current_steps": 5057, "total_steps": 6840, "loss": 0.5891577005386353, "lr": 3.494680922481445e-06, "epoch": 1.478724959789443, "percentage": 73.93, "elapsed_time": "7:24:02", "remaining_time": "2:36:33"} +{"current_steps": 5058, "total_steps": 6840, "loss": 0.5583761930465698, "lr": 3.491009824787911e-06, "epoch": 1.4790174002047083, "percentage": 73.95, "elapsed_time": "7:24:07", "remaining_time": "2:36:28"} +{"current_steps": 5059, "total_steps": 6840, "loss": 0.5001339912414551, "lr": 3.4873402485312548e-06, "epoch": 1.4793098406199736, "percentage": 73.96, "elapsed_time": "7:24:14", "remaining_time": "2:36:23"} +{"current_steps": 5060, "total_steps": 6840, "loss": 0.5050641894340515, "lr": 3.4836721945692175e-06, "epoch": 1.479602281035239, "percentage": 73.98, "elapsed_time": "7:24:20", "remaining_time": "2:36:18"} +{"current_steps": 5061, "total_steps": 6840, "loss": 0.5377815365791321, "lr": 3.4800056637591885e-06, "epoch": 1.4798947214505045, "percentage": 73.99, "elapsed_time": "7:24:26", "remaining_time": "2:36:13"} +{"current_steps": 5062, "total_steps": 6840, "loss": 0.5517662763595581, "lr": 3.4763406569581892e-06, "epoch": 1.4801871618657698, "percentage": 74.01, "elapsed_time": "7:24:31", "remaining_time": "2:36:08"} +{"current_steps": 5063, "total_steps": 6840, "loss": 0.5908320546150208, "lr": 3.4726771750228984e-06, "epoch": 1.4804796022810351, "percentage": 74.02, "elapsed_time": "7:24:36", "remaining_time": "2:36:02"} +{"current_steps": 5064, "total_steps": 6840, "loss": 0.5169299840927124, "lr": 3.4690152188096293e-06, "epoch": 1.4807720426963007, "percentage": 74.04, "elapsed_time": "7:24:41", "remaining_time": "2:35:57"} +{"current_steps": 5065, "total_steps": 6840, "loss": 0.6198064088821411, "lr": 3.4653547891743457e-06, "epoch": 1.481064483111566, "percentage": 74.05, "elapsed_time": "7:24:47", "remaining_time": "2:35:52"} +{"current_steps": 5066, "total_steps": 6840, "loss": 0.4971558153629303, "lr": 3.4616958869726436e-06, "epoch": 1.4813569235268313, "percentage": 74.06, "elapsed_time": "7:24:51", "remaining_time": "2:35:46"} +{"current_steps": 5067, "total_steps": 6840, "loss": 0.556640088558197, "lr": 3.4580385130597794e-06, "epoch": 1.481649363942097, "percentage": 74.08, "elapsed_time": "7:24:55", "remaining_time": "2:35:41"} +{"current_steps": 5068, "total_steps": 6840, "loss": 0.5336956977844238, "lr": 3.4543826682906358e-06, "epoch": 1.4819418043573622, "percentage": 74.09, "elapsed_time": "7:25:00", "remaining_time": "2:35:35"} +{"current_steps": 5069, "total_steps": 6840, "loss": 0.5185145735740662, "lr": 3.4507283535197454e-06, "epoch": 1.4822342447726276, "percentage": 74.11, "elapsed_time": "7:25:05", "remaining_time": "2:35:30"} +{"current_steps": 5070, "total_steps": 6840, "loss": 0.5460748672485352, "lr": 3.447075569601287e-06, "epoch": 1.482526685187893, "percentage": 74.12, "elapsed_time": "7:25:11", "remaining_time": "2:35:25"} +{"current_steps": 5071, "total_steps": 6840, "loss": 0.5860699415206909, "lr": 3.4434243173890667e-06, "epoch": 1.4828191256031584, "percentage": 74.14, "elapsed_time": "7:25:16", "remaining_time": "2:35:19"} +{"current_steps": 5072, "total_steps": 6840, "loss": 0.5818450450897217, "lr": 3.4397745977365482e-06, "epoch": 1.4831115660184238, "percentage": 74.15, "elapsed_time": "7:25:22", "remaining_time": "2:35:15"} +{"current_steps": 5073, "total_steps": 6840, "loss": 0.4205876588821411, "lr": 3.4361264114968316e-06, "epoch": 1.483404006433689, "percentage": 74.17, "elapsed_time": "7:25:27", "remaining_time": "2:35:09"} +{"current_steps": 5074, "total_steps": 6840, "loss": 0.5503501892089844, "lr": 3.4324797595226567e-06, "epoch": 1.4836964468489544, "percentage": 74.18, "elapsed_time": "7:25:32", "remaining_time": "2:35:04"} +{"current_steps": 5075, "total_steps": 6840, "loss": 0.5388503074645996, "lr": 3.4288346426664063e-06, "epoch": 1.48398888726422, "percentage": 74.2, "elapsed_time": "7:25:39", "remaining_time": "2:34:59"} +{"current_steps": 5076, "total_steps": 6840, "loss": 0.5866841673851013, "lr": 3.4251910617801054e-06, "epoch": 1.4842813276794853, "percentage": 74.21, "elapsed_time": "7:25:44", "remaining_time": "2:34:54"} +{"current_steps": 5077, "total_steps": 6840, "loss": 0.5377970337867737, "lr": 3.4215490177154176e-06, "epoch": 1.4845737680947506, "percentage": 74.23, "elapsed_time": "7:25:49", "remaining_time": "2:34:48"} +{"current_steps": 5078, "total_steps": 6840, "loss": 0.6311028003692627, "lr": 3.41790851132365e-06, "epoch": 1.4848662085100162, "percentage": 74.24, "elapsed_time": "7:25:54", "remaining_time": "2:34:43"} +{"current_steps": 5079, "total_steps": 6840, "loss": 0.5226441621780396, "lr": 3.414269543455747e-06, "epoch": 1.4851586489252815, "percentage": 74.25, "elapsed_time": "7:26:00", "remaining_time": "2:34:38"} +{"current_steps": 5080, "total_steps": 6840, "loss": 0.6306775212287903, "lr": 3.410632114962298e-06, "epoch": 1.4854510893405468, "percentage": 74.27, "elapsed_time": "7:26:06", "remaining_time": "2:34:33"} +{"current_steps": 5081, "total_steps": 6840, "loss": 0.5432136058807373, "lr": 3.406996226693531e-06, "epoch": 1.4857435297558124, "percentage": 74.28, "elapsed_time": "7:26:12", "remaining_time": "2:34:28"} +{"current_steps": 5082, "total_steps": 6840, "loss": 0.4218754470348358, "lr": 3.403361879499305e-06, "epoch": 1.4860359701710777, "percentage": 74.3, "elapsed_time": "7:26:17", "remaining_time": "2:34:23"} +{"current_steps": 5083, "total_steps": 6840, "loss": 0.5121650099754333, "lr": 3.3997290742291335e-06, "epoch": 1.486328410586343, "percentage": 74.31, "elapsed_time": "7:26:23", "remaining_time": "2:34:18"} +{"current_steps": 5084, "total_steps": 6840, "loss": 0.5489382743835449, "lr": 3.39609781173216e-06, "epoch": 1.4866208510016083, "percentage": 74.33, "elapsed_time": "7:26:28", "remaining_time": "2:34:12"} +{"current_steps": 5085, "total_steps": 6840, "loss": 0.4190993309020996, "lr": 3.3924680928571694e-06, "epoch": 1.486913291416874, "percentage": 74.34, "elapsed_time": "7:26:32", "remaining_time": "2:34:07"} +{"current_steps": 5086, "total_steps": 6840, "loss": 0.5927796363830566, "lr": 3.388839918452589e-06, "epoch": 1.4872057318321392, "percentage": 74.36, "elapsed_time": "7:26:37", "remaining_time": "2:34:01"} +{"current_steps": 5087, "total_steps": 6840, "loss": 0.43746429681777954, "lr": 3.3852132893664803e-06, "epoch": 1.4874981722474045, "percentage": 74.37, "elapsed_time": "7:26:42", "remaining_time": "2:33:56"} +{"current_steps": 5088, "total_steps": 6840, "loss": 0.41599413752555847, "lr": 3.381588206446548e-06, "epoch": 1.4877906126626699, "percentage": 74.39, "elapsed_time": "7:26:47", "remaining_time": "2:33:51"} +{"current_steps": 5089, "total_steps": 6840, "loss": 0.5803484320640564, "lr": 3.3779646705401305e-06, "epoch": 1.4880830530779354, "percentage": 74.4, "elapsed_time": "7:26:52", "remaining_time": "2:33:45"} +{"current_steps": 5090, "total_steps": 6840, "loss": 0.5277384519577026, "lr": 3.3743426824942082e-06, "epoch": 1.4883754934932008, "percentage": 74.42, "elapsed_time": "7:26:58", "remaining_time": "2:33:40"} +{"current_steps": 5091, "total_steps": 6840, "loss": 0.5842317342758179, "lr": 3.370722243155401e-06, "epoch": 1.488667933908466, "percentage": 74.43, "elapsed_time": "7:27:04", "remaining_time": "2:33:35"} +{"current_steps": 5092, "total_steps": 6840, "loss": 0.5394416451454163, "lr": 3.367103353369965e-06, "epoch": 1.4889603743237316, "percentage": 74.44, "elapsed_time": "7:27:08", "remaining_time": "2:33:29"} +{"current_steps": 5093, "total_steps": 6840, "loss": 0.5457144975662231, "lr": 3.3634860139837877e-06, "epoch": 1.489252814738997, "percentage": 74.46, "elapsed_time": "7:27:14", "remaining_time": "2:33:24"} +{"current_steps": 5094, "total_steps": 6840, "loss": 0.49552473425865173, "lr": 3.3598702258424044e-06, "epoch": 1.4895452551542623, "percentage": 74.47, "elapsed_time": "7:27:18", "remaining_time": "2:33:19"} +{"current_steps": 5095, "total_steps": 6840, "loss": 0.5922214984893799, "lr": 3.3562559897909842e-06, "epoch": 1.4898376955695278, "percentage": 74.49, "elapsed_time": "7:27:24", "remaining_time": "2:33:14"} +{"current_steps": 5096, "total_steps": 6840, "loss": 0.5844507217407227, "lr": 3.35264330667433e-06, "epoch": 1.4901301359847932, "percentage": 74.5, "elapsed_time": "7:27:30", "remaining_time": "2:33:08"} +{"current_steps": 5097, "total_steps": 6840, "loss": 0.5096029043197632, "lr": 3.3490321773368872e-06, "epoch": 1.4904225764000585, "percentage": 74.52, "elapsed_time": "7:27:35", "remaining_time": "2:33:03"} +{"current_steps": 5098, "total_steps": 6840, "loss": 0.6343984603881836, "lr": 3.345422602622734e-06, "epoch": 1.4907150168153238, "percentage": 74.53, "elapsed_time": "7:27:41", "remaining_time": "2:32:58"} +{"current_steps": 5099, "total_steps": 6840, "loss": 0.5319832563400269, "lr": 3.3418145833755875e-06, "epoch": 1.4910074572305891, "percentage": 74.55, "elapsed_time": "7:27:46", "remaining_time": "2:32:53"} +{"current_steps": 5100, "total_steps": 6840, "loss": 0.6453676819801331, "lr": 3.3382081204388006e-06, "epoch": 1.4912998976458547, "percentage": 74.56, "elapsed_time": "7:27:52", "remaining_time": "2:32:48"} +{"current_steps": 5101, "total_steps": 6840, "loss": 0.5129305720329285, "lr": 3.33460321465536e-06, "epoch": 1.49159233806112, "percentage": 74.58, "elapsed_time": "7:28:01", "remaining_time": "2:32:44"} +{"current_steps": 5102, "total_steps": 6840, "loss": 0.5680958032608032, "lr": 3.3309998668678912e-06, "epoch": 1.4918847784763853, "percentage": 74.59, "elapsed_time": "7:28:07", "remaining_time": "2:32:39"} +{"current_steps": 5103, "total_steps": 6840, "loss": 0.5959445834159851, "lr": 3.32739807791866e-06, "epoch": 1.492177218891651, "percentage": 74.61, "elapsed_time": "7:28:12", "remaining_time": "2:32:33"} +{"current_steps": 5104, "total_steps": 6840, "loss": 0.5549102425575256, "lr": 3.3237978486495536e-06, "epoch": 1.4924696593069162, "percentage": 74.62, "elapsed_time": "7:28:17", "remaining_time": "2:32:28"} +{"current_steps": 5105, "total_steps": 6840, "loss": 0.4219816327095032, "lr": 3.3201991799021084e-06, "epoch": 1.4927620997221815, "percentage": 74.63, "elapsed_time": "7:28:23", "remaining_time": "2:32:23"} +{"current_steps": 5106, "total_steps": 6840, "loss": 0.46013522148132324, "lr": 3.3166020725174906e-06, "epoch": 1.493054540137447, "percentage": 74.65, "elapsed_time": "7:28:27", "remaining_time": "2:32:17"} +{"current_steps": 5107, "total_steps": 6840, "loss": 0.6013174057006836, "lr": 3.3130065273365033e-06, "epoch": 1.4933469805527124, "percentage": 74.66, "elapsed_time": "7:28:32", "remaining_time": "2:32:12"} +{"current_steps": 5108, "total_steps": 6840, "loss": 0.7097996473312378, "lr": 3.3094125451995827e-06, "epoch": 1.4936394209679777, "percentage": 74.68, "elapsed_time": "7:28:38", "remaining_time": "2:32:07"} +{"current_steps": 5109, "total_steps": 6840, "loss": 0.6704884767532349, "lr": 3.305820126946799e-06, "epoch": 1.4939318613832433, "percentage": 74.69, "elapsed_time": "7:28:45", "remaining_time": "2:32:02"} +{"current_steps": 5110, "total_steps": 6840, "loss": 0.5211119651794434, "lr": 3.3022292734178605e-06, "epoch": 1.4942243017985086, "percentage": 74.71, "elapsed_time": "7:28:49", "remaining_time": "2:31:57"} +{"current_steps": 5111, "total_steps": 6840, "loss": 0.5830427408218384, "lr": 3.2986399854521065e-06, "epoch": 1.494516742213774, "percentage": 74.72, "elapsed_time": "7:28:55", "remaining_time": "2:31:52"} +{"current_steps": 5112, "total_steps": 6840, "loss": 0.5647883415222168, "lr": 3.2950522638885106e-06, "epoch": 1.4948091826290393, "percentage": 74.74, "elapsed_time": "7:29:02", "remaining_time": "2:31:47"} +{"current_steps": 5113, "total_steps": 6840, "loss": 0.46678125858306885, "lr": 3.2914661095656807e-06, "epoch": 1.4951016230443046, "percentage": 74.75, "elapsed_time": "7:29:07", "remaining_time": "2:31:42"} +{"current_steps": 5114, "total_steps": 6840, "loss": 0.5391934514045715, "lr": 3.287881523321863e-06, "epoch": 1.4953940634595702, "percentage": 74.77, "elapsed_time": "7:29:13", "remaining_time": "2:31:36"} +{"current_steps": 5115, "total_steps": 6840, "loss": 0.4039243459701538, "lr": 3.284298505994926e-06, "epoch": 1.4956865038748355, "percentage": 74.78, "elapsed_time": "7:29:18", "remaining_time": "2:31:31"} +{"current_steps": 5116, "total_steps": 6840, "loss": 0.6187412738800049, "lr": 3.2807170584223802e-06, "epoch": 1.4959789442901008, "percentage": 74.8, "elapsed_time": "7:29:24", "remaining_time": "2:31:26"} +{"current_steps": 5117, "total_steps": 6840, "loss": 0.5165137648582458, "lr": 3.277137181441369e-06, "epoch": 1.4962713847053664, "percentage": 74.81, "elapsed_time": "7:29:30", "remaining_time": "2:31:21"} +{"current_steps": 5118, "total_steps": 6840, "loss": 0.6315420866012573, "lr": 3.273558875888665e-06, "epoch": 1.4965638251206317, "percentage": 74.82, "elapsed_time": "7:29:35", "remaining_time": "2:31:16"} +{"current_steps": 5119, "total_steps": 6840, "loss": 0.5522993206977844, "lr": 3.269982142600677e-06, "epoch": 1.496856265535897, "percentage": 74.84, "elapsed_time": "7:29:41", "remaining_time": "2:31:11"} +{"current_steps": 5120, "total_steps": 6840, "loss": 0.5751636028289795, "lr": 3.266406982413444e-06, "epoch": 1.4971487059511626, "percentage": 74.85, "elapsed_time": "7:29:47", "remaining_time": "2:31:06"} +{"current_steps": 5121, "total_steps": 6840, "loss": 0.5552358031272888, "lr": 3.262833396162637e-06, "epoch": 1.4974411463664279, "percentage": 74.87, "elapsed_time": "7:29:52", "remaining_time": "2:31:00"} +{"current_steps": 5122, "total_steps": 6840, "loss": 0.4971257150173187, "lr": 3.259261384683562e-06, "epoch": 1.4977335867816932, "percentage": 74.88, "elapsed_time": "7:29:58", "remaining_time": "2:30:55"} +{"current_steps": 5123, "total_steps": 6840, "loss": 0.3803454637527466, "lr": 3.2556909488111533e-06, "epoch": 1.4980260271969585, "percentage": 74.9, "elapsed_time": "7:30:01", "remaining_time": "2:30:49"} +{"current_steps": 5124, "total_steps": 6840, "loss": 0.45348531007766724, "lr": 3.25212208937998e-06, "epoch": 1.498318467612224, "percentage": 74.91, "elapsed_time": "7:30:07", "remaining_time": "2:30:44"} +{"current_steps": 5125, "total_steps": 6840, "loss": 0.4839708209037781, "lr": 3.2485548072242403e-06, "epoch": 1.4986109080274894, "percentage": 74.93, "elapsed_time": "7:30:13", "remaining_time": "2:30:39"} +{"current_steps": 5126, "total_steps": 6840, "loss": 0.4743500351905823, "lr": 3.244989103177768e-06, "epoch": 1.4989033484427547, "percentage": 74.94, "elapsed_time": "7:30:18", "remaining_time": "2:30:34"} +{"current_steps": 5127, "total_steps": 6840, "loss": 0.558182954788208, "lr": 3.241424978074018e-06, "epoch": 1.49919578885802, "percentage": 74.96, "elapsed_time": "7:30:23", "remaining_time": "2:30:28"} +{"current_steps": 5128, "total_steps": 6840, "loss": 0.41309911012649536, "lr": 3.2378624327460874e-06, "epoch": 1.4994882292732856, "percentage": 74.97, "elapsed_time": "7:30:30", "remaining_time": "2:30:24"} +{"current_steps": 5129, "total_steps": 6840, "loss": 0.5627751350402832, "lr": 3.2343014680266984e-06, "epoch": 1.499780669688551, "percentage": 74.99, "elapsed_time": "7:30:34", "remaining_time": "2:30:18"} +{"current_steps": 5130, "total_steps": 6840, "loss": 0.5374714732170105, "lr": 3.230742084748204e-06, "epoch": 1.5000731101038163, "percentage": 75.0, "elapsed_time": "7:30:40", "remaining_time": "2:30:13"} +{"current_steps": 5131, "total_steps": 6840, "loss": 0.4264039993286133, "lr": 3.2271842837425917e-06, "epoch": 1.5003655505190818, "percentage": 75.01, "elapsed_time": "7:30:45", "remaining_time": "2:30:08"} +{"current_steps": 5132, "total_steps": 6840, "loss": 0.44204217195510864, "lr": 3.223628065841472e-06, "epoch": 1.5006579909343472, "percentage": 75.03, "elapsed_time": "7:30:49", "remaining_time": "2:30:02"} +{"current_steps": 5133, "total_steps": 6840, "loss": 0.5322041511535645, "lr": 3.220073431876092e-06, "epoch": 1.5009504313496125, "percentage": 75.04, "elapsed_time": "7:30:55", "remaining_time": "2:29:57"} +{"current_steps": 5134, "total_steps": 6840, "loss": 0.4741417169570923, "lr": 3.216520382677324e-06, "epoch": 1.501242871764878, "percentage": 75.06, "elapsed_time": "7:31:01", "remaining_time": "2:29:52"} +{"current_steps": 5135, "total_steps": 6840, "loss": 0.7069851756095886, "lr": 3.212968919075672e-06, "epoch": 1.5015353121801431, "percentage": 75.07, "elapsed_time": "7:31:06", "remaining_time": "2:29:47"} +{"current_steps": 5136, "total_steps": 6840, "loss": 0.6049044132232666, "lr": 3.2094190419012694e-06, "epoch": 1.5018277525954087, "percentage": 75.09, "elapsed_time": "7:31:11", "remaining_time": "2:29:41"} +{"current_steps": 5137, "total_steps": 6840, "loss": 0.556586503982544, "lr": 3.2058707519838817e-06, "epoch": 1.5021201930106742, "percentage": 75.1, "elapsed_time": "7:31:17", "remaining_time": "2:29:36"} +{"current_steps": 5138, "total_steps": 6840, "loss": 0.46489936113357544, "lr": 3.202324050152894e-06, "epoch": 1.5024126334259393, "percentage": 75.12, "elapsed_time": "7:31:24", "remaining_time": "2:29:31"} +{"current_steps": 5139, "total_steps": 6840, "loss": 0.5332333445549011, "lr": 3.1987789372373292e-06, "epoch": 1.5027050738412049, "percentage": 75.13, "elapsed_time": "7:31:30", "remaining_time": "2:29:26"} +{"current_steps": 5140, "total_steps": 6840, "loss": 0.5547586679458618, "lr": 3.1952354140658346e-06, "epoch": 1.5029975142564702, "percentage": 75.15, "elapsed_time": "7:31:34", "remaining_time": "2:29:21"} +{"current_steps": 5141, "total_steps": 6840, "loss": 0.5500372648239136, "lr": 3.1916934814666858e-06, "epoch": 1.5032899546717355, "percentage": 75.16, "elapsed_time": "7:31:37", "remaining_time": "2:29:15"} +{"current_steps": 5142, "total_steps": 6840, "loss": 0.5065571069717407, "lr": 3.1881531402677934e-06, "epoch": 1.503582395087001, "percentage": 75.18, "elapsed_time": "7:31:42", "remaining_time": "2:29:09"} +{"current_steps": 5143, "total_steps": 6840, "loss": 0.5942833423614502, "lr": 3.1846143912966887e-06, "epoch": 1.5038748355022664, "percentage": 75.19, "elapsed_time": "7:31:47", "remaining_time": "2:29:04"} +{"current_steps": 5144, "total_steps": 6840, "loss": 0.4089720547199249, "lr": 3.181077235380531e-06, "epoch": 1.5041672759175317, "percentage": 75.2, "elapsed_time": "7:31:52", "remaining_time": "2:28:59"} +{"current_steps": 5145, "total_steps": 6840, "loss": 0.5360317230224609, "lr": 3.1775416733461107e-06, "epoch": 1.5044597163327973, "percentage": 75.22, "elapsed_time": "7:31:57", "remaining_time": "2:28:53"} +{"current_steps": 5146, "total_steps": 6840, "loss": 0.5403856635093689, "lr": 3.174007706019845e-06, "epoch": 1.5047521567480626, "percentage": 75.23, "elapsed_time": "7:32:02", "remaining_time": "2:28:48"} +{"current_steps": 5147, "total_steps": 6840, "loss": 0.5377147197723389, "lr": 3.1704753342277727e-06, "epoch": 1.505044597163328, "percentage": 75.25, "elapsed_time": "7:32:07", "remaining_time": "2:28:43"} +{"current_steps": 5148, "total_steps": 6840, "loss": 0.49888312816619873, "lr": 3.166944558795567e-06, "epoch": 1.5053370375785935, "percentage": 75.26, "elapsed_time": "7:32:12", "remaining_time": "2:28:37"} +{"current_steps": 5149, "total_steps": 6840, "loss": 0.5105957984924316, "lr": 3.1634153805485245e-06, "epoch": 1.5056294779938586, "percentage": 75.28, "elapsed_time": "7:32:17", "remaining_time": "2:28:32"} +{"current_steps": 5150, "total_steps": 6840, "loss": 0.5653882026672363, "lr": 3.1598878003115694e-06, "epoch": 1.5059219184091241, "percentage": 75.29, "elapsed_time": "7:32:23", "remaining_time": "2:28:27"} +{"current_steps": 5151, "total_steps": 6840, "loss": 0.5293145179748535, "lr": 3.1563618189092536e-06, "epoch": 1.5062143588243895, "percentage": 75.31, "elapsed_time": "7:32:27", "remaining_time": "2:28:21"} +{"current_steps": 5152, "total_steps": 6840, "loss": 0.5852463841438293, "lr": 3.1528374371657524e-06, "epoch": 1.5065067992396548, "percentage": 75.32, "elapsed_time": "7:32:33", "remaining_time": "2:28:16"} +{"current_steps": 5153, "total_steps": 6840, "loss": 0.5986759662628174, "lr": 3.1493146559048683e-06, "epoch": 1.5067992396549204, "percentage": 75.34, "elapsed_time": "7:32:39", "remaining_time": "2:28:11"} +{"current_steps": 5154, "total_steps": 6840, "loss": 0.6363133788108826, "lr": 3.1457934759500298e-06, "epoch": 1.5070916800701857, "percentage": 75.35, "elapsed_time": "7:32:45", "remaining_time": "2:28:06"} +{"current_steps": 5155, "total_steps": 6840, "loss": 0.4757901430130005, "lr": 3.1422738981242927e-06, "epoch": 1.507384120485451, "percentage": 75.37, "elapsed_time": "7:32:50", "remaining_time": "2:28:01"} +{"current_steps": 5156, "total_steps": 6840, "loss": 0.7614980936050415, "lr": 3.1387559232503374e-06, "epoch": 1.5076765609007166, "percentage": 75.38, "elapsed_time": "7:32:55", "remaining_time": "2:27:55"} +{"current_steps": 5157, "total_steps": 6840, "loss": 0.5739883184432983, "lr": 3.13523955215047e-06, "epoch": 1.5079690013159819, "percentage": 75.39, "elapsed_time": "7:33:00", "remaining_time": "2:27:50"} +{"current_steps": 5158, "total_steps": 6840, "loss": 0.5893388390541077, "lr": 3.131724785646616e-06, "epoch": 1.5082614417312472, "percentage": 75.41, "elapsed_time": "7:33:05", "remaining_time": "2:27:44"} +{"current_steps": 5159, "total_steps": 6840, "loss": 0.5809957981109619, "lr": 3.1282116245603333e-06, "epoch": 1.5085538821465128, "percentage": 75.42, "elapsed_time": "7:33:11", "remaining_time": "2:27:39"} +{"current_steps": 5160, "total_steps": 6840, "loss": 0.5651090741157532, "lr": 3.124700069712803e-06, "epoch": 1.508846322561778, "percentage": 75.44, "elapsed_time": "7:33:17", "remaining_time": "2:27:35"} +{"current_steps": 5161, "total_steps": 6840, "loss": 0.3736303448677063, "lr": 3.1211901219248273e-06, "epoch": 1.5091387629770434, "percentage": 75.45, "elapsed_time": "7:33:21", "remaining_time": "2:27:29"} +{"current_steps": 5162, "total_steps": 6840, "loss": 0.5501068234443665, "lr": 3.117681782016838e-06, "epoch": 1.509431203392309, "percentage": 75.47, "elapsed_time": "7:33:26", "remaining_time": "2:27:23"} +{"current_steps": 5163, "total_steps": 6840, "loss": 0.6210630536079407, "lr": 3.1141750508088865e-06, "epoch": 1.509723643807574, "percentage": 75.48, "elapsed_time": "7:33:31", "remaining_time": "2:27:18"} +{"current_steps": 5164, "total_steps": 6840, "loss": 0.5722042322158813, "lr": 3.110669929120651e-06, "epoch": 1.5100160842228396, "percentage": 75.5, "elapsed_time": "7:33:36", "remaining_time": "2:27:13"} +{"current_steps": 5165, "total_steps": 6840, "loss": 0.5813776254653931, "lr": 3.107166417771431e-06, "epoch": 1.510308524638105, "percentage": 75.51, "elapsed_time": "7:33:41", "remaining_time": "2:27:07"} +{"current_steps": 5166, "total_steps": 6840, "loss": 0.4911368787288666, "lr": 3.1036645175801515e-06, "epoch": 1.5106009650533703, "percentage": 75.53, "elapsed_time": "7:33:47", "remaining_time": "2:27:02"} +{"current_steps": 5167, "total_steps": 6840, "loss": 0.5136172771453857, "lr": 3.100164229365361e-06, "epoch": 1.5108934054686358, "percentage": 75.54, "elapsed_time": "7:33:53", "remaining_time": "2:26:57"} +{"current_steps": 5168, "total_steps": 6840, "loss": 0.5746543407440186, "lr": 3.096665553945234e-06, "epoch": 1.5111858458839011, "percentage": 75.56, "elapsed_time": "7:33:58", "remaining_time": "2:26:52"} +{"current_steps": 5169, "total_steps": 6840, "loss": 0.4949193000793457, "lr": 3.0931684921375572e-06, "epoch": 1.5114782862991665, "percentage": 75.57, "elapsed_time": "7:34:02", "remaining_time": "2:26:46"} +{"current_steps": 5170, "total_steps": 6840, "loss": 0.5732932090759277, "lr": 3.089673044759751e-06, "epoch": 1.511770726714432, "percentage": 75.58, "elapsed_time": "7:34:06", "remaining_time": "2:26:41"} +{"current_steps": 5171, "total_steps": 6840, "loss": 0.5329696536064148, "lr": 3.086179212628855e-06, "epoch": 1.5120631671296973, "percentage": 75.6, "elapsed_time": "7:34:12", "remaining_time": "2:26:36"} +{"current_steps": 5172, "total_steps": 6840, "loss": 0.631770670413971, "lr": 3.082686996561531e-06, "epoch": 1.5123556075449627, "percentage": 75.61, "elapsed_time": "7:34:17", "remaining_time": "2:26:30"} +{"current_steps": 5173, "total_steps": 6840, "loss": 0.5183405876159668, "lr": 3.0791963973740646e-06, "epoch": 1.5126480479602282, "percentage": 75.63, "elapsed_time": "7:34:23", "remaining_time": "2:26:25"} +{"current_steps": 5174, "total_steps": 6840, "loss": 0.5616034269332886, "lr": 3.075707415882361e-06, "epoch": 1.5129404883754933, "percentage": 75.64, "elapsed_time": "7:34:29", "remaining_time": "2:26:20"} +{"current_steps": 5175, "total_steps": 6840, "loss": 0.48513877391815186, "lr": 3.0722200529019477e-06, "epoch": 1.5132329287907589, "percentage": 75.66, "elapsed_time": "7:34:34", "remaining_time": "2:26:15"} +{"current_steps": 5176, "total_steps": 6840, "loss": 0.5226399898529053, "lr": 3.068734309247976e-06, "epoch": 1.5135253692060244, "percentage": 75.67, "elapsed_time": "7:34:39", "remaining_time": "2:26:09"} +{"current_steps": 5177, "total_steps": 6840, "loss": 0.48817533254623413, "lr": 3.0652501857352167e-06, "epoch": 1.5138178096212895, "percentage": 75.69, "elapsed_time": "7:34:45", "remaining_time": "2:26:04"} +{"current_steps": 5178, "total_steps": 6840, "loss": 0.4163327217102051, "lr": 3.061767683178063e-06, "epoch": 1.514110250036555, "percentage": 75.7, "elapsed_time": "7:34:49", "remaining_time": "2:25:59"} +{"current_steps": 5179, "total_steps": 6840, "loss": 0.5984256267547607, "lr": 3.058286802390531e-06, "epoch": 1.5144026904518204, "percentage": 75.72, "elapsed_time": "7:34:53", "remaining_time": "2:25:53"} +{"current_steps": 5180, "total_steps": 6840, "loss": 0.47233515977859497, "lr": 3.054807544186249e-06, "epoch": 1.5146951308670857, "percentage": 75.73, "elapsed_time": "7:34:58", "remaining_time": "2:25:48"} +{"current_steps": 5181, "total_steps": 6840, "loss": 0.5545482635498047, "lr": 3.0513299093784766e-06, "epoch": 1.5149875712823513, "percentage": 75.75, "elapsed_time": "7:35:03", "remaining_time": "2:25:42"} +{"current_steps": 5182, "total_steps": 6840, "loss": 0.46200019121170044, "lr": 3.047853898780089e-06, "epoch": 1.5152800116976166, "percentage": 75.76, "elapsed_time": "7:35:08", "remaining_time": "2:25:37"} +{"current_steps": 5183, "total_steps": 6840, "loss": 0.6146235466003418, "lr": 3.0443795132035824e-06, "epoch": 1.515572452112882, "percentage": 75.77, "elapsed_time": "7:35:14", "remaining_time": "2:25:32"} +{"current_steps": 5184, "total_steps": 6840, "loss": 0.5653461217880249, "lr": 3.040906753461075e-06, "epoch": 1.5158648925281475, "percentage": 75.79, "elapsed_time": "7:35:19", "remaining_time": "2:25:27"} +{"current_steps": 5185, "total_steps": 6840, "loss": 0.6514929533004761, "lr": 3.0374356203643008e-06, "epoch": 1.5161573329434128, "percentage": 75.8, "elapsed_time": "7:35:25", "remaining_time": "2:25:21"} +{"current_steps": 5186, "total_steps": 6840, "loss": 0.48213401436805725, "lr": 3.033966114724618e-06, "epoch": 1.5164497733586781, "percentage": 75.82, "elapsed_time": "7:35:30", "remaining_time": "2:25:16"} +{"current_steps": 5187, "total_steps": 6840, "loss": 0.4935530424118042, "lr": 3.0304982373530013e-06, "epoch": 1.5167422137739437, "percentage": 75.83, "elapsed_time": "7:35:35", "remaining_time": "2:25:11"} +{"current_steps": 5188, "total_steps": 6840, "loss": 0.6435343027114868, "lr": 3.0270319890600465e-06, "epoch": 1.5170346541892088, "percentage": 75.85, "elapsed_time": "7:35:40", "remaining_time": "2:25:05"} +{"current_steps": 5189, "total_steps": 6840, "loss": 0.49350717663764954, "lr": 3.0235673706559675e-06, "epoch": 1.5173270946044743, "percentage": 75.86, "elapsed_time": "7:35:46", "remaining_time": "2:25:00"} +{"current_steps": 5190, "total_steps": 6840, "loss": 0.4745938777923584, "lr": 3.0201043829506015e-06, "epoch": 1.5176195350197397, "percentage": 75.88, "elapsed_time": "7:35:51", "remaining_time": "2:24:55"} +{"current_steps": 5191, "total_steps": 6840, "loss": 0.5867031812667847, "lr": 3.0166430267533944e-06, "epoch": 1.517911975435005, "percentage": 75.89, "elapsed_time": "7:35:57", "remaining_time": "2:24:50"} +{"current_steps": 5192, "total_steps": 6840, "loss": 0.5477231740951538, "lr": 3.01318330287342e-06, "epoch": 1.5182044158502705, "percentage": 75.91, "elapsed_time": "7:36:02", "remaining_time": "2:24:45"} +{"current_steps": 5193, "total_steps": 6840, "loss": 0.5788818597793579, "lr": 3.0097252121193687e-06, "epoch": 1.5184968562655359, "percentage": 75.92, "elapsed_time": "7:36:08", "remaining_time": "2:24:40"} +{"current_steps": 5194, "total_steps": 6840, "loss": 0.4967714548110962, "lr": 3.0062687552995475e-06, "epoch": 1.5187892966808012, "percentage": 75.94, "elapsed_time": "7:36:13", "remaining_time": "2:24:34"} +{"current_steps": 5195, "total_steps": 6840, "loss": 0.6427319645881653, "lr": 3.002813933221882e-06, "epoch": 1.5190817370960668, "percentage": 75.95, "elapsed_time": "7:36:18", "remaining_time": "2:24:29"} +{"current_steps": 5196, "total_steps": 6840, "loss": 0.5615307688713074, "lr": 2.999360746693916e-06, "epoch": 1.519374177511332, "percentage": 75.96, "elapsed_time": "7:36:23", "remaining_time": "2:24:24"} +{"current_steps": 5197, "total_steps": 6840, "loss": 0.6646313667297363, "lr": 2.9959091965228102e-06, "epoch": 1.5196666179265974, "percentage": 75.98, "elapsed_time": "7:36:28", "remaining_time": "2:24:18"} +{"current_steps": 5198, "total_steps": 6840, "loss": 0.47536247968673706, "lr": 2.9924592835153454e-06, "epoch": 1.519959058341863, "percentage": 75.99, "elapsed_time": "7:36:33", "remaining_time": "2:24:13"} +{"current_steps": 5199, "total_steps": 6840, "loss": 0.5850256681442261, "lr": 2.9890110084779157e-06, "epoch": 1.5202514987571283, "percentage": 76.01, "elapsed_time": "7:36:38", "remaining_time": "2:24:08"} +{"current_steps": 5200, "total_steps": 6840, "loss": 0.5724887251853943, "lr": 2.985564372216536e-06, "epoch": 1.5205439391723936, "percentage": 76.02, "elapsed_time": "7:36:45", "remaining_time": "2:24:03"} +{"current_steps": 5201, "total_steps": 6840, "loss": 0.5052510499954224, "lr": 2.9821193755368383e-06, "epoch": 1.5208363795876592, "percentage": 76.04, "elapsed_time": "7:36:53", "remaining_time": "2:23:58"} +{"current_steps": 5202, "total_steps": 6840, "loss": 0.439144492149353, "lr": 2.9786760192440644e-06, "epoch": 1.5211288200029243, "percentage": 76.05, "elapsed_time": "7:36:58", "remaining_time": "2:23:53"} +{"current_steps": 5203, "total_steps": 6840, "loss": 0.4560511112213135, "lr": 2.97523430414308e-06, "epoch": 1.5214212604181898, "percentage": 76.07, "elapsed_time": "7:37:03", "remaining_time": "2:23:48"} +{"current_steps": 5204, "total_steps": 6840, "loss": 0.6848068237304688, "lr": 2.9717942310383664e-06, "epoch": 1.5217137008334551, "percentage": 76.08, "elapsed_time": "7:37:08", "remaining_time": "2:23:42"} +{"current_steps": 5205, "total_steps": 6840, "loss": 0.5541313886642456, "lr": 2.9683558007340184e-06, "epoch": 1.5220061412487205, "percentage": 76.1, "elapsed_time": "7:37:14", "remaining_time": "2:23:37"} +{"current_steps": 5206, "total_steps": 6840, "loss": 0.5117338299751282, "lr": 2.964919014033749e-06, "epoch": 1.522298581663986, "percentage": 76.11, "elapsed_time": "7:37:19", "remaining_time": "2:23:32"} +{"current_steps": 5207, "total_steps": 6840, "loss": 0.5164151191711426, "lr": 2.9614838717408866e-06, "epoch": 1.5225910220792513, "percentage": 76.13, "elapsed_time": "7:37:24", "remaining_time": "2:23:26"} +{"current_steps": 5208, "total_steps": 6840, "loss": 0.5461020469665527, "lr": 2.9580503746583744e-06, "epoch": 1.5228834624945167, "percentage": 76.14, "elapsed_time": "7:37:29", "remaining_time": "2:23:21"} +{"current_steps": 5209, "total_steps": 6840, "loss": 0.4265401065349579, "lr": 2.9546185235887705e-06, "epoch": 1.5231759029097822, "percentage": 76.15, "elapsed_time": "7:37:36", "remaining_time": "2:23:16"} +{"current_steps": 5210, "total_steps": 6840, "loss": 0.47372496128082275, "lr": 2.9511883193342505e-06, "epoch": 1.5234683433250475, "percentage": 76.17, "elapsed_time": "7:37:39", "remaining_time": "2:23:11"} +{"current_steps": 5211, "total_steps": 6840, "loss": 0.43951019644737244, "lr": 2.9477597626966036e-06, "epoch": 1.5237607837403129, "percentage": 76.18, "elapsed_time": "7:37:44", "remaining_time": "2:23:05"} +{"current_steps": 5212, "total_steps": 6840, "loss": 0.6514073610305786, "lr": 2.9443328544772343e-06, "epoch": 1.5240532241555784, "percentage": 76.2, "elapsed_time": "7:37:49", "remaining_time": "2:23:00"} +{"current_steps": 5213, "total_steps": 6840, "loss": 0.523013710975647, "lr": 2.940907595477164e-06, "epoch": 1.5243456645708435, "percentage": 76.21, "elapsed_time": "7:37:55", "remaining_time": "2:22:55"} +{"current_steps": 5214, "total_steps": 6840, "loss": 0.4945281744003296, "lr": 2.9374839864970194e-06, "epoch": 1.524638104986109, "percentage": 76.23, "elapsed_time": "7:38:01", "remaining_time": "2:22:50"} +{"current_steps": 5215, "total_steps": 6840, "loss": 0.5768609046936035, "lr": 2.9340620283370525e-06, "epoch": 1.5249305454013746, "percentage": 76.24, "elapsed_time": "7:38:05", "remaining_time": "2:22:44"} +{"current_steps": 5216, "total_steps": 6840, "loss": 0.45644205808639526, "lr": 2.930641721797125e-06, "epoch": 1.5252229858166397, "percentage": 76.26, "elapsed_time": "7:38:10", "remaining_time": "2:22:39"} +{"current_steps": 5217, "total_steps": 6840, "loss": 0.590227484703064, "lr": 2.92722306767671e-06, "epoch": 1.5255154262319053, "percentage": 76.27, "elapsed_time": "7:38:17", "remaining_time": "2:22:34"} +{"current_steps": 5218, "total_steps": 6840, "loss": 0.5618122816085815, "lr": 2.9238060667749014e-06, "epoch": 1.5258078666471706, "percentage": 76.29, "elapsed_time": "7:38:22", "remaining_time": "2:22:29"} +{"current_steps": 5219, "total_steps": 6840, "loss": 0.6431877613067627, "lr": 2.9203907198904027e-06, "epoch": 1.526100307062436, "percentage": 76.3, "elapsed_time": "7:38:28", "remaining_time": "2:22:23"} +{"current_steps": 5220, "total_steps": 6840, "loss": 0.5019941329956055, "lr": 2.916977027821527e-06, "epoch": 1.5263927474777015, "percentage": 76.32, "elapsed_time": "7:38:33", "remaining_time": "2:22:18"} +{"current_steps": 5221, "total_steps": 6840, "loss": 0.5413016080856323, "lr": 2.913564991366209e-06, "epoch": 1.5266851878929668, "percentage": 76.33, "elapsed_time": "7:38:38", "remaining_time": "2:22:13"} +{"current_steps": 5222, "total_steps": 6840, "loss": 0.6546905636787415, "lr": 2.9101546113219846e-06, "epoch": 1.5269776283082321, "percentage": 76.35, "elapsed_time": "7:38:44", "remaining_time": "2:22:08"} +{"current_steps": 5223, "total_steps": 6840, "loss": 0.5689815878868103, "lr": 2.906745888486013e-06, "epoch": 1.5272700687234977, "percentage": 76.36, "elapsed_time": "7:38:49", "remaining_time": "2:22:02"} +{"current_steps": 5224, "total_steps": 6840, "loss": 0.5134810209274292, "lr": 2.9033388236550632e-06, "epoch": 1.527562509138763, "percentage": 76.37, "elapsed_time": "7:38:54", "remaining_time": "2:21:57"} +{"current_steps": 5225, "total_steps": 6840, "loss": 0.4880787134170532, "lr": 2.8999334176255143e-06, "epoch": 1.5278549495540283, "percentage": 76.39, "elapsed_time": "7:39:00", "remaining_time": "2:21:52"} +{"current_steps": 5226, "total_steps": 6840, "loss": 0.4345950782299042, "lr": 2.89652967119336e-06, "epoch": 1.528147389969294, "percentage": 76.4, "elapsed_time": "7:39:05", "remaining_time": "2:21:47"} +{"current_steps": 5227, "total_steps": 6840, "loss": 0.43327242136001587, "lr": 2.893127585154205e-06, "epoch": 1.528439830384559, "percentage": 76.42, "elapsed_time": "7:39:10", "remaining_time": "2:21:41"} +{"current_steps": 5228, "total_steps": 6840, "loss": 0.6423674821853638, "lr": 2.889727160303266e-06, "epoch": 1.5287322707998245, "percentage": 76.43, "elapsed_time": "7:39:17", "remaining_time": "2:21:36"} +{"current_steps": 5229, "total_steps": 6840, "loss": 0.5263554453849792, "lr": 2.886328397435374e-06, "epoch": 1.5290247112150899, "percentage": 76.45, "elapsed_time": "7:39:22", "remaining_time": "2:21:31"} +{"current_steps": 5230, "total_steps": 6840, "loss": 0.4111948013305664, "lr": 2.882931297344965e-06, "epoch": 1.5293171516303552, "percentage": 76.46, "elapsed_time": "7:39:26", "remaining_time": "2:21:26"} +{"current_steps": 5231, "total_steps": 6840, "loss": 0.43803131580352783, "lr": 2.8795358608260936e-06, "epoch": 1.5296095920456207, "percentage": 76.48, "elapsed_time": "7:39:30", "remaining_time": "2:21:20"} +{"current_steps": 5232, "total_steps": 6840, "loss": 0.4708956778049469, "lr": 2.8761420886724223e-06, "epoch": 1.529902032460886, "percentage": 76.49, "elapsed_time": "7:39:37", "remaining_time": "2:21:15"} +{"current_steps": 5233, "total_steps": 6840, "loss": 0.5268635749816895, "lr": 2.8727499816772265e-06, "epoch": 1.5301944728761514, "percentage": 76.51, "elapsed_time": "7:39:41", "remaining_time": "2:21:09"} +{"current_steps": 5234, "total_steps": 6840, "loss": 0.5092788934707642, "lr": 2.869359540633385e-06, "epoch": 1.530486913291417, "percentage": 76.52, "elapsed_time": "7:39:46", "remaining_time": "2:21:04"} +{"current_steps": 5235, "total_steps": 6840, "loss": 0.4603293836116791, "lr": 2.8659707663333958e-06, "epoch": 1.5307793537066823, "percentage": 76.54, "elapsed_time": "7:39:51", "remaining_time": "2:20:59"} +{"current_steps": 5236, "total_steps": 6840, "loss": 0.545462965965271, "lr": 2.8625836595693646e-06, "epoch": 1.5310717941219476, "percentage": 76.55, "elapsed_time": "7:39:55", "remaining_time": "2:20:53"} +{"current_steps": 5237, "total_steps": 6840, "loss": 0.511603832244873, "lr": 2.8591982211330073e-06, "epoch": 1.5313642345372132, "percentage": 76.56, "elapsed_time": "7:39:59", "remaining_time": "2:20:48"} +{"current_steps": 5238, "total_steps": 6840, "loss": 0.5076707601547241, "lr": 2.8558144518156485e-06, "epoch": 1.5316566749524785, "percentage": 76.58, "elapsed_time": "7:40:05", "remaining_time": "2:20:42"} +{"current_steps": 5239, "total_steps": 6840, "loss": 0.5923745632171631, "lr": 2.852432352408224e-06, "epoch": 1.5319491153677438, "percentage": 76.59, "elapsed_time": "7:40:10", "remaining_time": "2:20:37"} +{"current_steps": 5240, "total_steps": 6840, "loss": 0.5588465332984924, "lr": 2.849051923701279e-06, "epoch": 1.5322415557830094, "percentage": 76.61, "elapsed_time": "7:40:14", "remaining_time": "2:20:31"} +{"current_steps": 5241, "total_steps": 6840, "loss": 0.6681923270225525, "lr": 2.845673166484969e-06, "epoch": 1.5325339961982745, "percentage": 76.62, "elapsed_time": "7:40:19", "remaining_time": "2:20:26"} +{"current_steps": 5242, "total_steps": 6840, "loss": 0.5702543258666992, "lr": 2.8422960815490564e-06, "epoch": 1.53282643661354, "percentage": 76.64, "elapsed_time": "7:40:23", "remaining_time": "2:20:20"} +{"current_steps": 5243, "total_steps": 6840, "loss": 0.5401744842529297, "lr": 2.8389206696829165e-06, "epoch": 1.5331188770288053, "percentage": 76.65, "elapsed_time": "7:40:29", "remaining_time": "2:20:15"} +{"current_steps": 5244, "total_steps": 6840, "loss": 0.43371906876564026, "lr": 2.8355469316755324e-06, "epoch": 1.5334113174440707, "percentage": 76.67, "elapsed_time": "7:40:34", "remaining_time": "2:20:10"} +{"current_steps": 5245, "total_steps": 6840, "loss": 0.5598163604736328, "lr": 2.8321748683154893e-06, "epoch": 1.5337037578593362, "percentage": 76.68, "elapsed_time": "7:40:40", "remaining_time": "2:20:05"} +{"current_steps": 5246, "total_steps": 6840, "loss": 0.5836831331253052, "lr": 2.8288044803909896e-06, "epoch": 1.5339961982746015, "percentage": 76.7, "elapsed_time": "7:40:45", "remaining_time": "2:20:00"} +{"current_steps": 5247, "total_steps": 6840, "loss": 0.5308898687362671, "lr": 2.8254357686898404e-06, "epoch": 1.5342886386898669, "percentage": 76.71, "elapsed_time": "7:40:49", "remaining_time": "2:19:54"} +{"current_steps": 5248, "total_steps": 6840, "loss": 0.6104828119277954, "lr": 2.822068733999459e-06, "epoch": 1.5345810791051324, "percentage": 76.73, "elapsed_time": "7:40:55", "remaining_time": "2:19:49"} +{"current_steps": 5249, "total_steps": 6840, "loss": 0.48373985290527344, "lr": 2.8187033771068685e-06, "epoch": 1.5348735195203977, "percentage": 76.74, "elapsed_time": "7:41:00", "remaining_time": "2:19:43"} +{"current_steps": 5250, "total_steps": 6840, "loss": 0.5213532447814941, "lr": 2.8153396987987e-06, "epoch": 1.535165959935663, "percentage": 76.75, "elapsed_time": "7:41:05", "remaining_time": "2:19:38"} +{"current_steps": 5251, "total_steps": 6840, "loss": 0.5241051912307739, "lr": 2.811977699861195e-06, "epoch": 1.5354584003509286, "percentage": 76.77, "elapsed_time": "7:41:10", "remaining_time": "2:19:33"} +{"current_steps": 5252, "total_steps": 6840, "loss": 0.48321712017059326, "lr": 2.8086173810801974e-06, "epoch": 1.5357508407661937, "percentage": 76.78, "elapsed_time": "7:41:16", "remaining_time": "2:19:28"} +{"current_steps": 5253, "total_steps": 6840, "loss": 0.5352765917778015, "lr": 2.8052587432411626e-06, "epoch": 1.5360432811814593, "percentage": 76.8, "elapsed_time": "7:41:21", "remaining_time": "2:19:22"} +{"current_steps": 5254, "total_steps": 6840, "loss": 0.5402188301086426, "lr": 2.8019017871291522e-06, "epoch": 1.5363357215967248, "percentage": 76.81, "elapsed_time": "7:41:27", "remaining_time": "2:19:17"} +{"current_steps": 5255, "total_steps": 6840, "loss": 0.4398813545703888, "lr": 2.798546513528837e-06, "epoch": 1.53662816201199, "percentage": 76.83, "elapsed_time": "7:41:32", "remaining_time": "2:19:12"} +{"current_steps": 5256, "total_steps": 6840, "loss": 0.5661803483963013, "lr": 2.7951929232244855e-06, "epoch": 1.5369206024272555, "percentage": 76.84, "elapsed_time": "7:41:36", "remaining_time": "2:19:06"} +{"current_steps": 5257, "total_steps": 6840, "loss": 0.5051732063293457, "lr": 2.791841016999982e-06, "epoch": 1.5372130428425208, "percentage": 76.86, "elapsed_time": "7:41:40", "remaining_time": "2:19:01"} +{"current_steps": 5258, "total_steps": 6840, "loss": 0.5712389945983887, "lr": 2.788490795638815e-06, "epoch": 1.5375054832577861, "percentage": 76.87, "elapsed_time": "7:41:45", "remaining_time": "2:18:55"} +{"current_steps": 5259, "total_steps": 6840, "loss": 0.6257319450378418, "lr": 2.7851422599240773e-06, "epoch": 1.5377979236730517, "percentage": 76.89, "elapsed_time": "7:41:49", "remaining_time": "2:18:50"} +{"current_steps": 5260, "total_steps": 6840, "loss": 0.5788396596908569, "lr": 2.7817954106384704e-06, "epoch": 1.538090364088317, "percentage": 76.9, "elapsed_time": "7:41:55", "remaining_time": "2:18:45"} +{"current_steps": 5261, "total_steps": 6840, "loss": 0.37253260612487793, "lr": 2.7784502485642985e-06, "epoch": 1.5383828045035823, "percentage": 76.92, "elapsed_time": "7:42:01", "remaining_time": "2:18:40"} +{"current_steps": 5262, "total_steps": 6840, "loss": 0.6547001004219055, "lr": 2.7751067744834726e-06, "epoch": 1.5386752449188479, "percentage": 76.93, "elapsed_time": "7:42:06", "remaining_time": "2:18:34"} +{"current_steps": 5263, "total_steps": 6840, "loss": 0.510914146900177, "lr": 2.77176498917751e-06, "epoch": 1.5389676853341132, "percentage": 76.94, "elapsed_time": "7:42:11", "remaining_time": "2:18:29"} +{"current_steps": 5264, "total_steps": 6840, "loss": 0.4387754201889038, "lr": 2.7684248934275327e-06, "epoch": 1.5392601257493785, "percentage": 76.96, "elapsed_time": "7:42:17", "remaining_time": "2:18:24"} +{"current_steps": 5265, "total_steps": 6840, "loss": 0.5640195608139038, "lr": 2.765086488014268e-06, "epoch": 1.539552566164644, "percentage": 76.97, "elapsed_time": "7:42:23", "remaining_time": "2:18:19"} +{"current_steps": 5266, "total_steps": 6840, "loss": 0.5780993103981018, "lr": 2.7617497737180508e-06, "epoch": 1.5398450065799092, "percentage": 76.99, "elapsed_time": "7:42:28", "remaining_time": "2:18:13"} +{"current_steps": 5267, "total_steps": 6840, "loss": 0.5190057158470154, "lr": 2.758414751318813e-06, "epoch": 1.5401374469951747, "percentage": 77.0, "elapsed_time": "7:42:34", "remaining_time": "2:18:09"} +{"current_steps": 5268, "total_steps": 6840, "loss": 0.4204869270324707, "lr": 2.7550814215960964e-06, "epoch": 1.54042988741044, "percentage": 77.02, "elapsed_time": "7:42:39", "remaining_time": "2:18:03"} +{"current_steps": 5269, "total_steps": 6840, "loss": 0.5649294853210449, "lr": 2.7517497853290477e-06, "epoch": 1.5407223278257054, "percentage": 77.03, "elapsed_time": "7:42:44", "remaining_time": "2:17:58"} +{"current_steps": 5270, "total_steps": 6840, "loss": 0.49142545461654663, "lr": 2.748419843296416e-06, "epoch": 1.541014768240971, "percentage": 77.05, "elapsed_time": "7:42:49", "remaining_time": "2:17:52"} +{"current_steps": 5271, "total_steps": 6840, "loss": 0.483539879322052, "lr": 2.745091596276557e-06, "epoch": 1.5413072086562363, "percentage": 77.06, "elapsed_time": "7:42:55", "remaining_time": "2:17:47"} +{"current_steps": 5272, "total_steps": 6840, "loss": 0.5400283336639404, "lr": 2.7417650450474253e-06, "epoch": 1.5415996490715016, "percentage": 77.08, "elapsed_time": "7:43:00", "remaining_time": "2:17:42"} +{"current_steps": 5273, "total_steps": 6840, "loss": 0.5490765571594238, "lr": 2.7384401903865844e-06, "epoch": 1.5418920894867671, "percentage": 77.09, "elapsed_time": "7:43:06", "remaining_time": "2:17:37"} +{"current_steps": 5274, "total_steps": 6840, "loss": 0.5434873700141907, "lr": 2.7351170330711975e-06, "epoch": 1.5421845299020325, "percentage": 77.11, "elapsed_time": "7:43:11", "remaining_time": "2:17:32"} +{"current_steps": 5275, "total_steps": 6840, "loss": 0.6195025444030762, "lr": 2.7317955738780333e-06, "epoch": 1.5424769703172978, "percentage": 77.12, "elapsed_time": "7:43:17", "remaining_time": "2:17:26"} +{"current_steps": 5276, "total_steps": 6840, "loss": 0.5552260875701904, "lr": 2.728475813583462e-06, "epoch": 1.5427694107325634, "percentage": 77.13, "elapsed_time": "7:43:22", "remaining_time": "2:17:21"} +{"current_steps": 5277, "total_steps": 6840, "loss": 0.5430501699447632, "lr": 2.725157752963461e-06, "epoch": 1.5430618511478287, "percentage": 77.15, "elapsed_time": "7:43:28", "remaining_time": "2:17:16"} +{"current_steps": 5278, "total_steps": 6840, "loss": 0.633337676525116, "lr": 2.7218413927936006e-06, "epoch": 1.543354291563094, "percentage": 77.16, "elapsed_time": "7:43:34", "remaining_time": "2:17:11"} +{"current_steps": 5279, "total_steps": 6840, "loss": 0.4974183738231659, "lr": 2.718526733849062e-06, "epoch": 1.5436467319783596, "percentage": 77.18, "elapsed_time": "7:43:38", "remaining_time": "2:17:05"} +{"current_steps": 5280, "total_steps": 6840, "loss": 0.5840449929237366, "lr": 2.715213776904628e-06, "epoch": 1.5439391723936247, "percentage": 77.19, "elapsed_time": "7:43:42", "remaining_time": "2:17:00"} +{"current_steps": 5281, "total_steps": 6840, "loss": 0.4684101343154907, "lr": 2.7119025227346807e-06, "epoch": 1.5442316128088902, "percentage": 77.21, "elapsed_time": "7:43:48", "remaining_time": "2:16:55"} +{"current_steps": 5282, "total_steps": 6840, "loss": 0.48402637243270874, "lr": 2.7085929721132078e-06, "epoch": 1.5445240532241555, "percentage": 77.22, "elapsed_time": "7:43:54", "remaining_time": "2:16:50"} +{"current_steps": 5283, "total_steps": 6840, "loss": 0.6122831106185913, "lr": 2.7052851258137936e-06, "epoch": 1.5448164936394209, "percentage": 77.24, "elapsed_time": "7:44:00", "remaining_time": "2:16:44"} +{"current_steps": 5284, "total_steps": 6840, "loss": 0.5731217861175537, "lr": 2.701978984609629e-06, "epoch": 1.5451089340546864, "percentage": 77.25, "elapsed_time": "7:44:04", "remaining_time": "2:16:39"} +{"current_steps": 5285, "total_steps": 6840, "loss": 0.5610803961753845, "lr": 2.6986745492735044e-06, "epoch": 1.5454013744699517, "percentage": 77.27, "elapsed_time": "7:44:09", "remaining_time": "2:16:34"} +{"current_steps": 5286, "total_steps": 6840, "loss": 0.46112626791000366, "lr": 2.695371820577811e-06, "epoch": 1.545693814885217, "percentage": 77.28, "elapsed_time": "7:44:14", "remaining_time": "2:16:28"} +{"current_steps": 5287, "total_steps": 6840, "loss": 0.5368741154670715, "lr": 2.692070799294542e-06, "epoch": 1.5459862553004826, "percentage": 77.3, "elapsed_time": "7:44:19", "remaining_time": "2:16:23"} +{"current_steps": 5288, "total_steps": 6840, "loss": 0.5991438627243042, "lr": 2.688771486195293e-06, "epoch": 1.546278695715748, "percentage": 77.31, "elapsed_time": "7:44:24", "remaining_time": "2:16:17"} +{"current_steps": 5289, "total_steps": 6840, "loss": 0.5751149654388428, "lr": 2.685473882051254e-06, "epoch": 1.5465711361310133, "percentage": 77.32, "elapsed_time": "7:44:28", "remaining_time": "2:16:12"} +{"current_steps": 5290, "total_steps": 6840, "loss": 0.6055437326431274, "lr": 2.682177987633221e-06, "epoch": 1.5468635765462788, "percentage": 77.34, "elapsed_time": "7:44:33", "remaining_time": "2:16:07"} +{"current_steps": 5291, "total_steps": 6840, "loss": 0.6009221076965332, "lr": 2.6788838037115916e-06, "epoch": 1.547156016961544, "percentage": 77.35, "elapsed_time": "7:44:37", "remaining_time": "2:16:01"} +{"current_steps": 5292, "total_steps": 6840, "loss": 0.6071531772613525, "lr": 2.6755913310563585e-06, "epoch": 1.5474484573768095, "percentage": 77.37, "elapsed_time": "7:44:41", "remaining_time": "2:15:55"} +{"current_steps": 5293, "total_steps": 6840, "loss": 0.4102080464363098, "lr": 2.6723005704371164e-06, "epoch": 1.547740897792075, "percentage": 77.38, "elapsed_time": "7:44:46", "remaining_time": "2:15:50"} +{"current_steps": 5294, "total_steps": 6840, "loss": 0.48021870851516724, "lr": 2.6690115226230663e-06, "epoch": 1.5480333382073401, "percentage": 77.4, "elapsed_time": "7:44:52", "remaining_time": "2:15:45"} +{"current_steps": 5295, "total_steps": 6840, "loss": 0.4893236458301544, "lr": 2.665724188382999e-06, "epoch": 1.5483257786226057, "percentage": 77.41, "elapsed_time": "7:44:58", "remaining_time": "2:15:40"} +{"current_steps": 5296, "total_steps": 6840, "loss": 0.6365019083023071, "lr": 2.6624385684853095e-06, "epoch": 1.548618219037871, "percentage": 77.43, "elapsed_time": "7:45:05", "remaining_time": "2:15:35"} +{"current_steps": 5297, "total_steps": 6840, "loss": 0.46510767936706543, "lr": 2.659154663697995e-06, "epoch": 1.5489106594531363, "percentage": 77.44, "elapsed_time": "7:45:09", "remaining_time": "2:15:29"} +{"current_steps": 5298, "total_steps": 6840, "loss": 0.4355175495147705, "lr": 2.655872474788641e-06, "epoch": 1.5492030998684019, "percentage": 77.46, "elapsed_time": "7:45:16", "remaining_time": "2:15:25"} +{"current_steps": 5299, "total_steps": 6840, "loss": 0.5180836915969849, "lr": 2.6525920025244432e-06, "epoch": 1.5494955402836672, "percentage": 77.47, "elapsed_time": "7:45:21", "remaining_time": "2:15:19"} +{"current_steps": 5300, "total_steps": 6840, "loss": 0.5597968101501465, "lr": 2.6493132476721927e-06, "epoch": 1.5497879806989325, "percentage": 77.49, "elapsed_time": "7:45:25", "remaining_time": "2:15:14"} +{"current_steps": 5301, "total_steps": 6840, "loss": 0.6581016778945923, "lr": 2.646036210998276e-06, "epoch": 1.550080421114198, "percentage": 77.5, "elapsed_time": "7:45:34", "remaining_time": "2:15:10"} +{"current_steps": 5302, "total_steps": 6840, "loss": 0.4875848889350891, "lr": 2.642760893268684e-06, "epoch": 1.5503728615294634, "percentage": 77.51, "elapsed_time": "7:45:39", "remaining_time": "2:15:04"} +{"current_steps": 5303, "total_steps": 6840, "loss": 0.4410843253135681, "lr": 2.639487295248999e-06, "epoch": 1.5506653019447287, "percentage": 77.53, "elapsed_time": "7:45:45", "remaining_time": "2:14:59"} +{"current_steps": 5304, "total_steps": 6840, "loss": 0.5829580426216125, "lr": 2.6362154177044076e-06, "epoch": 1.5509577423599943, "percentage": 77.54, "elapsed_time": "7:45:50", "remaining_time": "2:14:54"} +{"current_steps": 5305, "total_steps": 6840, "loss": 0.6281459927558899, "lr": 2.6329452613996886e-06, "epoch": 1.5512501827752594, "percentage": 77.56, "elapsed_time": "7:45:56", "remaining_time": "2:14:49"} +{"current_steps": 5306, "total_steps": 6840, "loss": 0.525640606880188, "lr": 2.629676827099222e-06, "epoch": 1.551542623190525, "percentage": 77.57, "elapsed_time": "7:46:02", "remaining_time": "2:14:44"} +{"current_steps": 5307, "total_steps": 6840, "loss": 0.5219406485557556, "lr": 2.626410115566985e-06, "epoch": 1.5518350636057903, "percentage": 77.59, "elapsed_time": "7:46:08", "remaining_time": "2:14:38"} +{"current_steps": 5308, "total_steps": 6840, "loss": 0.5120927691459656, "lr": 2.623145127566555e-06, "epoch": 1.5521275040210556, "percentage": 77.6, "elapsed_time": "7:46:13", "remaining_time": "2:14:33"} +{"current_steps": 5309, "total_steps": 6840, "loss": 0.586410641670227, "lr": 2.6198818638610967e-06, "epoch": 1.5524199444363211, "percentage": 77.62, "elapsed_time": "7:46:19", "remaining_time": "2:14:28"} +{"current_steps": 5310, "total_steps": 6840, "loss": 0.5014485120773315, "lr": 2.6166203252133803e-06, "epoch": 1.5527123848515865, "percentage": 77.63, "elapsed_time": "7:46:26", "remaining_time": "2:14:23"} +{"current_steps": 5311, "total_steps": 6840, "loss": 0.5087070465087891, "lr": 2.6133605123857707e-06, "epoch": 1.5530048252668518, "percentage": 77.65, "elapsed_time": "7:46:30", "remaining_time": "2:14:18"} +{"current_steps": 5312, "total_steps": 6840, "loss": 0.5829774737358093, "lr": 2.610102426140231e-06, "epoch": 1.5532972656821173, "percentage": 77.66, "elapsed_time": "7:46:36", "remaining_time": "2:14:13"} +{"current_steps": 5313, "total_steps": 6840, "loss": 0.5273870229721069, "lr": 2.6068460672383166e-06, "epoch": 1.5535897060973827, "percentage": 77.68, "elapsed_time": "7:46:40", "remaining_time": "2:14:07"} +{"current_steps": 5314, "total_steps": 6840, "loss": 0.528778076171875, "lr": 2.603591436441183e-06, "epoch": 1.553882146512648, "percentage": 77.69, "elapsed_time": "7:46:46", "remaining_time": "2:14:02"} +{"current_steps": 5315, "total_steps": 6840, "loss": 0.4914259612560272, "lr": 2.600338534509581e-06, "epoch": 1.5541745869279135, "percentage": 77.7, "elapsed_time": "7:46:52", "remaining_time": "2:13:57"} +{"current_steps": 5316, "total_steps": 6840, "loss": 0.48063480854034424, "lr": 2.597087362203855e-06, "epoch": 1.5544670273431789, "percentage": 77.72, "elapsed_time": "7:46:56", "remaining_time": "2:13:51"} +{"current_steps": 5317, "total_steps": 6840, "loss": 0.4406088888645172, "lr": 2.593837920283949e-06, "epoch": 1.5547594677584442, "percentage": 77.73, "elapsed_time": "7:47:01", "remaining_time": "2:13:46"} +{"current_steps": 5318, "total_steps": 6840, "loss": 0.5027159452438354, "lr": 2.590590209509398e-06, "epoch": 1.5550519081737098, "percentage": 77.75, "elapsed_time": "7:47:06", "remaining_time": "2:13:41"} +{"current_steps": 5319, "total_steps": 6840, "loss": 0.3894188404083252, "lr": 2.5873442306393357e-06, "epoch": 1.5553443485889749, "percentage": 77.76, "elapsed_time": "7:47:11", "remaining_time": "2:13:35"} +{"current_steps": 5320, "total_steps": 6840, "loss": 0.5393104553222656, "lr": 2.584099984432492e-06, "epoch": 1.5556367890042404, "percentage": 77.78, "elapsed_time": "7:47:16", "remaining_time": "2:13:30"} +{"current_steps": 5321, "total_steps": 6840, "loss": 0.5701737999916077, "lr": 2.580857471647186e-06, "epoch": 1.5559292294195057, "percentage": 77.79, "elapsed_time": "7:47:22", "remaining_time": "2:13:25"} +{"current_steps": 5322, "total_steps": 6840, "loss": 0.6173145174980164, "lr": 2.577616693041336e-06, "epoch": 1.556221669834771, "percentage": 77.81, "elapsed_time": "7:47:29", "remaining_time": "2:13:20"} +{"current_steps": 5323, "total_steps": 6840, "loss": 0.534600555896759, "lr": 2.5743776493724548e-06, "epoch": 1.5565141102500366, "percentage": 77.82, "elapsed_time": "7:47:34", "remaining_time": "2:13:15"} +{"current_steps": 5324, "total_steps": 6840, "loss": 0.5205268859863281, "lr": 2.571140341397651e-06, "epoch": 1.556806550665302, "percentage": 77.84, "elapsed_time": "7:47:39", "remaining_time": "2:13:09"} +{"current_steps": 5325, "total_steps": 6840, "loss": 0.5631835460662842, "lr": 2.5679047698736224e-06, "epoch": 1.5570989910805673, "percentage": 77.85, "elapsed_time": "7:47:45", "remaining_time": "2:13:04"} +{"current_steps": 5326, "total_steps": 6840, "loss": 0.5855015516281128, "lr": 2.564670935556667e-06, "epoch": 1.5573914314958328, "percentage": 77.87, "elapsed_time": "7:47:49", "remaining_time": "2:12:59"} +{"current_steps": 5327, "total_steps": 6840, "loss": 0.5219928026199341, "lr": 2.5614388392026735e-06, "epoch": 1.5576838719110981, "percentage": 77.88, "elapsed_time": "7:47:55", "remaining_time": "2:12:54"} +{"current_steps": 5328, "total_steps": 6840, "loss": 0.50178462266922, "lr": 2.5582084815671225e-06, "epoch": 1.5579763123263635, "percentage": 77.89, "elapsed_time": "7:48:00", "remaining_time": "2:12:48"} +{"current_steps": 5329, "total_steps": 6840, "loss": 0.643866777420044, "lr": 2.554979863405094e-06, "epoch": 1.558268752741629, "percentage": 77.91, "elapsed_time": "7:48:06", "remaining_time": "2:12:43"} +{"current_steps": 5330, "total_steps": 6840, "loss": 0.4976714849472046, "lr": 2.5517529854712543e-06, "epoch": 1.5585611931568941, "percentage": 77.92, "elapsed_time": "7:48:10", "remaining_time": "2:12:38"} +{"current_steps": 5331, "total_steps": 6840, "loss": 0.47352612018585205, "lr": 2.5485278485198716e-06, "epoch": 1.5588536335721597, "percentage": 77.94, "elapsed_time": "7:48:16", "remaining_time": "2:12:32"} +{"current_steps": 5332, "total_steps": 6840, "loss": 0.6319230794906616, "lr": 2.5453044533047955e-06, "epoch": 1.5591460739874252, "percentage": 77.95, "elapsed_time": "7:48:21", "remaining_time": "2:12:27"} +{"current_steps": 5333, "total_steps": 6840, "loss": 0.724555253982544, "lr": 2.5420828005794786e-06, "epoch": 1.5594385144026903, "percentage": 77.97, "elapsed_time": "7:48:26", "remaining_time": "2:12:22"} +{"current_steps": 5334, "total_steps": 6840, "loss": 0.6235928535461426, "lr": 2.5388628910969625e-06, "epoch": 1.5597309548179559, "percentage": 77.98, "elapsed_time": "7:48:30", "remaining_time": "2:12:16"} +{"current_steps": 5335, "total_steps": 6840, "loss": 0.47880417108535767, "lr": 2.5356447256098805e-06, "epoch": 1.5600233952332212, "percentage": 78.0, "elapsed_time": "7:48:34", "remaining_time": "2:12:11"} +{"current_steps": 5336, "total_steps": 6840, "loss": 0.3986828029155731, "lr": 2.53242830487046e-06, "epoch": 1.5603158356484865, "percentage": 78.01, "elapsed_time": "7:48:40", "remaining_time": "2:12:06"} +{"current_steps": 5337, "total_steps": 6840, "loss": 0.515389084815979, "lr": 2.529213629630519e-06, "epoch": 1.560608276063752, "percentage": 78.03, "elapsed_time": "7:48:45", "remaining_time": "2:12:00"} +{"current_steps": 5338, "total_steps": 6840, "loss": 0.611845076084137, "lr": 2.52600070064147e-06, "epoch": 1.5609007164790174, "percentage": 78.04, "elapsed_time": "7:48:51", "remaining_time": "2:11:55"} +{"current_steps": 5339, "total_steps": 6840, "loss": 0.4417461156845093, "lr": 2.522789518654314e-06, "epoch": 1.5611931568942827, "percentage": 78.06, "elapsed_time": "7:48:56", "remaining_time": "2:11:50"} +{"current_steps": 5340, "total_steps": 6840, "loss": 0.5082979798316956, "lr": 2.519580084419646e-06, "epoch": 1.5614855973095483, "percentage": 78.07, "elapsed_time": "7:49:02", "remaining_time": "2:11:45"} +{"current_steps": 5341, "total_steps": 6840, "loss": 0.4535973072052002, "lr": 2.516372398687652e-06, "epoch": 1.5617780377248136, "percentage": 78.08, "elapsed_time": "7:49:06", "remaining_time": "2:11:39"} +{"current_steps": 5342, "total_steps": 6840, "loss": 0.5528950095176697, "lr": 2.513166462208111e-06, "epoch": 1.562070478140079, "percentage": 78.1, "elapsed_time": "7:49:11", "remaining_time": "2:11:34"} +{"current_steps": 5343, "total_steps": 6840, "loss": 0.6272662281990051, "lr": 2.5099622757303865e-06, "epoch": 1.5623629185553445, "percentage": 78.11, "elapsed_time": "7:49:16", "remaining_time": "2:11:28"} +{"current_steps": 5344, "total_steps": 6840, "loss": 0.602135181427002, "lr": 2.506759840003439e-06, "epoch": 1.5626553589706096, "percentage": 78.13, "elapsed_time": "7:49:22", "remaining_time": "2:11:23"} +{"current_steps": 5345, "total_steps": 6840, "loss": 0.6336733102798462, "lr": 2.5035591557758197e-06, "epoch": 1.5629477993858751, "percentage": 78.14, "elapsed_time": "7:49:27", "remaining_time": "2:11:18"} +{"current_steps": 5346, "total_steps": 6840, "loss": 0.5819063186645508, "lr": 2.500360223795668e-06, "epoch": 1.5632402398011405, "percentage": 78.16, "elapsed_time": "7:49:34", "remaining_time": "2:11:13"} +{"current_steps": 5347, "total_steps": 6840, "loss": 0.6384624242782593, "lr": 2.4971630448107166e-06, "epoch": 1.5635326802164058, "percentage": 78.17, "elapsed_time": "7:49:38", "remaining_time": "2:11:08"} +{"current_steps": 5348, "total_steps": 6840, "loss": 0.5495754480361938, "lr": 2.493967619568285e-06, "epoch": 1.5638251206316713, "percentage": 78.19, "elapsed_time": "7:49:44", "remaining_time": "2:11:02"} +{"current_steps": 5349, "total_steps": 6840, "loss": 0.5661545395851135, "lr": 2.490773948815284e-06, "epoch": 1.5641175610469367, "percentage": 78.2, "elapsed_time": "7:49:49", "remaining_time": "2:10:57"} +{"current_steps": 5350, "total_steps": 6840, "loss": 0.47731083631515503, "lr": 2.487582033298217e-06, "epoch": 1.564410001462202, "percentage": 78.22, "elapsed_time": "7:49:54", "remaining_time": "2:10:52"} +{"current_steps": 5351, "total_steps": 6840, "loss": 0.5081999897956848, "lr": 2.4843918737631724e-06, "epoch": 1.5647024418774675, "percentage": 78.23, "elapsed_time": "7:49:58", "remaining_time": "2:10:46"} +{"current_steps": 5352, "total_steps": 6840, "loss": 0.4803314208984375, "lr": 2.481203470955832e-06, "epoch": 1.5649948822927329, "percentage": 78.25, "elapsed_time": "7:50:02", "remaining_time": "2:10:41"} +{"current_steps": 5353, "total_steps": 6840, "loss": 0.5049692392349243, "lr": 2.4780168256214687e-06, "epoch": 1.5652873227079982, "percentage": 78.26, "elapsed_time": "7:50:08", "remaining_time": "2:10:35"} +{"current_steps": 5354, "total_steps": 6840, "loss": 0.46404945850372314, "lr": 2.4748319385049346e-06, "epoch": 1.5655797631232637, "percentage": 78.27, "elapsed_time": "7:50:13", "remaining_time": "2:10:30"} +{"current_steps": 5355, "total_steps": 6840, "loss": 0.426737904548645, "lr": 2.471648810350681e-06, "epoch": 1.565872203538529, "percentage": 78.29, "elapsed_time": "7:50:18", "remaining_time": "2:10:25"} +{"current_steps": 5356, "total_steps": 6840, "loss": 0.511459231376648, "lr": 2.4684674419027445e-06, "epoch": 1.5661646439537944, "percentage": 78.3, "elapsed_time": "7:50:24", "remaining_time": "2:10:20"} +{"current_steps": 5357, "total_steps": 6840, "loss": 0.5199254155158997, "lr": 2.4652878339047516e-06, "epoch": 1.56645708436906, "percentage": 78.32, "elapsed_time": "7:50:30", "remaining_time": "2:10:15"} +{"current_steps": 5358, "total_steps": 6840, "loss": 0.6220999360084534, "lr": 2.4621099870999156e-06, "epoch": 1.566749524784325, "percentage": 78.33, "elapsed_time": "7:50:36", "remaining_time": "2:10:10"} +{"current_steps": 5359, "total_steps": 6840, "loss": 0.598499059677124, "lr": 2.4589339022310386e-06, "epoch": 1.5670419651995906, "percentage": 78.35, "elapsed_time": "7:50:41", "remaining_time": "2:10:04"} +{"current_steps": 5360, "total_steps": 6840, "loss": 0.4726351499557495, "lr": 2.455759580040512e-06, "epoch": 1.567334405614856, "percentage": 78.36, "elapsed_time": "7:50:46", "remaining_time": "2:09:59"} +{"current_steps": 5361, "total_steps": 6840, "loss": 0.4492379426956177, "lr": 2.452587021270314e-06, "epoch": 1.5676268460301213, "percentage": 78.38, "elapsed_time": "7:50:51", "remaining_time": "2:09:54"} +{"current_steps": 5362, "total_steps": 6840, "loss": 0.46546655893325806, "lr": 2.4494162266620105e-06, "epoch": 1.5679192864453868, "percentage": 78.39, "elapsed_time": "7:50:56", "remaining_time": "2:09:48"} +{"current_steps": 5363, "total_steps": 6840, "loss": 0.45048198103904724, "lr": 2.446247196956756e-06, "epoch": 1.5682117268606521, "percentage": 78.41, "elapsed_time": "7:51:01", "remaining_time": "2:09:43"} +{"current_steps": 5364, "total_steps": 6840, "loss": 0.543383002281189, "lr": 2.4430799328952935e-06, "epoch": 1.5685041672759175, "percentage": 78.42, "elapsed_time": "7:51:07", "remaining_time": "2:09:38"} +{"current_steps": 5365, "total_steps": 6840, "loss": 0.560661256313324, "lr": 2.4399144352179484e-06, "epoch": 1.568796607691183, "percentage": 78.44, "elapsed_time": "7:51:12", "remaining_time": "2:09:32"} +{"current_steps": 5366, "total_steps": 6840, "loss": 0.4915887117385864, "lr": 2.4367507046646367e-06, "epoch": 1.5690890481064483, "percentage": 78.45, "elapsed_time": "7:51:18", "remaining_time": "2:09:27"} +{"current_steps": 5367, "total_steps": 6840, "loss": 0.576668918132782, "lr": 2.433588741974863e-06, "epoch": 1.5693814885217137, "percentage": 78.46, "elapsed_time": "7:51:24", "remaining_time": "2:09:22"} +{"current_steps": 5368, "total_steps": 6840, "loss": 0.615422248840332, "lr": 2.4304285478877134e-06, "epoch": 1.5696739289369792, "percentage": 78.48, "elapsed_time": "7:51:28", "remaining_time": "2:09:17"} +{"current_steps": 5369, "total_steps": 6840, "loss": 0.505649209022522, "lr": 2.4272701231418706e-06, "epoch": 1.5699663693522443, "percentage": 78.49, "elapsed_time": "7:51:32", "remaining_time": "2:09:11"} +{"current_steps": 5370, "total_steps": 6840, "loss": 0.4803265929222107, "lr": 2.424113468475593e-06, "epoch": 1.5702588097675099, "percentage": 78.51, "elapsed_time": "7:51:38", "remaining_time": "2:09:06"} +{"current_steps": 5371, "total_steps": 6840, "loss": 0.43251073360443115, "lr": 2.4209585846267293e-06, "epoch": 1.5705512501827754, "percentage": 78.52, "elapsed_time": "7:51:44", "remaining_time": "2:09:01"} +{"current_steps": 5372, "total_steps": 6840, "loss": 0.6021081209182739, "lr": 2.417805472332716e-06, "epoch": 1.5708436905980405, "percentage": 78.54, "elapsed_time": "7:51:50", "remaining_time": "2:08:56"} +{"current_steps": 5373, "total_steps": 6840, "loss": 0.5236715078353882, "lr": 2.414654132330575e-06, "epoch": 1.571136131013306, "percentage": 78.55, "elapsed_time": "7:51:54", "remaining_time": "2:08:50"} +{"current_steps": 5374, "total_steps": 6840, "loss": 0.45632290840148926, "lr": 2.4115045653569092e-06, "epoch": 1.5714285714285714, "percentage": 78.57, "elapsed_time": "7:51:59", "remaining_time": "2:08:45"} +{"current_steps": 5375, "total_steps": 6840, "loss": 0.5745086669921875, "lr": 2.408356772147912e-06, "epoch": 1.5717210118438367, "percentage": 78.58, "elapsed_time": "7:52:06", "remaining_time": "2:08:40"} +{"current_steps": 5376, "total_steps": 6840, "loss": 0.6032901406288147, "lr": 2.405210753439361e-06, "epoch": 1.5720134522591023, "percentage": 78.6, "elapsed_time": "7:52:11", "remaining_time": "2:08:35"} +{"current_steps": 5377, "total_steps": 6840, "loss": 0.579899787902832, "lr": 2.40206650996662e-06, "epoch": 1.5723058926743676, "percentage": 78.61, "elapsed_time": "7:52:15", "remaining_time": "2:08:29"} +{"current_steps": 5378, "total_steps": 6840, "loss": 0.5920897722244263, "lr": 2.3989240424646355e-06, "epoch": 1.572598333089633, "percentage": 78.63, "elapsed_time": "7:52:21", "remaining_time": "2:08:24"} +{"current_steps": 5379, "total_steps": 6840, "loss": 0.5080469846725464, "lr": 2.395783351667941e-06, "epoch": 1.5728907735048985, "percentage": 78.64, "elapsed_time": "7:52:25", "remaining_time": "2:08:19"} +{"current_steps": 5380, "total_steps": 6840, "loss": 0.6438730955123901, "lr": 2.392644438310654e-06, "epoch": 1.5731832139201638, "percentage": 78.65, "elapsed_time": "7:52:31", "remaining_time": "2:08:13"} +{"current_steps": 5381, "total_steps": 6840, "loss": 0.6496621370315552, "lr": 2.389507303126475e-06, "epoch": 1.5734756543354291, "percentage": 78.67, "elapsed_time": "7:52:36", "remaining_time": "2:08:08"} +{"current_steps": 5382, "total_steps": 6840, "loss": 0.5780459642410278, "lr": 2.3863719468486925e-06, "epoch": 1.5737680947506947, "percentage": 78.68, "elapsed_time": "7:52:43", "remaining_time": "2:08:03"} +{"current_steps": 5383, "total_steps": 6840, "loss": 0.47817176580429077, "lr": 2.3832383702101747e-06, "epoch": 1.5740605351659598, "percentage": 78.7, "elapsed_time": "7:52:49", "remaining_time": "2:07:58"} +{"current_steps": 5384, "total_steps": 6840, "loss": 0.565629243850708, "lr": 2.3801065739433816e-06, "epoch": 1.5743529755812253, "percentage": 78.71, "elapsed_time": "7:52:55", "remaining_time": "2:07:53"} +{"current_steps": 5385, "total_steps": 6840, "loss": 0.6291453838348389, "lr": 2.376976558780343e-06, "epoch": 1.5746454159964907, "percentage": 78.73, "elapsed_time": "7:53:01", "remaining_time": "2:07:48"} +{"current_steps": 5386, "total_steps": 6840, "loss": 0.5309170484542847, "lr": 2.3738483254526856e-06, "epoch": 1.574937856411756, "percentage": 78.74, "elapsed_time": "7:53:07", "remaining_time": "2:07:43"} +{"current_steps": 5387, "total_steps": 6840, "loss": 0.36860692501068115, "lr": 2.370721874691614e-06, "epoch": 1.5752302968270215, "percentage": 78.76, "elapsed_time": "7:53:12", "remaining_time": "2:07:38"} +{"current_steps": 5388, "total_steps": 6840, "loss": 0.4871997833251953, "lr": 2.3675972072279172e-06, "epoch": 1.5755227372422869, "percentage": 78.77, "elapsed_time": "7:53:16", "remaining_time": "2:07:32"} +{"current_steps": 5389, "total_steps": 6840, "loss": 0.5318939685821533, "lr": 2.3644743237919674e-06, "epoch": 1.5758151776575522, "percentage": 78.79, "elapsed_time": "7:53:21", "remaining_time": "2:07:27"} +{"current_steps": 5390, "total_steps": 6840, "loss": 0.5851289629936218, "lr": 2.3613532251137205e-06, "epoch": 1.5761076180728177, "percentage": 78.8, "elapsed_time": "7:53:26", "remaining_time": "2:07:21"} +{"current_steps": 5391, "total_steps": 6840, "loss": 0.5535321235656738, "lr": 2.358233911922713e-06, "epoch": 1.576400058488083, "percentage": 78.82, "elapsed_time": "7:53:30", "remaining_time": "2:07:16"} +{"current_steps": 5392, "total_steps": 6840, "loss": 0.5443980693817139, "lr": 2.3551163849480664e-06, "epoch": 1.5766924989033484, "percentage": 78.83, "elapsed_time": "7:53:36", "remaining_time": "2:07:11"} +{"current_steps": 5393, "total_steps": 6840, "loss": 0.6381241083145142, "lr": 2.352000644918483e-06, "epoch": 1.576984939318614, "percentage": 78.85, "elapsed_time": "7:53:40", "remaining_time": "2:07:05"} +{"current_steps": 5394, "total_steps": 6840, "loss": 0.5710772275924683, "lr": 2.348886692562248e-06, "epoch": 1.5772773797338793, "percentage": 78.86, "elapsed_time": "7:53:44", "remaining_time": "2:06:59"} +{"current_steps": 5395, "total_steps": 6840, "loss": 0.5507428050041199, "lr": 2.3457745286072307e-06, "epoch": 1.5775698201491446, "percentage": 78.87, "elapsed_time": "7:53:51", "remaining_time": "2:06:55"} +{"current_steps": 5396, "total_steps": 6840, "loss": 0.4475744366645813, "lr": 2.342664153780878e-06, "epoch": 1.5778622605644101, "percentage": 78.89, "elapsed_time": "7:53:57", "remaining_time": "2:06:50"} +{"current_steps": 5397, "total_steps": 6840, "loss": 0.5237560868263245, "lr": 2.339555568810221e-06, "epoch": 1.5781547009796753, "percentage": 78.9, "elapsed_time": "7:54:02", "remaining_time": "2:06:44"} +{"current_steps": 5398, "total_steps": 6840, "loss": 0.513353705406189, "lr": 2.3364487744218735e-06, "epoch": 1.5784471413949408, "percentage": 78.92, "elapsed_time": "7:54:06", "remaining_time": "2:06:39"} +{"current_steps": 5399, "total_steps": 6840, "loss": 0.5986731052398682, "lr": 2.3333437713420305e-06, "epoch": 1.5787395818102061, "percentage": 78.93, "elapsed_time": "7:54:11", "remaining_time": "2:06:33"} +{"current_steps": 5400, "total_steps": 6840, "loss": 0.5834506750106812, "lr": 2.330240560296466e-06, "epoch": 1.5790320222254715, "percentage": 78.95, "elapsed_time": "7:54:16", "remaining_time": "2:06:28"} +{"current_steps": 5401, "total_steps": 6840, "loss": 0.4756021499633789, "lr": 2.3271391420105384e-06, "epoch": 1.579324462640737, "percentage": 78.96, "elapsed_time": "7:54:27", "remaining_time": "2:06:24"} +{"current_steps": 5402, "total_steps": 6840, "loss": 0.5524263978004456, "lr": 2.3240395172091847e-06, "epoch": 1.5796169030560023, "percentage": 78.98, "elapsed_time": "7:54:32", "remaining_time": "2:06:19"} +{"current_steps": 5403, "total_steps": 6840, "loss": 0.5689926743507385, "lr": 2.320941686616922e-06, "epoch": 1.5799093434712677, "percentage": 78.99, "elapsed_time": "7:54:36", "remaining_time": "2:06:13"} +{"current_steps": 5404, "total_steps": 6840, "loss": 0.5737600326538086, "lr": 2.317845650957852e-06, "epoch": 1.5802017838865332, "percentage": 79.01, "elapsed_time": "7:54:41", "remaining_time": "2:06:08"} +{"current_steps": 5405, "total_steps": 6840, "loss": 0.585626482963562, "lr": 2.314751410955652e-06, "epoch": 1.5804942243017985, "percentage": 79.02, "elapsed_time": "7:54:45", "remaining_time": "2:06:02"} +{"current_steps": 5406, "total_steps": 6840, "loss": 0.4410518407821655, "lr": 2.3116589673335833e-06, "epoch": 1.5807866647170639, "percentage": 79.04, "elapsed_time": "7:54:51", "remaining_time": "2:05:57"} +{"current_steps": 5407, "total_steps": 6840, "loss": 0.49071764945983887, "lr": 2.308568320814487e-06, "epoch": 1.5810791051323294, "percentage": 79.05, "elapsed_time": "7:54:57", "remaining_time": "2:05:52"} +{"current_steps": 5408, "total_steps": 6840, "loss": 0.5332186818122864, "lr": 2.3054794721207796e-06, "epoch": 1.5813715455475945, "percentage": 79.06, "elapsed_time": "7:55:02", "remaining_time": "2:05:47"} +{"current_steps": 5409, "total_steps": 6840, "loss": 0.4655637741088867, "lr": 2.3023924219744607e-06, "epoch": 1.58166398596286, "percentage": 79.08, "elapsed_time": "7:55:07", "remaining_time": "2:05:41"} +{"current_steps": 5410, "total_steps": 6840, "loss": 0.4226027727127075, "lr": 2.2993071710971115e-06, "epoch": 1.5819564263781256, "percentage": 79.09, "elapsed_time": "7:55:12", "remaining_time": "2:05:36"} +{"current_steps": 5411, "total_steps": 6840, "loss": 0.5582948923110962, "lr": 2.2962237202098903e-06, "epoch": 1.5822488667933907, "percentage": 79.11, "elapsed_time": "7:55:18", "remaining_time": "2:05:31"} +{"current_steps": 5412, "total_steps": 6840, "loss": 0.6695314645767212, "lr": 2.293142070033535e-06, "epoch": 1.5825413072086563, "percentage": 79.12, "elapsed_time": "7:55:22", "remaining_time": "2:05:25"} +{"current_steps": 5413, "total_steps": 6840, "loss": 0.39315858483314514, "lr": 2.2900622212883617e-06, "epoch": 1.5828337476239216, "percentage": 79.14, "elapsed_time": "7:55:29", "remaining_time": "2:05:20"} +{"current_steps": 5414, "total_steps": 6840, "loss": 0.5034759044647217, "lr": 2.2869841746942666e-06, "epoch": 1.583126188039187, "percentage": 79.15, "elapsed_time": "7:55:34", "remaining_time": "2:05:15"} +{"current_steps": 5415, "total_steps": 6840, "loss": 0.6739548444747925, "lr": 2.2839079309707256e-06, "epoch": 1.5834186284544525, "percentage": 79.17, "elapsed_time": "7:55:40", "remaining_time": "2:05:10"} +{"current_steps": 5416, "total_steps": 6840, "loss": 0.4091438949108124, "lr": 2.2808334908367914e-06, "epoch": 1.5837110688697178, "percentage": 79.18, "elapsed_time": "7:55:44", "remaining_time": "2:05:05"} +{"current_steps": 5417, "total_steps": 6840, "loss": 0.5543409585952759, "lr": 2.277760855011094e-06, "epoch": 1.5840035092849831, "percentage": 79.2, "elapsed_time": "7:55:48", "remaining_time": "2:04:59"} +{"current_steps": 5418, "total_steps": 6840, "loss": 0.44680702686309814, "lr": 2.2746900242118487e-06, "epoch": 1.5842959497002487, "percentage": 79.21, "elapsed_time": "7:55:53", "remaining_time": "2:04:54"} +{"current_steps": 5419, "total_steps": 6840, "loss": 0.604156494140625, "lr": 2.271620999156837e-06, "epoch": 1.584588390115514, "percentage": 79.23, "elapsed_time": "7:55:59", "remaining_time": "2:04:49"} +{"current_steps": 5420, "total_steps": 6840, "loss": 0.6055774688720703, "lr": 2.268553780563427e-06, "epoch": 1.5848808305307793, "percentage": 79.24, "elapsed_time": "7:56:04", "remaining_time": "2:04:43"} +{"current_steps": 5421, "total_steps": 6840, "loss": 0.5826502442359924, "lr": 2.265488369148563e-06, "epoch": 1.5851732709460449, "percentage": 79.25, "elapsed_time": "7:56:10", "remaining_time": "2:04:38"} +{"current_steps": 5422, "total_steps": 6840, "loss": 0.61782306432724, "lr": 2.2624247656287658e-06, "epoch": 1.58546571136131, "percentage": 79.27, "elapsed_time": "7:56:16", "remaining_time": "2:04:33"} +{"current_steps": 5423, "total_steps": 6840, "loss": 0.5561526417732239, "lr": 2.2593629707201348e-06, "epoch": 1.5857581517765755, "percentage": 79.28, "elapsed_time": "7:56:22", "remaining_time": "2:04:28"} +{"current_steps": 5424, "total_steps": 6840, "loss": 0.6122138500213623, "lr": 2.2563029851383447e-06, "epoch": 1.5860505921918409, "percentage": 79.3, "elapsed_time": "7:56:27", "remaining_time": "2:04:23"} +{"current_steps": 5425, "total_steps": 6840, "loss": 0.5694067478179932, "lr": 2.2532448095986504e-06, "epoch": 1.5863430326071062, "percentage": 79.31, "elapsed_time": "7:56:32", "remaining_time": "2:04:17"} +{"current_steps": 5426, "total_steps": 6840, "loss": 0.5243874788284302, "lr": 2.2501884448158804e-06, "epoch": 1.5866354730223717, "percentage": 79.33, "elapsed_time": "7:56:37", "remaining_time": "2:04:12"} +{"current_steps": 5427, "total_steps": 6840, "loss": 0.5144485831260681, "lr": 2.2471338915044414e-06, "epoch": 1.586927913437637, "percentage": 79.34, "elapsed_time": "7:56:43", "remaining_time": "2:04:07"} +{"current_steps": 5428, "total_steps": 6840, "loss": 0.5013881325721741, "lr": 2.244081150378318e-06, "epoch": 1.5872203538529024, "percentage": 79.36, "elapsed_time": "7:56:47", "remaining_time": "2:04:01"} +{"current_steps": 5429, "total_steps": 6840, "loss": 0.45199382305145264, "lr": 2.2410302221510704e-06, "epoch": 1.587512794268168, "percentage": 79.37, "elapsed_time": "7:56:52", "remaining_time": "2:03:56"} +{"current_steps": 5430, "total_steps": 6840, "loss": 0.4699060022830963, "lr": 2.2379811075358315e-06, "epoch": 1.5878052346834333, "percentage": 79.39, "elapsed_time": "7:56:56", "remaining_time": "2:03:50"} +{"current_steps": 5431, "total_steps": 6840, "loss": 0.6530928611755371, "lr": 2.234933807245314e-06, "epoch": 1.5880976750986986, "percentage": 79.4, "elapsed_time": "7:57:02", "remaining_time": "2:03:45"} +{"current_steps": 5432, "total_steps": 6840, "loss": 0.653563380241394, "lr": 2.2318883219918075e-06, "epoch": 1.5883901155139641, "percentage": 79.42, "elapsed_time": "7:57:06", "remaining_time": "2:03:40"} +{"current_steps": 5433, "total_steps": 6840, "loss": 0.5283595323562622, "lr": 2.2288446524871743e-06, "epoch": 1.5886825559292295, "percentage": 79.43, "elapsed_time": "7:57:11", "remaining_time": "2:03:34"} +{"current_steps": 5434, "total_steps": 6840, "loss": 0.4382442831993103, "lr": 2.2258027994428543e-06, "epoch": 1.5889749963444948, "percentage": 79.44, "elapsed_time": "7:57:17", "remaining_time": "2:03:29"} +{"current_steps": 5435, "total_steps": 6840, "loss": 0.427448570728302, "lr": 2.2227627635698624e-06, "epoch": 1.5892674367597603, "percentage": 79.46, "elapsed_time": "7:57:22", "remaining_time": "2:03:24"} +{"current_steps": 5436, "total_steps": 6840, "loss": 0.5794345140457153, "lr": 2.2197245455787875e-06, "epoch": 1.5895598771750254, "percentage": 79.47, "elapsed_time": "7:57:27", "remaining_time": "2:03:19"} +{"current_steps": 5437, "total_steps": 6840, "loss": 0.4996277987957001, "lr": 2.2166881461797953e-06, "epoch": 1.589852317590291, "percentage": 79.49, "elapsed_time": "7:57:34", "remaining_time": "2:03:14"} +{"current_steps": 5438, "total_steps": 6840, "loss": 0.580248236656189, "lr": 2.213653566082625e-06, "epoch": 1.5901447580055563, "percentage": 79.5, "elapsed_time": "7:57:39", "remaining_time": "2:03:08"} +{"current_steps": 5439, "total_steps": 6840, "loss": 0.5173758864402771, "lr": 2.210620805996594e-06, "epoch": 1.5904371984208217, "percentage": 79.52, "elapsed_time": "7:57:45", "remaining_time": "2:03:03"} +{"current_steps": 5440, "total_steps": 6840, "loss": 0.5336873531341553, "lr": 2.2075898666305908e-06, "epoch": 1.5907296388360872, "percentage": 79.53, "elapsed_time": "7:57:50", "remaining_time": "2:02:58"} +{"current_steps": 5441, "total_steps": 6840, "loss": 0.44921910762786865, "lr": 2.204560748693074e-06, "epoch": 1.5910220792513525, "percentage": 79.55, "elapsed_time": "7:57:56", "remaining_time": "2:02:53"} +{"current_steps": 5442, "total_steps": 6840, "loss": 0.46475526690483093, "lr": 2.201533452892086e-06, "epoch": 1.5913145196666179, "percentage": 79.56, "elapsed_time": "7:58:00", "remaining_time": "2:02:47"} +{"current_steps": 5443, "total_steps": 6840, "loss": 0.6213991045951843, "lr": 2.1985079799352383e-06, "epoch": 1.5916069600818834, "percentage": 79.58, "elapsed_time": "7:58:06", "remaining_time": "2:02:42"} +{"current_steps": 5444, "total_steps": 6840, "loss": 0.5271334648132324, "lr": 2.1954843305297138e-06, "epoch": 1.5918994004971487, "percentage": 79.59, "elapsed_time": "7:58:10", "remaining_time": "2:02:37"} +{"current_steps": 5445, "total_steps": 6840, "loss": 0.6957610249519348, "lr": 2.192462505382277e-06, "epoch": 1.592191840912414, "percentage": 79.61, "elapsed_time": "7:58:16", "remaining_time": "2:02:31"} +{"current_steps": 5446, "total_steps": 6840, "loss": 0.4935681223869324, "lr": 2.1894425051992587e-06, "epoch": 1.5924842813276796, "percentage": 79.62, "elapsed_time": "7:58:21", "remaining_time": "2:02:26"} +{"current_steps": 5447, "total_steps": 6840, "loss": 0.7389976978302002, "lr": 2.1864243306865663e-06, "epoch": 1.5927767217429447, "percentage": 79.63, "elapsed_time": "7:58:27", "remaining_time": "2:02:21"} +{"current_steps": 5448, "total_steps": 6840, "loss": 0.4711039662361145, "lr": 2.183407982549679e-06, "epoch": 1.5930691621582103, "percentage": 79.65, "elapsed_time": "7:58:32", "remaining_time": "2:02:16"} +{"current_steps": 5449, "total_steps": 6840, "loss": 0.5640024542808533, "lr": 2.180393461493654e-06, "epoch": 1.5933616025734758, "percentage": 79.66, "elapsed_time": "7:58:37", "remaining_time": "2:02:10"} +{"current_steps": 5450, "total_steps": 6840, "loss": 0.5471343994140625, "lr": 2.1773807682231095e-06, "epoch": 1.593654042988741, "percentage": 79.68, "elapsed_time": "7:58:42", "remaining_time": "2:02:05"} +{"current_steps": 5451, "total_steps": 6840, "loss": 0.4971361756324768, "lr": 2.1743699034422483e-06, "epoch": 1.5939464834040065, "percentage": 79.69, "elapsed_time": "7:58:47", "remaining_time": "2:02:00"} +{"current_steps": 5452, "total_steps": 6840, "loss": 0.6338681578636169, "lr": 2.1713608678548414e-06, "epoch": 1.5942389238192718, "percentage": 79.71, "elapsed_time": "7:58:52", "remaining_time": "2:01:54"} +{"current_steps": 5453, "total_steps": 6840, "loss": 0.5218038558959961, "lr": 2.168353662164233e-06, "epoch": 1.5945313642345371, "percentage": 79.72, "elapsed_time": "7:58:57", "remaining_time": "2:01:49"} +{"current_steps": 5454, "total_steps": 6840, "loss": 0.44414108991622925, "lr": 2.165348287073339e-06, "epoch": 1.5948238046498027, "percentage": 79.74, "elapsed_time": "7:59:03", "remaining_time": "2:01:44"} +{"current_steps": 5455, "total_steps": 6840, "loss": 0.5994665622711182, "lr": 2.162344743284647e-06, "epoch": 1.595116245065068, "percentage": 79.75, "elapsed_time": "7:59:09", "remaining_time": "2:01:39"} +{"current_steps": 5456, "total_steps": 6840, "loss": 0.6745023727416992, "lr": 2.159343031500217e-06, "epoch": 1.5954086854803333, "percentage": 79.77, "elapsed_time": "7:59:14", "remaining_time": "2:01:34"} +{"current_steps": 5457, "total_steps": 6840, "loss": 0.4678364396095276, "lr": 2.1563431524216825e-06, "epoch": 1.5957011258955989, "percentage": 79.78, "elapsed_time": "7:59:19", "remaining_time": "2:01:28"} +{"current_steps": 5458, "total_steps": 6840, "loss": 0.5792031288146973, "lr": 2.1533451067502464e-06, "epoch": 1.5959935663108642, "percentage": 79.8, "elapsed_time": "7:59:25", "remaining_time": "2:01:23"} +{"current_steps": 5459, "total_steps": 6840, "loss": 0.48152512311935425, "lr": 2.1503488951866822e-06, "epoch": 1.5962860067261295, "percentage": 79.81, "elapsed_time": "7:59:31", "remaining_time": "2:01:18"} +{"current_steps": 5460, "total_steps": 6840, "loss": 0.4407780170440674, "lr": 2.147354518431339e-06, "epoch": 1.596578447141395, "percentage": 79.82, "elapsed_time": "7:59:37", "remaining_time": "2:01:13"} +{"current_steps": 5461, "total_steps": 6840, "loss": 0.41062241792678833, "lr": 2.1443619771841308e-06, "epoch": 1.5968708875566602, "percentage": 79.84, "elapsed_time": "7:59:42", "remaining_time": "2:01:08"} +{"current_steps": 5462, "total_steps": 6840, "loss": 0.4564778208732605, "lr": 2.1413712721445478e-06, "epoch": 1.5971633279719257, "percentage": 79.85, "elapsed_time": "7:59:47", "remaining_time": "2:01:02"} +{"current_steps": 5463, "total_steps": 6840, "loss": 0.4347888231277466, "lr": 2.1383824040116474e-06, "epoch": 1.597455768387191, "percentage": 79.87, "elapsed_time": "7:59:51", "remaining_time": "2:00:57"} +{"current_steps": 5464, "total_steps": 6840, "loss": 0.574216902256012, "lr": 2.1353953734840615e-06, "epoch": 1.5977482088024564, "percentage": 79.88, "elapsed_time": "7:59:58", "remaining_time": "2:00:52"} +{"current_steps": 5465, "total_steps": 6840, "loss": 0.46540650725364685, "lr": 2.1324101812599884e-06, "epoch": 1.598040649217722, "percentage": 79.9, "elapsed_time": "8:00:02", "remaining_time": "2:00:46"} +{"current_steps": 5466, "total_steps": 6840, "loss": 0.5446870923042297, "lr": 2.129426828037201e-06, "epoch": 1.5983330896329873, "percentage": 79.91, "elapsed_time": "8:00:07", "remaining_time": "2:00:41"} +{"current_steps": 5467, "total_steps": 6840, "loss": 0.5442406535148621, "lr": 2.126445314513038e-06, "epoch": 1.5986255300482526, "percentage": 79.93, "elapsed_time": "8:00:12", "remaining_time": "2:00:36"} +{"current_steps": 5468, "total_steps": 6840, "loss": 0.48960334062576294, "lr": 2.1234656413844114e-06, "epoch": 1.5989179704635181, "percentage": 79.94, "elapsed_time": "8:00:16", "remaining_time": "2:00:30"} +{"current_steps": 5469, "total_steps": 6840, "loss": 0.5053935647010803, "lr": 2.1204878093477998e-06, "epoch": 1.5992104108787835, "percentage": 79.96, "elapsed_time": "8:00:21", "remaining_time": "2:00:25"} +{"current_steps": 5470, "total_steps": 6840, "loss": 0.5984711647033691, "lr": 2.117511819099256e-06, "epoch": 1.5995028512940488, "percentage": 79.97, "elapsed_time": "8:00:25", "remaining_time": "2:00:19"} +{"current_steps": 5471, "total_steps": 6840, "loss": 0.6060935258865356, "lr": 2.1145376713344e-06, "epoch": 1.5997952917093143, "percentage": 79.99, "elapsed_time": "8:00:32", "remaining_time": "2:00:14"} +{"current_steps": 5472, "total_steps": 6840, "loss": 0.5640311241149902, "lr": 2.111565366748416e-06, "epoch": 1.6000877321245797, "percentage": 80.0, "elapsed_time": "8:00:37", "remaining_time": "2:00:09"} +{"current_steps": 5473, "total_steps": 6840, "loss": 0.5127131342887878, "lr": 2.1085949060360654e-06, "epoch": 1.600380172539845, "percentage": 80.01, "elapsed_time": "8:00:42", "remaining_time": "2:00:04"} +{"current_steps": 5474, "total_steps": 6840, "loss": 0.5630159378051758, "lr": 2.1056262898916747e-06, "epoch": 1.6006726129551105, "percentage": 80.03, "elapsed_time": "8:00:48", "remaining_time": "1:59:58"} +{"current_steps": 5475, "total_steps": 6840, "loss": 0.4511195421218872, "lr": 2.1026595190091403e-06, "epoch": 1.6009650533703756, "percentage": 80.04, "elapsed_time": "8:00:54", "remaining_time": "1:59:53"} +{"current_steps": 5476, "total_steps": 6840, "loss": 0.47073638439178467, "lr": 2.099694594081927e-06, "epoch": 1.6012574937856412, "percentage": 80.06, "elapsed_time": "8:00:59", "remaining_time": "1:59:48"} +{"current_steps": 5477, "total_steps": 6840, "loss": 0.47757452726364136, "lr": 2.0967315158030675e-06, "epoch": 1.6015499342009065, "percentage": 80.07, "elapsed_time": "8:01:05", "remaining_time": "1:59:43"} +{"current_steps": 5478, "total_steps": 6840, "loss": 0.4703200161457062, "lr": 2.093770284865164e-06, "epoch": 1.6018423746161718, "percentage": 80.09, "elapsed_time": "8:01:10", "remaining_time": "1:59:38"} +{"current_steps": 5479, "total_steps": 6840, "loss": 0.47457796335220337, "lr": 2.090810901960385e-06, "epoch": 1.6021348150314374, "percentage": 80.1, "elapsed_time": "8:01:15", "remaining_time": "1:59:32"} +{"current_steps": 5480, "total_steps": 6840, "loss": 0.4907105267047882, "lr": 2.087853367780469e-06, "epoch": 1.6024272554467027, "percentage": 80.12, "elapsed_time": "8:01:21", "remaining_time": "1:59:27"} +{"current_steps": 5481, "total_steps": 6840, "loss": 0.5329782962799072, "lr": 2.0848976830167224e-06, "epoch": 1.602719695861968, "percentage": 80.13, "elapsed_time": "8:01:25", "remaining_time": "1:59:22"} +{"current_steps": 5482, "total_steps": 6840, "loss": 0.45858579874038696, "lr": 2.0819438483600197e-06, "epoch": 1.6030121362772336, "percentage": 80.15, "elapsed_time": "8:01:29", "remaining_time": "1:59:16"} +{"current_steps": 5483, "total_steps": 6840, "loss": 0.47545814514160156, "lr": 2.0789918645007977e-06, "epoch": 1.603304576692499, "percentage": 80.16, "elapsed_time": "8:01:35", "remaining_time": "1:59:11"} +{"current_steps": 5484, "total_steps": 6840, "loss": 0.5482660531997681, "lr": 2.076041732129066e-06, "epoch": 1.6035970171077643, "percentage": 80.18, "elapsed_time": "8:01:41", "remaining_time": "1:59:06"} +{"current_steps": 5485, "total_steps": 6840, "loss": 0.5252633094787598, "lr": 2.0730934519344025e-06, "epoch": 1.6038894575230298, "percentage": 80.19, "elapsed_time": "8:01:47", "remaining_time": "1:59:01"} +{"current_steps": 5486, "total_steps": 6840, "loss": 0.5400367379188538, "lr": 2.0701470246059472e-06, "epoch": 1.604181897938295, "percentage": 80.2, "elapsed_time": "8:01:53", "remaining_time": "1:58:56"} +{"current_steps": 5487, "total_steps": 6840, "loss": 0.4788953363895416, "lr": 2.0672024508324107e-06, "epoch": 1.6044743383535605, "percentage": 80.22, "elapsed_time": "8:01:58", "remaining_time": "1:58:50"} +{"current_steps": 5488, "total_steps": 6840, "loss": 0.5430850982666016, "lr": 2.0642597313020685e-06, "epoch": 1.604766778768826, "percentage": 80.23, "elapsed_time": "8:02:03", "remaining_time": "1:58:45"} +{"current_steps": 5489, "total_steps": 6840, "loss": 0.5833520293235779, "lr": 2.061318866702765e-06, "epoch": 1.6050592191840911, "percentage": 80.25, "elapsed_time": "8:02:08", "remaining_time": "1:58:40"} +{"current_steps": 5490, "total_steps": 6840, "loss": 0.5854958295822144, "lr": 2.058379857721908e-06, "epoch": 1.6053516595993567, "percentage": 80.26, "elapsed_time": "8:02:14", "remaining_time": "1:58:34"} +{"current_steps": 5491, "total_steps": 6840, "loss": 0.5577352643013, "lr": 2.0554427050464742e-06, "epoch": 1.605644100014622, "percentage": 80.28, "elapsed_time": "8:02:19", "remaining_time": "1:58:29"} +{"current_steps": 5492, "total_steps": 6840, "loss": 0.5328816175460815, "lr": 2.052507409363004e-06, "epoch": 1.6059365404298873, "percentage": 80.29, "elapsed_time": "8:02:23", "remaining_time": "1:58:24"} +{"current_steps": 5493, "total_steps": 6840, "loss": 0.5606744289398193, "lr": 2.0495739713576046e-06, "epoch": 1.6062289808451529, "percentage": 80.31, "elapsed_time": "8:02:29", "remaining_time": "1:58:19"} +{"current_steps": 5494, "total_steps": 6840, "loss": 0.541358470916748, "lr": 2.0466423917159526e-06, "epoch": 1.6065214212604182, "percentage": 80.32, "elapsed_time": "8:02:33", "remaining_time": "1:58:13"} +{"current_steps": 5495, "total_steps": 6840, "loss": 0.6578946709632874, "lr": 2.0437126711232826e-06, "epoch": 1.6068138616756835, "percentage": 80.34, "elapsed_time": "8:02:37", "remaining_time": "1:58:07"} +{"current_steps": 5496, "total_steps": 6840, "loss": 0.5967978239059448, "lr": 2.0407848102644002e-06, "epoch": 1.607106302090949, "percentage": 80.35, "elapsed_time": "8:02:42", "remaining_time": "1:58:02"} +{"current_steps": 5497, "total_steps": 6840, "loss": 0.46947693824768066, "lr": 2.037858809823675e-06, "epoch": 1.6073987425062144, "percentage": 80.37, "elapsed_time": "8:02:48", "remaining_time": "1:57:57"} +{"current_steps": 5498, "total_steps": 6840, "loss": 0.5014760494232178, "lr": 2.0349346704850436e-06, "epoch": 1.6076911829214797, "percentage": 80.38, "elapsed_time": "8:02:53", "remaining_time": "1:57:52"} +{"current_steps": 5499, "total_steps": 6840, "loss": 0.4399675726890564, "lr": 2.0320123929320033e-06, "epoch": 1.6079836233367453, "percentage": 80.39, "elapsed_time": "8:02:57", "remaining_time": "1:57:46"} +{"current_steps": 5500, "total_steps": 6840, "loss": 0.4729107618331909, "lr": 2.0290919778476214e-06, "epoch": 1.6082760637520104, "percentage": 80.41, "elapsed_time": "8:03:01", "remaining_time": "1:57:41"} +{"current_steps": 5501, "total_steps": 6840, "loss": 0.5669134259223938, "lr": 2.0261734259145248e-06, "epoch": 1.608568504167276, "percentage": 80.42, "elapsed_time": "8:03:12", "remaining_time": "1:57:37"} +{"current_steps": 5502, "total_steps": 6840, "loss": 0.4200817942619324, "lr": 2.0232567378149082e-06, "epoch": 1.6088609445825413, "percentage": 80.44, "elapsed_time": "8:03:18", "remaining_time": "1:57:32"} +{"current_steps": 5503, "total_steps": 6840, "loss": 0.6057849526405334, "lr": 2.0203419142305303e-06, "epoch": 1.6091533849978066, "percentage": 80.45, "elapsed_time": "8:03:24", "remaining_time": "1:57:26"} +{"current_steps": 5504, "total_steps": 6840, "loss": 0.5644170045852661, "lr": 2.017428955842713e-06, "epoch": 1.6094458254130721, "percentage": 80.47, "elapsed_time": "8:03:29", "remaining_time": "1:57:21"} +{"current_steps": 5505, "total_steps": 6840, "loss": 0.6368730068206787, "lr": 2.014517863332345e-06, "epoch": 1.6097382658283375, "percentage": 80.48, "elapsed_time": "8:03:35", "remaining_time": "1:57:16"} +{"current_steps": 5506, "total_steps": 6840, "loss": 0.4829355478286743, "lr": 2.0116086373798704e-06, "epoch": 1.6100307062436028, "percentage": 80.5, "elapsed_time": "8:03:40", "remaining_time": "1:57:11"} +{"current_steps": 5507, "total_steps": 6840, "loss": 0.5604796409606934, "lr": 2.0087012786653072e-06, "epoch": 1.6103231466588683, "percentage": 80.51, "elapsed_time": "8:03:45", "remaining_time": "1:57:05"} +{"current_steps": 5508, "total_steps": 6840, "loss": 0.5594274997711182, "lr": 2.005795787868232e-06, "epoch": 1.6106155870741337, "percentage": 80.53, "elapsed_time": "8:03:52", "remaining_time": "1:57:00"} +{"current_steps": 5509, "total_steps": 6840, "loss": 0.5553449988365173, "lr": 2.0028921656677857e-06, "epoch": 1.610908027489399, "percentage": 80.54, "elapsed_time": "8:03:57", "remaining_time": "1:56:55"} +{"current_steps": 5510, "total_steps": 6840, "loss": 0.5056631565093994, "lr": 1.999990412742673e-06, "epoch": 1.6112004679046645, "percentage": 80.56, "elapsed_time": "8:04:02", "remaining_time": "1:56:50"} +{"current_steps": 5511, "total_steps": 6840, "loss": 0.432037353515625, "lr": 1.9970905297711606e-06, "epoch": 1.6114929083199299, "percentage": 80.57, "elapsed_time": "8:04:07", "remaining_time": "1:56:44"} +{"current_steps": 5512, "total_steps": 6840, "loss": 0.5152974128723145, "lr": 1.9941925174310773e-06, "epoch": 1.6117853487351952, "percentage": 80.58, "elapsed_time": "8:04:13", "remaining_time": "1:56:39"} +{"current_steps": 5513, "total_steps": 6840, "loss": 0.59015291929245, "lr": 1.9912963763998185e-06, "epoch": 1.6120777891504607, "percentage": 80.6, "elapsed_time": "8:04:18", "remaining_time": "1:56:34"} +{"current_steps": 5514, "total_steps": 6840, "loss": 0.564031720161438, "lr": 1.9884021073543368e-06, "epoch": 1.6123702295657258, "percentage": 80.61, "elapsed_time": "8:04:23", "remaining_time": "1:56:29"} +{"current_steps": 5515, "total_steps": 6840, "loss": 0.5930228233337402, "lr": 1.985509710971152e-06, "epoch": 1.6126626699809914, "percentage": 80.63, "elapsed_time": "8:04:28", "remaining_time": "1:56:23"} +{"current_steps": 5516, "total_steps": 6840, "loss": 0.540229082107544, "lr": 1.9826191879263446e-06, "epoch": 1.6129551103962567, "percentage": 80.64, "elapsed_time": "8:04:33", "remaining_time": "1:56:18"} +{"current_steps": 5517, "total_steps": 6840, "loss": 0.5473166704177856, "lr": 1.9797305388955547e-06, "epoch": 1.613247550811522, "percentage": 80.66, "elapsed_time": "8:04:39", "remaining_time": "1:56:13"} +{"current_steps": 5518, "total_steps": 6840, "loss": 0.5814535617828369, "lr": 1.976843764553986e-06, "epoch": 1.6135399912267876, "percentage": 80.67, "elapsed_time": "8:04:44", "remaining_time": "1:56:07"} +{"current_steps": 5519, "total_steps": 6840, "loss": 0.4892576038837433, "lr": 1.973958865576403e-06, "epoch": 1.613832431642053, "percentage": 80.69, "elapsed_time": "8:04:49", "remaining_time": "1:56:02"} +{"current_steps": 5520, "total_steps": 6840, "loss": 0.5416869521141052, "lr": 1.97107584263714e-06, "epoch": 1.6141248720573182, "percentage": 80.7, "elapsed_time": "8:04:55", "remaining_time": "1:55:57"} +{"current_steps": 5521, "total_steps": 6840, "loss": 0.5956105589866638, "lr": 1.9681946964100807e-06, "epoch": 1.6144173124725838, "percentage": 80.72, "elapsed_time": "8:04:59", "remaining_time": "1:55:51"} +{"current_steps": 5522, "total_steps": 6840, "loss": 0.5722565650939941, "lr": 1.9653154275686782e-06, "epoch": 1.6147097528878491, "percentage": 80.73, "elapsed_time": "8:05:04", "remaining_time": "1:55:46"} +{"current_steps": 5523, "total_steps": 6840, "loss": 0.3984888195991516, "lr": 1.962438036785942e-06, "epoch": 1.6150021933031145, "percentage": 80.75, "elapsed_time": "8:05:10", "remaining_time": "1:55:41"} +{"current_steps": 5524, "total_steps": 6840, "loss": 0.601211428642273, "lr": 1.959562524734445e-06, "epoch": 1.61529463371838, "percentage": 80.76, "elapsed_time": "8:05:14", "remaining_time": "1:55:36"} +{"current_steps": 5525, "total_steps": 6840, "loss": 0.4803691506385803, "lr": 1.9566888920863247e-06, "epoch": 1.615587074133645, "percentage": 80.77, "elapsed_time": "8:05:17", "remaining_time": "1:55:30"} +{"current_steps": 5526, "total_steps": 6840, "loss": 0.6914256811141968, "lr": 1.9538171395132688e-06, "epoch": 1.6158795145489107, "percentage": 80.79, "elapsed_time": "8:05:23", "remaining_time": "1:55:25"} +{"current_steps": 5527, "total_steps": 6840, "loss": 0.49076569080352783, "lr": 1.950947267686536e-06, "epoch": 1.6161719549641762, "percentage": 80.8, "elapsed_time": "8:05:28", "remaining_time": "1:55:19"} +{"current_steps": 5528, "total_steps": 6840, "loss": 0.45781368017196655, "lr": 1.9480792772769384e-06, "epoch": 1.6164643953794413, "percentage": 80.82, "elapsed_time": "8:05:34", "remaining_time": "1:55:14"} +{"current_steps": 5529, "total_steps": 6840, "loss": 0.5257985591888428, "lr": 1.9452131689548547e-06, "epoch": 1.6167568357947069, "percentage": 80.83, "elapsed_time": "8:05:39", "remaining_time": "1:55:09"} +{"current_steps": 5530, "total_steps": 6840, "loss": 0.4170517921447754, "lr": 1.9423489433902186e-06, "epoch": 1.6170492762099722, "percentage": 80.85, "elapsed_time": "8:05:45", "remaining_time": "1:55:04"} +{"current_steps": 5531, "total_steps": 6840, "loss": 0.5612319707870483, "lr": 1.939486601252525e-06, "epoch": 1.6173417166252375, "percentage": 80.86, "elapsed_time": "8:05:48", "remaining_time": "1:54:58"} +{"current_steps": 5532, "total_steps": 6840, "loss": 0.4543185234069824, "lr": 1.93662614321083e-06, "epoch": 1.617634157040503, "percentage": 80.88, "elapsed_time": "8:05:55", "remaining_time": "1:54:53"} +{"current_steps": 5533, "total_steps": 6840, "loss": 0.5506256222724915, "lr": 1.933767569933749e-06, "epoch": 1.6179265974557684, "percentage": 80.89, "elapsed_time": "8:06:00", "remaining_time": "1:54:48"} +{"current_steps": 5534, "total_steps": 6840, "loss": 0.5411139130592346, "lr": 1.930910882089454e-06, "epoch": 1.6182190378710337, "percentage": 80.91, "elapsed_time": "8:06:05", "remaining_time": "1:54:42"} +{"current_steps": 5535, "total_steps": 6840, "loss": 0.5332196950912476, "lr": 1.9280560803456794e-06, "epoch": 1.6185114782862993, "percentage": 80.92, "elapsed_time": "8:06:11", "remaining_time": "1:54:37"} +{"current_steps": 5536, "total_steps": 6840, "loss": 0.5159808993339539, "lr": 1.92520316536972e-06, "epoch": 1.6188039187015646, "percentage": 80.94, "elapsed_time": "8:06:17", "remaining_time": "1:54:32"} +{"current_steps": 5537, "total_steps": 6840, "loss": 0.5483378767967224, "lr": 1.9223521378284227e-06, "epoch": 1.61909635911683, "percentage": 80.95, "elapsed_time": "8:06:23", "remaining_time": "1:54:27"} +{"current_steps": 5538, "total_steps": 6840, "loss": 0.6451961994171143, "lr": 1.9195029983882008e-06, "epoch": 1.6193887995320955, "percentage": 80.96, "elapsed_time": "8:06:28", "remaining_time": "1:54:22"} +{"current_steps": 5539, "total_steps": 6840, "loss": 0.5904289484024048, "lr": 1.9166557477150227e-06, "epoch": 1.6196812399473606, "percentage": 80.98, "elapsed_time": "8:06:33", "remaining_time": "1:54:16"} +{"current_steps": 5540, "total_steps": 6840, "loss": 0.6688845753669739, "lr": 1.9138103864744164e-06, "epoch": 1.6199736803626261, "percentage": 80.99, "elapsed_time": "8:06:39", "remaining_time": "1:54:11"} +{"current_steps": 5541, "total_steps": 6840, "loss": 0.6299211382865906, "lr": 1.910966915331467e-06, "epoch": 1.6202661207778914, "percentage": 81.01, "elapsed_time": "8:06:43", "remaining_time": "1:54:06"} +{"current_steps": 5542, "total_steps": 6840, "loss": 0.5502864122390747, "lr": 1.908125334950819e-06, "epoch": 1.6205585611931568, "percentage": 81.02, "elapsed_time": "8:06:47", "remaining_time": "1:54:00"} +{"current_steps": 5543, "total_steps": 6840, "loss": 0.5332865118980408, "lr": 1.905285645996674e-06, "epoch": 1.6208510016084223, "percentage": 81.04, "elapsed_time": "8:06:52", "remaining_time": "1:53:55"} +{"current_steps": 5544, "total_steps": 6840, "loss": 0.43371304869651794, "lr": 1.9024478491327936e-06, "epoch": 1.6211434420236877, "percentage": 81.05, "elapsed_time": "8:06:56", "remaining_time": "1:53:49"} +{"current_steps": 5545, "total_steps": 6840, "loss": 0.6992501616477966, "lr": 1.8996119450224936e-06, "epoch": 1.621435882438953, "percentage": 81.07, "elapsed_time": "8:07:02", "remaining_time": "1:53:44"} +{"current_steps": 5546, "total_steps": 6840, "loss": 0.46558254957199097, "lr": 1.8967779343286507e-06, "epoch": 1.6217283228542185, "percentage": 81.08, "elapsed_time": "8:07:06", "remaining_time": "1:53:39"} +{"current_steps": 5547, "total_steps": 6840, "loss": 0.48943620920181274, "lr": 1.8939458177136994e-06, "epoch": 1.6220207632694839, "percentage": 81.1, "elapsed_time": "8:07:10", "remaining_time": "1:53:33"} +{"current_steps": 5548, "total_steps": 6840, "loss": 0.6041419506072998, "lr": 1.8911155958396256e-06, "epoch": 1.6223132036847492, "percentage": 81.11, "elapsed_time": "8:07:16", "remaining_time": "1:53:28"} +{"current_steps": 5549, "total_steps": 6840, "loss": 0.5695589780807495, "lr": 1.8882872693679787e-06, "epoch": 1.6226056441000147, "percentage": 81.13, "elapsed_time": "8:07:22", "remaining_time": "1:53:23"} +{"current_steps": 5550, "total_steps": 6840, "loss": 0.45147764682769775, "lr": 1.8854608389598616e-06, "epoch": 1.62289808451528, "percentage": 81.14, "elapsed_time": "8:07:26", "remaining_time": "1:53:17"} +{"current_steps": 5551, "total_steps": 6840, "loss": 0.45560893416404724, "lr": 1.8826363052759367e-06, "epoch": 1.6231905249305454, "percentage": 81.15, "elapsed_time": "8:07:31", "remaining_time": "1:53:12"} +{"current_steps": 5552, "total_steps": 6840, "loss": 0.5714661478996277, "lr": 1.8798136689764213e-06, "epoch": 1.623482965345811, "percentage": 81.17, "elapsed_time": "8:07:36", "remaining_time": "1:53:07"} +{"current_steps": 5553, "total_steps": 6840, "loss": 0.6074325442314148, "lr": 1.8769929307210889e-06, "epoch": 1.623775405761076, "percentage": 81.18, "elapsed_time": "8:07:41", "remaining_time": "1:53:01"} +{"current_steps": 5554, "total_steps": 6840, "loss": 0.5406322479248047, "lr": 1.8741740911692708e-06, "epoch": 1.6240678461763416, "percentage": 81.2, "elapsed_time": "8:07:46", "remaining_time": "1:52:56"} +{"current_steps": 5555, "total_steps": 6840, "loss": 0.6098664999008179, "lr": 1.8713571509798524e-06, "epoch": 1.624360286591607, "percentage": 81.21, "elapsed_time": "8:07:53", "remaining_time": "1:52:51"} +{"current_steps": 5556, "total_steps": 6840, "loss": 0.4424859881401062, "lr": 1.8685421108112778e-06, "epoch": 1.6246527270068722, "percentage": 81.23, "elapsed_time": "8:07:59", "remaining_time": "1:52:46"} +{"current_steps": 5557, "total_steps": 6840, "loss": 0.5893913507461548, "lr": 1.8657289713215442e-06, "epoch": 1.6249451674221378, "percentage": 81.24, "elapsed_time": "8:08:04", "remaining_time": "1:52:41"} +{"current_steps": 5558, "total_steps": 6840, "loss": 0.5462610125541687, "lr": 1.862917733168208e-06, "epoch": 1.6252376078374031, "percentage": 81.26, "elapsed_time": "8:08:09", "remaining_time": "1:52:35"} +{"current_steps": 5559, "total_steps": 6840, "loss": 0.6044303178787231, "lr": 1.8601083970083766e-06, "epoch": 1.6255300482526684, "percentage": 81.27, "elapsed_time": "8:08:14", "remaining_time": "1:52:30"} +{"current_steps": 5560, "total_steps": 6840, "loss": 0.4110264778137207, "lr": 1.857300963498715e-06, "epoch": 1.625822488667934, "percentage": 81.29, "elapsed_time": "8:08:20", "remaining_time": "1:52:25"} +{"current_steps": 5561, "total_steps": 6840, "loss": 0.5640783309936523, "lr": 1.8544954332954445e-06, "epoch": 1.6261149290831993, "percentage": 81.3, "elapsed_time": "8:08:25", "remaining_time": "1:52:20"} +{"current_steps": 5562, "total_steps": 6840, "loss": 0.43247851729393005, "lr": 1.851691807054342e-06, "epoch": 1.6264073694984647, "percentage": 81.32, "elapsed_time": "8:08:29", "remaining_time": "1:52:14"} +{"current_steps": 5563, "total_steps": 6840, "loss": 0.4909735918045044, "lr": 1.8488900854307367e-06, "epoch": 1.6266998099137302, "percentage": 81.33, "elapsed_time": "8:08:36", "remaining_time": "1:52:09"} +{"current_steps": 5564, "total_steps": 6840, "loss": 0.5705426335334778, "lr": 1.8460902690795135e-06, "epoch": 1.6269922503289953, "percentage": 81.35, "elapsed_time": "8:08:42", "remaining_time": "1:52:04"} +{"current_steps": 5565, "total_steps": 6840, "loss": 0.6455575823783875, "lr": 1.8432923586551144e-06, "epoch": 1.6272846907442609, "percentage": 81.36, "elapsed_time": "8:08:48", "remaining_time": "1:51:59"} +{"current_steps": 5566, "total_steps": 6840, "loss": 0.4156351089477539, "lr": 1.8404963548115318e-06, "epoch": 1.6275771311595264, "percentage": 81.37, "elapsed_time": "8:08:54", "remaining_time": "1:51:54"} +{"current_steps": 5567, "total_steps": 6840, "loss": 0.5497896075248718, "lr": 1.8377022582023163e-06, "epoch": 1.6278695715747915, "percentage": 81.39, "elapsed_time": "8:08:59", "remaining_time": "1:51:49"} +{"current_steps": 5568, "total_steps": 6840, "loss": 0.5237758159637451, "lr": 1.8349100694805711e-06, "epoch": 1.628162011990057, "percentage": 81.4, "elapsed_time": "8:09:05", "remaining_time": "1:51:43"} +{"current_steps": 5569, "total_steps": 6840, "loss": 0.5140771865844727, "lr": 1.832119789298954e-06, "epoch": 1.6284544524053224, "percentage": 81.42, "elapsed_time": "8:09:09", "remaining_time": "1:51:38"} +{"current_steps": 5570, "total_steps": 6840, "loss": 0.5942349433898926, "lr": 1.8293314183096721e-06, "epoch": 1.6287468928205877, "percentage": 81.43, "elapsed_time": "8:09:13", "remaining_time": "1:51:32"} +{"current_steps": 5571, "total_steps": 6840, "loss": 0.6316613554954529, "lr": 1.8265449571644933e-06, "epoch": 1.6290393332358533, "percentage": 81.45, "elapsed_time": "8:09:19", "remaining_time": "1:51:27"} +{"current_steps": 5572, "total_steps": 6840, "loss": 0.4789954423904419, "lr": 1.823760406514735e-06, "epoch": 1.6293317736511186, "percentage": 81.46, "elapsed_time": "8:09:24", "remaining_time": "1:51:22"} +{"current_steps": 5573, "total_steps": 6840, "loss": 0.596744179725647, "lr": 1.8209777670112706e-06, "epoch": 1.629624214066384, "percentage": 81.48, "elapsed_time": "8:09:28", "remaining_time": "1:51:16"} +{"current_steps": 5574, "total_steps": 6840, "loss": 0.5785890817642212, "lr": 1.8181970393045223e-06, "epoch": 1.6299166544816495, "percentage": 81.49, "elapsed_time": "8:09:33", "remaining_time": "1:51:11"} +{"current_steps": 5575, "total_steps": 6840, "loss": 0.5399461388587952, "lr": 1.8154182240444706e-06, "epoch": 1.6302090948969148, "percentage": 81.51, "elapsed_time": "8:09:38", "remaining_time": "1:51:06"} +{"current_steps": 5576, "total_steps": 6840, "loss": 0.5251961946487427, "lr": 1.812641321880645e-06, "epoch": 1.6305015353121801, "percentage": 81.52, "elapsed_time": "8:09:43", "remaining_time": "1:51:00"} +{"current_steps": 5577, "total_steps": 6840, "loss": 0.6094855070114136, "lr": 1.8098663334621314e-06, "epoch": 1.6307939757274457, "percentage": 81.54, "elapsed_time": "8:09:48", "remaining_time": "1:50:55"} +{"current_steps": 5578, "total_steps": 6840, "loss": 0.5586157441139221, "lr": 1.8070932594375656e-06, "epoch": 1.6310864161427108, "percentage": 81.55, "elapsed_time": "8:09:54", "remaining_time": "1:50:50"} +{"current_steps": 5579, "total_steps": 6840, "loss": 0.5572035312652588, "lr": 1.804322100455136e-06, "epoch": 1.6313788565579763, "percentage": 81.56, "elapsed_time": "8:09:58", "remaining_time": "1:50:44"} +{"current_steps": 5580, "total_steps": 6840, "loss": 0.5567929148674011, "lr": 1.801552857162585e-06, "epoch": 1.6316712969732416, "percentage": 81.58, "elapsed_time": "8:10:04", "remaining_time": "1:50:39"} +{"current_steps": 5581, "total_steps": 6840, "loss": 0.4823629558086395, "lr": 1.79878553020721e-06, "epoch": 1.631963737388507, "percentage": 81.59, "elapsed_time": "8:10:08", "remaining_time": "1:50:34"} +{"current_steps": 5582, "total_steps": 6840, "loss": 0.52935791015625, "lr": 1.7960201202358495e-06, "epoch": 1.6322561778037725, "percentage": 81.61, "elapsed_time": "8:10:14", "remaining_time": "1:50:29"} +{"current_steps": 5583, "total_steps": 6840, "loss": 0.5486055016517639, "lr": 1.7932566278949049e-06, "epoch": 1.6325486182190379, "percentage": 81.62, "elapsed_time": "8:10:19", "remaining_time": "1:50:23"} +{"current_steps": 5584, "total_steps": 6840, "loss": 0.5606030225753784, "lr": 1.7904950538303256e-06, "epoch": 1.6328410586343032, "percentage": 81.64, "elapsed_time": "8:10:25", "remaining_time": "1:50:18"} +{"current_steps": 5585, "total_steps": 6840, "loss": 0.5394873023033142, "lr": 1.7877353986876134e-06, "epoch": 1.6331334990495687, "percentage": 81.65, "elapsed_time": "8:10:32", "remaining_time": "1:50:13"} +{"current_steps": 5586, "total_steps": 6840, "loss": 0.6015416383743286, "lr": 1.7849776631118198e-06, "epoch": 1.633425939464834, "percentage": 81.67, "elapsed_time": "8:10:39", "remaining_time": "1:50:08"} +{"current_steps": 5587, "total_steps": 6840, "loss": 0.3476119041442871, "lr": 1.7822218477475496e-06, "epoch": 1.6337183798800994, "percentage": 81.68, "elapsed_time": "8:10:45", "remaining_time": "1:50:03"} +{"current_steps": 5588, "total_steps": 6840, "loss": 0.43062901496887207, "lr": 1.7794679532389569e-06, "epoch": 1.634010820295365, "percentage": 81.7, "elapsed_time": "8:10:49", "remaining_time": "1:49:58"} +{"current_steps": 5589, "total_steps": 6840, "loss": 0.5267136096954346, "lr": 1.7767159802297497e-06, "epoch": 1.6343032607106303, "percentage": 81.71, "elapsed_time": "8:10:54", "remaining_time": "1:49:52"} +{"current_steps": 5590, "total_steps": 6840, "loss": 0.40477365255355835, "lr": 1.7739659293631828e-06, "epoch": 1.6345957011258956, "percentage": 81.73, "elapsed_time": "8:10:59", "remaining_time": "1:49:47"} +{"current_steps": 5591, "total_steps": 6840, "loss": 0.5166594386100769, "lr": 1.7712178012820657e-06, "epoch": 1.6348881415411611, "percentage": 81.74, "elapsed_time": "8:11:04", "remaining_time": "1:49:42"} +{"current_steps": 5592, "total_steps": 6840, "loss": 0.577332615852356, "lr": 1.768471596628757e-06, "epoch": 1.6351805819564262, "percentage": 81.75, "elapsed_time": "8:11:09", "remaining_time": "1:49:36"} +{"current_steps": 5593, "total_steps": 6840, "loss": 0.6265558004379272, "lr": 1.7657273160451626e-06, "epoch": 1.6354730223716918, "percentage": 81.77, "elapsed_time": "8:11:15", "remaining_time": "1:49:31"} +{"current_steps": 5594, "total_steps": 6840, "loss": 0.46483689546585083, "lr": 1.7629849601727422e-06, "epoch": 1.6357654627869571, "percentage": 81.78, "elapsed_time": "8:11:20", "remaining_time": "1:49:26"} +{"current_steps": 5595, "total_steps": 6840, "loss": 0.5217114090919495, "lr": 1.760244529652504e-06, "epoch": 1.6360579032022224, "percentage": 81.8, "elapsed_time": "8:11:25", "remaining_time": "1:49:21"} +{"current_steps": 5596, "total_steps": 6840, "loss": 0.40754032135009766, "lr": 1.7575060251250098e-06, "epoch": 1.636350343617488, "percentage": 81.81, "elapsed_time": "8:11:31", "remaining_time": "1:49:15"} +{"current_steps": 5597, "total_steps": 6840, "loss": 0.5153856873512268, "lr": 1.7547694472303677e-06, "epoch": 1.6366427840327533, "percentage": 81.83, "elapsed_time": "8:11:35", "remaining_time": "1:49:10"} +{"current_steps": 5598, "total_steps": 6840, "loss": 0.47374534606933594, "lr": 1.7520347966082352e-06, "epoch": 1.6369352244480186, "percentage": 81.84, "elapsed_time": "8:11:39", "remaining_time": "1:49:04"} +{"current_steps": 5599, "total_steps": 6840, "loss": 0.375232070684433, "lr": 1.7493020738978205e-06, "epoch": 1.6372276648632842, "percentage": 81.86, "elapsed_time": "8:11:44", "remaining_time": "1:48:59"} +{"current_steps": 5600, "total_steps": 6840, "loss": 0.5731218457221985, "lr": 1.746571279737884e-06, "epoch": 1.6375201052785495, "percentage": 81.87, "elapsed_time": "8:11:50", "remaining_time": "1:48:54"} +{"current_steps": 5601, "total_steps": 6840, "loss": 0.4908478260040283, "lr": 1.7438424147667267e-06, "epoch": 1.6378125456938148, "percentage": 81.89, "elapsed_time": "8:11:59", "remaining_time": "1:48:49"} +{"current_steps": 5602, "total_steps": 6840, "loss": 0.6114420890808105, "lr": 1.741115479622205e-06, "epoch": 1.6381049861090804, "percentage": 81.9, "elapsed_time": "8:12:03", "remaining_time": "1:48:44"} +{"current_steps": 5603, "total_steps": 6840, "loss": 0.6207842826843262, "lr": 1.738390474941727e-06, "epoch": 1.6383974265243455, "percentage": 81.92, "elapsed_time": "8:12:08", "remaining_time": "1:48:39"} +{"current_steps": 5604, "total_steps": 6840, "loss": 0.4745057225227356, "lr": 1.7356674013622431e-06, "epoch": 1.638689866939611, "percentage": 81.93, "elapsed_time": "8:12:12", "remaining_time": "1:48:33"} +{"current_steps": 5605, "total_steps": 6840, "loss": 0.5501791834831238, "lr": 1.7329462595202573e-06, "epoch": 1.6389823073548766, "percentage": 81.94, "elapsed_time": "8:12:18", "remaining_time": "1:48:28"} +{"current_steps": 5606, "total_steps": 6840, "loss": 0.5497169494628906, "lr": 1.7302270500518181e-06, "epoch": 1.6392747477701417, "percentage": 81.96, "elapsed_time": "8:12:23", "remaining_time": "1:48:23"} +{"current_steps": 5607, "total_steps": 6840, "loss": 0.4439499080181122, "lr": 1.7275097735925239e-06, "epoch": 1.6395671881854073, "percentage": 81.97, "elapsed_time": "8:12:28", "remaining_time": "1:48:17"} +{"current_steps": 5608, "total_steps": 6840, "loss": 0.5869239568710327, "lr": 1.7247944307775245e-06, "epoch": 1.6398596286006726, "percentage": 81.99, "elapsed_time": "8:12:35", "remaining_time": "1:48:12"} +{"current_steps": 5609, "total_steps": 6840, "loss": 0.6979252099990845, "lr": 1.722081022241512e-06, "epoch": 1.640152069015938, "percentage": 82.0, "elapsed_time": "8:12:41", "remaining_time": "1:48:07"} +{"current_steps": 5610, "total_steps": 6840, "loss": 0.43291550874710083, "lr": 1.719369548618729e-06, "epoch": 1.6404445094312035, "percentage": 82.02, "elapsed_time": "8:12:48", "remaining_time": "1:48:02"} +{"current_steps": 5611, "total_steps": 6840, "loss": 0.5670255422592163, "lr": 1.7166600105429676e-06, "epoch": 1.6407369498464688, "percentage": 82.03, "elapsed_time": "8:12:52", "remaining_time": "1:47:57"} +{"current_steps": 5612, "total_steps": 6840, "loss": 0.5956759452819824, "lr": 1.7139524086475679e-06, "epoch": 1.6410293902617341, "percentage": 82.05, "elapsed_time": "8:12:57", "remaining_time": "1:47:52"} +{"current_steps": 5613, "total_steps": 6840, "loss": 0.624202311038971, "lr": 1.71124674356541e-06, "epoch": 1.6413218306769997, "percentage": 82.06, "elapsed_time": "8:13:02", "remaining_time": "1:47:46"} +{"current_steps": 5614, "total_steps": 6840, "loss": 0.5394845604896545, "lr": 1.7085430159289295e-06, "epoch": 1.641614271092265, "percentage": 82.08, "elapsed_time": "8:13:08", "remaining_time": "1:47:41"} +{"current_steps": 5615, "total_steps": 6840, "loss": 0.5320364236831665, "lr": 1.7058412263701063e-06, "epoch": 1.6419067115075303, "percentage": 82.09, "elapsed_time": "8:13:14", "remaining_time": "1:47:36"} +{"current_steps": 5616, "total_steps": 6840, "loss": 0.5889087915420532, "lr": 1.7031413755204673e-06, "epoch": 1.6421991519227959, "percentage": 82.11, "elapsed_time": "8:13:19", "remaining_time": "1:47:31"} +{"current_steps": 5617, "total_steps": 6840, "loss": 0.63529372215271, "lr": 1.7004434640110857e-06, "epoch": 1.642491592338061, "percentage": 82.12, "elapsed_time": "8:13:24", "remaining_time": "1:47:25"} +{"current_steps": 5618, "total_steps": 6840, "loss": 0.48696887493133545, "lr": 1.6977474924725823e-06, "epoch": 1.6427840327533265, "percentage": 82.13, "elapsed_time": "8:13:30", "remaining_time": "1:47:20"} +{"current_steps": 5619, "total_steps": 6840, "loss": 0.5998564958572388, "lr": 1.6950534615351234e-06, "epoch": 1.6430764731685918, "percentage": 82.15, "elapsed_time": "8:13:36", "remaining_time": "1:47:15"} +{"current_steps": 5620, "total_steps": 6840, "loss": 0.5256673693656921, "lr": 1.6923613718284237e-06, "epoch": 1.6433689135838572, "percentage": 82.16, "elapsed_time": "8:13:42", "remaining_time": "1:47:10"} +{"current_steps": 5621, "total_steps": 6840, "loss": 0.4609792232513428, "lr": 1.6896712239817425e-06, "epoch": 1.6436613539991227, "percentage": 82.18, "elapsed_time": "8:13:47", "remaining_time": "1:47:05"} +{"current_steps": 5622, "total_steps": 6840, "loss": 0.6816249489784241, "lr": 1.6869830186238846e-06, "epoch": 1.643953794414388, "percentage": 82.19, "elapsed_time": "8:13:53", "remaining_time": "1:47:00"} +{"current_steps": 5623, "total_steps": 6840, "loss": 0.4622993767261505, "lr": 1.6842967563832036e-06, "epoch": 1.6442462348296534, "percentage": 82.21, "elapsed_time": "8:13:59", "remaining_time": "1:46:54"} +{"current_steps": 5624, "total_steps": 6840, "loss": 0.5089092254638672, "lr": 1.6816124378875942e-06, "epoch": 1.644538675244919, "percentage": 82.22, "elapsed_time": "8:14:04", "remaining_time": "1:46:49"} +{"current_steps": 5625, "total_steps": 6840, "loss": 0.49178463220596313, "lr": 1.6789300637645e-06, "epoch": 1.6448311156601843, "percentage": 82.24, "elapsed_time": "8:14:11", "remaining_time": "1:46:44"} +{"current_steps": 5626, "total_steps": 6840, "loss": 0.4943847358226776, "lr": 1.676249634640912e-06, "epoch": 1.6451235560754496, "percentage": 82.25, "elapsed_time": "8:14:15", "remaining_time": "1:46:39"} +{"current_steps": 5627, "total_steps": 6840, "loss": 0.38509243726730347, "lr": 1.6735711511433606e-06, "epoch": 1.6454159964907151, "percentage": 82.27, "elapsed_time": "8:14:20", "remaining_time": "1:46:33"} +{"current_steps": 5628, "total_steps": 6840, "loss": 0.4765651822090149, "lr": 1.6708946138979288e-06, "epoch": 1.6457084369059805, "percentage": 82.28, "elapsed_time": "8:14:24", "remaining_time": "1:46:28"} +{"current_steps": 5629, "total_steps": 6840, "loss": 0.5667406916618347, "lr": 1.6682200235302383e-06, "epoch": 1.6460008773212458, "percentage": 82.3, "elapsed_time": "8:14:29", "remaining_time": "1:46:22"} +{"current_steps": 5630, "total_steps": 6840, "loss": 0.702905535697937, "lr": 1.66554738066546e-06, "epoch": 1.6462933177365113, "percentage": 82.31, "elapsed_time": "8:14:33", "remaining_time": "1:46:17"} +{"current_steps": 5631, "total_steps": 6840, "loss": 0.5056663155555725, "lr": 1.6628766859283064e-06, "epoch": 1.6465857581517764, "percentage": 82.32, "elapsed_time": "8:14:38", "remaining_time": "1:46:12"} +{"current_steps": 5632, "total_steps": 6840, "loss": 0.3949700593948364, "lr": 1.660207939943037e-06, "epoch": 1.646878198567042, "percentage": 82.34, "elapsed_time": "8:14:44", "remaining_time": "1:46:06"} +{"current_steps": 5633, "total_steps": 6840, "loss": 0.5562522411346436, "lr": 1.6575411433334553e-06, "epoch": 1.6471706389823073, "percentage": 82.35, "elapsed_time": "8:14:48", "remaining_time": "1:46:01"} +{"current_steps": 5634, "total_steps": 6840, "loss": 0.5046012997627258, "lr": 1.6548762967229104e-06, "epoch": 1.6474630793975726, "percentage": 82.37, "elapsed_time": "8:14:53", "remaining_time": "1:45:56"} +{"current_steps": 5635, "total_steps": 6840, "loss": 0.510586678981781, "lr": 1.6522134007342894e-06, "epoch": 1.6477555198128382, "percentage": 82.38, "elapsed_time": "8:14:57", "remaining_time": "1:45:50"} +{"current_steps": 5636, "total_steps": 6840, "loss": 0.5587502717971802, "lr": 1.649552455990031e-06, "epoch": 1.6480479602281035, "percentage": 82.4, "elapsed_time": "8:15:03", "remaining_time": "1:45:45"} +{"current_steps": 5637, "total_steps": 6840, "loss": 0.4774302840232849, "lr": 1.6468934631121147e-06, "epoch": 1.6483404006433688, "percentage": 82.41, "elapsed_time": "8:15:09", "remaining_time": "1:45:40"} +{"current_steps": 5638, "total_steps": 6840, "loss": 0.5969966650009155, "lr": 1.644236422722063e-06, "epoch": 1.6486328410586344, "percentage": 82.43, "elapsed_time": "8:15:14", "remaining_time": "1:45:35"} +{"current_steps": 5639, "total_steps": 6840, "loss": 0.6344267129898071, "lr": 1.6415813354409438e-06, "epoch": 1.6489252814738997, "percentage": 82.44, "elapsed_time": "8:15:18", "remaining_time": "1:45:29"} +{"current_steps": 5640, "total_steps": 6840, "loss": 0.5252394676208496, "lr": 1.638928201889367e-06, "epoch": 1.649217721889165, "percentage": 82.46, "elapsed_time": "8:15:23", "remaining_time": "1:45:24"} +{"current_steps": 5641, "total_steps": 6840, "loss": 0.5092496871948242, "lr": 1.636277022687488e-06, "epoch": 1.6495101623044306, "percentage": 82.47, "elapsed_time": "8:15:28", "remaining_time": "1:45:18"} +{"current_steps": 5642, "total_steps": 6840, "loss": 0.6530938148498535, "lr": 1.633627798455002e-06, "epoch": 1.6498026027196957, "percentage": 82.49, "elapsed_time": "8:15:34", "remaining_time": "1:45:13"} +{"current_steps": 5643, "total_steps": 6840, "loss": 0.5152128338813782, "lr": 1.6309805298111492e-06, "epoch": 1.6500950431349612, "percentage": 82.5, "elapsed_time": "8:15:38", "remaining_time": "1:45:08"} +{"current_steps": 5644, "total_steps": 6840, "loss": 0.4670040011405945, "lr": 1.6283352173747148e-06, "epoch": 1.6503874835502268, "percentage": 82.51, "elapsed_time": "8:15:43", "remaining_time": "1:45:02"} +{"current_steps": 5645, "total_steps": 6840, "loss": 0.47373896837234497, "lr": 1.625691861764024e-06, "epoch": 1.650679923965492, "percentage": 82.53, "elapsed_time": "8:15:47", "remaining_time": "1:44:57"} +{"current_steps": 5646, "total_steps": 6840, "loss": 0.44277217984199524, "lr": 1.6230504635969413e-06, "epoch": 1.6509723643807575, "percentage": 82.54, "elapsed_time": "8:15:52", "remaining_time": "1:44:52"} +{"current_steps": 5647, "total_steps": 6840, "loss": 0.6000313758850098, "lr": 1.6204110234908798e-06, "epoch": 1.6512648047960228, "percentage": 82.56, "elapsed_time": "8:15:58", "remaining_time": "1:44:46"} +{"current_steps": 5648, "total_steps": 6840, "loss": 0.5950880646705627, "lr": 1.6177735420627939e-06, "epoch": 1.651557245211288, "percentage": 82.57, "elapsed_time": "8:16:03", "remaining_time": "1:44:41"} +{"current_steps": 5649, "total_steps": 6840, "loss": 0.6308536529541016, "lr": 1.6151380199291767e-06, "epoch": 1.6518496856265537, "percentage": 82.59, "elapsed_time": "8:16:09", "remaining_time": "1:44:36"} +{"current_steps": 5650, "total_steps": 6840, "loss": 0.619708240032196, "lr": 1.6125044577060667e-06, "epoch": 1.652142126041819, "percentage": 82.6, "elapsed_time": "8:16:15", "remaining_time": "1:44:31"} +{"current_steps": 5651, "total_steps": 6840, "loss": 0.4912105202674866, "lr": 1.6098728560090438e-06, "epoch": 1.6524345664570843, "percentage": 82.62, "elapsed_time": "8:16:21", "remaining_time": "1:44:26"} +{"current_steps": 5652, "total_steps": 6840, "loss": 0.5078046917915344, "lr": 1.607243215453227e-06, "epoch": 1.6527270068723499, "percentage": 82.63, "elapsed_time": "8:16:28", "remaining_time": "1:44:21"} +{"current_steps": 5653, "total_steps": 6840, "loss": 0.4845188856124878, "lr": 1.60461553665328e-06, "epoch": 1.6530194472876152, "percentage": 82.65, "elapsed_time": "8:16:33", "remaining_time": "1:44:15"} +{"current_steps": 5654, "total_steps": 6840, "loss": 0.6323055028915405, "lr": 1.6019898202234075e-06, "epoch": 1.6533118877028805, "percentage": 82.66, "elapsed_time": "8:16:38", "remaining_time": "1:44:10"} +{"current_steps": 5655, "total_steps": 6840, "loss": 0.5700039863586426, "lr": 1.5993660667773524e-06, "epoch": 1.653604328118146, "percentage": 82.68, "elapsed_time": "8:16:42", "remaining_time": "1:44:05"} +{"current_steps": 5656, "total_steps": 6840, "loss": 0.5415322780609131, "lr": 1.596744276928406e-06, "epoch": 1.6538967685334112, "percentage": 82.69, "elapsed_time": "8:16:47", "remaining_time": "1:43:59"} +{"current_steps": 5657, "total_steps": 6840, "loss": 0.47339457273483276, "lr": 1.5941244512893894e-06, "epoch": 1.6541892089486767, "percentage": 82.7, "elapsed_time": "8:16:52", "remaining_time": "1:43:54"} +{"current_steps": 5658, "total_steps": 6840, "loss": 0.5391967296600342, "lr": 1.5915065904726735e-06, "epoch": 1.654481649363942, "percentage": 82.72, "elapsed_time": "8:16:57", "remaining_time": "1:43:49"} +{"current_steps": 5659, "total_steps": 6840, "loss": 0.4832335114479065, "lr": 1.5888906950901683e-06, "epoch": 1.6547740897792074, "percentage": 82.73, "elapsed_time": "8:17:04", "remaining_time": "1:43:44"} +{"current_steps": 5660, "total_steps": 6840, "loss": 0.4539526104927063, "lr": 1.5862767657533217e-06, "epoch": 1.655066530194473, "percentage": 82.75, "elapsed_time": "8:17:09", "remaining_time": "1:43:38"} +{"current_steps": 5661, "total_steps": 6840, "loss": 0.5261383056640625, "lr": 1.583664803073125e-06, "epoch": 1.6553589706097382, "percentage": 82.76, "elapsed_time": "8:17:14", "remaining_time": "1:43:33"} +{"current_steps": 5662, "total_steps": 6840, "loss": 0.44060665369033813, "lr": 1.5810548076601096e-06, "epoch": 1.6556514110250036, "percentage": 82.78, "elapsed_time": "8:17:21", "remaining_time": "1:43:28"} +{"current_steps": 5663, "total_steps": 6840, "loss": 0.4202715754508972, "lr": 1.578446780124344e-06, "epoch": 1.6559438514402691, "percentage": 82.79, "elapsed_time": "8:17:26", "remaining_time": "1:43:23"} +{"current_steps": 5664, "total_steps": 6840, "loss": 0.4736124873161316, "lr": 1.57584072107544e-06, "epoch": 1.6562362918555344, "percentage": 82.81, "elapsed_time": "8:17:31", "remaining_time": "1:43:17"} +{"current_steps": 5665, "total_steps": 6840, "loss": 0.46696585416793823, "lr": 1.5732366311225466e-06, "epoch": 1.6565287322707998, "percentage": 82.82, "elapsed_time": "8:17:35", "remaining_time": "1:43:12"} +{"current_steps": 5666, "total_steps": 6840, "loss": 0.566236138343811, "lr": 1.570634510874356e-06, "epoch": 1.6568211726860653, "percentage": 82.84, "elapsed_time": "8:17:40", "remaining_time": "1:43:07"} +{"current_steps": 5667, "total_steps": 6840, "loss": 0.5486587882041931, "lr": 1.568034360939098e-06, "epoch": 1.6571136131013307, "percentage": 82.85, "elapsed_time": "8:17:45", "remaining_time": "1:43:01"} +{"current_steps": 5668, "total_steps": 6840, "loss": 0.49735748767852783, "lr": 1.5654361819245423e-06, "epoch": 1.657406053516596, "percentage": 82.87, "elapsed_time": "8:17:50", "remaining_time": "1:42:56"} +{"current_steps": 5669, "total_steps": 6840, "loss": 0.6306062340736389, "lr": 1.562839974437993e-06, "epoch": 1.6576984939318615, "percentage": 82.88, "elapsed_time": "8:17:56", "remaining_time": "1:42:51"} +{"current_steps": 5670, "total_steps": 6840, "loss": 0.5062750577926636, "lr": 1.5602457390863e-06, "epoch": 1.6579909343471266, "percentage": 82.89, "elapsed_time": "8:18:02", "remaining_time": "1:42:46"} +{"current_steps": 5671, "total_steps": 6840, "loss": 0.5037271976470947, "lr": 1.5576534764758522e-06, "epoch": 1.6582833747623922, "percentage": 82.91, "elapsed_time": "8:18:09", "remaining_time": "1:42:41"} +{"current_steps": 5672, "total_steps": 6840, "loss": 0.5749099254608154, "lr": 1.5550631872125743e-06, "epoch": 1.6585758151776575, "percentage": 82.92, "elapsed_time": "8:18:15", "remaining_time": "1:42:36"} +{"current_steps": 5673, "total_steps": 6840, "loss": 0.5241814255714417, "lr": 1.5524748719019312e-06, "epoch": 1.6588682555929228, "percentage": 82.94, "elapsed_time": "8:18:20", "remaining_time": "1:42:30"} +{"current_steps": 5674, "total_steps": 6840, "loss": 0.5410301685333252, "lr": 1.5498885311489243e-06, "epoch": 1.6591606960081884, "percentage": 82.95, "elapsed_time": "8:18:24", "remaining_time": "1:42:25"} +{"current_steps": 5675, "total_steps": 6840, "loss": 0.6363968253135681, "lr": 1.5473041655580956e-06, "epoch": 1.6594531364234537, "percentage": 82.97, "elapsed_time": "8:18:29", "remaining_time": "1:42:20"} +{"current_steps": 5676, "total_steps": 6840, "loss": 0.5476758480072021, "lr": 1.5447217757335264e-06, "epoch": 1.659745576838719, "percentage": 82.98, "elapsed_time": "8:18:34", "remaining_time": "1:42:14"} +{"current_steps": 5677, "total_steps": 6840, "loss": 0.5375553369522095, "lr": 1.5421413622788328e-06, "epoch": 1.6600380172539846, "percentage": 83.0, "elapsed_time": "8:18:40", "remaining_time": "1:42:09"} +{"current_steps": 5678, "total_steps": 6840, "loss": 0.6227232217788696, "lr": 1.53956292579717e-06, "epoch": 1.66033045766925, "percentage": 83.01, "elapsed_time": "8:18:45", "remaining_time": "1:42:04"} +{"current_steps": 5679, "total_steps": 6840, "loss": 0.5981070399284363, "lr": 1.5369864668912327e-06, "epoch": 1.6606228980845152, "percentage": 83.03, "elapsed_time": "8:18:49", "remaining_time": "1:41:58"} +{"current_steps": 5680, "total_steps": 6840, "loss": 0.5535466074943542, "lr": 1.5344119861632535e-06, "epoch": 1.6609153384997808, "percentage": 83.04, "elapsed_time": "8:18:55", "remaining_time": "1:41:53"} +{"current_steps": 5681, "total_steps": 6840, "loss": 0.5562780499458313, "lr": 1.5318394842150009e-06, "epoch": 1.661207778915046, "percentage": 83.06, "elapsed_time": "8:19:00", "remaining_time": "1:41:48"} +{"current_steps": 5682, "total_steps": 6840, "loss": 0.5937552452087402, "lr": 1.5292689616477808e-06, "epoch": 1.6615002193303114, "percentage": 83.07, "elapsed_time": "8:19:05", "remaining_time": "1:41:42"} +{"current_steps": 5683, "total_steps": 6840, "loss": 0.5154576301574707, "lr": 1.526700419062439e-06, "epoch": 1.661792659745577, "percentage": 83.08, "elapsed_time": "8:19:10", "remaining_time": "1:41:37"} +{"current_steps": 5684, "total_steps": 6840, "loss": 0.6887973546981812, "lr": 1.5241338570593557e-06, "epoch": 1.662085100160842, "percentage": 83.1, "elapsed_time": "8:19:17", "remaining_time": "1:41:32"} +{"current_steps": 5685, "total_steps": 6840, "loss": 0.5365385413169861, "lr": 1.5215692762384481e-06, "epoch": 1.6623775405761076, "percentage": 83.11, "elapsed_time": "8:19:22", "remaining_time": "1:41:27"} +{"current_steps": 5686, "total_steps": 6840, "loss": 0.5906165838241577, "lr": 1.519006677199173e-06, "epoch": 1.662669980991373, "percentage": 83.13, "elapsed_time": "8:19:27", "remaining_time": "1:41:22"} +{"current_steps": 5687, "total_steps": 6840, "loss": 0.5752634406089783, "lr": 1.5164460605405252e-06, "epoch": 1.6629624214066383, "percentage": 83.14, "elapsed_time": "8:19:33", "remaining_time": "1:41:16"} +{"current_steps": 5688, "total_steps": 6840, "loss": 0.6265667676925659, "lr": 1.5138874268610259e-06, "epoch": 1.6632548618219039, "percentage": 83.16, "elapsed_time": "8:19:38", "remaining_time": "1:41:11"} +{"current_steps": 5689, "total_steps": 6840, "loss": 0.5032769441604614, "lr": 1.5113307767587449e-06, "epoch": 1.6635473022371692, "percentage": 83.17, "elapsed_time": "8:19:43", "remaining_time": "1:41:06"} +{"current_steps": 5690, "total_steps": 6840, "loss": 0.5740037560462952, "lr": 1.5087761108312837e-06, "epoch": 1.6638397426524345, "percentage": 83.19, "elapsed_time": "8:19:48", "remaining_time": "1:41:00"} +{"current_steps": 5691, "total_steps": 6840, "loss": 0.5745523571968079, "lr": 1.5062234296757782e-06, "epoch": 1.6641321830677, "percentage": 83.2, "elapsed_time": "8:19:54", "remaining_time": "1:40:55"} +{"current_steps": 5692, "total_steps": 6840, "loss": 0.4448510408401489, "lr": 1.5036727338889035e-06, "epoch": 1.6644246234829654, "percentage": 83.22, "elapsed_time": "8:19:59", "remaining_time": "1:40:50"} +{"current_steps": 5693, "total_steps": 6840, "loss": 0.5142196416854858, "lr": 1.5011240240668678e-06, "epoch": 1.6647170638982307, "percentage": 83.23, "elapsed_time": "8:20:03", "remaining_time": "1:40:44"} +{"current_steps": 5694, "total_steps": 6840, "loss": 0.3317479193210602, "lr": 1.4985773008054184e-06, "epoch": 1.6650095043134963, "percentage": 83.25, "elapsed_time": "8:20:08", "remaining_time": "1:40:39"} +{"current_steps": 5695, "total_steps": 6840, "loss": 0.5721619129180908, "lr": 1.4960325646998353e-06, "epoch": 1.6653019447287614, "percentage": 83.26, "elapsed_time": "8:20:14", "remaining_time": "1:40:34"} +{"current_steps": 5696, "total_steps": 6840, "loss": 0.4937021732330322, "lr": 1.4934898163449341e-06, "epoch": 1.665594385144027, "percentage": 83.27, "elapsed_time": "8:20:20", "remaining_time": "1:40:29"} +{"current_steps": 5697, "total_steps": 6840, "loss": 0.6414870023727417, "lr": 1.4909490563350694e-06, "epoch": 1.6658868255592922, "percentage": 83.29, "elapsed_time": "8:20:24", "remaining_time": "1:40:23"} +{"current_steps": 5698, "total_steps": 6840, "loss": 0.6265281438827515, "lr": 1.4884102852641258e-06, "epoch": 1.6661792659745576, "percentage": 83.3, "elapsed_time": "8:20:30", "remaining_time": "1:40:18"} +{"current_steps": 5699, "total_steps": 6840, "loss": 0.5908917784690857, "lr": 1.48587350372553e-06, "epoch": 1.6664717063898231, "percentage": 83.32, "elapsed_time": "8:20:36", "remaining_time": "1:40:13"} +{"current_steps": 5700, "total_steps": 6840, "loss": 0.6098382472991943, "lr": 1.4833387123122334e-06, "epoch": 1.6667641468050884, "percentage": 83.33, "elapsed_time": "8:20:41", "remaining_time": "1:40:08"} +{"current_steps": 5701, "total_steps": 6840, "loss": 0.5106536746025085, "lr": 1.4808059116167306e-06, "epoch": 1.6670565872203538, "percentage": 83.35, "elapsed_time": "8:20:50", "remaining_time": "1:40:03"} +{"current_steps": 5702, "total_steps": 6840, "loss": 0.5548620820045471, "lr": 1.4782751022310481e-06, "epoch": 1.6673490276356193, "percentage": 83.36, "elapsed_time": "8:20:55", "remaining_time": "1:39:58"} +{"current_steps": 5703, "total_steps": 6840, "loss": 0.4596245288848877, "lr": 1.4757462847467475e-06, "epoch": 1.6676414680508846, "percentage": 83.38, "elapsed_time": "8:21:01", "remaining_time": "1:39:53"} +{"current_steps": 5704, "total_steps": 6840, "loss": 0.6000612378120422, "lr": 1.4732194597549244e-06, "epoch": 1.66793390846615, "percentage": 83.39, "elapsed_time": "8:21:05", "remaining_time": "1:39:47"} +{"current_steps": 5705, "total_steps": 6840, "loss": 0.5522277355194092, "lr": 1.4706946278462097e-06, "epoch": 1.6682263488814155, "percentage": 83.41, "elapsed_time": "8:21:12", "remaining_time": "1:39:42"} +{"current_steps": 5706, "total_steps": 6840, "loss": 0.4765724837779999, "lr": 1.468171789610766e-06, "epoch": 1.6685187892966808, "percentage": 83.42, "elapsed_time": "8:21:17", "remaining_time": "1:39:37"} +{"current_steps": 5707, "total_steps": 6840, "loss": 0.564188539981842, "lr": 1.4656509456382927e-06, "epoch": 1.6688112297119462, "percentage": 83.44, "elapsed_time": "8:21:22", "remaining_time": "1:39:32"} +{"current_steps": 5708, "total_steps": 6840, "loss": 0.4910390377044678, "lr": 1.4631320965180208e-06, "epoch": 1.6691036701272117, "percentage": 83.45, "elapsed_time": "8:21:26", "remaining_time": "1:39:26"} +{"current_steps": 5709, "total_steps": 6840, "loss": 0.5992041826248169, "lr": 1.4606152428387166e-06, "epoch": 1.6693961105424768, "percentage": 83.46, "elapsed_time": "8:21:31", "remaining_time": "1:39:21"} +{"current_steps": 5710, "total_steps": 6840, "loss": 0.4873291850090027, "lr": 1.4581003851886811e-06, "epoch": 1.6696885509577424, "percentage": 83.48, "elapsed_time": "8:21:36", "remaining_time": "1:39:16"} +{"current_steps": 5711, "total_steps": 6840, "loss": 0.6487013101577759, "lr": 1.4555875241557426e-06, "epoch": 1.6699809913730077, "percentage": 83.49, "elapsed_time": "8:21:40", "remaining_time": "1:39:10"} +{"current_steps": 5712, "total_steps": 6840, "loss": 0.4624609351158142, "lr": 1.4530766603272695e-06, "epoch": 1.670273431788273, "percentage": 83.51, "elapsed_time": "8:21:46", "remaining_time": "1:39:05"} +{"current_steps": 5713, "total_steps": 6840, "loss": 0.5765592455863953, "lr": 1.4505677942901609e-06, "epoch": 1.6705658722035386, "percentage": 83.52, "elapsed_time": "8:21:52", "remaining_time": "1:39:00"} +{"current_steps": 5714, "total_steps": 6840, "loss": 0.6730339527130127, "lr": 1.4480609266308488e-06, "epoch": 1.670858312618804, "percentage": 83.54, "elapsed_time": "8:21:57", "remaining_time": "1:38:54"} +{"current_steps": 5715, "total_steps": 6840, "loss": 0.6381770372390747, "lr": 1.445556057935299e-06, "epoch": 1.6711507530340692, "percentage": 83.55, "elapsed_time": "8:22:03", "remaining_time": "1:38:49"} +{"current_steps": 5716, "total_steps": 6840, "loss": 0.6236029863357544, "lr": 1.4430531887890076e-06, "epoch": 1.6714431934493348, "percentage": 83.57, "elapsed_time": "8:22:09", "remaining_time": "1:38:44"} +{"current_steps": 5717, "total_steps": 6840, "loss": 0.521639347076416, "lr": 1.4405523197770076e-06, "epoch": 1.6717356338646001, "percentage": 83.58, "elapsed_time": "8:22:15", "remaining_time": "1:38:39"} +{"current_steps": 5718, "total_steps": 6840, "loss": 0.5912468433380127, "lr": 1.4380534514838596e-06, "epoch": 1.6720280742798654, "percentage": 83.6, "elapsed_time": "8:22:21", "remaining_time": "1:38:34"} +{"current_steps": 5719, "total_steps": 6840, "loss": 0.5533329248428345, "lr": 1.4355565844936602e-06, "epoch": 1.672320514695131, "percentage": 83.61, "elapsed_time": "8:22:26", "remaining_time": "1:38:29"} +{"current_steps": 5720, "total_steps": 6840, "loss": 0.5901006460189819, "lr": 1.4330617193900365e-06, "epoch": 1.672612955110396, "percentage": 83.63, "elapsed_time": "8:22:31", "remaining_time": "1:38:23"} +{"current_steps": 5721, "total_steps": 6840, "loss": 0.5083344578742981, "lr": 1.4305688567561503e-06, "epoch": 1.6729053955256616, "percentage": 83.64, "elapsed_time": "8:22:36", "remaining_time": "1:38:18"} +{"current_steps": 5722, "total_steps": 6840, "loss": 0.4443317651748657, "lr": 1.4280779971746894e-06, "epoch": 1.6731978359409272, "percentage": 83.65, "elapsed_time": "8:22:42", "remaining_time": "1:38:13"} +{"current_steps": 5723, "total_steps": 6840, "loss": 0.6355078220367432, "lr": 1.4255891412278778e-06, "epoch": 1.6734902763561923, "percentage": 83.67, "elapsed_time": "8:22:48", "remaining_time": "1:38:08"} +{"current_steps": 5724, "total_steps": 6840, "loss": 0.5403381586074829, "lr": 1.423102289497471e-06, "epoch": 1.6737827167714578, "percentage": 83.68, "elapsed_time": "8:22:53", "remaining_time": "1:38:02"} +{"current_steps": 5725, "total_steps": 6840, "loss": 0.5272151231765747, "lr": 1.4206174425647556e-06, "epoch": 1.6740751571867232, "percentage": 83.7, "elapsed_time": "8:22:59", "remaining_time": "1:37:57"} +{"current_steps": 5726, "total_steps": 6840, "loss": 0.6750346422195435, "lr": 1.41813460101055e-06, "epoch": 1.6743675976019885, "percentage": 83.71, "elapsed_time": "8:23:03", "remaining_time": "1:37:52"} +{"current_steps": 5727, "total_steps": 6840, "loss": 0.552655816078186, "lr": 1.4156537654152026e-06, "epoch": 1.674660038017254, "percentage": 83.73, "elapsed_time": "8:23:07", "remaining_time": "1:37:46"} +{"current_steps": 5728, "total_steps": 6840, "loss": 0.3947732448577881, "lr": 1.4131749363585933e-06, "epoch": 1.6749524784325194, "percentage": 83.74, "elapsed_time": "8:23:12", "remaining_time": "1:37:41"} +{"current_steps": 5729, "total_steps": 6840, "loss": 0.4910270571708679, "lr": 1.4106981144201337e-06, "epoch": 1.6752449188477847, "percentage": 83.76, "elapsed_time": "8:23:18", "remaining_time": "1:37:36"} +{"current_steps": 5730, "total_steps": 6840, "loss": 0.5717943906784058, "lr": 1.408223300178767e-06, "epoch": 1.6755373592630503, "percentage": 83.77, "elapsed_time": "8:23:23", "remaining_time": "1:37:30"} +{"current_steps": 5731, "total_steps": 6840, "loss": 0.4993055462837219, "lr": 1.4057504942129652e-06, "epoch": 1.6758297996783156, "percentage": 83.79, "elapsed_time": "8:23:29", "remaining_time": "1:37:25"} +{"current_steps": 5732, "total_steps": 6840, "loss": 0.4772619605064392, "lr": 1.4032796971007322e-06, "epoch": 1.676122240093581, "percentage": 83.8, "elapsed_time": "8:23:35", "remaining_time": "1:37:20"} +{"current_steps": 5733, "total_steps": 6840, "loss": 0.3824518322944641, "lr": 1.400810909419601e-06, "epoch": 1.6764146805088465, "percentage": 83.82, "elapsed_time": "8:23:41", "remaining_time": "1:37:15"} +{"current_steps": 5734, "total_steps": 6840, "loss": 0.5302368402481079, "lr": 1.398344131746634e-06, "epoch": 1.6767071209241116, "percentage": 83.83, "elapsed_time": "8:23:45", "remaining_time": "1:37:10"} +{"current_steps": 5735, "total_steps": 6840, "loss": 0.5776697397232056, "lr": 1.3958793646584279e-06, "epoch": 1.676999561339377, "percentage": 83.85, "elapsed_time": "8:23:51", "remaining_time": "1:37:04"} +{"current_steps": 5736, "total_steps": 6840, "loss": 0.53890061378479, "lr": 1.3934166087311063e-06, "epoch": 1.6772920017546424, "percentage": 83.86, "elapsed_time": "8:23:55", "remaining_time": "1:36:59"} +{"current_steps": 5737, "total_steps": 6840, "loss": 0.47210827469825745, "lr": 1.3909558645403243e-06, "epoch": 1.6775844421699078, "percentage": 83.87, "elapsed_time": "8:24:01", "remaining_time": "1:36:54"} +{"current_steps": 5738, "total_steps": 6840, "loss": 0.6020913124084473, "lr": 1.388497132661264e-06, "epoch": 1.6778768825851733, "percentage": 83.89, "elapsed_time": "8:24:07", "remaining_time": "1:36:49"} +{"current_steps": 5739, "total_steps": 6840, "loss": 0.4244590997695923, "lr": 1.3860404136686411e-06, "epoch": 1.6781693230004386, "percentage": 83.9, "elapsed_time": "8:24:12", "remaining_time": "1:36:43"} +{"current_steps": 5740, "total_steps": 6840, "loss": 0.5969624519348145, "lr": 1.3835857081366965e-06, "epoch": 1.678461763415704, "percentage": 83.92, "elapsed_time": "8:24:17", "remaining_time": "1:36:38"} +{"current_steps": 5741, "total_steps": 6840, "loss": 0.6573030352592468, "lr": 1.3811330166392057e-06, "epoch": 1.6787542038309695, "percentage": 83.93, "elapsed_time": "8:24:22", "remaining_time": "1:36:33"} +{"current_steps": 5742, "total_steps": 6840, "loss": 0.4251132905483246, "lr": 1.3786823397494675e-06, "epoch": 1.6790466442462348, "percentage": 83.95, "elapsed_time": "8:24:27", "remaining_time": "1:36:27"} +{"current_steps": 5743, "total_steps": 6840, "loss": 0.5555700659751892, "lr": 1.3762336780403163e-06, "epoch": 1.6793390846615002, "percentage": 83.96, "elapsed_time": "8:24:34", "remaining_time": "1:36:22"} +{"current_steps": 5744, "total_steps": 6840, "loss": 0.5651364326477051, "lr": 1.3737870320841073e-06, "epoch": 1.6796315250767657, "percentage": 83.98, "elapsed_time": "8:24:38", "remaining_time": "1:36:17"} +{"current_steps": 5745, "total_steps": 6840, "loss": 0.6283698678016663, "lr": 1.371342402452731e-06, "epoch": 1.679923965492031, "percentage": 83.99, "elapsed_time": "8:24:43", "remaining_time": "1:36:12"} +{"current_steps": 5746, "total_steps": 6840, "loss": 0.47864413261413574, "lr": 1.3688997897176037e-06, "epoch": 1.6802164059072964, "percentage": 84.01, "elapsed_time": "8:24:49", "remaining_time": "1:36:06"} +{"current_steps": 5747, "total_steps": 6840, "loss": 0.6254131197929382, "lr": 1.366459194449674e-06, "epoch": 1.680508846322562, "percentage": 84.02, "elapsed_time": "8:24:55", "remaining_time": "1:36:01"} +{"current_steps": 5748, "total_steps": 6840, "loss": 0.35147637128829956, "lr": 1.364020617219415e-06, "epoch": 1.680801286737827, "percentage": 84.04, "elapsed_time": "8:25:01", "remaining_time": "1:35:56"} +{"current_steps": 5749, "total_steps": 6840, "loss": 0.6126410961151123, "lr": 1.3615840585968287e-06, "epoch": 1.6810937271530926, "percentage": 84.05, "elapsed_time": "8:25:05", "remaining_time": "1:35:51"} +{"current_steps": 5750, "total_steps": 6840, "loss": 0.5807974338531494, "lr": 1.359149519151447e-06, "epoch": 1.681386167568358, "percentage": 84.06, "elapsed_time": "8:25:09", "remaining_time": "1:35:45"} +{"current_steps": 5751, "total_steps": 6840, "loss": 0.5033349990844727, "lr": 1.3567169994523277e-06, "epoch": 1.6816786079836232, "percentage": 84.08, "elapsed_time": "8:25:15", "remaining_time": "1:35:40"} +{"current_steps": 5752, "total_steps": 6840, "loss": 0.47656023502349854, "lr": 1.3542865000680604e-06, "epoch": 1.6819710483988888, "percentage": 84.09, "elapsed_time": "8:25:22", "remaining_time": "1:35:35"} +{"current_steps": 5753, "total_steps": 6840, "loss": 0.5137293338775635, "lr": 1.3518580215667542e-06, "epoch": 1.682263488814154, "percentage": 84.11, "elapsed_time": "8:25:28", "remaining_time": "1:35:30"} +{"current_steps": 5754, "total_steps": 6840, "loss": 0.4636800289154053, "lr": 1.3494315645160539e-06, "epoch": 1.6825559292294194, "percentage": 84.12, "elapsed_time": "8:25:33", "remaining_time": "1:35:25"} +{"current_steps": 5755, "total_steps": 6840, "loss": 0.5825523138046265, "lr": 1.3470071294831289e-06, "epoch": 1.682848369644685, "percentage": 84.14, "elapsed_time": "8:25:37", "remaining_time": "1:35:19"} +{"current_steps": 5756, "total_steps": 6840, "loss": 0.49282288551330566, "lr": 1.344584717034677e-06, "epoch": 1.6831408100599503, "percentage": 84.15, "elapsed_time": "8:25:42", "remaining_time": "1:35:14"} +{"current_steps": 5757, "total_steps": 6840, "loss": 0.5551935434341431, "lr": 1.3421643277369211e-06, "epoch": 1.6834332504752156, "percentage": 84.17, "elapsed_time": "8:25:46", "remaining_time": "1:35:08"} +{"current_steps": 5758, "total_steps": 6840, "loss": 0.6423832178115845, "lr": 1.339745962155613e-06, "epoch": 1.6837256908904812, "percentage": 84.18, "elapsed_time": "8:25:52", "remaining_time": "1:35:03"} +{"current_steps": 5759, "total_steps": 6840, "loss": 0.6178075671195984, "lr": 1.3373296208560316e-06, "epoch": 1.6840181313057463, "percentage": 84.2, "elapsed_time": "8:25:57", "remaining_time": "1:34:58"} +{"current_steps": 5760, "total_steps": 6840, "loss": 0.6781176328659058, "lr": 1.3349153044029816e-06, "epoch": 1.6843105717210118, "percentage": 84.21, "elapsed_time": "8:26:02", "remaining_time": "1:34:53"} +{"current_steps": 5761, "total_steps": 6840, "loss": 0.6511910557746887, "lr": 1.332503013360794e-06, "epoch": 1.6846030121362774, "percentage": 84.23, "elapsed_time": "8:26:07", "remaining_time": "1:34:47"} +{"current_steps": 5762, "total_steps": 6840, "loss": 0.4980696141719818, "lr": 1.3300927482933279e-06, "epoch": 1.6848954525515425, "percentage": 84.24, "elapsed_time": "8:26:11", "remaining_time": "1:34:42"} +{"current_steps": 5763, "total_steps": 6840, "loss": 0.49176928400993347, "lr": 1.3276845097639702e-06, "epoch": 1.685187892966808, "percentage": 84.25, "elapsed_time": "8:26:16", "remaining_time": "1:34:36"} +{"current_steps": 5764, "total_steps": 6840, "loss": 0.5198799967765808, "lr": 1.3252782983356272e-06, "epoch": 1.6854803333820734, "percentage": 84.27, "elapsed_time": "8:26:20", "remaining_time": "1:34:31"} +{"current_steps": 5765, "total_steps": 6840, "loss": 0.5058869123458862, "lr": 1.322874114570739e-06, "epoch": 1.6857727737973387, "percentage": 84.28, "elapsed_time": "8:26:25", "remaining_time": "1:34:26"} +{"current_steps": 5766, "total_steps": 6840, "loss": 0.46573105454444885, "lr": 1.3204719590312698e-06, "epoch": 1.6860652142126042, "percentage": 84.3, "elapsed_time": "8:26:30", "remaining_time": "1:34:20"} +{"current_steps": 5767, "total_steps": 6840, "loss": 0.5033260583877563, "lr": 1.3180718322787067e-06, "epoch": 1.6863576546278696, "percentage": 84.31, "elapsed_time": "8:26:36", "remaining_time": "1:34:15"} +{"current_steps": 5768, "total_steps": 6840, "loss": 0.5306515693664551, "lr": 1.3156737348740655e-06, "epoch": 1.686650095043135, "percentage": 84.33, "elapsed_time": "8:26:42", "remaining_time": "1:34:10"} +{"current_steps": 5769, "total_steps": 6840, "loss": 0.45660221576690674, "lr": 1.313277667377888e-06, "epoch": 1.6869425354584004, "percentage": 84.34, "elapsed_time": "8:26:46", "remaining_time": "1:34:04"} +{"current_steps": 5770, "total_steps": 6840, "loss": 0.5353757739067078, "lr": 1.3108836303502392e-06, "epoch": 1.6872349758736658, "percentage": 84.36, "elapsed_time": "8:26:51", "remaining_time": "1:33:59"} +{"current_steps": 5771, "total_steps": 6840, "loss": 0.5415239930152893, "lr": 1.3084916243507118e-06, "epoch": 1.687527416288931, "percentage": 84.37, "elapsed_time": "8:26:58", "remaining_time": "1:33:54"} +{"current_steps": 5772, "total_steps": 6840, "loss": 0.5860229730606079, "lr": 1.3061016499384217e-06, "epoch": 1.6878198567041967, "percentage": 84.39, "elapsed_time": "8:27:03", "remaining_time": "1:33:49"} +{"current_steps": 5773, "total_steps": 6840, "loss": 0.5524891018867493, "lr": 1.3037137076720107e-06, "epoch": 1.6881122971194618, "percentage": 84.4, "elapsed_time": "8:27:07", "remaining_time": "1:33:43"} +{"current_steps": 5774, "total_steps": 6840, "loss": 0.5557498931884766, "lr": 1.3013277981096484e-06, "epoch": 1.6884047375347273, "percentage": 84.42, "elapsed_time": "8:27:12", "remaining_time": "1:33:38"} +{"current_steps": 5775, "total_steps": 6840, "loss": 0.45877397060394287, "lr": 1.2989439218090227e-06, "epoch": 1.6886971779499926, "percentage": 84.43, "elapsed_time": "8:27:16", "remaining_time": "1:33:32"} +{"current_steps": 5776, "total_steps": 6840, "loss": 0.5310335159301758, "lr": 1.2965620793273515e-06, "epoch": 1.688989618365258, "percentage": 84.44, "elapsed_time": "8:27:22", "remaining_time": "1:33:27"} +{"current_steps": 5777, "total_steps": 6840, "loss": 0.4855915904045105, "lr": 1.294182271221377e-06, "epoch": 1.6892820587805235, "percentage": 84.46, "elapsed_time": "8:27:28", "remaining_time": "1:33:22"} +{"current_steps": 5778, "total_steps": 6840, "loss": 0.6070747971534729, "lr": 1.2918044980473643e-06, "epoch": 1.6895744991957888, "percentage": 84.47, "elapsed_time": "8:27:34", "remaining_time": "1:33:17"} +{"current_steps": 5779, "total_steps": 6840, "loss": 0.5108609795570374, "lr": 1.2894287603611033e-06, "epoch": 1.6898669396110542, "percentage": 84.49, "elapsed_time": "8:27:38", "remaining_time": "1:33:11"} +{"current_steps": 5780, "total_steps": 6840, "loss": 0.49141189455986023, "lr": 1.2870550587179087e-06, "epoch": 1.6901593800263197, "percentage": 84.5, "elapsed_time": "8:27:44", "remaining_time": "1:33:06"} +{"current_steps": 5781, "total_steps": 6840, "loss": 0.4239678382873535, "lr": 1.2846833936726178e-06, "epoch": 1.690451820441585, "percentage": 84.52, "elapsed_time": "8:27:49", "remaining_time": "1:33:01"} +{"current_steps": 5782, "total_steps": 6840, "loss": 0.6348937153816223, "lr": 1.2823137657795948e-06, "epoch": 1.6907442608568504, "percentage": 84.53, "elapsed_time": "8:27:53", "remaining_time": "1:32:56"} +{"current_steps": 5783, "total_steps": 6840, "loss": 0.4561845064163208, "lr": 1.2799461755927233e-06, "epoch": 1.691036701272116, "percentage": 84.55, "elapsed_time": "8:27:58", "remaining_time": "1:32:50"} +{"current_steps": 5784, "total_steps": 6840, "loss": 0.5663880109786987, "lr": 1.2775806236654153e-06, "epoch": 1.6913291416873812, "percentage": 84.56, "elapsed_time": "8:28:04", "remaining_time": "1:32:45"} +{"current_steps": 5785, "total_steps": 6840, "loss": 0.5200550556182861, "lr": 1.275217110550604e-06, "epoch": 1.6916215821026466, "percentage": 84.58, "elapsed_time": "8:28:09", "remaining_time": "1:32:40"} +{"current_steps": 5786, "total_steps": 6840, "loss": 0.5401214361190796, "lr": 1.2728556368007461e-06, "epoch": 1.6919140225179121, "percentage": 84.59, "elapsed_time": "8:28:14", "remaining_time": "1:32:35"} +{"current_steps": 5787, "total_steps": 6840, "loss": 0.5409752130508423, "lr": 1.2704962029678202e-06, "epoch": 1.6922064629331772, "percentage": 84.61, "elapsed_time": "8:28:20", "remaining_time": "1:32:29"} +{"current_steps": 5788, "total_steps": 6840, "loss": 0.46215158700942993, "lr": 1.2681388096033298e-06, "epoch": 1.6924989033484428, "percentage": 84.62, "elapsed_time": "8:28:24", "remaining_time": "1:32:24"} +{"current_steps": 5789, "total_steps": 6840, "loss": 0.44687867164611816, "lr": 1.2657834572583027e-06, "epoch": 1.692791343763708, "percentage": 84.63, "elapsed_time": "8:28:29", "remaining_time": "1:32:19"} +{"current_steps": 5790, "total_steps": 6840, "loss": 0.47882723808288574, "lr": 1.2634301464832877e-06, "epoch": 1.6930837841789734, "percentage": 84.65, "elapsed_time": "8:28:35", "remaining_time": "1:32:13"} +{"current_steps": 5791, "total_steps": 6840, "loss": 0.6108201742172241, "lr": 1.2610788778283567e-06, "epoch": 1.693376224594239, "percentage": 84.66, "elapsed_time": "8:28:39", "remaining_time": "1:32:08"} +{"current_steps": 5792, "total_steps": 6840, "loss": 0.45024657249450684, "lr": 1.2587296518431036e-06, "epoch": 1.6936686650095043, "percentage": 84.68, "elapsed_time": "8:28:45", "remaining_time": "1:32:03"} +{"current_steps": 5793, "total_steps": 6840, "loss": 0.6746254563331604, "lr": 1.256382469076648e-06, "epoch": 1.6939611054247696, "percentage": 84.69, "elapsed_time": "8:28:50", "remaining_time": "1:31:57"} +{"current_steps": 5794, "total_steps": 6840, "loss": 0.6439248323440552, "lr": 1.2540373300776264e-06, "epoch": 1.6942535458400352, "percentage": 84.71, "elapsed_time": "8:28:57", "remaining_time": "1:31:52"} +{"current_steps": 5795, "total_steps": 6840, "loss": 0.467510461807251, "lr": 1.251694235394204e-06, "epoch": 1.6945459862553005, "percentage": 84.72, "elapsed_time": "8:29:03", "remaining_time": "1:31:47"} +{"current_steps": 5796, "total_steps": 6840, "loss": 0.5509516596794128, "lr": 1.2493531855740626e-06, "epoch": 1.6948384266705658, "percentage": 84.74, "elapsed_time": "8:29:08", "remaining_time": "1:31:42"} +{"current_steps": 5797, "total_steps": 6840, "loss": 0.49178194999694824, "lr": 1.247014181164412e-06, "epoch": 1.6951308670858314, "percentage": 84.75, "elapsed_time": "8:29:13", "remaining_time": "1:31:37"} +{"current_steps": 5798, "total_steps": 6840, "loss": 0.4825005531311035, "lr": 1.2446772227119753e-06, "epoch": 1.6954233075010965, "percentage": 84.77, "elapsed_time": "8:29:19", "remaining_time": "1:31:32"} +{"current_steps": 5799, "total_steps": 6840, "loss": 0.7441064715385437, "lr": 1.242342310763005e-06, "epoch": 1.695715747916362, "percentage": 84.78, "elapsed_time": "8:29:25", "remaining_time": "1:31:26"} +{"current_steps": 5800, "total_steps": 6840, "loss": 0.5020110011100769, "lr": 1.2400094458632717e-06, "epoch": 1.6960081883316276, "percentage": 84.8, "elapsed_time": "8:29:31", "remaining_time": "1:31:21"} +{"current_steps": 5801, "total_steps": 6840, "loss": 0.5439830422401428, "lr": 1.237678628558069e-06, "epoch": 1.6963006287468927, "percentage": 84.81, "elapsed_time": "8:29:41", "remaining_time": "1:31:17"} +{"current_steps": 5802, "total_steps": 6840, "loss": 0.6235179901123047, "lr": 1.235349859392211e-06, "epoch": 1.6965930691621582, "percentage": 84.82, "elapsed_time": "8:29:46", "remaining_time": "1:31:11"} +{"current_steps": 5803, "total_steps": 6840, "loss": 0.6176612377166748, "lr": 1.2330231389100323e-06, "epoch": 1.6968855095774236, "percentage": 84.84, "elapsed_time": "8:29:51", "remaining_time": "1:31:06"} +{"current_steps": 5804, "total_steps": 6840, "loss": 0.5956840515136719, "lr": 1.2306984676553924e-06, "epoch": 1.697177949992689, "percentage": 84.85, "elapsed_time": "8:29:55", "remaining_time": "1:31:01"} +{"current_steps": 5805, "total_steps": 6840, "loss": 0.5025947690010071, "lr": 1.2283758461716667e-06, "epoch": 1.6974703904079544, "percentage": 84.87, "elapsed_time": "8:30:00", "remaining_time": "1:30:55"} +{"current_steps": 5806, "total_steps": 6840, "loss": 0.5772436857223511, "lr": 1.2260552750017551e-06, "epoch": 1.6977628308232198, "percentage": 84.88, "elapsed_time": "8:30:05", "remaining_time": "1:30:50"} +{"current_steps": 5807, "total_steps": 6840, "loss": 0.4336615204811096, "lr": 1.223736754688075e-06, "epoch": 1.698055271238485, "percentage": 84.9, "elapsed_time": "8:30:10", "remaining_time": "1:30:45"} +{"current_steps": 5808, "total_steps": 6840, "loss": 0.5697668790817261, "lr": 1.221420285772572e-06, "epoch": 1.6983477116537506, "percentage": 84.91, "elapsed_time": "8:30:16", "remaining_time": "1:30:40"} +{"current_steps": 5809, "total_steps": 6840, "loss": 0.4966861605644226, "lr": 1.2191058687966995e-06, "epoch": 1.698640152069016, "percentage": 84.93, "elapsed_time": "8:30:20", "remaining_time": "1:30:34"} +{"current_steps": 5810, "total_steps": 6840, "loss": 0.5805951952934265, "lr": 1.2167935043014411e-06, "epoch": 1.6989325924842813, "percentage": 84.94, "elapsed_time": "8:30:26", "remaining_time": "1:30:29"} +{"current_steps": 5811, "total_steps": 6840, "loss": 0.4669906497001648, "lr": 1.2144831928272994e-06, "epoch": 1.6992250328995469, "percentage": 84.96, "elapsed_time": "8:30:31", "remaining_time": "1:30:24"} +{"current_steps": 5812, "total_steps": 6840, "loss": 0.5630965828895569, "lr": 1.212174934914294e-06, "epoch": 1.699517473314812, "percentage": 84.97, "elapsed_time": "8:30:35", "remaining_time": "1:30:18"} +{"current_steps": 5813, "total_steps": 6840, "loss": 0.5345104932785034, "lr": 1.2098687311019663e-06, "epoch": 1.6998099137300775, "percentage": 84.99, "elapsed_time": "8:30:40", "remaining_time": "1:30:13"} +{"current_steps": 5814, "total_steps": 6840, "loss": 0.5760249495506287, "lr": 1.207564581929378e-06, "epoch": 1.7001023541453428, "percentage": 85.0, "elapsed_time": "8:30:45", "remaining_time": "1:30:08"} +{"current_steps": 5815, "total_steps": 6840, "loss": 0.506635308265686, "lr": 1.2052624879351105e-06, "epoch": 1.7003947945606082, "percentage": 85.01, "elapsed_time": "8:30:50", "remaining_time": "1:30:02"} +{"current_steps": 5816, "total_steps": 6840, "loss": 0.5107032656669617, "lr": 1.2029624496572622e-06, "epoch": 1.7006872349758737, "percentage": 85.03, "elapsed_time": "8:30:55", "remaining_time": "1:29:57"} +{"current_steps": 5817, "total_steps": 6840, "loss": 0.5888187885284424, "lr": 1.2006644676334557e-06, "epoch": 1.700979675391139, "percentage": 85.04, "elapsed_time": "8:31:01", "remaining_time": "1:29:52"} +{"current_steps": 5818, "total_steps": 6840, "loss": 0.5326075553894043, "lr": 1.1983685424008285e-06, "epoch": 1.7012721158064044, "percentage": 85.06, "elapsed_time": "8:31:06", "remaining_time": "1:29:46"} +{"current_steps": 5819, "total_steps": 6840, "loss": 0.5097993612289429, "lr": 1.1960746744960417e-06, "epoch": 1.70156455622167, "percentage": 85.07, "elapsed_time": "8:31:11", "remaining_time": "1:29:41"} +{"current_steps": 5820, "total_steps": 6840, "loss": 0.6001093983650208, "lr": 1.1937828644552696e-06, "epoch": 1.7018569966369352, "percentage": 85.09, "elapsed_time": "8:31:18", "remaining_time": "1:29:36"} +{"current_steps": 5821, "total_steps": 6840, "loss": 0.513684093952179, "lr": 1.1914931128142072e-06, "epoch": 1.7021494370522006, "percentage": 85.1, "elapsed_time": "8:31:22", "remaining_time": "1:29:31"} +{"current_steps": 5822, "total_steps": 6840, "loss": 0.4688597321510315, "lr": 1.189205420108076e-06, "epoch": 1.7024418774674661, "percentage": 85.12, "elapsed_time": "8:31:28", "remaining_time": "1:29:25"} +{"current_steps": 5823, "total_steps": 6840, "loss": 0.4537498354911804, "lr": 1.1869197868716075e-06, "epoch": 1.7027343178827314, "percentage": 85.13, "elapsed_time": "8:31:34", "remaining_time": "1:29:20"} +{"current_steps": 5824, "total_steps": 6840, "loss": 0.43031078577041626, "lr": 1.1846362136390531e-06, "epoch": 1.7030267582979968, "percentage": 85.15, "elapsed_time": "8:31:39", "remaining_time": "1:29:15"} +{"current_steps": 5825, "total_steps": 6840, "loss": 0.5139330625534058, "lr": 1.182354700944187e-06, "epoch": 1.7033191987132623, "percentage": 85.16, "elapsed_time": "8:31:43", "remaining_time": "1:29:10"} +{"current_steps": 5826, "total_steps": 6840, "loss": 0.6542010307312012, "lr": 1.180075249320296e-06, "epoch": 1.7036116391285274, "percentage": 85.18, "elapsed_time": "8:31:48", "remaining_time": "1:29:04"} +{"current_steps": 5827, "total_steps": 6840, "loss": 0.5371676087379456, "lr": 1.1777978593001903e-06, "epoch": 1.703904079543793, "percentage": 85.19, "elapsed_time": "8:31:54", "remaining_time": "1:28:59"} +{"current_steps": 5828, "total_steps": 6840, "loss": 0.47583359479904175, "lr": 1.1755225314161967e-06, "epoch": 1.7041965199590583, "percentage": 85.2, "elapsed_time": "8:32:00", "remaining_time": "1:28:54"} +{"current_steps": 5829, "total_steps": 6840, "loss": 0.5471247434616089, "lr": 1.173249266200156e-06, "epoch": 1.7044889603743236, "percentage": 85.22, "elapsed_time": "8:32:07", "remaining_time": "1:28:49"} +{"current_steps": 5830, "total_steps": 6840, "loss": 0.5095713138580322, "lr": 1.1709780641834323e-06, "epoch": 1.7047814007895892, "percentage": 85.23, "elapsed_time": "8:32:11", "remaining_time": "1:28:44"} +{"current_steps": 5831, "total_steps": 6840, "loss": 0.41944777965545654, "lr": 1.1687089258969041e-06, "epoch": 1.7050738412048545, "percentage": 85.25, "elapsed_time": "8:32:18", "remaining_time": "1:28:38"} +{"current_steps": 5832, "total_steps": 6840, "loss": 0.42380404472351074, "lr": 1.1664418518709697e-06, "epoch": 1.7053662816201198, "percentage": 85.26, "elapsed_time": "8:32:23", "remaining_time": "1:28:33"} +{"current_steps": 5833, "total_steps": 6840, "loss": 0.5688038468360901, "lr": 1.1641768426355427e-06, "epoch": 1.7056587220353854, "percentage": 85.28, "elapsed_time": "8:32:27", "remaining_time": "1:28:28"} +{"current_steps": 5834, "total_steps": 6840, "loss": 0.5432788133621216, "lr": 1.1619138987200562e-06, "epoch": 1.7059511624506507, "percentage": 85.29, "elapsed_time": "8:32:32", "remaining_time": "1:28:22"} +{"current_steps": 5835, "total_steps": 6840, "loss": 0.5408512949943542, "lr": 1.1596530206534606e-06, "epoch": 1.706243602865916, "percentage": 85.31, "elapsed_time": "8:32:37", "remaining_time": "1:28:17"} +{"current_steps": 5836, "total_steps": 6840, "loss": 0.5149247646331787, "lr": 1.1573942089642198e-06, "epoch": 1.7065360432811816, "percentage": 85.32, "elapsed_time": "8:32:43", "remaining_time": "1:28:12"} +{"current_steps": 5837, "total_steps": 6840, "loss": 0.36905592679977417, "lr": 1.1551374641803193e-06, "epoch": 1.7068284836964467, "percentage": 85.34, "elapsed_time": "8:32:48", "remaining_time": "1:28:07"} +{"current_steps": 5838, "total_steps": 6840, "loss": 0.5370720624923706, "lr": 1.152882786829259e-06, "epoch": 1.7071209241117122, "percentage": 85.35, "elapsed_time": "8:32:53", "remaining_time": "1:28:01"} +{"current_steps": 5839, "total_steps": 6840, "loss": 0.4535629153251648, "lr": 1.1506301774380578e-06, "epoch": 1.7074133645269778, "percentage": 85.37, "elapsed_time": "8:33:00", "remaining_time": "1:27:56"} +{"current_steps": 5840, "total_steps": 6840, "loss": 0.5456075668334961, "lr": 1.1483796365332455e-06, "epoch": 1.7077058049422429, "percentage": 85.38, "elapsed_time": "8:33:05", "remaining_time": "1:27:51"} +{"current_steps": 5841, "total_steps": 6840, "loss": 0.5884554386138916, "lr": 1.1461311646408756e-06, "epoch": 1.7079982453575084, "percentage": 85.39, "elapsed_time": "8:33:12", "remaining_time": "1:27:46"} +{"current_steps": 5842, "total_steps": 6840, "loss": 0.605168879032135, "lr": 1.1438847622865125e-06, "epoch": 1.7082906857727738, "percentage": 85.41, "elapsed_time": "8:33:18", "remaining_time": "1:27:41"} +{"current_steps": 5843, "total_steps": 6840, "loss": 0.43739163875579834, "lr": 1.14164042999524e-06, "epoch": 1.708583126188039, "percentage": 85.42, "elapsed_time": "8:33:23", "remaining_time": "1:27:36"} +{"current_steps": 5844, "total_steps": 6840, "loss": 0.4508574306964874, "lr": 1.1393981682916578e-06, "epoch": 1.7088755666033046, "percentage": 85.44, "elapsed_time": "8:33:27", "remaining_time": "1:27:30"} +{"current_steps": 5845, "total_steps": 6840, "loss": 0.5918034315109253, "lr": 1.1371579776998798e-06, "epoch": 1.70916800701857, "percentage": 85.45, "elapsed_time": "8:33:33", "remaining_time": "1:27:25"} +{"current_steps": 5846, "total_steps": 6840, "loss": 0.5668582320213318, "lr": 1.1349198587435373e-06, "epoch": 1.7094604474338353, "percentage": 85.47, "elapsed_time": "8:33:37", "remaining_time": "1:27:19"} +{"current_steps": 5847, "total_steps": 6840, "loss": 0.6374846696853638, "lr": 1.1326838119457784e-06, "epoch": 1.7097528878491008, "percentage": 85.48, "elapsed_time": "8:33:42", "remaining_time": "1:27:14"} +{"current_steps": 5848, "total_steps": 6840, "loss": 0.5074985027313232, "lr": 1.130449837829264e-06, "epoch": 1.7100453282643662, "percentage": 85.5, "elapsed_time": "8:33:47", "remaining_time": "1:27:09"} +{"current_steps": 5849, "total_steps": 6840, "loss": 0.5012484788894653, "lr": 1.1282179369161717e-06, "epoch": 1.7103377686796315, "percentage": 85.51, "elapsed_time": "8:33:52", "remaining_time": "1:27:03"} +{"current_steps": 5850, "total_steps": 6840, "loss": 0.4417869746685028, "lr": 1.1259881097281977e-06, "epoch": 1.710630209094897, "percentage": 85.53, "elapsed_time": "8:33:57", "remaining_time": "1:26:58"} +{"current_steps": 5851, "total_steps": 6840, "loss": 0.6032637357711792, "lr": 1.1237603567865452e-06, "epoch": 1.7109226495101622, "percentage": 85.54, "elapsed_time": "8:34:01", "remaining_time": "1:26:53"} +{"current_steps": 5852, "total_steps": 6840, "loss": 0.5790234804153442, "lr": 1.121534678611942e-06, "epoch": 1.7112150899254277, "percentage": 85.56, "elapsed_time": "8:34:06", "remaining_time": "1:26:47"} +{"current_steps": 5853, "total_steps": 6840, "loss": 0.5436397194862366, "lr": 1.1193110757246251e-06, "epoch": 1.711507530340693, "percentage": 85.57, "elapsed_time": "8:34:10", "remaining_time": "1:26:42"} +{"current_steps": 5854, "total_steps": 6840, "loss": 0.5088083744049072, "lr": 1.11708954864435e-06, "epoch": 1.7117999707559584, "percentage": 85.58, "elapsed_time": "8:34:16", "remaining_time": "1:26:37"} +{"current_steps": 5855, "total_steps": 6840, "loss": 0.5907719135284424, "lr": 1.1148700978903826e-06, "epoch": 1.712092411171224, "percentage": 85.6, "elapsed_time": "8:34:22", "remaining_time": "1:26:32"} +{"current_steps": 5856, "total_steps": 6840, "loss": 0.4744384288787842, "lr": 1.1126527239815078e-06, "epoch": 1.7123848515864892, "percentage": 85.61, "elapsed_time": "8:34:27", "remaining_time": "1:26:26"} +{"current_steps": 5857, "total_steps": 6840, "loss": 0.6644346714019775, "lr": 1.110437427436023e-06, "epoch": 1.7126772920017546, "percentage": 85.63, "elapsed_time": "8:34:31", "remaining_time": "1:26:21"} +{"current_steps": 5858, "total_steps": 6840, "loss": 0.4926042854785919, "lr": 1.10822420877174e-06, "epoch": 1.71296973241702, "percentage": 85.64, "elapsed_time": "8:34:35", "remaining_time": "1:26:15"} +{"current_steps": 5859, "total_steps": 6840, "loss": 0.47684335708618164, "lr": 1.1060130685059845e-06, "epoch": 1.7132621728322854, "percentage": 85.66, "elapsed_time": "8:34:40", "remaining_time": "1:26:10"} +{"current_steps": 5860, "total_steps": 6840, "loss": 0.5574014186859131, "lr": 1.1038040071555988e-06, "epoch": 1.7135546132475508, "percentage": 85.67, "elapsed_time": "8:34:45", "remaining_time": "1:26:05"} +{"current_steps": 5861, "total_steps": 6840, "loss": 0.6276485323905945, "lr": 1.101597025236939e-06, "epoch": 1.7138470536628163, "percentage": 85.69, "elapsed_time": "8:34:49", "remaining_time": "1:25:59"} +{"current_steps": 5862, "total_steps": 6840, "loss": 0.558611273765564, "lr": 1.099392123265869e-06, "epoch": 1.7141394940780816, "percentage": 85.7, "elapsed_time": "8:34:53", "remaining_time": "1:25:54"} +{"current_steps": 5863, "total_steps": 6840, "loss": 0.5561566948890686, "lr": 1.097189301757773e-06, "epoch": 1.714431934493347, "percentage": 85.72, "elapsed_time": "8:35:00", "remaining_time": "1:25:49"} +{"current_steps": 5864, "total_steps": 6840, "loss": 0.5360273122787476, "lr": 1.094988561227548e-06, "epoch": 1.7147243749086125, "percentage": 85.73, "elapsed_time": "8:35:04", "remaining_time": "1:25:43"} +{"current_steps": 5865, "total_steps": 6840, "loss": 0.5572026968002319, "lr": 1.0927899021896038e-06, "epoch": 1.7150168153238776, "percentage": 85.75, "elapsed_time": "8:35:07", "remaining_time": "1:25:38"} +{"current_steps": 5866, "total_steps": 6840, "loss": 0.4593105912208557, "lr": 1.0905933251578626e-06, "epoch": 1.7153092557391432, "percentage": 85.76, "elapsed_time": "8:35:13", "remaining_time": "1:25:32"} +{"current_steps": 5867, "total_steps": 6840, "loss": 0.5017558336257935, "lr": 1.0883988306457627e-06, "epoch": 1.7156016961544085, "percentage": 85.77, "elapsed_time": "8:35:18", "remaining_time": "1:25:27"} +{"current_steps": 5868, "total_steps": 6840, "loss": 0.4982030391693115, "lr": 1.0862064191662524e-06, "epoch": 1.7158941365696738, "percentage": 85.79, "elapsed_time": "8:35:24", "remaining_time": "1:25:22"} +{"current_steps": 5869, "total_steps": 6840, "loss": 0.5563114881515503, "lr": 1.0840160912317943e-06, "epoch": 1.7161865769849394, "percentage": 85.8, "elapsed_time": "8:35:29", "remaining_time": "1:25:17"} +{"current_steps": 5870, "total_steps": 6840, "loss": 0.4817348122596741, "lr": 1.0818278473543652e-06, "epoch": 1.7164790174002047, "percentage": 85.82, "elapsed_time": "8:35:33", "remaining_time": "1:25:11"} +{"current_steps": 5871, "total_steps": 6840, "loss": 0.47907108068466187, "lr": 1.079641688045453e-06, "epoch": 1.71677145781547, "percentage": 85.83, "elapsed_time": "8:35:39", "remaining_time": "1:25:06"} +{"current_steps": 5872, "total_steps": 6840, "loss": 0.6158252954483032, "lr": 1.0774576138160596e-06, "epoch": 1.7170638982307356, "percentage": 85.85, "elapsed_time": "8:35:44", "remaining_time": "1:25:01"} +{"current_steps": 5873, "total_steps": 6840, "loss": 0.5336505174636841, "lr": 1.0752756251767015e-06, "epoch": 1.717356338646001, "percentage": 85.86, "elapsed_time": "8:35:50", "remaining_time": "1:24:55"} +{"current_steps": 5874, "total_steps": 6840, "loss": 0.5806115865707397, "lr": 1.0730957226374006e-06, "epoch": 1.7176487790612662, "percentage": 85.88, "elapsed_time": "8:35:54", "remaining_time": "1:24:50"} +{"current_steps": 5875, "total_steps": 6840, "loss": 0.3701411485671997, "lr": 1.070917906707698e-06, "epoch": 1.7179412194765318, "percentage": 85.89, "elapsed_time": "8:36:00", "remaining_time": "1:24:45"} +{"current_steps": 5876, "total_steps": 6840, "loss": 0.5779517292976379, "lr": 1.0687421778966445e-06, "epoch": 1.7182336598917969, "percentage": 85.91, "elapsed_time": "8:36:06", "remaining_time": "1:24:40"} +{"current_steps": 5877, "total_steps": 6840, "loss": 0.43965232372283936, "lr": 1.0665685367128041e-06, "epoch": 1.7185261003070624, "percentage": 85.92, "elapsed_time": "8:36:12", "remaining_time": "1:24:35"} +{"current_steps": 5878, "total_steps": 6840, "loss": 0.4768058657646179, "lr": 1.064396983664253e-06, "epoch": 1.718818540722328, "percentage": 85.94, "elapsed_time": "8:36:18", "remaining_time": "1:24:29"} +{"current_steps": 5879, "total_steps": 6840, "loss": 0.5331600904464722, "lr": 1.0622275192585773e-06, "epoch": 1.719110981137593, "percentage": 85.95, "elapsed_time": "8:36:23", "remaining_time": "1:24:24"} +{"current_steps": 5880, "total_steps": 6840, "loss": 0.5495625734329224, "lr": 1.0600601440028758e-06, "epoch": 1.7194034215528586, "percentage": 85.96, "elapsed_time": "8:36:29", "remaining_time": "1:24:19"} +{"current_steps": 5881, "total_steps": 6840, "loss": 0.4244312345981598, "lr": 1.0578948584037608e-06, "epoch": 1.719695861968124, "percentage": 85.98, "elapsed_time": "8:36:35", "remaining_time": "1:24:14"} +{"current_steps": 5882, "total_steps": 6840, "loss": 0.4618447721004486, "lr": 1.0557316629673531e-06, "epoch": 1.7199883023833893, "percentage": 85.99, "elapsed_time": "8:36:40", "remaining_time": "1:24:09"} +{"current_steps": 5883, "total_steps": 6840, "loss": 0.4226785898208618, "lr": 1.0535705581992873e-06, "epoch": 1.7202807427986548, "percentage": 86.01, "elapsed_time": "8:36:46", "remaining_time": "1:24:03"} +{"current_steps": 5884, "total_steps": 6840, "loss": 0.5813404321670532, "lr": 1.0514115446047101e-06, "epoch": 1.7205731832139202, "percentage": 86.02, "elapsed_time": "8:36:51", "remaining_time": "1:23:58"} +{"current_steps": 5885, "total_steps": 6840, "loss": 0.6700260639190674, "lr": 1.0492546226882738e-06, "epoch": 1.7208656236291855, "percentage": 86.04, "elapsed_time": "8:36:57", "remaining_time": "1:23:53"} +{"current_steps": 5886, "total_steps": 6840, "loss": 0.6024131178855896, "lr": 1.0470997929541494e-06, "epoch": 1.721158064044451, "percentage": 86.05, "elapsed_time": "8:37:02", "remaining_time": "1:23:48"} +{"current_steps": 5887, "total_steps": 6840, "loss": 0.6015123724937439, "lr": 1.0449470559060125e-06, "epoch": 1.7214505044597164, "percentage": 86.07, "elapsed_time": "8:37:07", "remaining_time": "1:23:42"} +{"current_steps": 5888, "total_steps": 6840, "loss": 0.6631267070770264, "lr": 1.0427964120470534e-06, "epoch": 1.7217429448749817, "percentage": 86.08, "elapsed_time": "8:37:11", "remaining_time": "1:23:37"} +{"current_steps": 5889, "total_steps": 6840, "loss": 0.5267488956451416, "lr": 1.0406478618799731e-06, "epoch": 1.7220353852902472, "percentage": 86.1, "elapsed_time": "8:37:16", "remaining_time": "1:23:32"} +{"current_steps": 5890, "total_steps": 6840, "loss": 0.5190263986587524, "lr": 1.038501405906982e-06, "epoch": 1.7223278257055123, "percentage": 86.11, "elapsed_time": "8:37:22", "remaining_time": "1:23:26"} +{"current_steps": 5891, "total_steps": 6840, "loss": 0.5253189206123352, "lr": 1.0363570446297999e-06, "epoch": 1.722620266120778, "percentage": 86.13, "elapsed_time": "8:37:26", "remaining_time": "1:23:21"} +{"current_steps": 5892, "total_steps": 6840, "loss": 0.5271278619766235, "lr": 1.0342147785496581e-06, "epoch": 1.7229127065360432, "percentage": 86.14, "elapsed_time": "8:37:32", "remaining_time": "1:23:16"} +{"current_steps": 5893, "total_steps": 6840, "loss": 0.5284109711647034, "lr": 1.0320746081672994e-06, "epoch": 1.7232051469513086, "percentage": 86.15, "elapsed_time": "8:37:37", "remaining_time": "1:23:10"} +{"current_steps": 5894, "total_steps": 6840, "loss": 0.6119050979614258, "lr": 1.0299365339829747e-06, "epoch": 1.723497587366574, "percentage": 86.17, "elapsed_time": "8:37:42", "remaining_time": "1:23:05"} +{"current_steps": 5895, "total_steps": 6840, "loss": 0.42297711968421936, "lr": 1.0278005564964488e-06, "epoch": 1.7237900277818394, "percentage": 86.18, "elapsed_time": "8:37:48", "remaining_time": "1:23:00"} +{"current_steps": 5896, "total_steps": 6840, "loss": 0.5923792123794556, "lr": 1.02566667620699e-06, "epoch": 1.7240824681971048, "percentage": 86.2, "elapsed_time": "8:37:54", "remaining_time": "1:22:55"} +{"current_steps": 5897, "total_steps": 6840, "loss": 0.4999189078807831, "lr": 1.023534893613377e-06, "epoch": 1.7243749086123703, "percentage": 86.21, "elapsed_time": "8:37:59", "remaining_time": "1:22:50"} +{"current_steps": 5898, "total_steps": 6840, "loss": 0.49083560705184937, "lr": 1.0214052092139082e-06, "epoch": 1.7246673490276356, "percentage": 86.23, "elapsed_time": "8:38:03", "remaining_time": "1:22:44"} +{"current_steps": 5899, "total_steps": 6840, "loss": 0.6001632213592529, "lr": 1.0192776235063795e-06, "epoch": 1.724959789442901, "percentage": 86.24, "elapsed_time": "8:38:08", "remaining_time": "1:22:39"} +{"current_steps": 5900, "total_steps": 6840, "loss": 0.4897228479385376, "lr": 1.0171521369881044e-06, "epoch": 1.7252522298581665, "percentage": 86.26, "elapsed_time": "8:38:13", "remaining_time": "1:22:33"} +{"current_steps": 5901, "total_steps": 6840, "loss": 0.44784292578697205, "lr": 1.0150287501558997e-06, "epoch": 1.7255446702734318, "percentage": 86.27, "elapsed_time": "8:38:23", "remaining_time": "1:22:29"} +{"current_steps": 5902, "total_steps": 6840, "loss": 0.46105432510375977, "lr": 1.0129074635060943e-06, "epoch": 1.7258371106886972, "percentage": 86.29, "elapsed_time": "8:38:27", "remaining_time": "1:22:23"} +{"current_steps": 5903, "total_steps": 6840, "loss": 0.5805546045303345, "lr": 1.0107882775345278e-06, "epoch": 1.7261295511039627, "percentage": 86.3, "elapsed_time": "8:38:32", "remaining_time": "1:22:18"} +{"current_steps": 5904, "total_steps": 6840, "loss": 0.560761570930481, "lr": 1.0086711927365488e-06, "epoch": 1.7264219915192278, "percentage": 86.32, "elapsed_time": "8:38:38", "remaining_time": "1:22:13"} +{"current_steps": 5905, "total_steps": 6840, "loss": 0.533979058265686, "lr": 1.006556209607007e-06, "epoch": 1.7267144319344934, "percentage": 86.33, "elapsed_time": "8:38:43", "remaining_time": "1:22:08"} +{"current_steps": 5906, "total_steps": 6840, "loss": 0.5742807984352112, "lr": 1.004443328640271e-06, "epoch": 1.7270068723497587, "percentage": 86.35, "elapsed_time": "8:38:46", "remaining_time": "1:22:02"} +{"current_steps": 5907, "total_steps": 6840, "loss": 0.5617523789405823, "lr": 1.0023325503302129e-06, "epoch": 1.727299312765024, "percentage": 86.36, "elapsed_time": "8:38:51", "remaining_time": "1:21:57"} +{"current_steps": 5908, "total_steps": 6840, "loss": 0.45596855878829956, "lr": 1.0002238751702143e-06, "epoch": 1.7275917531802896, "percentage": 86.37, "elapsed_time": "8:38:56", "remaining_time": "1:21:51"} +{"current_steps": 5909, "total_steps": 6840, "loss": 0.4917908012866974, "lr": 9.981173036531655e-07, "epoch": 1.727884193595555, "percentage": 86.39, "elapsed_time": "8:39:01", "remaining_time": "1:21:46"} +{"current_steps": 5910, "total_steps": 6840, "loss": 0.6204911470413208, "lr": 9.960128362714637e-07, "epoch": 1.7281766340108202, "percentage": 86.4, "elapsed_time": "8:39:06", "remaining_time": "1:21:41"} +{"current_steps": 5911, "total_steps": 6840, "loss": 0.47288352251052856, "lr": 9.93910473517018e-07, "epoch": 1.7284690744260858, "percentage": 86.42, "elapsed_time": "8:39:12", "remaining_time": "1:21:36"} +{"current_steps": 5912, "total_steps": 6840, "loss": 0.48668670654296875, "lr": 9.918102158812404e-07, "epoch": 1.728761514841351, "percentage": 86.43, "elapsed_time": "8:39:16", "remaining_time": "1:21:30"} +{"current_steps": 5913, "total_steps": 6840, "loss": 0.43311381340026855, "lr": 9.89712063855054e-07, "epoch": 1.7290539552566164, "percentage": 86.45, "elapsed_time": "8:39:21", "remaining_time": "1:21:25"} +{"current_steps": 5914, "total_steps": 6840, "loss": 0.5066087245941162, "lr": 9.876160179288886e-07, "epoch": 1.729346395671882, "percentage": 86.46, "elapsed_time": "8:39:25", "remaining_time": "1:21:19"} +{"current_steps": 5915, "total_steps": 6840, "loss": 0.6022528409957886, "lr": 9.855220785926856e-07, "epoch": 1.729638836087147, "percentage": 86.48, "elapsed_time": "8:39:31", "remaining_time": "1:21:14"} +{"current_steps": 5916, "total_steps": 6840, "loss": 0.5288707613945007, "lr": 9.834302463358858e-07, "epoch": 1.7299312765024126, "percentage": 86.49, "elapsed_time": "8:39:36", "remaining_time": "1:21:09"} +{"current_steps": 5917, "total_steps": 6840, "loss": 0.6150302290916443, "lr": 9.813405216474436e-07, "epoch": 1.7302237169176782, "percentage": 86.51, "elapsed_time": "8:39:41", "remaining_time": "1:21:03"} +{"current_steps": 5918, "total_steps": 6840, "loss": 0.5431156158447266, "lr": 9.792529050158218e-07, "epoch": 1.7305161573329433, "percentage": 86.52, "elapsed_time": "8:39:45", "remaining_time": "1:20:58"} +{"current_steps": 5919, "total_steps": 6840, "loss": 0.6844080686569214, "lr": 9.771673969289851e-07, "epoch": 1.7308085977482088, "percentage": 86.54, "elapsed_time": "8:39:51", "remaining_time": "1:20:53"} +{"current_steps": 5920, "total_steps": 6840, "loss": 0.4778372049331665, "lr": 9.750839978744098e-07, "epoch": 1.7311010381634742, "percentage": 86.55, "elapsed_time": "8:39:54", "remaining_time": "1:20:47"} +{"current_steps": 5921, "total_steps": 6840, "loss": 0.6913809776306152, "lr": 9.73002708339077e-07, "epoch": 1.7313934785787395, "percentage": 86.56, "elapsed_time": "8:39:58", "remaining_time": "1:20:42"} +{"current_steps": 5922, "total_steps": 6840, "loss": 0.6289864778518677, "lr": 9.709235288094765e-07, "epoch": 1.731685918994005, "percentage": 86.58, "elapsed_time": "8:40:04", "remaining_time": "1:20:37"} +{"current_steps": 5923, "total_steps": 6840, "loss": 0.4735794961452484, "lr": 9.68846459771604e-07, "epoch": 1.7319783594092704, "percentage": 86.59, "elapsed_time": "8:40:08", "remaining_time": "1:20:31"} +{"current_steps": 5924, "total_steps": 6840, "loss": 0.53554767370224, "lr": 9.667715017109614e-07, "epoch": 1.7322707998245357, "percentage": 86.61, "elapsed_time": "8:40:14", "remaining_time": "1:20:26"} +{"current_steps": 5925, "total_steps": 6840, "loss": 0.5118460655212402, "lr": 9.64698655112557e-07, "epoch": 1.7325632402398012, "percentage": 86.62, "elapsed_time": "8:40:21", "remaining_time": "1:20:21"} +{"current_steps": 5926, "total_steps": 6840, "loss": 0.5739814043045044, "lr": 9.626279204609079e-07, "epoch": 1.7328556806550666, "percentage": 86.64, "elapsed_time": "8:40:27", "remaining_time": "1:20:16"} +{"current_steps": 5927, "total_steps": 6840, "loss": 0.5716123580932617, "lr": 9.605592982400325e-07, "epoch": 1.733148121070332, "percentage": 86.65, "elapsed_time": "8:40:33", "remaining_time": "1:20:11"} +{"current_steps": 5928, "total_steps": 6840, "loss": 0.5091898441314697, "lr": 9.584927889334605e-07, "epoch": 1.7334405614855974, "percentage": 86.67, "elapsed_time": "8:40:37", "remaining_time": "1:20:05"} +{"current_steps": 5929, "total_steps": 6840, "loss": 0.46946650743484497, "lr": 9.564283930242258e-07, "epoch": 1.7337330019008625, "percentage": 86.68, "elapsed_time": "8:40:43", "remaining_time": "1:20:00"} +{"current_steps": 5930, "total_steps": 6840, "loss": 0.6238217353820801, "lr": 9.543661109948688e-07, "epoch": 1.734025442316128, "percentage": 86.7, "elapsed_time": "8:40:48", "remaining_time": "1:19:55"} +{"current_steps": 5931, "total_steps": 6840, "loss": 0.7464175224304199, "lr": 9.52305943327434e-07, "epoch": 1.7343178827313934, "percentage": 86.71, "elapsed_time": "8:40:54", "remaining_time": "1:19:50"} +{"current_steps": 5932, "total_steps": 6840, "loss": 0.6064578294754028, "lr": 9.502478905034751e-07, "epoch": 1.7346103231466588, "percentage": 86.73, "elapsed_time": "8:41:01", "remaining_time": "1:19:45"} +{"current_steps": 5933, "total_steps": 6840, "loss": 0.5703303813934326, "lr": 9.481919530040484e-07, "epoch": 1.7349027635619243, "percentage": 86.74, "elapsed_time": "8:41:07", "remaining_time": "1:19:39"} +{"current_steps": 5934, "total_steps": 6840, "loss": 0.5570278167724609, "lr": 9.461381313097162e-07, "epoch": 1.7351952039771896, "percentage": 86.75, "elapsed_time": "8:41:12", "remaining_time": "1:19:34"} +{"current_steps": 5935, "total_steps": 6840, "loss": 0.54972243309021, "lr": 9.440864259005477e-07, "epoch": 1.735487644392455, "percentage": 86.77, "elapsed_time": "8:41:17", "remaining_time": "1:19:29"} +{"current_steps": 5936, "total_steps": 6840, "loss": 0.5670010447502136, "lr": 9.420368372561161e-07, "epoch": 1.7357800848077205, "percentage": 86.78, "elapsed_time": "8:41:24", "remaining_time": "1:19:24"} +{"current_steps": 5937, "total_steps": 6840, "loss": 0.5306927561759949, "lr": 9.399893658555026e-07, "epoch": 1.7360725252229858, "percentage": 86.8, "elapsed_time": "8:41:30", "remaining_time": "1:19:19"} +{"current_steps": 5938, "total_steps": 6840, "loss": 0.5080308318138123, "lr": 9.379440121772876e-07, "epoch": 1.7363649656382512, "percentage": 86.81, "elapsed_time": "8:41:36", "remaining_time": "1:19:14"} +{"current_steps": 5939, "total_steps": 6840, "loss": 0.5444519519805908, "lr": 9.359007766995609e-07, "epoch": 1.7366574060535167, "percentage": 86.83, "elapsed_time": "8:41:42", "remaining_time": "1:19:08"} +{"current_steps": 5940, "total_steps": 6840, "loss": 0.5353262424468994, "lr": 9.338596598999172e-07, "epoch": 1.736949846468782, "percentage": 86.84, "elapsed_time": "8:41:48", "remaining_time": "1:19:03"} +{"current_steps": 5941, "total_steps": 6840, "loss": 0.4766794443130493, "lr": 9.318206622554549e-07, "epoch": 1.7372422868840474, "percentage": 86.86, "elapsed_time": "8:41:55", "remaining_time": "1:18:58"} +{"current_steps": 5942, "total_steps": 6840, "loss": 0.4913482666015625, "lr": 9.29783784242777e-07, "epoch": 1.737534727299313, "percentage": 86.87, "elapsed_time": "8:42:02", "remaining_time": "1:18:53"} +{"current_steps": 5943, "total_steps": 6840, "loss": 0.47637009620666504, "lr": 9.277490263379918e-07, "epoch": 1.737827167714578, "percentage": 86.89, "elapsed_time": "8:42:08", "remaining_time": "1:18:48"} +{"current_steps": 5944, "total_steps": 6840, "loss": 0.5122126936912537, "lr": 9.25716389016712e-07, "epoch": 1.7381196081298436, "percentage": 86.9, "elapsed_time": "8:42:14", "remaining_time": "1:18:43"} +{"current_steps": 5945, "total_steps": 6840, "loss": 0.5263532400131226, "lr": 9.236858727540543e-07, "epoch": 1.738412048545109, "percentage": 86.92, "elapsed_time": "8:42:20", "remaining_time": "1:18:38"} +{"current_steps": 5946, "total_steps": 6840, "loss": 0.5214182734489441, "lr": 9.216574780246379e-07, "epoch": 1.7387044889603742, "percentage": 86.93, "elapsed_time": "8:42:26", "remaining_time": "1:18:33"} +{"current_steps": 5947, "total_steps": 6840, "loss": 0.5955429077148438, "lr": 9.196312053025891e-07, "epoch": 1.7389969293756398, "percentage": 86.94, "elapsed_time": "8:42:32", "remaining_time": "1:18:27"} +{"current_steps": 5948, "total_steps": 6840, "loss": 0.4809807538986206, "lr": 9.176070550615379e-07, "epoch": 1.739289369790905, "percentage": 86.96, "elapsed_time": "8:42:37", "remaining_time": "1:18:22"} +{"current_steps": 5949, "total_steps": 6840, "loss": 0.4769969582557678, "lr": 9.155850277746148e-07, "epoch": 1.7395818102061704, "percentage": 86.97, "elapsed_time": "8:42:42", "remaining_time": "1:18:17"} +{"current_steps": 5950, "total_steps": 6840, "loss": 0.48527538776397705, "lr": 9.135651239144561e-07, "epoch": 1.739874250621436, "percentage": 86.99, "elapsed_time": "8:42:48", "remaining_time": "1:18:12"} +{"current_steps": 5951, "total_steps": 6840, "loss": 0.6703393459320068, "lr": 9.115473439532041e-07, "epoch": 1.7401666910367013, "percentage": 87.0, "elapsed_time": "8:42:53", "remaining_time": "1:18:06"} +{"current_steps": 5952, "total_steps": 6840, "loss": 0.5742951035499573, "lr": 9.095316883625016e-07, "epoch": 1.7404591314519666, "percentage": 87.02, "elapsed_time": "8:42:59", "remaining_time": "1:18:01"} +{"current_steps": 5953, "total_steps": 6840, "loss": 0.6285614967346191, "lr": 9.075181576134961e-07, "epoch": 1.7407515718672322, "percentage": 87.03, "elapsed_time": "8:43:04", "remaining_time": "1:17:56"} +{"current_steps": 5954, "total_steps": 6840, "loss": 0.5872488021850586, "lr": 9.055067521768379e-07, "epoch": 1.7410440122824973, "percentage": 87.05, "elapsed_time": "8:43:10", "remaining_time": "1:17:51"} +{"current_steps": 5955, "total_steps": 6840, "loss": 0.5483776330947876, "lr": 9.034974725226808e-07, "epoch": 1.7413364526977628, "percentage": 87.06, "elapsed_time": "8:43:15", "remaining_time": "1:17:45"} +{"current_steps": 5956, "total_steps": 6840, "loss": 0.4913061261177063, "lr": 9.014903191206825e-07, "epoch": 1.7416288931130284, "percentage": 87.08, "elapsed_time": "8:43:20", "remaining_time": "1:17:40"} +{"current_steps": 5957, "total_steps": 6840, "loss": 0.5431212186813354, "lr": 8.994852924400022e-07, "epoch": 1.7419213335282935, "percentage": 87.09, "elapsed_time": "8:43:25", "remaining_time": "1:17:35"} +{"current_steps": 5958, "total_steps": 6840, "loss": 0.5391141176223755, "lr": 8.974823929493015e-07, "epoch": 1.742213773943559, "percentage": 87.11, "elapsed_time": "8:43:30", "remaining_time": "1:17:29"} +{"current_steps": 5959, "total_steps": 6840, "loss": 0.48980265855789185, "lr": 8.954816211167483e-07, "epoch": 1.7425062143588244, "percentage": 87.12, "elapsed_time": "8:43:36", "remaining_time": "1:17:24"} +{"current_steps": 5960, "total_steps": 6840, "loss": 0.6747336387634277, "lr": 8.934829774100118e-07, "epoch": 1.7427986547740897, "percentage": 87.13, "elapsed_time": "8:43:41", "remaining_time": "1:17:19"} +{"current_steps": 5961, "total_steps": 6840, "loss": 0.4911282956600189, "lr": 8.914864622962582e-07, "epoch": 1.7430910951893552, "percentage": 87.15, "elapsed_time": "8:43:45", "remaining_time": "1:17:13"} +{"current_steps": 5962, "total_steps": 6840, "loss": 0.5863965153694153, "lr": 8.894920762421644e-07, "epoch": 1.7433835356046206, "percentage": 87.16, "elapsed_time": "8:43:51", "remaining_time": "1:17:08"} +{"current_steps": 5963, "total_steps": 6840, "loss": 0.5413792729377747, "lr": 8.87499819713904e-07, "epoch": 1.7436759760198859, "percentage": 87.18, "elapsed_time": "8:43:55", "remaining_time": "1:17:03"} +{"current_steps": 5964, "total_steps": 6840, "loss": 0.5288723707199097, "lr": 8.855096931771568e-07, "epoch": 1.7439684164351514, "percentage": 87.19, "elapsed_time": "8:43:59", "remaining_time": "1:16:57"} +{"current_steps": 5965, "total_steps": 6840, "loss": 0.5129783749580383, "lr": 8.835216970971006e-07, "epoch": 1.7442608568504168, "percentage": 87.21, "elapsed_time": "8:44:05", "remaining_time": "1:16:52"} +{"current_steps": 5966, "total_steps": 6840, "loss": 0.5606918334960938, "lr": 8.815358319384193e-07, "epoch": 1.744553297265682, "percentage": 87.22, "elapsed_time": "8:44:10", "remaining_time": "1:16:47"} +{"current_steps": 5967, "total_steps": 6840, "loss": 0.6277288198471069, "lr": 8.79552098165296e-07, "epoch": 1.7448457376809476, "percentage": 87.24, "elapsed_time": "8:44:15", "remaining_time": "1:16:42"} +{"current_steps": 5968, "total_steps": 6840, "loss": 0.5390176773071289, "lr": 8.775704962414167e-07, "epoch": 1.7451381780962127, "percentage": 87.25, "elapsed_time": "8:44:20", "remaining_time": "1:16:36"} +{"current_steps": 5969, "total_steps": 6840, "loss": 0.680462121963501, "lr": 8.755910266299684e-07, "epoch": 1.7454306185114783, "percentage": 87.27, "elapsed_time": "8:44:25", "remaining_time": "1:16:31"} +{"current_steps": 5970, "total_steps": 6840, "loss": 0.5134397149085999, "lr": 8.736136897936398e-07, "epoch": 1.7457230589267436, "percentage": 87.28, "elapsed_time": "8:44:30", "remaining_time": "1:16:26"} +{"current_steps": 5971, "total_steps": 6840, "loss": 0.6280460357666016, "lr": 8.716384861946248e-07, "epoch": 1.746015499342009, "percentage": 87.3, "elapsed_time": "8:44:36", "remaining_time": "1:16:20"} +{"current_steps": 5972, "total_steps": 6840, "loss": 0.5425370931625366, "lr": 8.696654162946094e-07, "epoch": 1.7463079397572745, "percentage": 87.31, "elapsed_time": "8:44:41", "remaining_time": "1:16:15"} +{"current_steps": 5973, "total_steps": 6840, "loss": 0.5831055045127869, "lr": 8.676944805547882e-07, "epoch": 1.7466003801725398, "percentage": 87.32, "elapsed_time": "8:44:47", "remaining_time": "1:16:10"} +{"current_steps": 5974, "total_steps": 6840, "loss": 0.4615570306777954, "lr": 8.657256794358592e-07, "epoch": 1.7468928205878052, "percentage": 87.34, "elapsed_time": "8:44:53", "remaining_time": "1:16:05"} +{"current_steps": 5975, "total_steps": 6840, "loss": 0.5727440118789673, "lr": 8.637590133980145e-07, "epoch": 1.7471852610030707, "percentage": 87.35, "elapsed_time": "8:44:58", "remaining_time": "1:16:00"} +{"current_steps": 5976, "total_steps": 6840, "loss": 0.5652801990509033, "lr": 8.617944829009517e-07, "epoch": 1.747477701418336, "percentage": 87.37, "elapsed_time": "8:45:04", "remaining_time": "1:15:54"} +{"current_steps": 5977, "total_steps": 6840, "loss": 0.42455562949180603, "lr": 8.59832088403868e-07, "epoch": 1.7477701418336014, "percentage": 87.38, "elapsed_time": "8:45:08", "remaining_time": "1:15:49"} +{"current_steps": 5978, "total_steps": 6840, "loss": 0.526951789855957, "lr": 8.578718303654588e-07, "epoch": 1.748062582248867, "percentage": 87.4, "elapsed_time": "8:45:14", "remaining_time": "1:15:44"} +{"current_steps": 5979, "total_steps": 6840, "loss": 0.45547354221343994, "lr": 8.559137092439252e-07, "epoch": 1.7483550226641322, "percentage": 87.41, "elapsed_time": "8:45:19", "remaining_time": "1:15:38"} +{"current_steps": 5980, "total_steps": 6840, "loss": 0.5470790863037109, "lr": 8.539577254969667e-07, "epoch": 1.7486474630793976, "percentage": 87.43, "elapsed_time": "8:45:24", "remaining_time": "1:15:33"} +{"current_steps": 5981, "total_steps": 6840, "loss": 0.5608032941818237, "lr": 8.520038795817798e-07, "epoch": 1.748939903494663, "percentage": 87.44, "elapsed_time": "8:45:29", "remaining_time": "1:15:28"} +{"current_steps": 5982, "total_steps": 6840, "loss": 0.5243809223175049, "lr": 8.500521719550648e-07, "epoch": 1.7492323439099282, "percentage": 87.46, "elapsed_time": "8:45:34", "remaining_time": "1:15:23"} +{"current_steps": 5983, "total_steps": 6840, "loss": 0.5040958523750305, "lr": 8.481026030730222e-07, "epoch": 1.7495247843251938, "percentage": 87.47, "elapsed_time": "8:45:41", "remaining_time": "1:15:18"} +{"current_steps": 5984, "total_steps": 6840, "loss": 0.5026291012763977, "lr": 8.461551733913509e-07, "epoch": 1.749817224740459, "percentage": 87.49, "elapsed_time": "8:45:46", "remaining_time": "1:15:12"} +{"current_steps": 5985, "total_steps": 6840, "loss": 0.5273059010505676, "lr": 8.442098833652523e-07, "epoch": 1.7501096651557244, "percentage": 87.5, "elapsed_time": "8:45:52", "remaining_time": "1:15:07"} +{"current_steps": 5986, "total_steps": 6840, "loss": 0.5811910629272461, "lr": 8.42266733449425e-07, "epoch": 1.75040210557099, "percentage": 87.51, "elapsed_time": "8:45:56", "remaining_time": "1:15:02"} +{"current_steps": 5987, "total_steps": 6840, "loss": 0.6078274250030518, "lr": 8.4032572409807e-07, "epoch": 1.7506945459862553, "percentage": 87.53, "elapsed_time": "8:46:02", "remaining_time": "1:14:56"} +{"current_steps": 5988, "total_steps": 6840, "loss": 0.5214031338691711, "lr": 8.383868557648833e-07, "epoch": 1.7509869864015206, "percentage": 87.54, "elapsed_time": "8:46:06", "remaining_time": "1:14:51"} +{"current_steps": 5989, "total_steps": 6840, "loss": 0.5464918613433838, "lr": 8.364501289030669e-07, "epoch": 1.7512794268167862, "percentage": 87.56, "elapsed_time": "8:46:11", "remaining_time": "1:14:46"} +{"current_steps": 5990, "total_steps": 6840, "loss": 0.48296278715133667, "lr": 8.345155439653175e-07, "epoch": 1.7515718672320515, "percentage": 87.57, "elapsed_time": "8:46:17", "remaining_time": "1:14:40"} +{"current_steps": 5991, "total_steps": 6840, "loss": 0.5441919565200806, "lr": 8.325831014038344e-07, "epoch": 1.7518643076473168, "percentage": 87.59, "elapsed_time": "8:46:22", "remaining_time": "1:14:35"} +{"current_steps": 5992, "total_steps": 6840, "loss": 0.4623905420303345, "lr": 8.306528016703097e-07, "epoch": 1.7521567480625824, "percentage": 87.6, "elapsed_time": "8:46:28", "remaining_time": "1:14:30"} +{"current_steps": 5993, "total_steps": 6840, "loss": 0.5671495795249939, "lr": 8.287246452159437e-07, "epoch": 1.7524491884778475, "percentage": 87.62, "elapsed_time": "8:46:34", "remaining_time": "1:14:25"} +{"current_steps": 5994, "total_steps": 6840, "loss": 0.5400685667991638, "lr": 8.267986324914278e-07, "epoch": 1.752741628893113, "percentage": 87.63, "elapsed_time": "8:46:39", "remaining_time": "1:14:19"} +{"current_steps": 5995, "total_steps": 6840, "loss": 0.4362148642539978, "lr": 8.24874763946959e-07, "epoch": 1.7530340693083786, "percentage": 87.65, "elapsed_time": "8:46:45", "remaining_time": "1:14:14"} +{"current_steps": 5996, "total_steps": 6840, "loss": 0.554877519607544, "lr": 8.229530400322283e-07, "epoch": 1.7533265097236437, "percentage": 87.66, "elapsed_time": "8:46:50", "remaining_time": "1:14:09"} +{"current_steps": 5997, "total_steps": 6840, "loss": 0.5239896774291992, "lr": 8.210334611964266e-07, "epoch": 1.7536189501389092, "percentage": 87.68, "elapsed_time": "8:46:57", "remaining_time": "1:14:04"} +{"current_steps": 5998, "total_steps": 6840, "loss": 0.4729669988155365, "lr": 8.191160278882438e-07, "epoch": 1.7539113905541746, "percentage": 87.69, "elapsed_time": "8:47:02", "remaining_time": "1:13:59"} +{"current_steps": 5999, "total_steps": 6840, "loss": 0.5449322462081909, "lr": 8.172007405558702e-07, "epoch": 1.7542038309694399, "percentage": 87.7, "elapsed_time": "8:47:08", "remaining_time": "1:13:54"} +{"current_steps": 6000, "total_steps": 6840, "loss": 0.5422194600105286, "lr": 8.1528759964699e-07, "epoch": 1.7544962713847054, "percentage": 87.72, "elapsed_time": "8:47:14", "remaining_time": "1:13:48"} +{"current_steps": 6001, "total_steps": 6840, "loss": 0.3854302763938904, "lr": 8.1337660560879e-07, "epoch": 1.7547887117999708, "percentage": 87.73, "elapsed_time": "8:47:23", "remaining_time": "1:13:44"} +{"current_steps": 6002, "total_steps": 6840, "loss": 0.4678449034690857, "lr": 8.114677588879549e-07, "epoch": 1.755081152215236, "percentage": 87.75, "elapsed_time": "8:47:29", "remaining_time": "1:13:38"} +{"current_steps": 6003, "total_steps": 6840, "loss": 0.5462471842765808, "lr": 8.095610599306614e-07, "epoch": 1.7553735926305016, "percentage": 87.76, "elapsed_time": "8:47:34", "remaining_time": "1:13:33"} +{"current_steps": 6004, "total_steps": 6840, "loss": 0.6314511299133301, "lr": 8.076565091825916e-07, "epoch": 1.755666033045767, "percentage": 87.78, "elapsed_time": "8:47:38", "remaining_time": "1:13:28"} +{"current_steps": 6005, "total_steps": 6840, "loss": 0.6373077630996704, "lr": 8.057541070889229e-07, "epoch": 1.7559584734610323, "percentage": 87.79, "elapsed_time": "8:47:44", "remaining_time": "1:13:23"} +{"current_steps": 6006, "total_steps": 6840, "loss": 0.5279273986816406, "lr": 8.038538540943297e-07, "epoch": 1.7562509138762978, "percentage": 87.81, "elapsed_time": "8:47:50", "remaining_time": "1:13:17"} +{"current_steps": 6007, "total_steps": 6840, "loss": 0.5645443201065063, "lr": 8.019557506429843e-07, "epoch": 1.756543354291563, "percentage": 87.82, "elapsed_time": "8:47:55", "remaining_time": "1:13:12"} +{"current_steps": 6008, "total_steps": 6840, "loss": 0.552385151386261, "lr": 8.000597971785573e-07, "epoch": 1.7568357947068285, "percentage": 87.84, "elapsed_time": "8:48:01", "remaining_time": "1:13:07"} +{"current_steps": 6009, "total_steps": 6840, "loss": 0.5790541172027588, "lr": 7.981659941442154e-07, "epoch": 1.757128235122094, "percentage": 87.85, "elapsed_time": "8:48:05", "remaining_time": "1:13:01"} +{"current_steps": 6010, "total_steps": 6840, "loss": 0.550809383392334, "lr": 7.962743419826247e-07, "epoch": 1.7574206755373591, "percentage": 87.87, "elapsed_time": "8:48:09", "remaining_time": "1:12:56"} +{"current_steps": 6011, "total_steps": 6840, "loss": 0.4659814238548279, "lr": 7.943848411359479e-07, "epoch": 1.7577131159526247, "percentage": 87.88, "elapsed_time": "8:48:15", "remaining_time": "1:12:51"} +{"current_steps": 6012, "total_steps": 6840, "loss": 0.5099040269851685, "lr": 7.924974920458428e-07, "epoch": 1.75800555636789, "percentage": 87.89, "elapsed_time": "8:48:20", "remaining_time": "1:12:45"} +{"current_steps": 6013, "total_steps": 6840, "loss": 0.4819038510322571, "lr": 7.906122951534678e-07, "epoch": 1.7582979967831553, "percentage": 87.91, "elapsed_time": "8:48:26", "remaining_time": "1:12:40"} +{"current_steps": 6014, "total_steps": 6840, "loss": 0.4033840298652649, "lr": 7.887292508994737e-07, "epoch": 1.758590437198421, "percentage": 87.92, "elapsed_time": "8:48:32", "remaining_time": "1:12:35"} +{"current_steps": 6015, "total_steps": 6840, "loss": 0.6316418647766113, "lr": 7.868483597240117e-07, "epoch": 1.7588828776136862, "percentage": 87.94, "elapsed_time": "8:48:35", "remaining_time": "1:12:30"} +{"current_steps": 6016, "total_steps": 6840, "loss": 0.5141040682792664, "lr": 7.84969622066728e-07, "epoch": 1.7591753180289516, "percentage": 87.95, "elapsed_time": "8:48:41", "remaining_time": "1:12:24"} +{"current_steps": 6017, "total_steps": 6840, "loss": 0.44808077812194824, "lr": 7.830930383667668e-07, "epoch": 1.759467758444217, "percentage": 87.97, "elapsed_time": "8:48:46", "remaining_time": "1:12:19"} +{"current_steps": 6018, "total_steps": 6840, "loss": 0.5661089420318604, "lr": 7.812186090627694e-07, "epoch": 1.7597601988594824, "percentage": 87.98, "elapsed_time": "8:48:51", "remaining_time": "1:12:14"} +{"current_steps": 6019, "total_steps": 6840, "loss": 0.487590491771698, "lr": 7.793463345928697e-07, "epoch": 1.7600526392747478, "percentage": 88.0, "elapsed_time": "8:48:57", "remaining_time": "1:12:09"} +{"current_steps": 6020, "total_steps": 6840, "loss": 0.5775022506713867, "lr": 7.774762153947024e-07, "epoch": 1.7603450796900133, "percentage": 88.01, "elapsed_time": "8:49:01", "remaining_time": "1:12:03"} +{"current_steps": 6021, "total_steps": 6840, "loss": 0.5714563131332397, "lr": 7.756082519053965e-07, "epoch": 1.7606375201052784, "percentage": 88.03, "elapsed_time": "8:49:07", "remaining_time": "1:11:58"} +{"current_steps": 6022, "total_steps": 6840, "loss": 0.6063593626022339, "lr": 7.73742444561576e-07, "epoch": 1.760929960520544, "percentage": 88.04, "elapsed_time": "8:49:11", "remaining_time": "1:11:53"} +{"current_steps": 6023, "total_steps": 6840, "loss": 0.48034632205963135, "lr": 7.718787937993622e-07, "epoch": 1.7612224009358093, "percentage": 88.06, "elapsed_time": "8:49:17", "remaining_time": "1:11:47"} +{"current_steps": 6024, "total_steps": 6840, "loss": 0.6003588438034058, "lr": 7.700173000543742e-07, "epoch": 1.7615148413510746, "percentage": 88.07, "elapsed_time": "8:49:22", "remaining_time": "1:11:42"} +{"current_steps": 6025, "total_steps": 6840, "loss": 0.42842140793800354, "lr": 7.681579637617209e-07, "epoch": 1.7618072817663402, "percentage": 88.08, "elapsed_time": "8:49:28", "remaining_time": "1:11:37"} +{"current_steps": 6026, "total_steps": 6840, "loss": 0.5235859155654907, "lr": 7.663007853560145e-07, "epoch": 1.7620997221816055, "percentage": 88.1, "elapsed_time": "8:49:33", "remaining_time": "1:11:32"} +{"current_steps": 6027, "total_steps": 6840, "loss": 0.47140365839004517, "lr": 7.644457652713566e-07, "epoch": 1.7623921625968708, "percentage": 88.11, "elapsed_time": "8:49:38", "remaining_time": "1:11:26"} +{"current_steps": 6028, "total_steps": 6840, "loss": 0.53680419921875, "lr": 7.625929039413483e-07, "epoch": 1.7626846030121364, "percentage": 88.13, "elapsed_time": "8:49:44", "remaining_time": "1:11:21"} +{"current_steps": 6029, "total_steps": 6840, "loss": 0.5280998349189758, "lr": 7.60742201799084e-07, "epoch": 1.7629770434274017, "percentage": 88.14, "elapsed_time": "8:49:50", "remaining_time": "1:11:16"} +{"current_steps": 6030, "total_steps": 6840, "loss": 0.49653276801109314, "lr": 7.588936592771545e-07, "epoch": 1.763269483842667, "percentage": 88.16, "elapsed_time": "8:49:56", "remaining_time": "1:11:11"} +{"current_steps": 6031, "total_steps": 6840, "loss": 0.511070966720581, "lr": 7.570472768076464e-07, "epoch": 1.7635619242579326, "percentage": 88.17, "elapsed_time": "8:50:01", "remaining_time": "1:11:05"} +{"current_steps": 6032, "total_steps": 6840, "loss": 0.6601030826568604, "lr": 7.552030548221379e-07, "epoch": 1.7638543646731977, "percentage": 88.19, "elapsed_time": "8:50:06", "remaining_time": "1:11:00"} +{"current_steps": 6033, "total_steps": 6840, "loss": 0.6216480731964111, "lr": 7.533609937517072e-07, "epoch": 1.7641468050884632, "percentage": 88.2, "elapsed_time": "8:50:10", "remaining_time": "1:10:55"} +{"current_steps": 6034, "total_steps": 6840, "loss": 0.7237618565559387, "lr": 7.515210940269224e-07, "epoch": 1.7644392455037288, "percentage": 88.22, "elapsed_time": "8:50:15", "remaining_time": "1:10:49"} +{"current_steps": 6035, "total_steps": 6840, "loss": 0.4979498088359833, "lr": 7.496833560778527e-07, "epoch": 1.7647316859189939, "percentage": 88.23, "elapsed_time": "8:50:19", "remaining_time": "1:10:44"} +{"current_steps": 6036, "total_steps": 6840, "loss": 0.49408191442489624, "lr": 7.478477803340533e-07, "epoch": 1.7650241263342594, "percentage": 88.25, "elapsed_time": "8:50:23", "remaining_time": "1:10:38"} +{"current_steps": 6037, "total_steps": 6840, "loss": 0.524259626865387, "lr": 7.460143672245823e-07, "epoch": 1.7653165667495248, "percentage": 88.26, "elapsed_time": "8:50:28", "remaining_time": "1:10:33"} +{"current_steps": 6038, "total_steps": 6840, "loss": 0.625320315361023, "lr": 7.441831171779878e-07, "epoch": 1.76560900716479, "percentage": 88.27, "elapsed_time": "8:50:33", "remaining_time": "1:10:28"} +{"current_steps": 6039, "total_steps": 6840, "loss": 0.5971028804779053, "lr": 7.42354030622312e-07, "epoch": 1.7659014475800556, "percentage": 88.29, "elapsed_time": "8:50:38", "remaining_time": "1:10:22"} +{"current_steps": 6040, "total_steps": 6840, "loss": 0.48935002088546753, "lr": 7.405271079850951e-07, "epoch": 1.766193887995321, "percentage": 88.3, "elapsed_time": "8:50:44", "remaining_time": "1:10:17"} +{"current_steps": 6041, "total_steps": 6840, "loss": 0.46346336603164673, "lr": 7.387023496933687e-07, "epoch": 1.7664863284105863, "percentage": 88.32, "elapsed_time": "8:50:50", "remaining_time": "1:10:12"} +{"current_steps": 6042, "total_steps": 6840, "loss": 0.5135314464569092, "lr": 7.368797561736574e-07, "epoch": 1.7667787688258518, "percentage": 88.33, "elapsed_time": "8:50:56", "remaining_time": "1:10:07"} +{"current_steps": 6043, "total_steps": 6840, "loss": 0.45815128087997437, "lr": 7.350593278519824e-07, "epoch": 1.7670712092411172, "percentage": 88.35, "elapsed_time": "8:51:02", "remaining_time": "1:10:02"} +{"current_steps": 6044, "total_steps": 6840, "loss": 0.5663015246391296, "lr": 7.332410651538591e-07, "epoch": 1.7673636496563825, "percentage": 88.36, "elapsed_time": "8:51:06", "remaining_time": "1:09:56"} +{"current_steps": 6045, "total_steps": 6840, "loss": 0.5323490500450134, "lr": 7.314249685042929e-07, "epoch": 1.767656090071648, "percentage": 88.38, "elapsed_time": "8:51:10", "remaining_time": "1:09:51"} +{"current_steps": 6046, "total_steps": 6840, "loss": 0.5489768981933594, "lr": 7.296110383277866e-07, "epoch": 1.7679485304869131, "percentage": 88.39, "elapsed_time": "8:51:15", "remaining_time": "1:09:46"} +{"current_steps": 6047, "total_steps": 6840, "loss": 0.5951086282730103, "lr": 7.277992750483364e-07, "epoch": 1.7682409709021787, "percentage": 88.41, "elapsed_time": "8:51:20", "remaining_time": "1:09:40"} +{"current_steps": 6048, "total_steps": 6840, "loss": 0.48228102922439575, "lr": 7.259896790894271e-07, "epoch": 1.7685334113174442, "percentage": 88.42, "elapsed_time": "8:51:26", "remaining_time": "1:09:35"} +{"current_steps": 6049, "total_steps": 6840, "loss": 0.6318891644477844, "lr": 7.241822508740448e-07, "epoch": 1.7688258517327093, "percentage": 88.44, "elapsed_time": "8:51:31", "remaining_time": "1:09:30"} +{"current_steps": 6050, "total_steps": 6840, "loss": 0.4966656267642975, "lr": 7.223769908246636e-07, "epoch": 1.769118292147975, "percentage": 88.45, "elapsed_time": "8:51:35", "remaining_time": "1:09:24"} +{"current_steps": 6051, "total_steps": 6840, "loss": 0.5645290613174438, "lr": 7.205738993632516e-07, "epoch": 1.7694107325632402, "percentage": 88.46, "elapsed_time": "8:51:40", "remaining_time": "1:09:19"} +{"current_steps": 6052, "total_steps": 6840, "loss": 0.560075044631958, "lr": 7.187729769112717e-07, "epoch": 1.7697031729785055, "percentage": 88.48, "elapsed_time": "8:51:45", "remaining_time": "1:09:14"} +{"current_steps": 6053, "total_steps": 6840, "loss": 0.6375163793563843, "lr": 7.169742238896771e-07, "epoch": 1.769995613393771, "percentage": 88.49, "elapsed_time": "8:51:49", "remaining_time": "1:09:08"} +{"current_steps": 6054, "total_steps": 6840, "loss": 0.5620392560958862, "lr": 7.15177640718916e-07, "epoch": 1.7702880538090364, "percentage": 88.51, "elapsed_time": "8:51:56", "remaining_time": "1:09:03"} +{"current_steps": 6055, "total_steps": 6840, "loss": 0.5382653474807739, "lr": 7.133832278189301e-07, "epoch": 1.7705804942243017, "percentage": 88.52, "elapsed_time": "8:52:00", "remaining_time": "1:08:58"} +{"current_steps": 6056, "total_steps": 6840, "loss": 0.502597451210022, "lr": 7.115909856091497e-07, "epoch": 1.7708729346395673, "percentage": 88.54, "elapsed_time": "8:52:06", "remaining_time": "1:08:53"} +{"current_steps": 6057, "total_steps": 6840, "loss": 0.5876599550247192, "lr": 7.098009145085016e-07, "epoch": 1.7711653750548326, "percentage": 88.55, "elapsed_time": "8:52:11", "remaining_time": "1:08:47"} +{"current_steps": 6058, "total_steps": 6840, "loss": 0.5164280533790588, "lr": 7.080130149354048e-07, "epoch": 1.771457815470098, "percentage": 88.57, "elapsed_time": "8:52:17", "remaining_time": "1:08:42"} +{"current_steps": 6059, "total_steps": 6840, "loss": 0.5192137360572815, "lr": 7.062272873077691e-07, "epoch": 1.7717502558853635, "percentage": 88.58, "elapsed_time": "8:52:22", "remaining_time": "1:08:37"} +{"current_steps": 6060, "total_steps": 6840, "loss": 0.5298370122909546, "lr": 7.044437320429987e-07, "epoch": 1.7720426963006286, "percentage": 88.6, "elapsed_time": "8:52:26", "remaining_time": "1:08:31"} +{"current_steps": 6061, "total_steps": 6840, "loss": 0.5099462270736694, "lr": 7.026623495579876e-07, "epoch": 1.7723351367158942, "percentage": 88.61, "elapsed_time": "8:52:32", "remaining_time": "1:08:26"} +{"current_steps": 6062, "total_steps": 6840, "loss": 0.6061269640922546, "lr": 7.00883140269123e-07, "epoch": 1.7726275771311595, "percentage": 88.63, "elapsed_time": "8:52:36", "remaining_time": "1:08:21"} +{"current_steps": 6063, "total_steps": 6840, "loss": 0.683641254901886, "lr": 6.991061045922854e-07, "epoch": 1.7729200175464248, "percentage": 88.64, "elapsed_time": "8:52:42", "remaining_time": "1:08:16"} +{"current_steps": 6064, "total_steps": 6840, "loss": 0.6294830441474915, "lr": 6.973312429428458e-07, "epoch": 1.7732124579616904, "percentage": 88.65, "elapsed_time": "8:52:45", "remaining_time": "1:08:10"} +{"current_steps": 6065, "total_steps": 6840, "loss": 0.40493613481521606, "lr": 6.95558555735667e-07, "epoch": 1.7735048983769557, "percentage": 88.67, "elapsed_time": "8:52:51", "remaining_time": "1:08:05"} +{"current_steps": 6066, "total_steps": 6840, "loss": 0.501255452632904, "lr": 6.93788043385103e-07, "epoch": 1.773797338792221, "percentage": 88.68, "elapsed_time": "8:52:57", "remaining_time": "1:08:00"} +{"current_steps": 6067, "total_steps": 6840, "loss": 0.6004104614257812, "lr": 6.920197063050038e-07, "epoch": 1.7740897792074866, "percentage": 88.7, "elapsed_time": "8:53:03", "remaining_time": "1:07:55"} +{"current_steps": 6068, "total_steps": 6840, "loss": 0.48683321475982666, "lr": 6.902535449087023e-07, "epoch": 1.7743822196227519, "percentage": 88.71, "elapsed_time": "8:53:09", "remaining_time": "1:07:49"} +{"current_steps": 6069, "total_steps": 6840, "loss": 0.6048111319541931, "lr": 6.884895596090302e-07, "epoch": 1.7746746600380172, "percentage": 88.73, "elapsed_time": "8:53:14", "remaining_time": "1:07:44"} +{"current_steps": 6070, "total_steps": 6840, "loss": 0.5532732009887695, "lr": 6.867277508183101e-07, "epoch": 1.7749671004532828, "percentage": 88.74, "elapsed_time": "8:53:19", "remaining_time": "1:07:39"} +{"current_steps": 6071, "total_steps": 6840, "loss": 0.544552206993103, "lr": 6.849681189483515e-07, "epoch": 1.7752595408685479, "percentage": 88.76, "elapsed_time": "8:53:23", "remaining_time": "1:07:33"} +{"current_steps": 6072, "total_steps": 6840, "loss": 0.5114158391952515, "lr": 6.832106644104586e-07, "epoch": 1.7755519812838134, "percentage": 88.77, "elapsed_time": "8:53:28", "remaining_time": "1:07:28"} +{"current_steps": 6073, "total_steps": 6840, "loss": 0.45777493715286255, "lr": 6.814553876154273e-07, "epoch": 1.775844421699079, "percentage": 88.79, "elapsed_time": "8:53:32", "remaining_time": "1:07:23"} +{"current_steps": 6074, "total_steps": 6840, "loss": 0.5449005365371704, "lr": 6.797022889735405e-07, "epoch": 1.776136862114344, "percentage": 88.8, "elapsed_time": "8:53:37", "remaining_time": "1:07:17"} +{"current_steps": 6075, "total_steps": 6840, "loss": 0.6308485865592957, "lr": 6.779513688945749e-07, "epoch": 1.7764293025296096, "percentage": 88.82, "elapsed_time": "8:53:42", "remaining_time": "1:07:12"} +{"current_steps": 6076, "total_steps": 6840, "loss": 0.5904842019081116, "lr": 6.762026277877986e-07, "epoch": 1.776721742944875, "percentage": 88.83, "elapsed_time": "8:53:47", "remaining_time": "1:07:07"} +{"current_steps": 6077, "total_steps": 6840, "loss": 0.6681115627288818, "lr": 6.744560660619681e-07, "epoch": 1.7770141833601403, "percentage": 88.85, "elapsed_time": "8:53:52", "remaining_time": "1:07:01"} +{"current_steps": 6078, "total_steps": 6840, "loss": 0.5084429979324341, "lr": 6.727116841253334e-07, "epoch": 1.7773066237754058, "percentage": 88.86, "elapsed_time": "8:53:57", "remaining_time": "1:06:56"} +{"current_steps": 6079, "total_steps": 6840, "loss": 0.5705291032791138, "lr": 6.709694823856305e-07, "epoch": 1.7775990641906712, "percentage": 88.87, "elapsed_time": "8:54:03", "remaining_time": "1:06:51"} +{"current_steps": 6080, "total_steps": 6840, "loss": 0.6481744050979614, "lr": 6.692294612500894e-07, "epoch": 1.7778915046059365, "percentage": 88.89, "elapsed_time": "8:54:08", "remaining_time": "1:06:46"} +{"current_steps": 6081, "total_steps": 6840, "loss": 0.5236573815345764, "lr": 6.67491621125429e-07, "epoch": 1.778183945021202, "percentage": 88.9, "elapsed_time": "8:54:15", "remaining_time": "1:06:40"} +{"current_steps": 6082, "total_steps": 6840, "loss": 0.5169326663017273, "lr": 6.657559624178611e-07, "epoch": 1.7784763854364674, "percentage": 88.92, "elapsed_time": "8:54:19", "remaining_time": "1:06:35"} +{"current_steps": 6083, "total_steps": 6840, "loss": 0.5304254293441772, "lr": 6.640224855330824e-07, "epoch": 1.7787688258517327, "percentage": 88.93, "elapsed_time": "8:54:24", "remaining_time": "1:06:30"} +{"current_steps": 6084, "total_steps": 6840, "loss": 0.457882285118103, "lr": 6.622911908762852e-07, "epoch": 1.7790612662669982, "percentage": 88.95, "elapsed_time": "8:54:29", "remaining_time": "1:06:24"} +{"current_steps": 6085, "total_steps": 6840, "loss": 0.48427796363830566, "lr": 6.605620788521472e-07, "epoch": 1.7793537066822633, "percentage": 88.96, "elapsed_time": "8:54:35", "remaining_time": "1:06:19"} +{"current_steps": 6086, "total_steps": 6840, "loss": 0.598512589931488, "lr": 6.588351498648382e-07, "epoch": 1.7796461470975289, "percentage": 88.98, "elapsed_time": "8:54:41", "remaining_time": "1:06:14"} +{"current_steps": 6087, "total_steps": 6840, "loss": 0.5065094232559204, "lr": 6.571104043180188e-07, "epoch": 1.7799385875127944, "percentage": 88.99, "elapsed_time": "8:54:47", "remaining_time": "1:06:09"} +{"current_steps": 6088, "total_steps": 6840, "loss": 0.5493142008781433, "lr": 6.553878426148364e-07, "epoch": 1.7802310279280595, "percentage": 89.01, "elapsed_time": "8:54:51", "remaining_time": "1:06:03"} +{"current_steps": 6089, "total_steps": 6840, "loss": 0.40520578622817993, "lr": 6.5366746515793e-07, "epoch": 1.780523468343325, "percentage": 89.02, "elapsed_time": "8:54:55", "remaining_time": "1:05:58"} +{"current_steps": 6090, "total_steps": 6840, "loss": 0.5416547656059265, "lr": 6.51949272349427e-07, "epoch": 1.7808159087585904, "percentage": 89.04, "elapsed_time": "8:55:01", "remaining_time": "1:05:53"} +{"current_steps": 6091, "total_steps": 6840, "loss": 0.4531989097595215, "lr": 6.502332645909438e-07, "epoch": 1.7811083491738557, "percentage": 89.05, "elapsed_time": "8:55:06", "remaining_time": "1:05:48"} +{"current_steps": 6092, "total_steps": 6840, "loss": 0.6385304927825928, "lr": 6.485194422835872e-07, "epoch": 1.7814007895891213, "percentage": 89.06, "elapsed_time": "8:55:11", "remaining_time": "1:05:42"} +{"current_steps": 6093, "total_steps": 6840, "loss": 0.5503095388412476, "lr": 6.468078058279537e-07, "epoch": 1.7816932300043866, "percentage": 89.08, "elapsed_time": "8:55:17", "remaining_time": "1:05:37"} +{"current_steps": 6094, "total_steps": 6840, "loss": 0.5184366703033447, "lr": 6.450983556241264e-07, "epoch": 1.781985670419652, "percentage": 89.09, "elapsed_time": "8:55:21", "remaining_time": "1:05:32"} +{"current_steps": 6095, "total_steps": 6840, "loss": 0.5211689472198486, "lr": 6.433910920716813e-07, "epoch": 1.7822781108349175, "percentage": 89.11, "elapsed_time": "8:55:25", "remaining_time": "1:05:26"} +{"current_steps": 6096, "total_steps": 6840, "loss": 0.7357909679412842, "lr": 6.416860155696781e-07, "epoch": 1.7825705512501828, "percentage": 89.12, "elapsed_time": "8:55:30", "remaining_time": "1:05:21"} +{"current_steps": 6097, "total_steps": 6840, "loss": 0.6283953189849854, "lr": 6.399831265166689e-07, "epoch": 1.7828629916654481, "percentage": 89.14, "elapsed_time": "8:55:35", "remaining_time": "1:05:16"} +{"current_steps": 6098, "total_steps": 6840, "loss": 0.45040953159332275, "lr": 6.382824253106945e-07, "epoch": 1.7831554320807137, "percentage": 89.15, "elapsed_time": "8:55:40", "remaining_time": "1:05:10"} +{"current_steps": 6099, "total_steps": 6840, "loss": 0.5056609511375427, "lr": 6.365839123492834e-07, "epoch": 1.7834478724959788, "percentage": 89.17, "elapsed_time": "8:55:45", "remaining_time": "1:05:05"} +{"current_steps": 6100, "total_steps": 6840, "loss": 0.4940416216850281, "lr": 6.348875880294536e-07, "epoch": 1.7837403129112444, "percentage": 89.18, "elapsed_time": "8:55:51", "remaining_time": "1:05:00"} +{"current_steps": 6101, "total_steps": 6840, "loss": 0.45796072483062744, "lr": 6.33193452747708e-07, "epoch": 1.7840327533265097, "percentage": 89.2, "elapsed_time": "8:56:00", "remaining_time": "1:04:55"} +{"current_steps": 6102, "total_steps": 6840, "loss": 0.4828432500362396, "lr": 6.315015069000408e-07, "epoch": 1.784325193741775, "percentage": 89.21, "elapsed_time": "8:56:06", "remaining_time": "1:04:50"} +{"current_steps": 6103, "total_steps": 6840, "loss": 0.5564515590667725, "lr": 6.298117508819357e-07, "epoch": 1.7846176341570406, "percentage": 89.23, "elapsed_time": "8:56:11", "remaining_time": "1:04:45"} +{"current_steps": 6104, "total_steps": 6840, "loss": 0.5160977840423584, "lr": 6.281241850883624e-07, "epoch": 1.7849100745723059, "percentage": 89.24, "elapsed_time": "8:56:16", "remaining_time": "1:04:39"} +{"current_steps": 6105, "total_steps": 6840, "loss": 0.585543155670166, "lr": 6.264388099137775e-07, "epoch": 1.7852025149875712, "percentage": 89.25, "elapsed_time": "8:56:22", "remaining_time": "1:04:34"} +{"current_steps": 6106, "total_steps": 6840, "loss": 0.5377194881439209, "lr": 6.247556257521303e-07, "epoch": 1.7854949554028368, "percentage": 89.27, "elapsed_time": "8:56:27", "remaining_time": "1:04:29"} +{"current_steps": 6107, "total_steps": 6840, "loss": 0.46788060665130615, "lr": 6.230746329968518e-07, "epoch": 1.785787395818102, "percentage": 89.28, "elapsed_time": "8:56:31", "remaining_time": "1:04:23"} +{"current_steps": 6108, "total_steps": 6840, "loss": 0.511722207069397, "lr": 6.213958320408664e-07, "epoch": 1.7860798362333674, "percentage": 89.3, "elapsed_time": "8:56:37", "remaining_time": "1:04:18"} +{"current_steps": 6109, "total_steps": 6840, "loss": 0.5609079599380493, "lr": 6.197192232765814e-07, "epoch": 1.786372276648633, "percentage": 89.31, "elapsed_time": "8:56:41", "remaining_time": "1:04:13"} +{"current_steps": 6110, "total_steps": 6840, "loss": 0.47641855478286743, "lr": 6.180448070958955e-07, "epoch": 1.786664717063898, "percentage": 89.33, "elapsed_time": "8:56:46", "remaining_time": "1:04:07"} +{"current_steps": 6111, "total_steps": 6840, "loss": 0.4209919273853302, "lr": 6.163725838901946e-07, "epoch": 1.7869571574791636, "percentage": 89.34, "elapsed_time": "8:56:51", "remaining_time": "1:04:02"} +{"current_steps": 6112, "total_steps": 6840, "loss": 0.6012829542160034, "lr": 6.147025540503459e-07, "epoch": 1.7872495978944292, "percentage": 89.36, "elapsed_time": "8:56:57", "remaining_time": "1:03:57"} +{"current_steps": 6113, "total_steps": 6840, "loss": 0.6112918853759766, "lr": 6.130347179667129e-07, "epoch": 1.7875420383096943, "percentage": 89.37, "elapsed_time": "8:57:01", "remaining_time": "1:03:52"} +{"current_steps": 6114, "total_steps": 6840, "loss": 0.6370030641555786, "lr": 6.113690760291402e-07, "epoch": 1.7878344787249598, "percentage": 89.39, "elapsed_time": "8:57:06", "remaining_time": "1:03:46"} +{"current_steps": 6115, "total_steps": 6840, "loss": 0.5385129451751709, "lr": 6.097056286269631e-07, "epoch": 1.7881269191402251, "percentage": 89.4, "elapsed_time": "8:57:11", "remaining_time": "1:03:41"} +{"current_steps": 6116, "total_steps": 6840, "loss": 0.4707196354866028, "lr": 6.080443761490007e-07, "epoch": 1.7884193595554905, "percentage": 89.42, "elapsed_time": "8:57:17", "remaining_time": "1:03:36"} +{"current_steps": 6117, "total_steps": 6840, "loss": 0.5361602306365967, "lr": 6.063853189835611e-07, "epoch": 1.788711799970756, "percentage": 89.43, "elapsed_time": "8:57:21", "remaining_time": "1:03:30"} +{"current_steps": 6118, "total_steps": 6840, "loss": 0.48841261863708496, "lr": 6.047284575184398e-07, "epoch": 1.7890042403860213, "percentage": 89.44, "elapsed_time": "8:57:27", "remaining_time": "1:03:25"} +{"current_steps": 6119, "total_steps": 6840, "loss": 0.47491973638534546, "lr": 6.030737921409169e-07, "epoch": 1.7892966808012867, "percentage": 89.46, "elapsed_time": "8:57:32", "remaining_time": "1:03:20"} +{"current_steps": 6120, "total_steps": 6840, "loss": 0.4579542875289917, "lr": 6.014213232377608e-07, "epoch": 1.7895891212165522, "percentage": 89.47, "elapsed_time": "8:57:37", "remaining_time": "1:03:14"} +{"current_steps": 6121, "total_steps": 6840, "loss": 0.4517485499382019, "lr": 5.997710511952259e-07, "epoch": 1.7898815616318176, "percentage": 89.49, "elapsed_time": "8:57:42", "remaining_time": "1:03:09"} +{"current_steps": 6122, "total_steps": 6840, "loss": 0.5656695365905762, "lr": 5.981229763990559e-07, "epoch": 1.7901740020470829, "percentage": 89.5, "elapsed_time": "8:57:46", "remaining_time": "1:03:04"} +{"current_steps": 6123, "total_steps": 6840, "loss": 0.5000064373016357, "lr": 5.964770992344737e-07, "epoch": 1.7904664424623484, "percentage": 89.52, "elapsed_time": "8:57:53", "remaining_time": "1:02:59"} +{"current_steps": 6124, "total_steps": 6840, "loss": 0.4823925495147705, "lr": 5.948334200861927e-07, "epoch": 1.7907588828776135, "percentage": 89.53, "elapsed_time": "8:57:58", "remaining_time": "1:02:53"} +{"current_steps": 6125, "total_steps": 6840, "loss": 0.45079779624938965, "lr": 5.931919393384189e-07, "epoch": 1.791051323292879, "percentage": 89.55, "elapsed_time": "8:58:02", "remaining_time": "1:02:48"} +{"current_steps": 6126, "total_steps": 6840, "loss": 0.5887237787246704, "lr": 5.915526573748331e-07, "epoch": 1.7913437637081446, "percentage": 89.56, "elapsed_time": "8:58:08", "remaining_time": "1:02:43"} +{"current_steps": 6127, "total_steps": 6840, "loss": 0.5625102519989014, "lr": 5.8991557457861e-07, "epoch": 1.7916362041234097, "percentage": 89.58, "elapsed_time": "8:58:14", "remaining_time": "1:02:38"} +{"current_steps": 6128, "total_steps": 6840, "loss": 0.5290789604187012, "lr": 5.882806913324079e-07, "epoch": 1.7919286445386753, "percentage": 89.59, "elapsed_time": "8:58:19", "remaining_time": "1:02:32"} +{"current_steps": 6129, "total_steps": 6840, "loss": 0.47694748640060425, "lr": 5.86648008018369e-07, "epoch": 1.7922210849539406, "percentage": 89.61, "elapsed_time": "8:58:24", "remaining_time": "1:02:27"} +{"current_steps": 6130, "total_steps": 6840, "loss": 0.6297628879547119, "lr": 5.850175250181244e-07, "epoch": 1.792513525369206, "percentage": 89.62, "elapsed_time": "8:58:29", "remaining_time": "1:02:22"} +{"current_steps": 6131, "total_steps": 6840, "loss": 0.5748087167739868, "lr": 5.833892427127908e-07, "epoch": 1.7928059657844715, "percentage": 89.63, "elapsed_time": "8:58:35", "remaining_time": "1:02:16"} +{"current_steps": 6132, "total_steps": 6840, "loss": 0.552059531211853, "lr": 5.817631614829666e-07, "epoch": 1.7930984061997368, "percentage": 89.65, "elapsed_time": "8:58:41", "remaining_time": "1:02:11"} +{"current_steps": 6133, "total_steps": 6840, "loss": 0.5980287790298462, "lr": 5.801392817087392e-07, "epoch": 1.7933908466150021, "percentage": 89.66, "elapsed_time": "8:58:47", "remaining_time": "1:02:06"} +{"current_steps": 6134, "total_steps": 6840, "loss": 0.5682743191719055, "lr": 5.785176037696815e-07, "epoch": 1.7936832870302677, "percentage": 89.68, "elapsed_time": "8:58:52", "remaining_time": "1:02:01"} +{"current_steps": 6135, "total_steps": 6840, "loss": 0.6907520294189453, "lr": 5.768981280448494e-07, "epoch": 1.793975727445533, "percentage": 89.69, "elapsed_time": "8:58:59", "remaining_time": "1:01:56"} +{"current_steps": 6136, "total_steps": 6840, "loss": 0.5939712524414062, "lr": 5.752808549127875e-07, "epoch": 1.7942681678607983, "percentage": 89.71, "elapsed_time": "8:59:03", "remaining_time": "1:01:50"} +{"current_steps": 6137, "total_steps": 6840, "loss": 0.5169910192489624, "lr": 5.736657847515215e-07, "epoch": 1.794560608276064, "percentage": 89.72, "elapsed_time": "8:59:08", "remaining_time": "1:01:45"} +{"current_steps": 6138, "total_steps": 6840, "loss": 0.5795155167579651, "lr": 5.720529179385659e-07, "epoch": 1.794853048691329, "percentage": 89.74, "elapsed_time": "8:59:12", "remaining_time": "1:01:40"} +{"current_steps": 6139, "total_steps": 6840, "loss": 0.4296284317970276, "lr": 5.704422548509181e-07, "epoch": 1.7951454891065945, "percentage": 89.75, "elapsed_time": "8:59:18", "remaining_time": "1:01:34"} +{"current_steps": 6140, "total_steps": 6840, "loss": 0.5175303220748901, "lr": 5.688337958650603e-07, "epoch": 1.7954379295218599, "percentage": 89.77, "elapsed_time": "8:59:23", "remaining_time": "1:01:29"} +{"current_steps": 6141, "total_steps": 6840, "loss": 0.49900466203689575, "lr": 5.672275413569605e-07, "epoch": 1.7957303699371252, "percentage": 89.78, "elapsed_time": "8:59:28", "remaining_time": "1:01:24"} +{"current_steps": 6142, "total_steps": 6840, "loss": 0.5047665238380432, "lr": 5.65623491702072e-07, "epoch": 1.7960228103523908, "percentage": 89.8, "elapsed_time": "8:59:34", "remaining_time": "1:01:19"} +{"current_steps": 6143, "total_steps": 6840, "loss": 0.5309686660766602, "lr": 5.64021647275329e-07, "epoch": 1.796315250767656, "percentage": 89.81, "elapsed_time": "8:59:38", "remaining_time": "1:01:13"} +{"current_steps": 6144, "total_steps": 6840, "loss": 0.7270892858505249, "lr": 5.624220084511544e-07, "epoch": 1.7966076911829214, "percentage": 89.82, "elapsed_time": "8:59:45", "remaining_time": "1:01:08"} +{"current_steps": 6145, "total_steps": 6840, "loss": 0.515272319316864, "lr": 5.608245756034536e-07, "epoch": 1.796900131598187, "percentage": 89.84, "elapsed_time": "8:59:51", "remaining_time": "1:01:03"} +{"current_steps": 6146, "total_steps": 6840, "loss": 0.4919237196445465, "lr": 5.592293491056167e-07, "epoch": 1.7971925720134523, "percentage": 89.85, "elapsed_time": "8:59:56", "remaining_time": "1:00:58"} +{"current_steps": 6147, "total_steps": 6840, "loss": 0.5812259316444397, "lr": 5.576363293305187e-07, "epoch": 1.7974850124287176, "percentage": 89.87, "elapsed_time": "9:00:00", "remaining_time": "1:00:52"} +{"current_steps": 6148, "total_steps": 6840, "loss": 0.434345006942749, "lr": 5.560455166505185e-07, "epoch": 1.7977774528439832, "percentage": 89.88, "elapsed_time": "9:00:06", "remaining_time": "1:00:47"} +{"current_steps": 6149, "total_steps": 6840, "loss": 0.4670771360397339, "lr": 5.544569114374588e-07, "epoch": 1.7980698932592483, "percentage": 89.9, "elapsed_time": "9:00:09", "remaining_time": "1:00:42"} +{"current_steps": 6150, "total_steps": 6840, "loss": 0.5867526531219482, "lr": 5.528705140626667e-07, "epoch": 1.7983623336745138, "percentage": 89.91, "elapsed_time": "9:00:16", "remaining_time": "1:00:36"} +{"current_steps": 6151, "total_steps": 6840, "loss": 0.5453605651855469, "lr": 5.512863248969513e-07, "epoch": 1.7986547740897794, "percentage": 89.93, "elapsed_time": "9:00:22", "remaining_time": "1:00:31"} +{"current_steps": 6152, "total_steps": 6840, "loss": 0.5535463690757751, "lr": 5.497043443106087e-07, "epoch": 1.7989472145050445, "percentage": 89.94, "elapsed_time": "9:00:27", "remaining_time": "1:00:26"} +{"current_steps": 6153, "total_steps": 6840, "loss": 0.6250847578048706, "lr": 5.481245726734174e-07, "epoch": 1.79923965492031, "percentage": 89.96, "elapsed_time": "9:00:31", "remaining_time": "1:00:21"} +{"current_steps": 6154, "total_steps": 6840, "loss": 0.45504581928253174, "lr": 5.465470103546399e-07, "epoch": 1.7995320953355753, "percentage": 89.97, "elapsed_time": "9:00:36", "remaining_time": "1:00:15"} +{"current_steps": 6155, "total_steps": 6840, "loss": 0.6192604303359985, "lr": 5.449716577230202e-07, "epoch": 1.7998245357508407, "percentage": 89.99, "elapsed_time": "9:00:42", "remaining_time": "1:00:10"} +{"current_steps": 6156, "total_steps": 6840, "loss": 0.5624358654022217, "lr": 5.433985151467869e-07, "epoch": 1.8001169761661062, "percentage": 90.0, "elapsed_time": "9:00:48", "remaining_time": "1:00:05"} +{"current_steps": 6157, "total_steps": 6840, "loss": 0.5759576559066772, "lr": 5.418275829936537e-07, "epoch": 1.8004094165813715, "percentage": 90.01, "elapsed_time": "9:00:52", "remaining_time": "1:00:00"} +{"current_steps": 6158, "total_steps": 6840, "loss": 0.5710508227348328, "lr": 5.402588616308169e-07, "epoch": 1.8007018569966369, "percentage": 90.03, "elapsed_time": "9:00:57", "remaining_time": "0:59:54"} +{"current_steps": 6159, "total_steps": 6840, "loss": 0.6146141290664673, "lr": 5.386923514249542e-07, "epoch": 1.8009942974119024, "percentage": 90.04, "elapsed_time": "9:01:02", "remaining_time": "0:59:49"} +{"current_steps": 6160, "total_steps": 6840, "loss": 0.425834983587265, "lr": 5.371280527422296e-07, "epoch": 1.8012867378271677, "percentage": 90.06, "elapsed_time": "9:01:07", "remaining_time": "0:59:44"} +{"current_steps": 6161, "total_steps": 6840, "loss": 0.4353194236755371, "lr": 5.35565965948287e-07, "epoch": 1.801579178242433, "percentage": 90.07, "elapsed_time": "9:01:12", "remaining_time": "0:59:38"} +{"current_steps": 6162, "total_steps": 6840, "loss": 0.7202355861663818, "lr": 5.340060914082546e-07, "epoch": 1.8018716186576986, "percentage": 90.09, "elapsed_time": "9:01:17", "remaining_time": "0:59:33"} +{"current_steps": 6163, "total_steps": 6840, "loss": 0.5371845960617065, "lr": 5.324484294867449e-07, "epoch": 1.8021640590729637, "percentage": 90.1, "elapsed_time": "9:01:23", "remaining_time": "0:59:28"} +{"current_steps": 6164, "total_steps": 6840, "loss": 0.4995431900024414, "lr": 5.308929805478513e-07, "epoch": 1.8024564994882293, "percentage": 90.12, "elapsed_time": "9:01:26", "remaining_time": "0:59:22"} +{"current_steps": 6165, "total_steps": 6840, "loss": 0.6503393650054932, "lr": 5.293397449551519e-07, "epoch": 1.8027489399034948, "percentage": 90.13, "elapsed_time": "9:01:30", "remaining_time": "0:59:17"} +{"current_steps": 6166, "total_steps": 6840, "loss": 0.5083032250404358, "lr": 5.277887230717027e-07, "epoch": 1.80304138031876, "percentage": 90.15, "elapsed_time": "9:01:35", "remaining_time": "0:59:12"} +{"current_steps": 6167, "total_steps": 6840, "loss": 0.6067851781845093, "lr": 5.262399152600473e-07, "epoch": 1.8033338207340255, "percentage": 90.16, "elapsed_time": "9:01:40", "remaining_time": "0:59:06"} +{"current_steps": 6168, "total_steps": 6840, "loss": 0.6446479558944702, "lr": 5.246933218822104e-07, "epoch": 1.8036262611492908, "percentage": 90.18, "elapsed_time": "9:01:45", "remaining_time": "0:59:01"} +{"current_steps": 6169, "total_steps": 6840, "loss": 0.6940749883651733, "lr": 5.231489432996984e-07, "epoch": 1.8039187015645561, "percentage": 90.19, "elapsed_time": "9:01:51", "remaining_time": "0:58:56"} +{"current_steps": 6170, "total_steps": 6840, "loss": 0.558691143989563, "lr": 5.216067798735014e-07, "epoch": 1.8042111419798217, "percentage": 90.2, "elapsed_time": "9:01:57", "remaining_time": "0:58:51"} +{"current_steps": 6171, "total_steps": 6840, "loss": 0.4561213254928589, "lr": 5.2006683196409e-07, "epoch": 1.804503582395087, "percentage": 90.22, "elapsed_time": "9:02:01", "remaining_time": "0:58:45"} +{"current_steps": 6172, "total_steps": 6840, "loss": 0.514278769493103, "lr": 5.185290999314174e-07, "epoch": 1.8047960228103523, "percentage": 90.23, "elapsed_time": "9:02:07", "remaining_time": "0:58:40"} +{"current_steps": 6173, "total_steps": 6840, "loss": 0.41933614015579224, "lr": 5.169935841349194e-07, "epoch": 1.805088463225618, "percentage": 90.25, "elapsed_time": "9:02:11", "remaining_time": "0:58:35"} +{"current_steps": 6174, "total_steps": 6840, "loss": 0.5590407848358154, "lr": 5.154602849335133e-07, "epoch": 1.8053809036408832, "percentage": 90.26, "elapsed_time": "9:02:16", "remaining_time": "0:58:29"} +{"current_steps": 6175, "total_steps": 6840, "loss": 0.49428898096084595, "lr": 5.139292026855991e-07, "epoch": 1.8056733440561485, "percentage": 90.28, "elapsed_time": "9:02:20", "remaining_time": "0:58:24"} +{"current_steps": 6176, "total_steps": 6840, "loss": 0.4737596809864044, "lr": 5.124003377490582e-07, "epoch": 1.805965784471414, "percentage": 90.29, "elapsed_time": "9:02:25", "remaining_time": "0:58:19"} +{"current_steps": 6177, "total_steps": 6840, "loss": 0.5017397403717041, "lr": 5.108736904812517e-07, "epoch": 1.8062582248866792, "percentage": 90.31, "elapsed_time": "9:02:31", "remaining_time": "0:58:13"} +{"current_steps": 6178, "total_steps": 6840, "loss": 0.4509057402610779, "lr": 5.09349261239026e-07, "epoch": 1.8065506653019447, "percentage": 90.32, "elapsed_time": "9:02:36", "remaining_time": "0:58:08"} +{"current_steps": 6179, "total_steps": 6840, "loss": 0.4440206289291382, "lr": 5.078270503787053e-07, "epoch": 1.80684310571721, "percentage": 90.34, "elapsed_time": "9:02:41", "remaining_time": "0:58:03"} +{"current_steps": 6180, "total_steps": 6840, "loss": 0.4981609582901001, "lr": 5.063070582560991e-07, "epoch": 1.8071355461324754, "percentage": 90.35, "elapsed_time": "9:02:46", "remaining_time": "0:57:58"} +{"current_steps": 6181, "total_steps": 6840, "loss": 0.4057808518409729, "lr": 5.047892852264946e-07, "epoch": 1.807427986547741, "percentage": 90.37, "elapsed_time": "9:02:52", "remaining_time": "0:57:52"} +{"current_steps": 6182, "total_steps": 6840, "loss": 0.5770435333251953, "lr": 5.032737316446634e-07, "epoch": 1.8077204269630063, "percentage": 90.38, "elapsed_time": "9:02:57", "remaining_time": "0:57:47"} +{"current_steps": 6183, "total_steps": 6840, "loss": 0.5431563258171082, "lr": 5.017603978648567e-07, "epoch": 1.8080128673782716, "percentage": 90.39, "elapsed_time": "9:03:02", "remaining_time": "0:57:42"} +{"current_steps": 6184, "total_steps": 6840, "loss": 0.469868928194046, "lr": 5.002492842408058e-07, "epoch": 1.8083053077935372, "percentage": 90.41, "elapsed_time": "9:03:07", "remaining_time": "0:57:36"} +{"current_steps": 6185, "total_steps": 6840, "loss": 0.4581238925457001, "lr": 4.98740391125726e-07, "epoch": 1.8085977482088025, "percentage": 90.42, "elapsed_time": "9:03:12", "remaining_time": "0:57:31"} +{"current_steps": 6186, "total_steps": 6840, "loss": 0.43255913257598877, "lr": 4.972337188723108e-07, "epoch": 1.8088901886240678, "percentage": 90.44, "elapsed_time": "9:03:18", "remaining_time": "0:57:26"} +{"current_steps": 6187, "total_steps": 6840, "loss": 0.5817975997924805, "lr": 4.957292678327374e-07, "epoch": 1.8091826290393334, "percentage": 90.45, "elapsed_time": "9:03:23", "remaining_time": "0:57:21"} +{"current_steps": 6188, "total_steps": 6840, "loss": 0.506614089012146, "lr": 4.9422703835866e-07, "epoch": 1.8094750694545985, "percentage": 90.47, "elapsed_time": "9:03:29", "remaining_time": "0:57:15"} +{"current_steps": 6189, "total_steps": 6840, "loss": 0.5245084762573242, "lr": 4.927270308012155e-07, "epoch": 1.809767509869864, "percentage": 90.48, "elapsed_time": "9:03:35", "remaining_time": "0:57:10"} +{"current_steps": 6190, "total_steps": 6840, "loss": 0.48700785636901855, "lr": 4.912292455110235e-07, "epoch": 1.8100599502851296, "percentage": 90.5, "elapsed_time": "9:03:39", "remaining_time": "0:57:05"} +{"current_steps": 6191, "total_steps": 6840, "loss": 0.5512829422950745, "lr": 4.897336828381794e-07, "epoch": 1.8103523907003947, "percentage": 90.51, "elapsed_time": "9:03:44", "remaining_time": "0:57:00"} +{"current_steps": 6192, "total_steps": 6840, "loss": 0.444965660572052, "lr": 4.882403431322647e-07, "epoch": 1.8106448311156602, "percentage": 90.53, "elapsed_time": "9:03:48", "remaining_time": "0:56:54"} +{"current_steps": 6193, "total_steps": 6840, "loss": 0.49120527505874634, "lr": 4.86749226742338e-07, "epoch": 1.8109372715309255, "percentage": 90.54, "elapsed_time": "9:03:52", "remaining_time": "0:56:49"} +{"current_steps": 6194, "total_steps": 6840, "loss": 0.47114405035972595, "lr": 4.852603340169371e-07, "epoch": 1.8112297119461909, "percentage": 90.56, "elapsed_time": "9:03:58", "remaining_time": "0:56:44"} +{"current_steps": 6195, "total_steps": 6840, "loss": 0.41404014825820923, "lr": 4.837736653040825e-07, "epoch": 1.8115221523614564, "percentage": 90.57, "elapsed_time": "9:04:04", "remaining_time": "0:56:38"} +{"current_steps": 6196, "total_steps": 6840, "loss": 0.5773917436599731, "lr": 4.822892209512742e-07, "epoch": 1.8118145927767217, "percentage": 90.58, "elapsed_time": "9:04:10", "remaining_time": "0:56:33"} +{"current_steps": 6197, "total_steps": 6840, "loss": 0.5048927068710327, "lr": 4.808070013054911e-07, "epoch": 1.812107033191987, "percentage": 90.6, "elapsed_time": "9:04:16", "remaining_time": "0:56:28"} +{"current_steps": 6198, "total_steps": 6840, "loss": 0.48112595081329346, "lr": 4.793270067131961e-07, "epoch": 1.8123994736072526, "percentage": 90.61, "elapsed_time": "9:04:22", "remaining_time": "0:56:23"} +{"current_steps": 6199, "total_steps": 6840, "loss": 0.465067982673645, "lr": 4.778492375203236e-07, "epoch": 1.812691914022518, "percentage": 90.63, "elapsed_time": "9:04:27", "remaining_time": "0:56:17"} +{"current_steps": 6200, "total_steps": 6840, "loss": 0.5456488132476807, "lr": 4.763736940722985e-07, "epoch": 1.8129843544377833, "percentage": 90.64, "elapsed_time": "9:04:32", "remaining_time": "0:56:12"} +{"current_steps": 6201, "total_steps": 6840, "loss": 0.5078476071357727, "lr": 4.74900376714017e-07, "epoch": 1.8132767948530488, "percentage": 90.66, "elapsed_time": "9:04:41", "remaining_time": "0:56:07"} +{"current_steps": 6202, "total_steps": 6840, "loss": 0.5087896585464478, "lr": 4.7342928578985814e-07, "epoch": 1.813569235268314, "percentage": 90.67, "elapsed_time": "9:04:45", "remaining_time": "0:56:02"} +{"current_steps": 6203, "total_steps": 6840, "loss": 0.5734537243843079, "lr": 4.719604216436824e-07, "epoch": 1.8138616756835795, "percentage": 90.69, "elapsed_time": "9:04:48", "remaining_time": "0:55:56"} +{"current_steps": 6204, "total_steps": 6840, "loss": 0.5163359045982361, "lr": 4.704937846188262e-07, "epoch": 1.814154116098845, "percentage": 90.7, "elapsed_time": "9:04:54", "remaining_time": "0:55:51"} +{"current_steps": 6205, "total_steps": 6840, "loss": 0.5884007811546326, "lr": 4.6902937505810765e-07, "epoch": 1.8144465565141101, "percentage": 90.72, "elapsed_time": "9:04:59", "remaining_time": "0:55:46"} +{"current_steps": 6206, "total_steps": 6840, "loss": 0.454215407371521, "lr": 4.675671933038228e-07, "epoch": 1.8147389969293757, "percentage": 90.73, "elapsed_time": "9:05:05", "remaining_time": "0:55:41"} +{"current_steps": 6207, "total_steps": 6840, "loss": 0.4380212426185608, "lr": 4.661072396977506e-07, "epoch": 1.815031437344641, "percentage": 90.75, "elapsed_time": "9:05:11", "remaining_time": "0:55:35"} +{"current_steps": 6208, "total_steps": 6840, "loss": 0.6138126850128174, "lr": 4.646495145811425e-07, "epoch": 1.8153238777599063, "percentage": 90.76, "elapsed_time": "9:05:17", "remaining_time": "0:55:30"} +{"current_steps": 6209, "total_steps": 6840, "loss": 0.560515284538269, "lr": 4.6319401829473366e-07, "epoch": 1.8156163181751719, "percentage": 90.77, "elapsed_time": "9:05:23", "remaining_time": "0:55:25"} +{"current_steps": 6210, "total_steps": 6840, "loss": 0.4744090735912323, "lr": 4.6174075117873976e-07, "epoch": 1.8159087585904372, "percentage": 90.79, "elapsed_time": "9:05:28", "remaining_time": "0:55:20"} +{"current_steps": 6211, "total_steps": 6840, "loss": 0.4508114457130432, "lr": 4.6028971357285126e-07, "epoch": 1.8162011990057025, "percentage": 90.8, "elapsed_time": "9:05:34", "remaining_time": "0:55:15"} +{"current_steps": 6212, "total_steps": 6840, "loss": 0.5437598824501038, "lr": 4.5884090581623906e-07, "epoch": 1.816493639420968, "percentage": 90.82, "elapsed_time": "9:05:38", "remaining_time": "0:55:09"} +{"current_steps": 6213, "total_steps": 6840, "loss": 0.608635425567627, "lr": 4.5739432824755456e-07, "epoch": 1.8167860798362334, "percentage": 90.83, "elapsed_time": "9:05:44", "remaining_time": "0:55:04"} +{"current_steps": 6214, "total_steps": 6840, "loss": 0.45614784955978394, "lr": 4.5594998120492505e-07, "epoch": 1.8170785202514987, "percentage": 90.85, "elapsed_time": "9:05:50", "remaining_time": "0:54:59"} +{"current_steps": 6215, "total_steps": 6840, "loss": 0.46722525358200073, "lr": 4.5450786502595933e-07, "epoch": 1.8173709606667643, "percentage": 90.86, "elapsed_time": "9:05:55", "remaining_time": "0:54:53"} +{"current_steps": 6216, "total_steps": 6840, "loss": 0.5424127578735352, "lr": 4.5306798004774333e-07, "epoch": 1.8176634010820294, "percentage": 90.88, "elapsed_time": "9:06:00", "remaining_time": "0:54:48"} +{"current_steps": 6217, "total_steps": 6840, "loss": 0.4360300302505493, "lr": 4.5163032660684e-07, "epoch": 1.817955841497295, "percentage": 90.89, "elapsed_time": "9:06:03", "remaining_time": "0:54:43"} +{"current_steps": 6218, "total_steps": 6840, "loss": 0.43406206369400024, "lr": 4.5019490503929395e-07, "epoch": 1.8182482819125603, "percentage": 90.91, "elapsed_time": "9:06:08", "remaining_time": "0:54:37"} +{"current_steps": 6219, "total_steps": 6840, "loss": 0.5435998439788818, "lr": 4.4876171568062346e-07, "epoch": 1.8185407223278256, "percentage": 90.92, "elapsed_time": "9:06:15", "remaining_time": "0:54:32"} +{"current_steps": 6220, "total_steps": 6840, "loss": 0.4555914103984833, "lr": 4.4733075886583043e-07, "epoch": 1.8188331627430911, "percentage": 90.94, "elapsed_time": "9:06:19", "remaining_time": "0:54:27"} +{"current_steps": 6221, "total_steps": 6840, "loss": 0.5246081352233887, "lr": 4.4590203492939076e-07, "epoch": 1.8191256031583565, "percentage": 90.95, "elapsed_time": "9:06:24", "remaining_time": "0:54:22"} +{"current_steps": 6222, "total_steps": 6840, "loss": 0.5093664526939392, "lr": 4.4447554420525954e-07, "epoch": 1.8194180435736218, "percentage": 90.96, "elapsed_time": "9:06:30", "remaining_time": "0:54:16"} +{"current_steps": 6223, "total_steps": 6840, "loss": 0.5759550333023071, "lr": 4.430512870268733e-07, "epoch": 1.8197104839888874, "percentage": 90.98, "elapsed_time": "9:06:35", "remaining_time": "0:54:11"} +{"current_steps": 6224, "total_steps": 6840, "loss": 0.5477207899093628, "lr": 4.416292637271402e-07, "epoch": 1.8200029244041527, "percentage": 90.99, "elapsed_time": "9:06:39", "remaining_time": "0:54:06"} +{"current_steps": 6225, "total_steps": 6840, "loss": 0.5786882638931274, "lr": 4.402094746384511e-07, "epoch": 1.820295364819418, "percentage": 91.01, "elapsed_time": "9:06:45", "remaining_time": "0:54:01"} +{"current_steps": 6226, "total_steps": 6840, "loss": 0.36909428238868713, "lr": 4.3879192009267266e-07, "epoch": 1.8205878052346836, "percentage": 91.02, "elapsed_time": "9:06:51", "remaining_time": "0:53:55"} +{"current_steps": 6227, "total_steps": 6840, "loss": 0.5471982955932617, "lr": 4.3737660042114993e-07, "epoch": 1.8208802456499487, "percentage": 91.04, "elapsed_time": "9:06:56", "remaining_time": "0:53:50"} +{"current_steps": 6228, "total_steps": 6840, "loss": 0.49737733602523804, "lr": 4.3596351595470596e-07, "epoch": 1.8211726860652142, "percentage": 91.05, "elapsed_time": "9:07:01", "remaining_time": "0:53:45"} +{"current_steps": 6229, "total_steps": 6840, "loss": 0.70830237865448, "lr": 4.3455266702363997e-07, "epoch": 1.8214651264804798, "percentage": 91.07, "elapsed_time": "9:07:06", "remaining_time": "0:53:39"} +{"current_steps": 6230, "total_steps": 6840, "loss": 0.5844424962997437, "lr": 4.331440539577281e-07, "epoch": 1.8217575668957449, "percentage": 91.08, "elapsed_time": "9:07:11", "remaining_time": "0:53:34"} +{"current_steps": 6231, "total_steps": 6840, "loss": 0.42457354068756104, "lr": 4.317376770862269e-07, "epoch": 1.8220500073110104, "percentage": 91.1, "elapsed_time": "9:07:17", "remaining_time": "0:53:29"} +{"current_steps": 6232, "total_steps": 6840, "loss": 0.5154321193695068, "lr": 4.3033353673786695e-07, "epoch": 1.8223424477262757, "percentage": 91.11, "elapsed_time": "9:07:22", "remaining_time": "0:53:24"} +{"current_steps": 6233, "total_steps": 6840, "loss": 0.5896856784820557, "lr": 4.2893163324085886e-07, "epoch": 1.822634888141541, "percentage": 91.13, "elapsed_time": "9:07:28", "remaining_time": "0:53:18"} +{"current_steps": 6234, "total_steps": 6840, "loss": 0.5032835006713867, "lr": 4.2753196692288835e-07, "epoch": 1.8229273285568066, "percentage": 91.14, "elapsed_time": "9:07:33", "remaining_time": "0:53:13"} +{"current_steps": 6235, "total_steps": 6840, "loss": 0.4691713750362396, "lr": 4.2613453811111814e-07, "epoch": 1.823219768972072, "percentage": 91.15, "elapsed_time": "9:07:38", "remaining_time": "0:53:08"} +{"current_steps": 6236, "total_steps": 6840, "loss": 0.595095694065094, "lr": 4.2473934713219033e-07, "epoch": 1.8235122093873373, "percentage": 91.17, "elapsed_time": "9:07:46", "remaining_time": "0:53:03"} +{"current_steps": 6237, "total_steps": 6840, "loss": 0.5004895329475403, "lr": 4.233463943122218e-07, "epoch": 1.8238046498026028, "percentage": 91.18, "elapsed_time": "9:07:52", "remaining_time": "0:52:58"} +{"current_steps": 6238, "total_steps": 6840, "loss": 0.4924081563949585, "lr": 4.2195567997680654e-07, "epoch": 1.8240970902178681, "percentage": 91.2, "elapsed_time": "9:07:57", "remaining_time": "0:52:52"} +{"current_steps": 6239, "total_steps": 6840, "loss": 0.5350006818771362, "lr": 4.2056720445101565e-07, "epoch": 1.8243895306331335, "percentage": 91.21, "elapsed_time": "9:08:02", "remaining_time": "0:52:47"} +{"current_steps": 6240, "total_steps": 6840, "loss": 0.5404629707336426, "lr": 4.191809680593961e-07, "epoch": 1.824681971048399, "percentage": 91.23, "elapsed_time": "9:08:07", "remaining_time": "0:52:42"} +{"current_steps": 6241, "total_steps": 6840, "loss": 0.727859377861023, "lr": 4.177969711259744e-07, "epoch": 1.8249744114636641, "percentage": 91.24, "elapsed_time": "9:08:13", "remaining_time": "0:52:37"} +{"current_steps": 6242, "total_steps": 6840, "loss": 0.4805057644844055, "lr": 4.164152139742494e-07, "epoch": 1.8252668518789297, "percentage": 91.26, "elapsed_time": "9:08:19", "remaining_time": "0:52:31"} +{"current_steps": 6243, "total_steps": 6840, "loss": 0.5520761013031006, "lr": 4.1503569692719847e-07, "epoch": 1.8255592922941952, "percentage": 91.27, "elapsed_time": "9:08:25", "remaining_time": "0:52:26"} +{"current_steps": 6244, "total_steps": 6840, "loss": 0.6130107641220093, "lr": 4.1365842030727576e-07, "epoch": 1.8258517327094603, "percentage": 91.29, "elapsed_time": "9:08:30", "remaining_time": "0:52:21"} +{"current_steps": 6245, "total_steps": 6840, "loss": 0.6048229932785034, "lr": 4.122833844364116e-07, "epoch": 1.8261441731247259, "percentage": 91.3, "elapsed_time": "9:08:35", "remaining_time": "0:52:16"} +{"current_steps": 6246, "total_steps": 6840, "loss": 0.667324960231781, "lr": 4.1091058963601214e-07, "epoch": 1.8264366135399912, "percentage": 91.32, "elapsed_time": "9:08:41", "remaining_time": "0:52:10"} +{"current_steps": 6247, "total_steps": 6840, "loss": 0.45595815777778625, "lr": 4.095400362269597e-07, "epoch": 1.8267290539552565, "percentage": 91.33, "elapsed_time": "9:08:46", "remaining_time": "0:52:05"} +{"current_steps": 6248, "total_steps": 6840, "loss": 0.49015533924102783, "lr": 4.081717245296124e-07, "epoch": 1.827021494370522, "percentage": 91.35, "elapsed_time": "9:08:51", "remaining_time": "0:52:00"} +{"current_steps": 6249, "total_steps": 6840, "loss": 0.5230038166046143, "lr": 4.068056548638055e-07, "epoch": 1.8273139347857874, "percentage": 91.36, "elapsed_time": "9:08:56", "remaining_time": "0:51:54"} +{"current_steps": 6250, "total_steps": 6840, "loss": 0.5025942325592041, "lr": 4.054418275488492e-07, "epoch": 1.8276063752010527, "percentage": 91.37, "elapsed_time": "9:09:02", "remaining_time": "0:51:49"} +{"current_steps": 6251, "total_steps": 6840, "loss": 0.5136677026748657, "lr": 4.0408024290352955e-07, "epoch": 1.8278988156163183, "percentage": 91.39, "elapsed_time": "9:09:07", "remaining_time": "0:51:44"} +{"current_steps": 6252, "total_steps": 6840, "loss": 0.6209211945533752, "lr": 4.0272090124611086e-07, "epoch": 1.8281912560315836, "percentage": 91.4, "elapsed_time": "9:09:13", "remaining_time": "0:51:39"} +{"current_steps": 6253, "total_steps": 6840, "loss": 0.5913738012313843, "lr": 4.0136380289432784e-07, "epoch": 1.828483696446849, "percentage": 91.42, "elapsed_time": "9:09:17", "remaining_time": "0:51:33"} +{"current_steps": 6254, "total_steps": 6840, "loss": 0.5745095610618591, "lr": 4.000089481653946e-07, "epoch": 1.8287761368621145, "percentage": 91.43, "elapsed_time": "9:09:23", "remaining_time": "0:51:28"} +{"current_steps": 6255, "total_steps": 6840, "loss": 0.4566704034805298, "lr": 3.9865633737600105e-07, "epoch": 1.8290685772773796, "percentage": 91.45, "elapsed_time": "9:09:28", "remaining_time": "0:51:23"} +{"current_steps": 6256, "total_steps": 6840, "loss": 0.49784860014915466, "lr": 3.9730597084231105e-07, "epoch": 1.8293610176926451, "percentage": 91.46, "elapsed_time": "9:09:33", "remaining_time": "0:51:18"} +{"current_steps": 6257, "total_steps": 6840, "loss": 0.4489399790763855, "lr": 3.9595784887996647e-07, "epoch": 1.8296534581079105, "percentage": 91.48, "elapsed_time": "9:09:40", "remaining_time": "0:51:12"} +{"current_steps": 6258, "total_steps": 6840, "loss": 0.6335956454277039, "lr": 3.946119718040797e-07, "epoch": 1.8299458985231758, "percentage": 91.49, "elapsed_time": "9:09:46", "remaining_time": "0:51:07"} +{"current_steps": 6259, "total_steps": 6840, "loss": 0.44865918159484863, "lr": 3.932683399292436e-07, "epoch": 1.8302383389384413, "percentage": 91.51, "elapsed_time": "9:09:53", "remaining_time": "0:51:02"} +{"current_steps": 6260, "total_steps": 6840, "loss": 0.4328421354293823, "lr": 3.919269535695225e-07, "epoch": 1.8305307793537067, "percentage": 91.52, "elapsed_time": "9:09:58", "remaining_time": "0:50:57"} +{"current_steps": 6261, "total_steps": 6840, "loss": 0.463814377784729, "lr": 3.9058781303845886e-07, "epoch": 1.830823219768972, "percentage": 91.54, "elapsed_time": "9:10:02", "remaining_time": "0:50:51"} +{"current_steps": 6262, "total_steps": 6840, "loss": 0.5857536196708679, "lr": 3.892509186490667e-07, "epoch": 1.8311156601842375, "percentage": 91.55, "elapsed_time": "9:10:08", "remaining_time": "0:50:46"} +{"current_steps": 6263, "total_steps": 6840, "loss": 0.4873831272125244, "lr": 3.879162707138395e-07, "epoch": 1.8314081005995029, "percentage": 91.56, "elapsed_time": "9:10:14", "remaining_time": "0:50:41"} +{"current_steps": 6264, "total_steps": 6840, "loss": 0.5428040027618408, "lr": 3.8658386954474104e-07, "epoch": 1.8317005410147682, "percentage": 91.58, "elapsed_time": "9:10:18", "remaining_time": "0:50:36"} +{"current_steps": 6265, "total_steps": 6840, "loss": 0.49092623591423035, "lr": 3.852537154532121e-07, "epoch": 1.8319929814300338, "percentage": 91.59, "elapsed_time": "9:10:23", "remaining_time": "0:50:30"} +{"current_steps": 6266, "total_steps": 6840, "loss": 0.5515817999839783, "lr": 3.839258087501685e-07, "epoch": 1.8322854218452989, "percentage": 91.61, "elapsed_time": "9:10:28", "remaining_time": "0:50:25"} +{"current_steps": 6267, "total_steps": 6840, "loss": 0.48080340027809143, "lr": 3.8260014974600077e-07, "epoch": 1.8325778622605644, "percentage": 91.62, "elapsed_time": "9:10:33", "remaining_time": "0:50:20"} +{"current_steps": 6268, "total_steps": 6840, "loss": 0.6129888296127319, "lr": 3.812767387505734e-07, "epoch": 1.83287030267583, "percentage": 91.64, "elapsed_time": "9:10:37", "remaining_time": "0:50:14"} +{"current_steps": 6269, "total_steps": 6840, "loss": 0.5843402147293091, "lr": 3.7995557607322543e-07, "epoch": 1.833162743091095, "percentage": 91.65, "elapsed_time": "9:10:42", "remaining_time": "0:50:09"} +{"current_steps": 6270, "total_steps": 6840, "loss": 0.5573143362998962, "lr": 3.7863666202276996e-07, "epoch": 1.8334551835063606, "percentage": 91.67, "elapsed_time": "9:10:47", "remaining_time": "0:50:04"} +{"current_steps": 6271, "total_steps": 6840, "loss": 0.552756667137146, "lr": 3.773199969074959e-07, "epoch": 1.833747623921626, "percentage": 91.68, "elapsed_time": "9:10:53", "remaining_time": "0:49:59"} +{"current_steps": 6272, "total_steps": 6840, "loss": 0.5559083223342896, "lr": 3.7600558103516706e-07, "epoch": 1.8340400643368913, "percentage": 91.7, "elapsed_time": "9:10:58", "remaining_time": "0:49:53"} +{"current_steps": 6273, "total_steps": 6840, "loss": 0.5388067364692688, "lr": 3.746934147130177e-07, "epoch": 1.8343325047521568, "percentage": 91.71, "elapsed_time": "9:11:03", "remaining_time": "0:49:48"} +{"current_steps": 6274, "total_steps": 6840, "loss": 0.5816110968589783, "lr": 3.7338349824776133e-07, "epoch": 1.8346249451674221, "percentage": 91.73, "elapsed_time": "9:11:08", "remaining_time": "0:49:43"} +{"current_steps": 6275, "total_steps": 6840, "loss": 0.5720102787017822, "lr": 3.720758319455786e-07, "epoch": 1.8349173855826875, "percentage": 91.74, "elapsed_time": "9:11:14", "remaining_time": "0:49:38"} +{"current_steps": 6276, "total_steps": 6840, "loss": 0.46005699038505554, "lr": 3.707704161121328e-07, "epoch": 1.835209825997953, "percentage": 91.75, "elapsed_time": "9:11:20", "remaining_time": "0:49:32"} +{"current_steps": 6277, "total_steps": 6840, "loss": 0.5602168440818787, "lr": 3.6946725105255656e-07, "epoch": 1.8355022664132183, "percentage": 91.77, "elapsed_time": "9:11:24", "remaining_time": "0:49:27"} +{"current_steps": 6278, "total_steps": 6840, "loss": 0.5390583276748657, "lr": 3.68166337071455e-07, "epoch": 1.8357947068284837, "percentage": 91.78, "elapsed_time": "9:11:30", "remaining_time": "0:49:22"} +{"current_steps": 6279, "total_steps": 6840, "loss": 0.48980700969696045, "lr": 3.668676744729094e-07, "epoch": 1.8360871472437492, "percentage": 91.8, "elapsed_time": "9:11:36", "remaining_time": "0:49:17"} +{"current_steps": 6280, "total_steps": 6840, "loss": 0.6565061807632446, "lr": 3.655712635604747e-07, "epoch": 1.8363795876590143, "percentage": 91.81, "elapsed_time": "9:11:41", "remaining_time": "0:49:11"} +{"current_steps": 6281, "total_steps": 6840, "loss": 0.465609610080719, "lr": 3.642771046371785e-07, "epoch": 1.8366720280742799, "percentage": 91.83, "elapsed_time": "9:11:47", "remaining_time": "0:49:06"} +{"current_steps": 6282, "total_steps": 6840, "loss": 0.5698891282081604, "lr": 3.6298519800552434e-07, "epoch": 1.8369644684895454, "percentage": 91.84, "elapsed_time": "9:11:52", "remaining_time": "0:49:01"} +{"current_steps": 6283, "total_steps": 6840, "loss": 0.5885399580001831, "lr": 3.616955439674863e-07, "epoch": 1.8372569089048105, "percentage": 91.86, "elapsed_time": "9:11:57", "remaining_time": "0:48:55"} +{"current_steps": 6284, "total_steps": 6840, "loss": 0.5158063173294067, "lr": 3.60408142824511e-07, "epoch": 1.837549349320076, "percentage": 91.87, "elapsed_time": "9:12:03", "remaining_time": "0:48:50"} +{"current_steps": 6285, "total_steps": 6840, "loss": 0.49203822016716003, "lr": 3.5912299487752434e-07, "epoch": 1.8378417897353414, "percentage": 91.89, "elapsed_time": "9:12:08", "remaining_time": "0:48:45"} +{"current_steps": 6286, "total_steps": 6840, "loss": 0.5756489038467407, "lr": 3.578401004269183e-07, "epoch": 1.8381342301506067, "percentage": 91.9, "elapsed_time": "9:12:14", "remaining_time": "0:48:40"} +{"current_steps": 6287, "total_steps": 6840, "loss": 0.5970584154129028, "lr": 3.565594597725652e-07, "epoch": 1.8384266705658723, "percentage": 91.92, "elapsed_time": "9:12:19", "remaining_time": "0:48:34"} +{"current_steps": 6288, "total_steps": 6840, "loss": 0.48702481389045715, "lr": 3.552810732138046e-07, "epoch": 1.8387191109811376, "percentage": 91.93, "elapsed_time": "9:12:24", "remaining_time": "0:48:29"} +{"current_steps": 6289, "total_steps": 6840, "loss": 0.4818963408470154, "lr": 3.540049410494517e-07, "epoch": 1.839011551396403, "percentage": 91.94, "elapsed_time": "9:12:30", "remaining_time": "0:48:24"} +{"current_steps": 6290, "total_steps": 6840, "loss": 0.389699786901474, "lr": 3.5273106357779585e-07, "epoch": 1.8393039918116685, "percentage": 91.96, "elapsed_time": "9:12:36", "remaining_time": "0:48:19"} +{"current_steps": 6291, "total_steps": 6840, "loss": 0.6438174247741699, "lr": 3.514594410965977e-07, "epoch": 1.8395964322269338, "percentage": 91.97, "elapsed_time": "9:12:41", "remaining_time": "0:48:13"} +{"current_steps": 6292, "total_steps": 6840, "loss": 0.654021143913269, "lr": 3.501900739030906e-07, "epoch": 1.8398888726421991, "percentage": 91.99, "elapsed_time": "9:12:47", "remaining_time": "0:48:08"} +{"current_steps": 6293, "total_steps": 6840, "loss": 0.748673677444458, "lr": 3.489229622939827e-07, "epoch": 1.8401813130574647, "percentage": 92.0, "elapsed_time": "9:12:53", "remaining_time": "0:48:03"} +{"current_steps": 6294, "total_steps": 6840, "loss": 0.47883105278015137, "lr": 3.476581065654527e-07, "epoch": 1.8404737534727298, "percentage": 92.02, "elapsed_time": "9:12:58", "remaining_time": "0:47:58"} +{"current_steps": 6295, "total_steps": 6840, "loss": 0.5221554040908813, "lr": 3.4639550701315303e-07, "epoch": 1.8407661938879953, "percentage": 92.03, "elapsed_time": "9:13:04", "remaining_time": "0:47:53"} +{"current_steps": 6296, "total_steps": 6840, "loss": 0.482231080532074, "lr": 3.451351639322087e-07, "epoch": 1.8410586343032607, "percentage": 92.05, "elapsed_time": "9:13:09", "remaining_time": "0:47:47"} +{"current_steps": 6297, "total_steps": 6840, "loss": 0.5407366752624512, "lr": 3.4387707761721625e-07, "epoch": 1.841351074718526, "percentage": 92.06, "elapsed_time": "9:13:14", "remaining_time": "0:47:42"} +{"current_steps": 6298, "total_steps": 6840, "loss": 0.626631498336792, "lr": 3.426212483622482e-07, "epoch": 1.8416435151337915, "percentage": 92.08, "elapsed_time": "9:13:19", "remaining_time": "0:47:37"} +{"current_steps": 6299, "total_steps": 6840, "loss": 0.4401513338088989, "lr": 3.4136767646084424e-07, "epoch": 1.8419359555490569, "percentage": 92.09, "elapsed_time": "9:13:25", "remaining_time": "0:47:31"} +{"current_steps": 6300, "total_steps": 6840, "loss": 0.48130229115486145, "lr": 3.4011636220602106e-07, "epoch": 1.8422283959643222, "percentage": 92.11, "elapsed_time": "9:13:30", "remaining_time": "0:47:26"} +{"current_steps": 6301, "total_steps": 6840, "loss": 0.7132935523986816, "lr": 3.3886730589026475e-07, "epoch": 1.8425208363795877, "percentage": 92.12, "elapsed_time": "9:13:40", "remaining_time": "0:47:21"} +{"current_steps": 6302, "total_steps": 6840, "loss": 0.6665343642234802, "lr": 3.37620507805535e-07, "epoch": 1.842813276794853, "percentage": 92.13, "elapsed_time": "9:13:45", "remaining_time": "0:47:16"} +{"current_steps": 6303, "total_steps": 6840, "loss": 0.4313231408596039, "lr": 3.3637596824326435e-07, "epoch": 1.8431057172101184, "percentage": 92.15, "elapsed_time": "9:13:50", "remaining_time": "0:47:11"} +{"current_steps": 6304, "total_steps": 6840, "loss": 0.6263744235038757, "lr": 3.3513368749435447e-07, "epoch": 1.843398157625384, "percentage": 92.16, "elapsed_time": "9:13:56", "remaining_time": "0:47:05"} +{"current_steps": 6305, "total_steps": 6840, "loss": 0.6215947866439819, "lr": 3.3389366584918313e-07, "epoch": 1.843690598040649, "percentage": 92.18, "elapsed_time": "9:14:01", "remaining_time": "0:47:00"} +{"current_steps": 6306, "total_steps": 6840, "loss": 0.45956021547317505, "lr": 3.3265590359759517e-07, "epoch": 1.8439830384559146, "percentage": 92.19, "elapsed_time": "9:14:06", "remaining_time": "0:46:55"} +{"current_steps": 6307, "total_steps": 6840, "loss": 0.5363642573356628, "lr": 3.3142040102891126e-07, "epoch": 1.8442754788711802, "percentage": 92.21, "elapsed_time": "9:14:10", "remaining_time": "0:46:50"} +{"current_steps": 6308, "total_steps": 6840, "loss": 0.4574592709541321, "lr": 3.3018715843192273e-07, "epoch": 1.8445679192864453, "percentage": 92.22, "elapsed_time": "9:14:16", "remaining_time": "0:46:44"} +{"current_steps": 6309, "total_steps": 6840, "loss": 0.43236005306243896, "lr": 3.2895617609489337e-07, "epoch": 1.8448603597017108, "percentage": 92.24, "elapsed_time": "9:14:21", "remaining_time": "0:46:39"} +{"current_steps": 6310, "total_steps": 6840, "loss": 0.46349820494651794, "lr": 3.277274543055564e-07, "epoch": 1.8451528001169761, "percentage": 92.25, "elapsed_time": "9:14:27", "remaining_time": "0:46:34"} +{"current_steps": 6311, "total_steps": 6840, "loss": 0.5233386754989624, "lr": 3.265009933511176e-07, "epoch": 1.8454452405322415, "percentage": 92.27, "elapsed_time": "9:14:31", "remaining_time": "0:46:28"} +{"current_steps": 6312, "total_steps": 6840, "loss": 0.44902727007865906, "lr": 3.252767935182566e-07, "epoch": 1.845737680947507, "percentage": 92.28, "elapsed_time": "9:14:37", "remaining_time": "0:46:23"} +{"current_steps": 6313, "total_steps": 6840, "loss": 0.709855854511261, "lr": 3.240548550931222e-07, "epoch": 1.8460301213627723, "percentage": 92.3, "elapsed_time": "9:14:42", "remaining_time": "0:46:18"} +{"current_steps": 6314, "total_steps": 6840, "loss": 0.5194632411003113, "lr": 3.228351783613348e-07, "epoch": 1.8463225617780377, "percentage": 92.31, "elapsed_time": "9:14:48", "remaining_time": "0:46:13"} +{"current_steps": 6315, "total_steps": 6840, "loss": 0.6027804017066956, "lr": 3.2161776360798535e-07, "epoch": 1.8466150021933032, "percentage": 92.32, "elapsed_time": "9:14:54", "remaining_time": "0:46:07"} +{"current_steps": 6316, "total_steps": 6840, "loss": 0.5047632455825806, "lr": 3.2040261111763946e-07, "epoch": 1.8469074426085685, "percentage": 92.34, "elapsed_time": "9:15:01", "remaining_time": "0:46:02"} +{"current_steps": 6317, "total_steps": 6840, "loss": 0.5763708353042603, "lr": 3.1918972117433e-07, "epoch": 1.8471998830238339, "percentage": 92.35, "elapsed_time": "9:15:07", "remaining_time": "0:45:57"} +{"current_steps": 6318, "total_steps": 6840, "loss": 0.4725028872489929, "lr": 3.1797909406156234e-07, "epoch": 1.8474923234390994, "percentage": 92.37, "elapsed_time": "9:15:11", "remaining_time": "0:45:52"} +{"current_steps": 6319, "total_steps": 6840, "loss": 0.523047924041748, "lr": 3.167707300623135e-07, "epoch": 1.8477847638543645, "percentage": 92.38, "elapsed_time": "9:15:18", "remaining_time": "0:45:47"} +{"current_steps": 6320, "total_steps": 6840, "loss": 0.5100070238113403, "lr": 3.15564629459032e-07, "epoch": 1.84807720426963, "percentage": 92.4, "elapsed_time": "9:15:24", "remaining_time": "0:45:41"} +{"current_steps": 6321, "total_steps": 6840, "loss": 0.6019359827041626, "lr": 3.143607925336356e-07, "epoch": 1.8483696446848956, "percentage": 92.41, "elapsed_time": "9:15:30", "remaining_time": "0:45:36"} +{"current_steps": 6322, "total_steps": 6840, "loss": 0.5514570474624634, "lr": 3.1315921956751483e-07, "epoch": 1.8486620851001607, "percentage": 92.43, "elapsed_time": "9:15:34", "remaining_time": "0:45:31"} +{"current_steps": 6323, "total_steps": 6840, "loss": 0.49585646390914917, "lr": 3.1195991084152944e-07, "epoch": 1.8489545255154263, "percentage": 92.44, "elapsed_time": "9:15:40", "remaining_time": "0:45:26"} +{"current_steps": 6324, "total_steps": 6840, "loss": 0.5738509297370911, "lr": 3.1076286663601076e-07, "epoch": 1.8492469659306916, "percentage": 92.46, "elapsed_time": "9:15:45", "remaining_time": "0:45:20"} +{"current_steps": 6325, "total_steps": 6840, "loss": 0.5149112939834595, "lr": 3.095680872307605e-07, "epoch": 1.849539406345957, "percentage": 92.47, "elapsed_time": "9:15:51", "remaining_time": "0:45:15"} +{"current_steps": 6326, "total_steps": 6840, "loss": 0.45808184146881104, "lr": 3.0837557290505083e-07, "epoch": 1.8498318467612225, "percentage": 92.49, "elapsed_time": "9:15:56", "remaining_time": "0:45:10"} +{"current_steps": 6327, "total_steps": 6840, "loss": 0.5173396468162537, "lr": 3.0718532393762435e-07, "epoch": 1.8501242871764878, "percentage": 92.5, "elapsed_time": "9:16:01", "remaining_time": "0:45:04"} +{"current_steps": 6328, "total_steps": 6840, "loss": 0.6229383945465088, "lr": 3.059973406066963e-07, "epoch": 1.8504167275917531, "percentage": 92.51, "elapsed_time": "9:16:07", "remaining_time": "0:44:59"} +{"current_steps": 6329, "total_steps": 6840, "loss": 0.45520371198654175, "lr": 3.0481162318994894e-07, "epoch": 1.8507091680070187, "percentage": 92.53, "elapsed_time": "9:16:12", "remaining_time": "0:44:54"} +{"current_steps": 6330, "total_steps": 6840, "loss": 0.43216121196746826, "lr": 3.036281719645373e-07, "epoch": 1.851001608422284, "percentage": 92.54, "elapsed_time": "9:16:18", "remaining_time": "0:44:49"} +{"current_steps": 6331, "total_steps": 6840, "loss": 0.5440583825111389, "lr": 3.0244698720708456e-07, "epoch": 1.8512940488375493, "percentage": 92.56, "elapsed_time": "9:16:22", "remaining_time": "0:44:43"} +{"current_steps": 6332, "total_steps": 6840, "loss": 0.5474626421928406, "lr": 3.0126806919368756e-07, "epoch": 1.8515864892528149, "percentage": 92.57, "elapsed_time": "9:16:27", "remaining_time": "0:44:38"} +{"current_steps": 6333, "total_steps": 6840, "loss": 0.5122883915901184, "lr": 3.000914181999093e-07, "epoch": 1.85187892966808, "percentage": 92.59, "elapsed_time": "9:16:33", "remaining_time": "0:44:33"} +{"current_steps": 6334, "total_steps": 6840, "loss": 0.48304370045661926, "lr": 2.989170345007852e-07, "epoch": 1.8521713700833455, "percentage": 92.6, "elapsed_time": "9:16:37", "remaining_time": "0:44:27"} +{"current_steps": 6335, "total_steps": 6840, "loss": 0.566180408000946, "lr": 2.977449183708214e-07, "epoch": 1.8524638104986109, "percentage": 92.62, "elapsed_time": "9:16:43", "remaining_time": "0:44:22"} +{"current_steps": 6336, "total_steps": 6840, "loss": 0.5218988656997681, "lr": 2.96575070083992e-07, "epoch": 1.8527562509138762, "percentage": 92.63, "elapsed_time": "9:16:47", "remaining_time": "0:44:17"} +{"current_steps": 6337, "total_steps": 6840, "loss": 0.49669283628463745, "lr": 2.954074899137427e-07, "epoch": 1.8530486913291417, "percentage": 92.65, "elapsed_time": "9:16:51", "remaining_time": "0:44:12"} +{"current_steps": 6338, "total_steps": 6840, "loss": 0.5505487322807312, "lr": 2.942421781329874e-07, "epoch": 1.853341131744407, "percentage": 92.66, "elapsed_time": "9:16:57", "remaining_time": "0:44:06"} +{"current_steps": 6339, "total_steps": 6840, "loss": 0.5386735200881958, "lr": 2.930791350141116e-07, "epoch": 1.8536335721596724, "percentage": 92.68, "elapsed_time": "9:17:02", "remaining_time": "0:44:01"} +{"current_steps": 6340, "total_steps": 6840, "loss": 0.5266523957252502, "lr": 2.919183608289689e-07, "epoch": 1.853926012574938, "percentage": 92.69, "elapsed_time": "9:17:07", "remaining_time": "0:43:56"} +{"current_steps": 6341, "total_steps": 6840, "loss": 0.5335103273391724, "lr": 2.907598558488822e-07, "epoch": 1.8542184529902033, "percentage": 92.7, "elapsed_time": "9:17:12", "remaining_time": "0:43:50"} +{"current_steps": 6342, "total_steps": 6840, "loss": 0.6155405044555664, "lr": 2.896036203446473e-07, "epoch": 1.8545108934054686, "percentage": 92.72, "elapsed_time": "9:17:18", "remaining_time": "0:43:45"} +{"current_steps": 6343, "total_steps": 6840, "loss": 0.5258159041404724, "lr": 2.884496545865245e-07, "epoch": 1.8548033338207341, "percentage": 92.73, "elapsed_time": "9:17:24", "remaining_time": "0:43:40"} +{"current_steps": 6344, "total_steps": 6840, "loss": 0.5428795218467712, "lr": 2.8729795884424927e-07, "epoch": 1.8550957742359993, "percentage": 92.75, "elapsed_time": "9:17:29", "remaining_time": "0:43:35"} +{"current_steps": 6345, "total_steps": 6840, "loss": 0.4876418709754944, "lr": 2.8614853338702066e-07, "epoch": 1.8553882146512648, "percentage": 92.76, "elapsed_time": "9:17:35", "remaining_time": "0:43:30"} +{"current_steps": 6346, "total_steps": 6840, "loss": 0.49640393257141113, "lr": 2.850013784835115e-07, "epoch": 1.8556806550665303, "percentage": 92.78, "elapsed_time": "9:17:41", "remaining_time": "0:43:24"} +{"current_steps": 6347, "total_steps": 6840, "loss": 0.5726122260093689, "lr": 2.838564944018618e-07, "epoch": 1.8559730954817955, "percentage": 92.79, "elapsed_time": "9:17:46", "remaining_time": "0:43:19"} +{"current_steps": 6348, "total_steps": 6840, "loss": 0.5106557011604309, "lr": 2.827138814096819e-07, "epoch": 1.856265535897061, "percentage": 92.81, "elapsed_time": "9:17:50", "remaining_time": "0:43:14"} +{"current_steps": 6349, "total_steps": 6840, "loss": 0.45941129326820374, "lr": 2.8157353977405044e-07, "epoch": 1.8565579763123263, "percentage": 92.82, "elapsed_time": "9:17:56", "remaining_time": "0:43:08"} +{"current_steps": 6350, "total_steps": 6840, "loss": 0.488609254360199, "lr": 2.8043546976151414e-07, "epoch": 1.8568504167275917, "percentage": 92.84, "elapsed_time": "9:18:01", "remaining_time": "0:43:03"} +{"current_steps": 6351, "total_steps": 6840, "loss": 0.639745831489563, "lr": 2.7929967163809135e-07, "epoch": 1.8571428571428572, "percentage": 92.85, "elapsed_time": "9:18:06", "remaining_time": "0:42:58"} +{"current_steps": 6352, "total_steps": 6840, "loss": 0.45327228307724, "lr": 2.7816614566926747e-07, "epoch": 1.8574352975581225, "percentage": 92.87, "elapsed_time": "9:18:12", "remaining_time": "0:42:53"} +{"current_steps": 6353, "total_steps": 6840, "loss": 0.5606091022491455, "lr": 2.7703489211999725e-07, "epoch": 1.8577277379733879, "percentage": 92.88, "elapsed_time": "9:18:18", "remaining_time": "0:42:47"} +{"current_steps": 6354, "total_steps": 6840, "loss": 0.5078528523445129, "lr": 2.759059112547047e-07, "epoch": 1.8580201783886534, "percentage": 92.89, "elapsed_time": "9:18:23", "remaining_time": "0:42:42"} +{"current_steps": 6355, "total_steps": 6840, "loss": 0.5558253526687622, "lr": 2.74779203337282e-07, "epoch": 1.8583126188039187, "percentage": 92.91, "elapsed_time": "9:18:27", "remaining_time": "0:42:37"} +{"current_steps": 6356, "total_steps": 6840, "loss": 0.3962102234363556, "lr": 2.7365476863108974e-07, "epoch": 1.858605059219184, "percentage": 92.92, "elapsed_time": "9:18:33", "remaining_time": "0:42:32"} +{"current_steps": 6357, "total_steps": 6840, "loss": 0.4737718105316162, "lr": 2.725326073989587e-07, "epoch": 1.8588974996344496, "percentage": 92.94, "elapsed_time": "9:18:39", "remaining_time": "0:42:26"} +{"current_steps": 6358, "total_steps": 6840, "loss": 0.5389090180397034, "lr": 2.7141271990318576e-07, "epoch": 1.8591899400497147, "percentage": 92.95, "elapsed_time": "9:18:43", "remaining_time": "0:42:21"} +{"current_steps": 6359, "total_steps": 6840, "loss": 0.5311479568481445, "lr": 2.7029510640554033e-07, "epoch": 1.8594823804649803, "percentage": 92.97, "elapsed_time": "9:18:48", "remaining_time": "0:42:16"} +{"current_steps": 6360, "total_steps": 6840, "loss": 0.4753482937812805, "lr": 2.691797671672558e-07, "epoch": 1.8597748208802458, "percentage": 92.98, "elapsed_time": "9:18:54", "remaining_time": "0:42:10"} +{"current_steps": 6361, "total_steps": 6840, "loss": 0.5192427635192871, "lr": 2.6806670244903577e-07, "epoch": 1.860067261295511, "percentage": 93.0, "elapsed_time": "9:19:00", "remaining_time": "0:42:05"} +{"current_steps": 6362, "total_steps": 6840, "loss": 0.5910875797271729, "lr": 2.6695591251105214e-07, "epoch": 1.8603597017107765, "percentage": 93.01, "elapsed_time": "9:19:06", "remaining_time": "0:42:00"} +{"current_steps": 6363, "total_steps": 6840, "loss": 0.5465212464332581, "lr": 2.658473976129472e-07, "epoch": 1.8606521421260418, "percentage": 93.03, "elapsed_time": "9:19:11", "remaining_time": "0:41:55"} +{"current_steps": 6364, "total_steps": 6840, "loss": 0.43188267946243286, "lr": 2.647411580138282e-07, "epoch": 1.8609445825413071, "percentage": 93.04, "elapsed_time": "9:19:15", "remaining_time": "0:41:49"} +{"current_steps": 6365, "total_steps": 6840, "loss": 0.5723724365234375, "lr": 2.636371939722715e-07, "epoch": 1.8612370229565727, "percentage": 93.06, "elapsed_time": "9:19:21", "remaining_time": "0:41:44"} +{"current_steps": 6366, "total_steps": 6840, "loss": 0.47383856773376465, "lr": 2.62535505746323e-07, "epoch": 1.861529463371838, "percentage": 93.07, "elapsed_time": "9:19:27", "remaining_time": "0:41:39"} +{"current_steps": 6367, "total_steps": 6840, "loss": 0.502855658531189, "lr": 2.6143609359349566e-07, "epoch": 1.8618219037871033, "percentage": 93.08, "elapsed_time": "9:19:33", "remaining_time": "0:41:34"} +{"current_steps": 6368, "total_steps": 6840, "loss": 0.5934205055236816, "lr": 2.6033895777077043e-07, "epoch": 1.8621143442023689, "percentage": 93.1, "elapsed_time": "9:19:38", "remaining_time": "0:41:28"} +{"current_steps": 6369, "total_steps": 6840, "loss": 0.4157971143722534, "lr": 2.5924409853459455e-07, "epoch": 1.8624067846176342, "percentage": 93.11, "elapsed_time": "9:19:43", "remaining_time": "0:41:23"} +{"current_steps": 6370, "total_steps": 6840, "loss": 0.5944307446479797, "lr": 2.5815151614088764e-07, "epoch": 1.8626992250328995, "percentage": 93.13, "elapsed_time": "9:19:48", "remaining_time": "0:41:18"} +{"current_steps": 6371, "total_steps": 6840, "loss": 0.5603153705596924, "lr": 2.57061210845031e-07, "epoch": 1.862991665448165, "percentage": 93.14, "elapsed_time": "9:19:54", "remaining_time": "0:41:13"} +{"current_steps": 6372, "total_steps": 6840, "loss": 0.49231380224227905, "lr": 2.559731829018786e-07, "epoch": 1.8632841058634302, "percentage": 93.16, "elapsed_time": "9:19:59", "remaining_time": "0:41:07"} +{"current_steps": 6373, "total_steps": 6840, "loss": 0.46984565258026123, "lr": 2.548874325657502e-07, "epoch": 1.8635765462786957, "percentage": 93.17, "elapsed_time": "9:20:04", "remaining_time": "0:41:02"} +{"current_steps": 6374, "total_steps": 6840, "loss": 0.5088338255882263, "lr": 2.5380396009043297e-07, "epoch": 1.863868986693961, "percentage": 93.19, "elapsed_time": "9:20:08", "remaining_time": "0:40:57"} +{"current_steps": 6375, "total_steps": 6840, "loss": 0.4760589599609375, "lr": 2.52722765729182e-07, "epoch": 1.8641614271092264, "percentage": 93.2, "elapsed_time": "9:20:14", "remaining_time": "0:40:51"} +{"current_steps": 6376, "total_steps": 6840, "loss": 0.44232040643692017, "lr": 2.5164384973471954e-07, "epoch": 1.864453867524492, "percentage": 93.22, "elapsed_time": "9:20:20", "remaining_time": "0:40:46"} +{"current_steps": 6377, "total_steps": 6840, "loss": 0.46714338660240173, "lr": 2.505672123592373e-07, "epoch": 1.8647463079397573, "percentage": 93.23, "elapsed_time": "9:20:25", "remaining_time": "0:40:41"} +{"current_steps": 6378, "total_steps": 6840, "loss": 0.5527149438858032, "lr": 2.494928538543917e-07, "epoch": 1.8650387483550226, "percentage": 93.25, "elapsed_time": "9:20:29", "remaining_time": "0:40:36"} +{"current_steps": 6379, "total_steps": 6840, "loss": 0.5006313323974609, "lr": 2.484207744713074e-07, "epoch": 1.8653311887702881, "percentage": 93.26, "elapsed_time": "9:20:36", "remaining_time": "0:40:30"} +{"current_steps": 6380, "total_steps": 6840, "loss": 0.5007860660552979, "lr": 2.473509744605751e-07, "epoch": 1.8656236291855535, "percentage": 93.27, "elapsed_time": "9:20:41", "remaining_time": "0:40:25"} +{"current_steps": 6381, "total_steps": 6840, "loss": 0.4354132413864136, "lr": 2.4628345407225804e-07, "epoch": 1.8659160696008188, "percentage": 93.29, "elapsed_time": "9:20:45", "remaining_time": "0:40:20"} +{"current_steps": 6382, "total_steps": 6840, "loss": 0.5199555158615112, "lr": 2.452182135558789e-07, "epoch": 1.8662085100160843, "percentage": 93.3, "elapsed_time": "9:20:51", "remaining_time": "0:40:14"} +{"current_steps": 6383, "total_steps": 6840, "loss": 0.5117326974868774, "lr": 2.441552531604319e-07, "epoch": 1.8665009504313494, "percentage": 93.32, "elapsed_time": "9:20:56", "remaining_time": "0:40:09"} +{"current_steps": 6384, "total_steps": 6840, "loss": 0.5169814825057983, "lr": 2.43094573134377e-07, "epoch": 1.866793390846615, "percentage": 93.33, "elapsed_time": "9:21:01", "remaining_time": "0:40:04"} +{"current_steps": 6385, "total_steps": 6840, "loss": 0.563339352607727, "lr": 2.420361737256438e-07, "epoch": 1.8670858312618805, "percentage": 93.35, "elapsed_time": "9:21:07", "remaining_time": "0:39:59"} +{"current_steps": 6386, "total_steps": 6840, "loss": 0.710465133190155, "lr": 2.409800551816255e-07, "epoch": 1.8673782716771457, "percentage": 93.36, "elapsed_time": "9:21:12", "remaining_time": "0:39:53"} +{"current_steps": 6387, "total_steps": 6840, "loss": 0.6894562244415283, "lr": 2.3992621774918343e-07, "epoch": 1.8676707120924112, "percentage": 93.38, "elapsed_time": "9:21:18", "remaining_time": "0:39:48"} +{"current_steps": 6388, "total_steps": 6840, "loss": 0.5105183124542236, "lr": 2.388746616746462e-07, "epoch": 1.8679631525076765, "percentage": 93.39, "elapsed_time": "9:21:23", "remaining_time": "0:39:43"} +{"current_steps": 6389, "total_steps": 6840, "loss": 0.4602908492088318, "lr": 2.3782538720380722e-07, "epoch": 1.8682555929229419, "percentage": 93.41, "elapsed_time": "9:21:28", "remaining_time": "0:39:38"} +{"current_steps": 6390, "total_steps": 6840, "loss": 0.5395161509513855, "lr": 2.3677839458192908e-07, "epoch": 1.8685480333382074, "percentage": 93.42, "elapsed_time": "9:21:32", "remaining_time": "0:39:32"} +{"current_steps": 6391, "total_steps": 6840, "loss": 0.5842725038528442, "lr": 2.3573368405374054e-07, "epoch": 1.8688404737534727, "percentage": 93.44, "elapsed_time": "9:21:38", "remaining_time": "0:39:27"} +{"current_steps": 6392, "total_steps": 6840, "loss": 0.5837947130203247, "lr": 2.346912558634362e-07, "epoch": 1.869132914168738, "percentage": 93.45, "elapsed_time": "9:21:44", "remaining_time": "0:39:22"} +{"current_steps": 6393, "total_steps": 6840, "loss": 0.5255596041679382, "lr": 2.3365111025467568e-07, "epoch": 1.8694253545840036, "percentage": 93.46, "elapsed_time": "9:21:49", "remaining_time": "0:39:16"} +{"current_steps": 6394, "total_steps": 6840, "loss": 0.5614485144615173, "lr": 2.326132474705889e-07, "epoch": 1.869717794999269, "percentage": 93.48, "elapsed_time": "9:21:54", "remaining_time": "0:39:11"} +{"current_steps": 6395, "total_steps": 6840, "loss": 0.5510128736495972, "lr": 2.3157766775376733e-07, "epoch": 1.8700102354145343, "percentage": 93.49, "elapsed_time": "9:22:00", "remaining_time": "0:39:06"} +{"current_steps": 6396, "total_steps": 6840, "loss": 0.690884530544281, "lr": 2.3054437134627406e-07, "epoch": 1.8703026758297998, "percentage": 93.51, "elapsed_time": "9:22:05", "remaining_time": "0:39:01"} +{"current_steps": 6397, "total_steps": 6840, "loss": 0.637476921081543, "lr": 2.2951335848963364e-07, "epoch": 1.870595116245065, "percentage": 93.52, "elapsed_time": "9:22:11", "remaining_time": "0:38:55"} +{"current_steps": 6398, "total_steps": 6840, "loss": 0.5254319906234741, "lr": 2.2848462942484108e-07, "epoch": 1.8708875566603305, "percentage": 93.54, "elapsed_time": "9:22:17", "remaining_time": "0:38:50"} +{"current_steps": 6399, "total_steps": 6840, "loss": 0.5038233995437622, "lr": 2.27458184392354e-07, "epoch": 1.871179997075596, "percentage": 93.55, "elapsed_time": "9:22:22", "remaining_time": "0:38:45"} +{"current_steps": 6400, "total_steps": 6840, "loss": 0.43701431155204773, "lr": 2.2643402363209832e-07, "epoch": 1.8714724374908611, "percentage": 93.57, "elapsed_time": "9:22:27", "remaining_time": "0:38:40"} +{"current_steps": 6401, "total_steps": 6840, "loss": 0.5490877628326416, "lr": 2.2541214738346583e-07, "epoch": 1.8717648779061267, "percentage": 93.58, "elapsed_time": "9:22:36", "remaining_time": "0:38:35"} +{"current_steps": 6402, "total_steps": 6840, "loss": 0.48393410444259644, "lr": 2.2439255588531327e-07, "epoch": 1.872057318321392, "percentage": 93.6, "elapsed_time": "9:22:41", "remaining_time": "0:38:29"} +{"current_steps": 6403, "total_steps": 6840, "loss": 0.5439243912696838, "lr": 2.2337524937596444e-07, "epoch": 1.8723497587366573, "percentage": 93.61, "elapsed_time": "9:22:46", "remaining_time": "0:38:24"} +{"current_steps": 6404, "total_steps": 6840, "loss": 0.5272157192230225, "lr": 2.22360228093208e-07, "epoch": 1.8726421991519229, "percentage": 93.63, "elapsed_time": "9:22:51", "remaining_time": "0:38:19"} +{"current_steps": 6405, "total_steps": 6840, "loss": 0.6323473453521729, "lr": 2.2134749227429864e-07, "epoch": 1.8729346395671882, "percentage": 93.64, "elapsed_time": "9:22:57", "remaining_time": "0:38:14"} +{"current_steps": 6406, "total_steps": 6840, "loss": 0.4568995237350464, "lr": 2.2033704215595808e-07, "epoch": 1.8732270799824535, "percentage": 93.65, "elapsed_time": "9:23:03", "remaining_time": "0:38:08"} +{"current_steps": 6407, "total_steps": 6840, "loss": 0.5817153453826904, "lr": 2.1932887797437296e-07, "epoch": 1.873519520397719, "percentage": 93.67, "elapsed_time": "9:23:09", "remaining_time": "0:38:03"} +{"current_steps": 6408, "total_steps": 6840, "loss": 0.5104260444641113, "lr": 2.183229999651948e-07, "epoch": 1.8738119608129844, "percentage": 93.68, "elapsed_time": "9:23:14", "remaining_time": "0:37:58"} +{"current_steps": 6409, "total_steps": 6840, "loss": 0.44944921135902405, "lr": 2.1731940836354105e-07, "epoch": 1.8741044012282497, "percentage": 93.7, "elapsed_time": "9:23:19", "remaining_time": "0:37:53"} +{"current_steps": 6410, "total_steps": 6840, "loss": 0.6935169696807861, "lr": 2.163181034039974e-07, "epoch": 1.8743968416435153, "percentage": 93.71, "elapsed_time": "9:23:24", "remaining_time": "0:37:47"} +{"current_steps": 6411, "total_steps": 6840, "loss": 0.55609130859375, "lr": 2.1531908532060998e-07, "epoch": 1.8746892820587804, "percentage": 93.73, "elapsed_time": "9:23:29", "remaining_time": "0:37:42"} +{"current_steps": 6412, "total_steps": 6840, "loss": 0.5402215719223022, "lr": 2.143223543468953e-07, "epoch": 1.874981722474046, "percentage": 93.74, "elapsed_time": "9:23:34", "remaining_time": "0:37:37"} +{"current_steps": 6413, "total_steps": 6840, "loss": 0.5669365525245667, "lr": 2.1332791071583258e-07, "epoch": 1.8752741628893113, "percentage": 93.76, "elapsed_time": "9:23:40", "remaining_time": "0:37:31"} +{"current_steps": 6414, "total_steps": 6840, "loss": 0.46257615089416504, "lr": 2.123357546598659e-07, "epoch": 1.8755666033045766, "percentage": 93.77, "elapsed_time": "9:23:44", "remaining_time": "0:37:26"} +{"current_steps": 6415, "total_steps": 6840, "loss": 0.4596136212348938, "lr": 2.1134588641090858e-07, "epoch": 1.8758590437198421, "percentage": 93.79, "elapsed_time": "9:23:49", "remaining_time": "0:37:21"} +{"current_steps": 6416, "total_steps": 6840, "loss": 0.5086819529533386, "lr": 2.1035830620033227e-07, "epoch": 1.8761514841351075, "percentage": 93.8, "elapsed_time": "9:23:54", "remaining_time": "0:37:15"} +{"current_steps": 6417, "total_steps": 6840, "loss": 0.6008501052856445, "lr": 2.0937301425898115e-07, "epoch": 1.8764439245503728, "percentage": 93.82, "elapsed_time": "9:24:00", "remaining_time": "0:37:10"} +{"current_steps": 6418, "total_steps": 6840, "loss": 0.5943784713745117, "lr": 2.0839001081715882e-07, "epoch": 1.8767363649656383, "percentage": 93.83, "elapsed_time": "9:24:05", "remaining_time": "0:37:05"} +{"current_steps": 6419, "total_steps": 6840, "loss": 0.5006660223007202, "lr": 2.0740929610463813e-07, "epoch": 1.8770288053809037, "percentage": 93.85, "elapsed_time": "9:24:11", "remaining_time": "0:37:00"} +{"current_steps": 6420, "total_steps": 6840, "loss": 0.5434073805809021, "lr": 2.0643087035065458e-07, "epoch": 1.877321245796169, "percentage": 93.86, "elapsed_time": "9:24:18", "remaining_time": "0:36:55"} +{"current_steps": 6421, "total_steps": 6840, "loss": 0.6426963210105896, "lr": 2.0545473378390858e-07, "epoch": 1.8776136862114345, "percentage": 93.87, "elapsed_time": "9:24:24", "remaining_time": "0:36:49"} +{"current_steps": 6422, "total_steps": 6840, "loss": 0.5190218687057495, "lr": 2.044808866325676e-07, "epoch": 1.8779061266266996, "percentage": 93.89, "elapsed_time": "9:24:30", "remaining_time": "0:36:44"} +{"current_steps": 6423, "total_steps": 6840, "loss": 0.40918534994125366, "lr": 2.035093291242607e-07, "epoch": 1.8781985670419652, "percentage": 93.9, "elapsed_time": "9:24:35", "remaining_time": "0:36:39"} +{"current_steps": 6424, "total_steps": 6840, "loss": 0.5403652191162109, "lr": 2.0254006148608507e-07, "epoch": 1.8784910074572307, "percentage": 93.92, "elapsed_time": "9:24:41", "remaining_time": "0:36:34"} +{"current_steps": 6425, "total_steps": 6840, "loss": 0.49781516194343567, "lr": 2.0157308394460062e-07, "epoch": 1.8787834478724958, "percentage": 93.93, "elapsed_time": "9:24:47", "remaining_time": "0:36:28"} +{"current_steps": 6426, "total_steps": 6840, "loss": 0.5841303467750549, "lr": 2.006083967258321e-07, "epoch": 1.8790758882877614, "percentage": 93.95, "elapsed_time": "9:24:52", "remaining_time": "0:36:23"} +{"current_steps": 6427, "total_steps": 6840, "loss": 0.5054808855056763, "lr": 1.9964600005527024e-07, "epoch": 1.8793683287030267, "percentage": 93.96, "elapsed_time": "9:24:56", "remaining_time": "0:36:18"} +{"current_steps": 6428, "total_steps": 6840, "loss": 0.4801362454891205, "lr": 1.9868589415786843e-07, "epoch": 1.879660769118292, "percentage": 93.98, "elapsed_time": "9:25:01", "remaining_time": "0:36:12"} +{"current_steps": 6429, "total_steps": 6840, "loss": 0.4709380269050598, "lr": 1.9772807925804494e-07, "epoch": 1.8799532095335576, "percentage": 93.99, "elapsed_time": "9:25:07", "remaining_time": "0:36:07"} +{"current_steps": 6430, "total_steps": 6840, "loss": 0.665968120098114, "lr": 1.9677255557968511e-07, "epoch": 1.880245649948823, "percentage": 94.01, "elapsed_time": "9:25:12", "remaining_time": "0:36:02"} +{"current_steps": 6431, "total_steps": 6840, "loss": 0.515839159488678, "lr": 1.9581932334613585e-07, "epoch": 1.8805380903640883, "percentage": 94.02, "elapsed_time": "9:25:19", "remaining_time": "0:35:57"} +{"current_steps": 6432, "total_steps": 6840, "loss": 0.5399242043495178, "lr": 1.948683827802089e-07, "epoch": 1.8808305307793538, "percentage": 94.04, "elapsed_time": "9:25:24", "remaining_time": "0:35:51"} +{"current_steps": 6433, "total_steps": 6840, "loss": 0.6167087554931641, "lr": 1.9391973410418097e-07, "epoch": 1.8811229711946191, "percentage": 94.05, "elapsed_time": "9:25:29", "remaining_time": "0:35:46"} +{"current_steps": 6434, "total_steps": 6840, "loss": 0.6139745116233826, "lr": 1.9297337753979462e-07, "epoch": 1.8814154116098845, "percentage": 94.06, "elapsed_time": "9:25:35", "remaining_time": "0:35:41"} +{"current_steps": 6435, "total_steps": 6840, "loss": 0.7103149890899658, "lr": 1.9202931330825292e-07, "epoch": 1.88170785202515, "percentage": 94.08, "elapsed_time": "9:25:41", "remaining_time": "0:35:36"} +{"current_steps": 6436, "total_steps": 6840, "loss": 0.5958741903305054, "lr": 1.9108754163022602e-07, "epoch": 1.8820002924404151, "percentage": 94.09, "elapsed_time": "9:25:47", "remaining_time": "0:35:30"} +{"current_steps": 6437, "total_steps": 6840, "loss": 0.32660478353500366, "lr": 1.9014806272584673e-07, "epoch": 1.8822927328556807, "percentage": 94.11, "elapsed_time": "9:25:53", "remaining_time": "0:35:25"} +{"current_steps": 6438, "total_steps": 6840, "loss": 0.49485981464385986, "lr": 1.8921087681471272e-07, "epoch": 1.8825851732709462, "percentage": 94.12, "elapsed_time": "9:25:59", "remaining_time": "0:35:20"} +{"current_steps": 6439, "total_steps": 6840, "loss": 0.5106277465820312, "lr": 1.8827598411588544e-07, "epoch": 1.8828776136862113, "percentage": 94.14, "elapsed_time": "9:26:03", "remaining_time": "0:35:15"} +{"current_steps": 6440, "total_steps": 6840, "loss": 0.50006502866745, "lr": 1.8734338484789115e-07, "epoch": 1.8831700541014769, "percentage": 94.15, "elapsed_time": "9:26:10", "remaining_time": "0:35:09"} +{"current_steps": 6441, "total_steps": 6840, "loss": 0.47097745537757874, "lr": 1.8641307922871887e-07, "epoch": 1.8834624945167422, "percentage": 94.17, "elapsed_time": "9:26:15", "remaining_time": "0:35:04"} +{"current_steps": 6442, "total_steps": 6840, "loss": 0.5874402523040771, "lr": 1.854850674758213e-07, "epoch": 1.8837549349320075, "percentage": 94.18, "elapsed_time": "9:26:20", "remaining_time": "0:34:59"} +{"current_steps": 6443, "total_steps": 6840, "loss": 0.45705318450927734, "lr": 1.8455934980611602e-07, "epoch": 1.884047375347273, "percentage": 94.2, "elapsed_time": "9:26:26", "remaining_time": "0:34:54"} +{"current_steps": 6444, "total_steps": 6840, "loss": 0.4949952960014343, "lr": 1.8363592643598328e-07, "epoch": 1.8843398157625384, "percentage": 94.21, "elapsed_time": "9:26:31", "remaining_time": "0:34:48"} +{"current_steps": 6445, "total_steps": 6840, "loss": 0.5311721563339233, "lr": 1.827147975812693e-07, "epoch": 1.8846322561778037, "percentage": 94.23, "elapsed_time": "9:26:36", "remaining_time": "0:34:43"} +{"current_steps": 6446, "total_steps": 6840, "loss": 0.5652828216552734, "lr": 1.817959634572819e-07, "epoch": 1.8849246965930693, "percentage": 94.24, "elapsed_time": "9:26:42", "remaining_time": "0:34:38"} +{"current_steps": 6447, "total_steps": 6840, "loss": 0.4856044054031372, "lr": 1.8087942427879146e-07, "epoch": 1.8852171370083346, "percentage": 94.25, "elapsed_time": "9:26:47", "remaining_time": "0:34:33"} +{"current_steps": 6448, "total_steps": 6840, "loss": 0.55420982837677, "lr": 1.799651802600344e-07, "epoch": 1.8855095774236, "percentage": 94.27, "elapsed_time": "9:26:52", "remaining_time": "0:34:27"} +{"current_steps": 6449, "total_steps": 6840, "loss": 0.5869326591491699, "lr": 1.7905323161470867e-07, "epoch": 1.8858020178388655, "percentage": 94.28, "elapsed_time": "9:26:56", "remaining_time": "0:34:22"} +{"current_steps": 6450, "total_steps": 6840, "loss": 0.4505504369735718, "lr": 1.781435785559793e-07, "epoch": 1.8860944582541306, "percentage": 94.3, "elapsed_time": "9:27:01", "remaining_time": "0:34:17"} +{"current_steps": 6451, "total_steps": 6840, "loss": 0.5460773706436157, "lr": 1.7723622129646955e-07, "epoch": 1.8863868986693961, "percentage": 94.31, "elapsed_time": "9:27:06", "remaining_time": "0:34:11"} +{"current_steps": 6452, "total_steps": 6840, "loss": 0.6214778423309326, "lr": 1.7633116004826978e-07, "epoch": 1.8866793390846615, "percentage": 94.33, "elapsed_time": "9:27:11", "remaining_time": "0:34:06"} +{"current_steps": 6453, "total_steps": 6840, "loss": 0.4900703430175781, "lr": 1.7542839502293297e-07, "epoch": 1.8869717794999268, "percentage": 94.34, "elapsed_time": "9:27:16", "remaining_time": "0:34:01"} +{"current_steps": 6454, "total_steps": 6840, "loss": 0.5177547931671143, "lr": 1.7452792643147364e-07, "epoch": 1.8872642199151923, "percentage": 94.36, "elapsed_time": "9:27:21", "remaining_time": "0:33:55"} +{"current_steps": 6455, "total_steps": 6840, "loss": 0.3914458453655243, "lr": 1.7362975448437236e-07, "epoch": 1.8875566603304577, "percentage": 94.37, "elapsed_time": "9:27:25", "remaining_time": "0:33:50"} +{"current_steps": 6456, "total_steps": 6840, "loss": 0.5222523212432861, "lr": 1.7273387939157116e-07, "epoch": 1.887849100745723, "percentage": 94.39, "elapsed_time": "9:27:31", "remaining_time": "0:33:45"} +{"current_steps": 6457, "total_steps": 6840, "loss": 0.5097587704658508, "lr": 1.7184030136247477e-07, "epoch": 1.8881415411609885, "percentage": 94.4, "elapsed_time": "9:27:35", "remaining_time": "0:33:40"} +{"current_steps": 6458, "total_steps": 6840, "loss": 0.517410397529602, "lr": 1.7094902060595053e-07, "epoch": 1.8884339815762539, "percentage": 94.42, "elapsed_time": "9:27:42", "remaining_time": "0:33:34"} +{"current_steps": 6459, "total_steps": 6840, "loss": 0.4951689839363098, "lr": 1.7006003733033182e-07, "epoch": 1.8887264219915192, "percentage": 94.43, "elapsed_time": "9:27:48", "remaining_time": "0:33:29"} +{"current_steps": 6460, "total_steps": 6840, "loss": 0.5530004501342773, "lr": 1.6917335174341242e-07, "epoch": 1.8890188624067847, "percentage": 94.44, "elapsed_time": "9:27:53", "remaining_time": "0:33:24"} +{"current_steps": 6461, "total_steps": 6840, "loss": 0.5231990814208984, "lr": 1.6828896405244988e-07, "epoch": 1.8893113028220498, "percentage": 94.46, "elapsed_time": "9:28:00", "remaining_time": "0:33:19"} +{"current_steps": 6462, "total_steps": 6840, "loss": 0.5142268538475037, "lr": 1.6740687446416326e-07, "epoch": 1.8896037432373154, "percentage": 94.47, "elapsed_time": "9:28:06", "remaining_time": "0:33:13"} +{"current_steps": 6463, "total_steps": 6840, "loss": 0.4803999364376068, "lr": 1.6652708318473765e-07, "epoch": 1.889896183652581, "percentage": 94.49, "elapsed_time": "9:28:12", "remaining_time": "0:33:08"} +{"current_steps": 6464, "total_steps": 6840, "loss": 0.38822099566459656, "lr": 1.6564959041981743e-07, "epoch": 1.890188624067846, "percentage": 94.5, "elapsed_time": "9:28:17", "remaining_time": "0:33:03"} +{"current_steps": 6465, "total_steps": 6840, "loss": 0.4778556823730469, "lr": 1.6477439637451186e-07, "epoch": 1.8904810644831116, "percentage": 94.52, "elapsed_time": "9:28:23", "remaining_time": "0:32:58"} +{"current_steps": 6466, "total_steps": 6840, "loss": 0.5083664059638977, "lr": 1.6390150125339178e-07, "epoch": 1.890773504898377, "percentage": 94.53, "elapsed_time": "9:28:29", "remaining_time": "0:32:52"} +{"current_steps": 6467, "total_steps": 6840, "loss": 0.6592142581939697, "lr": 1.6303090526049058e-07, "epoch": 1.8910659453136422, "percentage": 94.55, "elapsed_time": "9:28:33", "remaining_time": "0:32:47"} +{"current_steps": 6468, "total_steps": 6840, "loss": 0.6350588798522949, "lr": 1.6216260859930776e-07, "epoch": 1.8913583857289078, "percentage": 94.56, "elapsed_time": "9:28:39", "remaining_time": "0:32:42"} +{"current_steps": 6469, "total_steps": 6840, "loss": 0.5542852282524109, "lr": 1.6129661147279763e-07, "epoch": 1.8916508261441731, "percentage": 94.58, "elapsed_time": "9:28:44", "remaining_time": "0:32:37"} +{"current_steps": 6470, "total_steps": 6840, "loss": 0.572988748550415, "lr": 1.6043291408338602e-07, "epoch": 1.8919432665594385, "percentage": 94.59, "elapsed_time": "9:28:50", "remaining_time": "0:32:31"} +{"current_steps": 6471, "total_steps": 6840, "loss": 0.4801466763019562, "lr": 1.5957151663295367e-07, "epoch": 1.892235706974704, "percentage": 94.61, "elapsed_time": "9:28:55", "remaining_time": "0:32:26"} +{"current_steps": 6472, "total_steps": 6840, "loss": 0.6286160349845886, "lr": 1.5871241932284953e-07, "epoch": 1.8925281473899693, "percentage": 94.62, "elapsed_time": "9:29:00", "remaining_time": "0:32:21"} +{"current_steps": 6473, "total_steps": 6840, "loss": 0.5731645822525024, "lr": 1.5785562235388074e-07, "epoch": 1.8928205878052347, "percentage": 94.63, "elapsed_time": "9:29:06", "remaining_time": "0:32:15"} +{"current_steps": 6474, "total_steps": 6840, "loss": 0.47890836000442505, "lr": 1.5700112592631933e-07, "epoch": 1.8931130282205002, "percentage": 94.65, "elapsed_time": "9:29:10", "remaining_time": "0:32:10"} +{"current_steps": 6475, "total_steps": 6840, "loss": 0.4379703998565674, "lr": 1.5614893023989886e-07, "epoch": 1.8934054686357653, "percentage": 94.66, "elapsed_time": "9:29:15", "remaining_time": "0:32:05"} +{"current_steps": 6476, "total_steps": 6840, "loss": 0.5629044771194458, "lr": 1.5529903549381331e-07, "epoch": 1.8936979090510309, "percentage": 94.68, "elapsed_time": "9:29:19", "remaining_time": "0:32:00"} +{"current_steps": 6477, "total_steps": 6840, "loss": 0.4995439052581787, "lr": 1.5445144188672268e-07, "epoch": 1.8939903494662964, "percentage": 94.69, "elapsed_time": "9:29:25", "remaining_time": "0:31:54"} +{"current_steps": 6478, "total_steps": 6840, "loss": 0.5350549221038818, "lr": 1.5360614961674403e-07, "epoch": 1.8942827898815615, "percentage": 94.71, "elapsed_time": "9:29:31", "remaining_time": "0:31:49"} +{"current_steps": 6479, "total_steps": 6840, "loss": 0.5245925188064575, "lr": 1.5276315888146266e-07, "epoch": 1.894575230296827, "percentage": 94.72, "elapsed_time": "9:29:36", "remaining_time": "0:31:44"} +{"current_steps": 6480, "total_steps": 6840, "loss": 0.5159675478935242, "lr": 1.519224698779198e-07, "epoch": 1.8948676707120924, "percentage": 94.74, "elapsed_time": "9:29:40", "remaining_time": "0:31:38"} +{"current_steps": 6481, "total_steps": 6840, "loss": 0.5046014189720154, "lr": 1.5108408280262276e-07, "epoch": 1.8951601111273577, "percentage": 94.75, "elapsed_time": "9:29:45", "remaining_time": "0:31:33"} +{"current_steps": 6482, "total_steps": 6840, "loss": 0.35977911949157715, "lr": 1.502479978515381e-07, "epoch": 1.8954525515426233, "percentage": 94.77, "elapsed_time": "9:29:50", "remaining_time": "0:31:28"} +{"current_steps": 6483, "total_steps": 6840, "loss": 0.4689600467681885, "lr": 1.4941421522009725e-07, "epoch": 1.8957449919578886, "percentage": 94.78, "elapsed_time": "9:29:55", "remaining_time": "0:31:23"} +{"current_steps": 6484, "total_steps": 6840, "loss": 0.5729683637619019, "lr": 1.485827351031899e-07, "epoch": 1.896037432373154, "percentage": 94.8, "elapsed_time": "9:30:02", "remaining_time": "0:31:17"} +{"current_steps": 6485, "total_steps": 6840, "loss": 0.5929673314094543, "lr": 1.4775355769517163e-07, "epoch": 1.8963298727884195, "percentage": 94.81, "elapsed_time": "9:30:06", "remaining_time": "0:31:12"} +{"current_steps": 6486, "total_steps": 6840, "loss": 0.43075594305992126, "lr": 1.4692668318985636e-07, "epoch": 1.8966223132036848, "percentage": 94.82, "elapsed_time": "9:30:11", "remaining_time": "0:31:07"} +{"current_steps": 6487, "total_steps": 6840, "loss": 0.5247992277145386, "lr": 1.461021117805217e-07, "epoch": 1.8969147536189501, "percentage": 94.84, "elapsed_time": "9:30:17", "remaining_time": "0:31:02"} +{"current_steps": 6488, "total_steps": 6840, "loss": 0.4930630326271057, "lr": 1.4527984365990455e-07, "epoch": 1.8972071940342157, "percentage": 94.85, "elapsed_time": "9:30:23", "remaining_time": "0:30:56"} +{"current_steps": 6489, "total_steps": 6840, "loss": 0.7183758616447449, "lr": 1.4445987902020676e-07, "epoch": 1.8974996344494808, "percentage": 94.87, "elapsed_time": "9:30:27", "remaining_time": "0:30:51"} +{"current_steps": 6490, "total_steps": 6840, "loss": 0.4766094982624054, "lr": 1.4364221805309052e-07, "epoch": 1.8977920748647463, "percentage": 94.88, "elapsed_time": "9:30:33", "remaining_time": "0:30:46"} +{"current_steps": 6491, "total_steps": 6840, "loss": 0.43594151735305786, "lr": 1.4282686094967747e-07, "epoch": 1.8980845152800117, "percentage": 94.9, "elapsed_time": "9:30:37", "remaining_time": "0:30:40"} +{"current_steps": 6492, "total_steps": 6840, "loss": 0.49320366978645325, "lr": 1.4201380790055397e-07, "epoch": 1.898376955695277, "percentage": 94.91, "elapsed_time": "9:30:43", "remaining_time": "0:30:35"} +{"current_steps": 6493, "total_steps": 6840, "loss": 0.600296139717102, "lr": 1.4120305909576359e-07, "epoch": 1.8986693961105425, "percentage": 94.93, "elapsed_time": "9:30:49", "remaining_time": "0:30:30"} +{"current_steps": 6494, "total_steps": 6840, "loss": 0.6692827939987183, "lr": 1.4039461472481696e-07, "epoch": 1.8989618365258079, "percentage": 94.94, "elapsed_time": "9:30:55", "remaining_time": "0:30:25"} +{"current_steps": 6495, "total_steps": 6840, "loss": 0.49206262826919556, "lr": 1.395884749766807e-07, "epoch": 1.8992542769410732, "percentage": 94.96, "elapsed_time": "9:31:00", "remaining_time": "0:30:19"} +{"current_steps": 6496, "total_steps": 6840, "loss": 0.4987361431121826, "lr": 1.3878464003978741e-07, "epoch": 1.8995467173563387, "percentage": 94.97, "elapsed_time": "9:31:06", "remaining_time": "0:30:14"} +{"current_steps": 6497, "total_steps": 6840, "loss": 0.5020350217819214, "lr": 1.3798311010202681e-07, "epoch": 1.899839157771604, "percentage": 94.99, "elapsed_time": "9:31:11", "remaining_time": "0:30:09"} +{"current_steps": 6498, "total_steps": 6840, "loss": 0.5906451344490051, "lr": 1.3718388535075123e-07, "epoch": 1.9001315981868694, "percentage": 95.0, "elapsed_time": "9:31:17", "remaining_time": "0:30:04"} +{"current_steps": 6499, "total_steps": 6840, "loss": 0.5089905858039856, "lr": 1.3638696597277678e-07, "epoch": 1.900424038602135, "percentage": 95.01, "elapsed_time": "9:31:22", "remaining_time": "0:29:58"} +{"current_steps": 6500, "total_steps": 6840, "loss": 0.4633820056915283, "lr": 1.3559235215437672e-07, "epoch": 1.9007164790174, "percentage": 95.03, "elapsed_time": "9:31:27", "remaining_time": "0:29:53"} +{"current_steps": 6501, "total_steps": 6840, "loss": 0.4214053750038147, "lr": 1.34800044081288e-07, "epoch": 1.9010089194326656, "percentage": 95.04, "elapsed_time": "9:31:36", "remaining_time": "0:29:48"} +{"current_steps": 6502, "total_steps": 6840, "loss": 0.6652689576148987, "lr": 1.3401004193870694e-07, "epoch": 1.9013013598479311, "percentage": 95.06, "elapsed_time": "9:31:40", "remaining_time": "0:29:43"} +{"current_steps": 6503, "total_steps": 6840, "loss": 0.610877275466919, "lr": 1.3322234591129247e-07, "epoch": 1.9015938002631962, "percentage": 95.07, "elapsed_time": "9:31:45", "remaining_time": "0:29:37"} +{"current_steps": 6504, "total_steps": 6840, "loss": 0.5051777958869934, "lr": 1.324369561831651e-07, "epoch": 1.9018862406784618, "percentage": 95.09, "elapsed_time": "9:31:50", "remaining_time": "0:29:32"} +{"current_steps": 6505, "total_steps": 6840, "loss": 0.5004675984382629, "lr": 1.3165387293790133e-07, "epoch": 1.9021786810937271, "percentage": 95.1, "elapsed_time": "9:31:54", "remaining_time": "0:29:27"} +{"current_steps": 6506, "total_steps": 6840, "loss": 0.5778615474700928, "lr": 1.3087309635854583e-07, "epoch": 1.9024711215089924, "percentage": 95.12, "elapsed_time": "9:32:01", "remaining_time": "0:29:21"} +{"current_steps": 6507, "total_steps": 6840, "loss": 0.5282145738601685, "lr": 1.300946266275982e-07, "epoch": 1.902763561924258, "percentage": 95.13, "elapsed_time": "9:32:06", "remaining_time": "0:29:16"} +{"current_steps": 6508, "total_steps": 6840, "loss": 0.5965359807014465, "lr": 1.2931846392702174e-07, "epoch": 1.9030560023395233, "percentage": 95.15, "elapsed_time": "9:32:11", "remaining_time": "0:29:11"} +{"current_steps": 6509, "total_steps": 6840, "loss": 0.5891281366348267, "lr": 1.2854460843823912e-07, "epoch": 1.9033484427547886, "percentage": 95.16, "elapsed_time": "9:32:16", "remaining_time": "0:29:06"} +{"current_steps": 6510, "total_steps": 6840, "loss": 0.516204297542572, "lr": 1.2777306034213677e-07, "epoch": 1.9036408831700542, "percentage": 95.18, "elapsed_time": "9:32:21", "remaining_time": "0:29:00"} +{"current_steps": 6511, "total_steps": 6840, "loss": 0.5148355960845947, "lr": 1.2700381981905486e-07, "epoch": 1.9039333235853195, "percentage": 95.19, "elapsed_time": "9:32:27", "remaining_time": "0:28:55"} +{"current_steps": 6512, "total_steps": 6840, "loss": 0.5599791407585144, "lr": 1.2623688704880287e-07, "epoch": 1.9042257640005849, "percentage": 95.2, "elapsed_time": "9:32:31", "remaining_time": "0:28:50"} +{"current_steps": 6513, "total_steps": 6840, "loss": 0.44349417090415955, "lr": 1.2547226221064412e-07, "epoch": 1.9045182044158504, "percentage": 95.22, "elapsed_time": "9:32:37", "remaining_time": "0:28:44"} +{"current_steps": 6514, "total_steps": 6840, "loss": 0.5919830799102783, "lr": 1.2470994548330672e-07, "epoch": 1.9048106448311155, "percentage": 95.23, "elapsed_time": "9:32:41", "remaining_time": "0:28:39"} +{"current_steps": 6515, "total_steps": 6840, "loss": 0.4615499675273895, "lr": 1.2394993704497592e-07, "epoch": 1.905103085246381, "percentage": 95.25, "elapsed_time": "9:32:47", "remaining_time": "0:28:34"} +{"current_steps": 6516, "total_steps": 6840, "loss": 0.5217719674110413, "lr": 1.2319223707330074e-07, "epoch": 1.9053955256616466, "percentage": 95.26, "elapsed_time": "9:32:53", "remaining_time": "0:28:29"} +{"current_steps": 6517, "total_steps": 6840, "loss": 0.510722279548645, "lr": 1.2243684574538838e-07, "epoch": 1.9056879660769117, "percentage": 95.28, "elapsed_time": "9:32:58", "remaining_time": "0:28:23"} +{"current_steps": 6518, "total_steps": 6840, "loss": 0.6744403839111328, "lr": 1.2168376323780652e-07, "epoch": 1.9059804064921773, "percentage": 95.29, "elapsed_time": "9:33:04", "remaining_time": "0:28:18"} +{"current_steps": 6519, "total_steps": 6840, "loss": 0.4991394281387329, "lr": 1.209329897265832e-07, "epoch": 1.9062728469074426, "percentage": 95.31, "elapsed_time": "9:33:08", "remaining_time": "0:28:13"} +{"current_steps": 6520, "total_steps": 6840, "loss": 0.43237754702568054, "lr": 1.2018452538720805e-07, "epoch": 1.906565287322708, "percentage": 95.32, "elapsed_time": "9:33:15", "remaining_time": "0:28:08"} +{"current_steps": 6521, "total_steps": 6840, "loss": 0.6042662262916565, "lr": 1.1943837039463112e-07, "epoch": 1.9068577277379735, "percentage": 95.34, "elapsed_time": "9:33:21", "remaining_time": "0:28:02"} +{"current_steps": 6522, "total_steps": 6840, "loss": 0.4275910556316376, "lr": 1.186945249232585e-07, "epoch": 1.9071501681532388, "percentage": 95.35, "elapsed_time": "9:33:26", "remaining_time": "0:27:57"} +{"current_steps": 6523, "total_steps": 6840, "loss": 0.4368266463279724, "lr": 1.1795298914696219e-07, "epoch": 1.9074426085685041, "percentage": 95.37, "elapsed_time": "9:33:30", "remaining_time": "0:27:52"} +{"current_steps": 6524, "total_steps": 6840, "loss": 0.49492496252059937, "lr": 1.172137632390713e-07, "epoch": 1.9077350489837697, "percentage": 95.38, "elapsed_time": "9:33:35", "remaining_time": "0:27:46"} +{"current_steps": 6525, "total_steps": 6840, "loss": 0.4296407103538513, "lr": 1.164768473723743e-07, "epoch": 1.908027489399035, "percentage": 95.39, "elapsed_time": "9:33:40", "remaining_time": "0:27:41"} +{"current_steps": 6526, "total_steps": 6840, "loss": 0.4609370231628418, "lr": 1.1574224171912118e-07, "epoch": 1.9083199298143003, "percentage": 95.41, "elapsed_time": "9:33:44", "remaining_time": "0:27:36"} +{"current_steps": 6527, "total_steps": 6840, "loss": 0.5201660394668579, "lr": 1.1500994645102237e-07, "epoch": 1.9086123702295659, "percentage": 95.42, "elapsed_time": "9:33:49", "remaining_time": "0:27:31"} +{"current_steps": 6528, "total_steps": 6840, "loss": 0.49946731328964233, "lr": 1.1427996173924649e-07, "epoch": 1.908904810644831, "percentage": 95.44, "elapsed_time": "9:33:53", "remaining_time": "0:27:25"} +{"current_steps": 6529, "total_steps": 6840, "loss": 0.5479187369346619, "lr": 1.1355228775442262e-07, "epoch": 1.9091972510600965, "percentage": 95.45, "elapsed_time": "9:33:59", "remaining_time": "0:27:20"} +{"current_steps": 6530, "total_steps": 6840, "loss": 0.5227243900299072, "lr": 1.1282692466664247e-07, "epoch": 1.9094896914753618, "percentage": 95.47, "elapsed_time": "9:34:04", "remaining_time": "0:27:15"} +{"current_steps": 6531, "total_steps": 6840, "loss": 0.42863208055496216, "lr": 1.1210387264545264e-07, "epoch": 1.9097821318906272, "percentage": 95.48, "elapsed_time": "9:34:08", "remaining_time": "0:27:09"} +{"current_steps": 6532, "total_steps": 6840, "loss": 0.37858498096466064, "lr": 1.113831318598635e-07, "epoch": 1.9100745723058927, "percentage": 95.5, "elapsed_time": "9:34:12", "remaining_time": "0:27:04"} +{"current_steps": 6533, "total_steps": 6840, "loss": 0.6447315216064453, "lr": 1.1066470247834471e-07, "epoch": 1.910367012721158, "percentage": 95.51, "elapsed_time": "9:34:18", "remaining_time": "0:26:59"} +{"current_steps": 6534, "total_steps": 6840, "loss": 0.4159877300262451, "lr": 1.0994858466882197e-07, "epoch": 1.9106594531364234, "percentage": 95.53, "elapsed_time": "9:34:25", "remaining_time": "0:26:54"} +{"current_steps": 6535, "total_steps": 6840, "loss": 0.5042530298233032, "lr": 1.0923477859868581e-07, "epoch": 1.910951893551689, "percentage": 95.54, "elapsed_time": "9:34:29", "remaining_time": "0:26:48"} +{"current_steps": 6536, "total_steps": 6840, "loss": 0.35955798625946045, "lr": 1.0852328443478278e-07, "epoch": 1.9112443339669543, "percentage": 95.56, "elapsed_time": "9:34:33", "remaining_time": "0:26:43"} +{"current_steps": 6537, "total_steps": 6840, "loss": 0.561823308467865, "lr": 1.0781410234342093e-07, "epoch": 1.9115367743822196, "percentage": 95.57, "elapsed_time": "9:34:39", "remaining_time": "0:26:38"} +{"current_steps": 6538, "total_steps": 6840, "loss": 0.44518136978149414, "lr": 1.0710723249036659e-07, "epoch": 1.9118292147974851, "percentage": 95.58, "elapsed_time": "9:34:44", "remaining_time": "0:26:32"} +{"current_steps": 6539, "total_steps": 6840, "loss": 0.5657057166099548, "lr": 1.0640267504084756e-07, "epoch": 1.9121216552127502, "percentage": 95.6, "elapsed_time": "9:34:49", "remaining_time": "0:26:27"} +{"current_steps": 6540, "total_steps": 6840, "loss": 0.5659947395324707, "lr": 1.0570043015954989e-07, "epoch": 1.9124140956280158, "percentage": 95.61, "elapsed_time": "9:34:54", "remaining_time": "0:26:22"} +{"current_steps": 6541, "total_steps": 6840, "loss": 0.45648419857025146, "lr": 1.0500049801061784e-07, "epoch": 1.9127065360432813, "percentage": 95.63, "elapsed_time": "9:34:58", "remaining_time": "0:26:17"} +{"current_steps": 6542, "total_steps": 6840, "loss": 0.4978141784667969, "lr": 1.0430287875765611e-07, "epoch": 1.9129989764585464, "percentage": 95.64, "elapsed_time": "9:35:03", "remaining_time": "0:26:11"} +{"current_steps": 6543, "total_steps": 6840, "loss": 0.5397627949714661, "lr": 1.0360757256372977e-07, "epoch": 1.913291416873812, "percentage": 95.66, "elapsed_time": "9:35:09", "remaining_time": "0:26:06"} +{"current_steps": 6544, "total_steps": 6840, "loss": 0.6359304189682007, "lr": 1.029145795913633e-07, "epoch": 1.9135838572890773, "percentage": 95.67, "elapsed_time": "9:35:14", "remaining_time": "0:26:01"} +{"current_steps": 6545, "total_steps": 6840, "loss": 0.5023899078369141, "lr": 1.0222390000253824e-07, "epoch": 1.9138762977043426, "percentage": 95.69, "elapsed_time": "9:35:19", "remaining_time": "0:25:55"} +{"current_steps": 6546, "total_steps": 6840, "loss": 0.5231877565383911, "lr": 1.0153553395869654e-07, "epoch": 1.9141687381196082, "percentage": 95.7, "elapsed_time": "9:35:22", "remaining_time": "0:25:50"} +{"current_steps": 6547, "total_steps": 6840, "loss": 0.5925711393356323, "lr": 1.008494816207406e-07, "epoch": 1.9144611785348735, "percentage": 95.72, "elapsed_time": "9:35:28", "remaining_time": "0:25:45"} +{"current_steps": 6548, "total_steps": 6840, "loss": 0.42732810974121094, "lr": 1.0016574314902993e-07, "epoch": 1.9147536189501388, "percentage": 95.73, "elapsed_time": "9:35:33", "remaining_time": "0:25:39"} +{"current_steps": 6549, "total_steps": 6840, "loss": 0.5011821985244751, "lr": 9.948431870338559e-08, "epoch": 1.9150460593654044, "percentage": 95.75, "elapsed_time": "9:35:38", "remaining_time": "0:25:34"} +{"current_steps": 6550, "total_steps": 6840, "loss": 0.5112487077713013, "lr": 9.88052084430846e-08, "epoch": 1.9153384997806697, "percentage": 95.76, "elapsed_time": "9:35:43", "remaining_time": "0:25:29"} +{"current_steps": 6551, "total_steps": 6840, "loss": 0.4751431345939636, "lr": 9.812841252686667e-08, "epoch": 1.915630940195935, "percentage": 95.77, "elapsed_time": "9:35:48", "remaining_time": "0:25:24"} +{"current_steps": 6552, "total_steps": 6840, "loss": 0.5343109369277954, "lr": 9.745393111292745e-08, "epoch": 1.9159233806112006, "percentage": 95.79, "elapsed_time": "9:35:52", "remaining_time": "0:25:18"} +{"current_steps": 6553, "total_steps": 6840, "loss": 0.4602724015712738, "lr": 9.678176435892417e-08, "epoch": 1.9162158210264657, "percentage": 95.8, "elapsed_time": "9:35:58", "remaining_time": "0:25:13"} +{"current_steps": 6554, "total_steps": 6840, "loss": 0.4756245017051697, "lr": 9.611191242197005e-08, "epoch": 1.9165082614417313, "percentage": 95.82, "elapsed_time": "9:36:05", "remaining_time": "0:25:08"} +{"current_steps": 6555, "total_steps": 6840, "loss": 0.5291459560394287, "lr": 9.544437545864093e-08, "epoch": 1.9168007018569968, "percentage": 95.83, "elapsed_time": "9:36:09", "remaining_time": "0:25:03"} +{"current_steps": 6556, "total_steps": 6840, "loss": 0.5357412099838257, "lr": 9.47791536249676e-08, "epoch": 1.917093142272262, "percentage": 95.85, "elapsed_time": "9:36:14", "remaining_time": "0:24:57"} +{"current_steps": 6557, "total_steps": 6840, "loss": 0.6298913955688477, "lr": 9.411624707644229e-08, "epoch": 1.9173855826875275, "percentage": 95.86, "elapsed_time": "9:36:19", "remaining_time": "0:24:52"} +{"current_steps": 6558, "total_steps": 6840, "loss": 0.5150517225265503, "lr": 9.345565596801553e-08, "epoch": 1.9176780231027928, "percentage": 95.88, "elapsed_time": "9:36:26", "remaining_time": "0:24:47"} +{"current_steps": 6559, "total_steps": 6840, "loss": 0.6264858245849609, "lr": 9.279738045409603e-08, "epoch": 1.917970463518058, "percentage": 95.89, "elapsed_time": "9:36:30", "remaining_time": "0:24:41"} +{"current_steps": 6560, "total_steps": 6840, "loss": 0.33123475313186646, "lr": 9.214142068855292e-08, "epoch": 1.9182629039333237, "percentage": 95.91, "elapsed_time": "9:36:36", "remaining_time": "0:24:36"} +{"current_steps": 6561, "total_steps": 6840, "loss": 0.5540212392807007, "lr": 9.148777682471133e-08, "epoch": 1.918555344348589, "percentage": 95.92, "elapsed_time": "9:36:41", "remaining_time": "0:24:31"} +{"current_steps": 6562, "total_steps": 6840, "loss": 0.5633922219276428, "lr": 9.083644901535793e-08, "epoch": 1.9188477847638543, "percentage": 95.94, "elapsed_time": "9:36:46", "remaining_time": "0:24:26"} +{"current_steps": 6563, "total_steps": 6840, "loss": 0.58629310131073, "lr": 9.018743741273428e-08, "epoch": 1.9191402251791199, "percentage": 95.95, "elapsed_time": "9:36:50", "remaining_time": "0:24:20"} +{"current_steps": 6564, "total_steps": 6840, "loss": 0.5985243320465088, "lr": 8.95407421685457e-08, "epoch": 1.9194326655943852, "percentage": 95.96, "elapsed_time": "9:36:55", "remaining_time": "0:24:15"} +{"current_steps": 6565, "total_steps": 6840, "loss": 0.5344138741493225, "lr": 8.889636343395235e-08, "epoch": 1.9197251060096505, "percentage": 95.98, "elapsed_time": "9:37:01", "remaining_time": "0:24:10"} +{"current_steps": 6566, "total_steps": 6840, "loss": 0.6139744520187378, "lr": 8.825430135957381e-08, "epoch": 1.920017546424916, "percentage": 95.99, "elapsed_time": "9:37:07", "remaining_time": "0:24:04"} +{"current_steps": 6567, "total_steps": 6840, "loss": 0.46376854181289673, "lr": 8.761455609548663e-08, "epoch": 1.9203099868401812, "percentage": 96.01, "elapsed_time": "9:37:12", "remaining_time": "0:23:59"} +{"current_steps": 6568, "total_steps": 6840, "loss": 0.5053622722625732, "lr": 8.697712779122902e-08, "epoch": 1.9206024272554467, "percentage": 96.02, "elapsed_time": "9:37:18", "remaining_time": "0:23:54"} +{"current_steps": 6569, "total_steps": 6840, "loss": 0.4363771080970764, "lr": 8.634201659579622e-08, "epoch": 1.920894867670712, "percentage": 96.04, "elapsed_time": "9:37:23", "remaining_time": "0:23:49"} +{"current_steps": 6570, "total_steps": 6840, "loss": 0.4167904853820801, "lr": 8.570922265764059e-08, "epoch": 1.9211873080859774, "percentage": 96.05, "elapsed_time": "9:37:29", "remaining_time": "0:23:43"} +{"current_steps": 6571, "total_steps": 6840, "loss": 0.525320291519165, "lr": 8.507874612467382e-08, "epoch": 1.921479748501243, "percentage": 96.07, "elapsed_time": "9:37:35", "remaining_time": "0:23:38"} +{"current_steps": 6572, "total_steps": 6840, "loss": 0.4087376594543457, "lr": 8.445058714426691e-08, "epoch": 1.9217721889165083, "percentage": 96.08, "elapsed_time": "9:37:38", "remaining_time": "0:23:33"} +{"current_steps": 6573, "total_steps": 6840, "loss": 0.471457839012146, "lr": 8.382474586324796e-08, "epoch": 1.9220646293317736, "percentage": 96.1, "elapsed_time": "9:37:44", "remaining_time": "0:23:28"} +{"current_steps": 6574, "total_steps": 6840, "loss": 0.6125116348266602, "lr": 8.32012224279033e-08, "epoch": 1.9223570697470391, "percentage": 96.11, "elapsed_time": "9:37:50", "remaining_time": "0:23:22"} +{"current_steps": 6575, "total_steps": 6840, "loss": 0.3800301253795624, "lr": 8.258001698397744e-08, "epoch": 1.9226495101623045, "percentage": 96.13, "elapsed_time": "9:37:56", "remaining_time": "0:23:17"} +{"current_steps": 6576, "total_steps": 6840, "loss": 0.561034083366394, "lr": 8.196112967667313e-08, "epoch": 1.9229419505775698, "percentage": 96.14, "elapsed_time": "9:38:01", "remaining_time": "0:23:12"} +{"current_steps": 6577, "total_steps": 6840, "loss": 0.5768460631370544, "lr": 8.134456065065354e-08, "epoch": 1.9232343909928353, "percentage": 96.15, "elapsed_time": "9:38:07", "remaining_time": "0:23:07"} +{"current_steps": 6578, "total_steps": 6840, "loss": 0.47440657019615173, "lr": 8.073031005003562e-08, "epoch": 1.9235268314081004, "percentage": 96.17, "elapsed_time": "9:38:12", "remaining_time": "0:23:01"} +{"current_steps": 6579, "total_steps": 6840, "loss": 0.5315208435058594, "lr": 8.011837801839672e-08, "epoch": 1.923819271823366, "percentage": 96.18, "elapsed_time": "9:38:18", "remaining_time": "0:22:56"} +{"current_steps": 6580, "total_steps": 6840, "loss": 0.4587036371231079, "lr": 7.950876469877467e-08, "epoch": 1.9241117122386315, "percentage": 96.2, "elapsed_time": "9:38:23", "remaining_time": "0:22:51"} +{"current_steps": 6581, "total_steps": 6840, "loss": 0.5356466770172119, "lr": 7.890147023366101e-08, "epoch": 1.9244041526538966, "percentage": 96.21, "elapsed_time": "9:38:28", "remaining_time": "0:22:45"} +{"current_steps": 6582, "total_steps": 6840, "loss": 0.48034095764160156, "lr": 7.829649476500667e-08, "epoch": 1.9246965930691622, "percentage": 96.23, "elapsed_time": "9:38:34", "remaining_time": "0:22:40"} +{"current_steps": 6583, "total_steps": 6840, "loss": 0.502929151058197, "lr": 7.769383843422185e-08, "epoch": 1.9249890334844275, "percentage": 96.24, "elapsed_time": "9:38:38", "remaining_time": "0:22:35"} +{"current_steps": 6584, "total_steps": 6840, "loss": 0.44771361351013184, "lr": 7.709350138217386e-08, "epoch": 1.9252814738996928, "percentage": 96.26, "elapsed_time": "9:38:44", "remaining_time": "0:22:30"} +{"current_steps": 6585, "total_steps": 6840, "loss": 0.462479829788208, "lr": 7.649548374918824e-08, "epoch": 1.9255739143149584, "percentage": 96.27, "elapsed_time": "9:38:49", "remaining_time": "0:22:24"} +{"current_steps": 6586, "total_steps": 6840, "loss": 0.4758496880531311, "lr": 7.589978567504763e-08, "epoch": 1.9258663547302237, "percentage": 96.29, "elapsed_time": "9:38:54", "remaining_time": "0:22:19"} +{"current_steps": 6587, "total_steps": 6840, "loss": 0.521172285079956, "lr": 7.530640729899174e-08, "epoch": 1.926158795145489, "percentage": 96.3, "elapsed_time": "9:38:58", "remaining_time": "0:22:14"} +{"current_steps": 6588, "total_steps": 6840, "loss": 0.5274392366409302, "lr": 7.471534875971964e-08, "epoch": 1.9264512355607546, "percentage": 96.32, "elapsed_time": "9:39:05", "remaining_time": "0:22:09"} +{"current_steps": 6589, "total_steps": 6840, "loss": 0.4350961446762085, "lr": 7.412661019538858e-08, "epoch": 1.92674367597602, "percentage": 96.33, "elapsed_time": "9:39:10", "remaining_time": "0:22:03"} +{"current_steps": 6590, "total_steps": 6840, "loss": 0.6298524737358093, "lr": 7.354019174361183e-08, "epoch": 1.9270361163912852, "percentage": 96.35, "elapsed_time": "9:39:15", "remaining_time": "0:21:58"} +{"current_steps": 6591, "total_steps": 6840, "loss": 0.5451292395591736, "lr": 7.295609354146194e-08, "epoch": 1.9273285568065508, "percentage": 96.36, "elapsed_time": "9:39:21", "remaining_time": "0:21:53"} +{"current_steps": 6592, "total_steps": 6840, "loss": 0.5371264219284058, "lr": 7.23743157254675e-08, "epoch": 1.927620997221816, "percentage": 96.37, "elapsed_time": "9:39:25", "remaining_time": "0:21:47"} +{"current_steps": 6593, "total_steps": 6840, "loss": 0.5805129408836365, "lr": 7.179485843161526e-08, "epoch": 1.9279134376370815, "percentage": 96.39, "elapsed_time": "9:39:31", "remaining_time": "0:21:42"} +{"current_steps": 6594, "total_steps": 6840, "loss": 0.5542718172073364, "lr": 7.121772179535135e-08, "epoch": 1.928205878052347, "percentage": 96.4, "elapsed_time": "9:39:37", "remaining_time": "0:21:37"} +{"current_steps": 6595, "total_steps": 6840, "loss": 0.5668192505836487, "lr": 7.064290595157675e-08, "epoch": 1.928498318467612, "percentage": 96.42, "elapsed_time": "9:39:41", "remaining_time": "0:21:32"} +{"current_steps": 6596, "total_steps": 6840, "loss": 0.5107895731925964, "lr": 7.007041103465062e-08, "epoch": 1.9287907588828777, "percentage": 96.43, "elapsed_time": "9:39:45", "remaining_time": "0:21:26"} +{"current_steps": 6597, "total_steps": 6840, "loss": 0.47974276542663574, "lr": 6.950023717839261e-08, "epoch": 1.929083199298143, "percentage": 96.45, "elapsed_time": "9:39:50", "remaining_time": "0:21:21"} +{"current_steps": 6598, "total_steps": 6840, "loss": 0.5641148090362549, "lr": 6.893238451607387e-08, "epoch": 1.9293756397134083, "percentage": 96.46, "elapsed_time": "9:39:56", "remaining_time": "0:21:16"} +{"current_steps": 6599, "total_steps": 6840, "loss": 0.5940253734588623, "lr": 6.836685318042935e-08, "epoch": 1.9296680801286739, "percentage": 96.48, "elapsed_time": "9:40:01", "remaining_time": "0:21:10"} +{"current_steps": 6600, "total_steps": 6840, "loss": 0.46844422817230225, "lr": 6.780364330364775e-08, "epoch": 1.9299605205439392, "percentage": 96.49, "elapsed_time": "9:40:07", "remaining_time": "0:21:05"} +{"current_steps": 6601, "total_steps": 6840, "loss": 0.3933336138725281, "lr": 6.724275501737487e-08, "epoch": 1.9302529609592045, "percentage": 96.51, "elapsed_time": "9:40:16", "remaining_time": "0:21:00"} +{"current_steps": 6602, "total_steps": 6840, "loss": 0.4786602258682251, "lr": 6.668418845271695e-08, "epoch": 1.93054540137447, "percentage": 96.52, "elapsed_time": "9:40:22", "remaining_time": "0:20:55"} +{"current_steps": 6603, "total_steps": 6840, "loss": 0.49695518612861633, "lr": 6.612794374023402e-08, "epoch": 1.9308378417897354, "percentage": 96.54, "elapsed_time": "9:40:27", "remaining_time": "0:20:50"} +{"current_steps": 6604, "total_steps": 6840, "loss": 0.4798729121685028, "lr": 6.557402100994426e-08, "epoch": 1.9311302822050007, "percentage": 96.55, "elapsed_time": "9:40:33", "remaining_time": "0:20:44"} +{"current_steps": 6605, "total_steps": 6840, "loss": 0.4187319278717041, "lr": 6.502242039132634e-08, "epoch": 1.9314227226202663, "percentage": 96.56, "elapsed_time": "9:40:37", "remaining_time": "0:20:39"} +{"current_steps": 6606, "total_steps": 6840, "loss": 0.4945526719093323, "lr": 6.447314201331156e-08, "epoch": 1.9317151630355314, "percentage": 96.58, "elapsed_time": "9:40:43", "remaining_time": "0:20:34"} +{"current_steps": 6607, "total_steps": 6840, "loss": 0.5721586346626282, "lr": 6.392618600429057e-08, "epoch": 1.932007603450797, "percentage": 96.59, "elapsed_time": "9:40:48", "remaining_time": "0:20:28"} +{"current_steps": 6608, "total_steps": 6840, "loss": 0.45542022585868835, "lr": 6.338155249211109e-08, "epoch": 1.9323000438660622, "percentage": 96.61, "elapsed_time": "9:40:54", "remaining_time": "0:20:23"} +{"current_steps": 6609, "total_steps": 6840, "loss": 0.5627170205116272, "lr": 6.283924160407796e-08, "epoch": 1.9325924842813276, "percentage": 96.62, "elapsed_time": "9:40:58", "remaining_time": "0:20:18"} +{"current_steps": 6610, "total_steps": 6840, "loss": 0.5369620323181152, "lr": 6.22992534669542e-08, "epoch": 1.9328849246965931, "percentage": 96.64, "elapsed_time": "9:41:03", "remaining_time": "0:20:13"} +{"current_steps": 6611, "total_steps": 6840, "loss": 0.5268368124961853, "lr": 6.176158820695665e-08, "epoch": 1.9331773651118584, "percentage": 96.65, "elapsed_time": "9:41:08", "remaining_time": "0:20:07"} +{"current_steps": 6612, "total_steps": 6840, "loss": 0.5734575986862183, "lr": 6.122624594976257e-08, "epoch": 1.9334698055271238, "percentage": 96.67, "elapsed_time": "9:41:13", "remaining_time": "0:20:02"} +{"current_steps": 6613, "total_steps": 6840, "loss": 0.5066978931427002, "lr": 6.069322682050516e-08, "epoch": 1.9337622459423893, "percentage": 96.68, "elapsed_time": "9:41:18", "remaining_time": "0:19:57"} +{"current_steps": 6614, "total_steps": 6840, "loss": 0.5462731719017029, "lr": 6.016253094377366e-08, "epoch": 1.9340546863576547, "percentage": 96.7, "elapsed_time": "9:41:24", "remaining_time": "0:19:51"} +{"current_steps": 6615, "total_steps": 6840, "loss": 0.5407041311264038, "lr": 5.963415844361553e-08, "epoch": 1.93434712677292, "percentage": 96.71, "elapsed_time": "9:41:29", "remaining_time": "0:19:46"} +{"current_steps": 6616, "total_steps": 6840, "loss": 0.48977869749069214, "lr": 5.910810944353418e-08, "epoch": 1.9346395671881855, "percentage": 96.73, "elapsed_time": "9:41:34", "remaining_time": "0:19:41"} +{"current_steps": 6617, "total_steps": 6840, "loss": 0.5320937037467957, "lr": 5.858438406649125e-08, "epoch": 1.9349320076034506, "percentage": 96.74, "elapsed_time": "9:41:40", "remaining_time": "0:19:36"} +{"current_steps": 6618, "total_steps": 6840, "loss": 0.5860059261322021, "lr": 5.806298243490327e-08, "epoch": 1.9352244480187162, "percentage": 96.75, "elapsed_time": "9:41:46", "remaining_time": "0:19:30"} +{"current_steps": 6619, "total_steps": 6840, "loss": 0.49517208337783813, "lr": 5.7543904670644965e-08, "epoch": 1.9355168884339817, "percentage": 96.77, "elapsed_time": "9:41:51", "remaining_time": "0:19:25"} +{"current_steps": 6620, "total_steps": 6840, "loss": 0.5060882568359375, "lr": 5.7027150895049286e-08, "epoch": 1.9358093288492468, "percentage": 96.78, "elapsed_time": "9:41:57", "remaining_time": "0:19:20"} +{"current_steps": 6621, "total_steps": 6840, "loss": 0.5887798070907593, "lr": 5.651272122890184e-08, "epoch": 1.9361017692645124, "percentage": 96.8, "elapsed_time": "9:42:02", "remaining_time": "0:19:15"} +{"current_steps": 6622, "total_steps": 6840, "loss": 0.6567577123641968, "lr": 5.600061579244753e-08, "epoch": 1.9363942096797777, "percentage": 96.81, "elapsed_time": "9:42:07", "remaining_time": "0:19:09"} +{"current_steps": 6623, "total_steps": 6840, "loss": 0.5672584176063538, "lr": 5.549083470538952e-08, "epoch": 1.936686650095043, "percentage": 96.83, "elapsed_time": "9:42:11", "remaining_time": "0:19:04"} +{"current_steps": 6624, "total_steps": 6840, "loss": 0.5166369676589966, "lr": 5.4983378086885806e-08, "epoch": 1.9369790905103086, "percentage": 96.84, "elapsed_time": "9:42:17", "remaining_time": "0:18:59"} +{"current_steps": 6625, "total_steps": 6840, "loss": 0.5157661437988281, "lr": 5.447824605555041e-08, "epoch": 1.937271530925574, "percentage": 96.86, "elapsed_time": "9:42:22", "remaining_time": "0:18:53"} +{"current_steps": 6626, "total_steps": 6840, "loss": 0.5001711845397949, "lr": 5.397543872945443e-08, "epoch": 1.9375639713408392, "percentage": 96.87, "elapsed_time": "9:42:28", "remaining_time": "0:18:48"} +{"current_steps": 6627, "total_steps": 6840, "loss": 0.48944878578186035, "lr": 5.34749562261272e-08, "epoch": 1.9378564117561048, "percentage": 96.89, "elapsed_time": "9:42:31", "remaining_time": "0:18:43"} +{"current_steps": 6628, "total_steps": 6840, "loss": 0.5400780439376831, "lr": 5.297679866255401e-08, "epoch": 1.9381488521713701, "percentage": 96.9, "elapsed_time": "9:42:37", "remaining_time": "0:18:38"} +{"current_steps": 6629, "total_steps": 6840, "loss": 0.544346809387207, "lr": 5.248096615517395e-08, "epoch": 1.9384412925866354, "percentage": 96.92, "elapsed_time": "9:42:41", "remaining_time": "0:18:32"} +{"current_steps": 6630, "total_steps": 6840, "loss": 0.5283153653144836, "lr": 5.1987458819886535e-08, "epoch": 1.938733733001901, "percentage": 96.93, "elapsed_time": "9:42:47", "remaining_time": "0:18:27"} +{"current_steps": 6631, "total_steps": 6840, "loss": 0.555808424949646, "lr": 5.149627677204616e-08, "epoch": 1.939026173417166, "percentage": 96.94, "elapsed_time": "9:42:52", "remaining_time": "0:18:22"} +{"current_steps": 6632, "total_steps": 6840, "loss": 0.5230466723442078, "lr": 5.10074201264632e-08, "epoch": 1.9393186138324316, "percentage": 96.96, "elapsed_time": "9:42:57", "remaining_time": "0:18:17"} +{"current_steps": 6633, "total_steps": 6840, "loss": 0.4810416102409363, "lr": 5.052088899740515e-08, "epoch": 1.9396110542476972, "percentage": 96.97, "elapsed_time": "9:43:04", "remaining_time": "0:18:11"} +{"current_steps": 6634, "total_steps": 6840, "loss": 0.35233962535858154, "lr": 5.0036683498594365e-08, "epoch": 1.9399034946629623, "percentage": 96.99, "elapsed_time": "9:43:10", "remaining_time": "0:18:06"} +{"current_steps": 6635, "total_steps": 6840, "loss": 0.5250035524368286, "lr": 4.955480374321253e-08, "epoch": 1.9401959350782279, "percentage": 97.0, "elapsed_time": "9:43:15", "remaining_time": "0:18:01"} +{"current_steps": 6636, "total_steps": 6840, "loss": 0.5896221399307251, "lr": 4.907524984389622e-08, "epoch": 1.9404883754934932, "percentage": 97.02, "elapsed_time": "9:43:22", "remaining_time": "0:17:56"} +{"current_steps": 6637, "total_steps": 6840, "loss": 0.5410518050193787, "lr": 4.859802191273688e-08, "epoch": 1.9407808159087585, "percentage": 97.03, "elapsed_time": "9:43:26", "remaining_time": "0:17:50"} +{"current_steps": 6638, "total_steps": 6840, "loss": 0.5044152736663818, "lr": 4.812312006128528e-08, "epoch": 1.941073256324024, "percentage": 97.05, "elapsed_time": "9:43:31", "remaining_time": "0:17:45"} +{"current_steps": 6639, "total_steps": 6840, "loss": 0.5388177633285522, "lr": 4.765054440054484e-08, "epoch": 1.9413656967392894, "percentage": 97.06, "elapsed_time": "9:43:36", "remaining_time": "0:17:40"} +{"current_steps": 6640, "total_steps": 6840, "loss": 0.5074491500854492, "lr": 4.718029504097943e-08, "epoch": 1.9416581371545547, "percentage": 97.08, "elapsed_time": "9:43:40", "remaining_time": "0:17:34"} +{"current_steps": 6641, "total_steps": 6840, "loss": 0.47772669792175293, "lr": 4.671237209250557e-08, "epoch": 1.9419505775698203, "percentage": 97.09, "elapsed_time": "9:43:44", "remaining_time": "0:17:29"} +{"current_steps": 6642, "total_steps": 6840, "loss": 0.4682825207710266, "lr": 4.624677566449798e-08, "epoch": 1.9422430179850856, "percentage": 97.11, "elapsed_time": "9:43:49", "remaining_time": "0:17:24"} +{"current_steps": 6643, "total_steps": 6840, "loss": 0.48880642652511597, "lr": 4.578350586578628e-08, "epoch": 1.942535458400351, "percentage": 97.12, "elapsed_time": "9:43:54", "remaining_time": "0:17:18"} +{"current_steps": 6644, "total_steps": 6840, "loss": 0.4590389132499695, "lr": 4.532256280465719e-08, "epoch": 1.9428278988156165, "percentage": 97.13, "elapsed_time": "9:43:58", "remaining_time": "0:17:13"} +{"current_steps": 6645, "total_steps": 6840, "loss": 0.5893105268478394, "lr": 4.48639465888534e-08, "epoch": 1.9431203392308816, "percentage": 97.15, "elapsed_time": "9:44:04", "remaining_time": "0:17:08"} +{"current_steps": 6646, "total_steps": 6840, "loss": 0.561900794506073, "lr": 4.4407657325574725e-08, "epoch": 1.9434127796461471, "percentage": 97.16, "elapsed_time": "9:44:11", "remaining_time": "0:17:03"} +{"current_steps": 6647, "total_steps": 6840, "loss": 0.4140210747718811, "lr": 4.395369512147474e-08, "epoch": 1.9437052200614124, "percentage": 97.18, "elapsed_time": "9:44:16", "remaining_time": "0:16:57"} +{"current_steps": 6648, "total_steps": 6840, "loss": 0.6220303773880005, "lr": 4.350206008266522e-08, "epoch": 1.9439976604766778, "percentage": 97.19, "elapsed_time": "9:44:21", "remaining_time": "0:16:52"} +{"current_steps": 6649, "total_steps": 6840, "loss": 0.4903472065925598, "lr": 4.3052752314712844e-08, "epoch": 1.9442901008919433, "percentage": 97.21, "elapsed_time": "9:44:26", "remaining_time": "0:16:47"} +{"current_steps": 6650, "total_steps": 6840, "loss": 0.4519340991973877, "lr": 4.260577192263915e-08, "epoch": 1.9445825413072086, "percentage": 97.22, "elapsed_time": "9:44:30", "remaining_time": "0:16:42"} +{"current_steps": 6651, "total_steps": 6840, "loss": 0.49067920446395874, "lr": 4.216111901092501e-08, "epoch": 1.944874981722474, "percentage": 97.24, "elapsed_time": "9:44:35", "remaining_time": "0:16:36"} +{"current_steps": 6652, "total_steps": 6840, "loss": 0.5935854911804199, "lr": 4.1718793683505066e-08, "epoch": 1.9451674221377395, "percentage": 97.25, "elapsed_time": "9:44:40", "remaining_time": "0:16:31"} +{"current_steps": 6653, "total_steps": 6840, "loss": 0.5209576487541199, "lr": 4.127879604376883e-08, "epoch": 1.9454598625530048, "percentage": 97.27, "elapsed_time": "9:44:45", "remaining_time": "0:16:26"} +{"current_steps": 6654, "total_steps": 6840, "loss": 0.4454221725463867, "lr": 4.084112619456515e-08, "epoch": 1.9457523029682702, "percentage": 97.28, "elapsed_time": "9:44:49", "remaining_time": "0:16:20"} +{"current_steps": 6655, "total_steps": 6840, "loss": 0.5129591226577759, "lr": 4.0405784238194415e-08, "epoch": 1.9460447433835357, "percentage": 97.3, "elapsed_time": "9:44:56", "remaining_time": "0:16:15"} +{"current_steps": 6656, "total_steps": 6840, "loss": 0.48704665899276733, "lr": 3.997277027641744e-08, "epoch": 1.9463371837988008, "percentage": 97.31, "elapsed_time": "9:45:01", "remaining_time": "0:16:10"} +{"current_steps": 6657, "total_steps": 6840, "loss": 0.4510651230812073, "lr": 3.95420844104466e-08, "epoch": 1.9466296242140664, "percentage": 97.32, "elapsed_time": "9:45:06", "remaining_time": "0:16:05"} +{"current_steps": 6658, "total_steps": 6840, "loss": 0.5116807222366333, "lr": 3.911372674095249e-08, "epoch": 1.946922064629332, "percentage": 97.34, "elapsed_time": "9:45:10", "remaining_time": "0:15:59"} +{"current_steps": 6659, "total_steps": 6840, "loss": 0.592056393623352, "lr": 3.868769736806277e-08, "epoch": 1.947214505044597, "percentage": 97.35, "elapsed_time": "9:45:15", "remaining_time": "0:15:54"} +{"current_steps": 6660, "total_steps": 6840, "loss": 0.579146146774292, "lr": 3.8263996391357805e-08, "epoch": 1.9475069454598626, "percentage": 97.37, "elapsed_time": "9:45:20", "remaining_time": "0:15:49"} +{"current_steps": 6661, "total_steps": 6840, "loss": 0.5253209471702576, "lr": 3.784262390987503e-08, "epoch": 1.947799385875128, "percentage": 97.38, "elapsed_time": "9:45:26", "remaining_time": "0:15:43"} +{"current_steps": 6662, "total_steps": 6840, "loss": 0.5614888072013855, "lr": 3.742358002210789e-08, "epoch": 1.9480918262903932, "percentage": 97.4, "elapsed_time": "9:45:30", "remaining_time": "0:15:38"} +{"current_steps": 6663, "total_steps": 6840, "loss": 0.5630952715873718, "lr": 3.7006864826005796e-08, "epoch": 1.9483842667056588, "percentage": 97.41, "elapsed_time": "9:45:35", "remaining_time": "0:15:33"} +{"current_steps": 6664, "total_steps": 6840, "loss": 0.5990846157073975, "lr": 3.659247841897306e-08, "epoch": 1.9486767071209241, "percentage": 97.43, "elapsed_time": "9:45:40", "remaining_time": "0:15:28"} +{"current_steps": 6665, "total_steps": 6840, "loss": 0.5290813446044922, "lr": 3.6180420897868886e-08, "epoch": 1.9489691475361894, "percentage": 97.44, "elapsed_time": "9:45:47", "remaining_time": "0:15:22"} +{"current_steps": 6666, "total_steps": 6840, "loss": 0.6710211038589478, "lr": 3.577069235901176e-08, "epoch": 1.949261587951455, "percentage": 97.46, "elapsed_time": "9:45:51", "remaining_time": "0:15:17"} +{"current_steps": 6667, "total_steps": 6840, "loss": 0.4802299737930298, "lr": 3.536329289817064e-08, "epoch": 1.9495540283667203, "percentage": 97.47, "elapsed_time": "9:45:55", "remaining_time": "0:15:12"} +{"current_steps": 6668, "total_steps": 6840, "loss": 0.5432649850845337, "lr": 3.495822261057491e-08, "epoch": 1.9498464687819856, "percentage": 97.49, "elapsed_time": "9:46:00", "remaining_time": "0:15:06"} +{"current_steps": 6669, "total_steps": 6840, "loss": 0.5824951529502869, "lr": 3.4555481590905495e-08, "epoch": 1.9501389091972512, "percentage": 97.5, "elapsed_time": "9:46:06", "remaining_time": "0:15:01"} +{"current_steps": 6670, "total_steps": 6840, "loss": 0.48428961634635925, "lr": 3.4155069933301535e-08, "epoch": 1.9504313496125163, "percentage": 97.51, "elapsed_time": "9:46:11", "remaining_time": "0:14:56"} +{"current_steps": 6671, "total_steps": 6840, "loss": 0.5684780478477478, "lr": 3.375698773135705e-08, "epoch": 1.9507237900277818, "percentage": 97.53, "elapsed_time": "9:46:15", "remaining_time": "0:14:51"} +{"current_steps": 6672, "total_steps": 6840, "loss": 0.5658689737319946, "lr": 3.336123507811983e-08, "epoch": 1.9510162304430474, "percentage": 97.54, "elapsed_time": "9:46:22", "remaining_time": "0:14:45"} +{"current_steps": 6673, "total_steps": 6840, "loss": 0.6265745162963867, "lr": 3.2967812066097006e-08, "epoch": 1.9513086708583125, "percentage": 97.56, "elapsed_time": "9:46:26", "remaining_time": "0:14:40"} +{"current_steps": 6674, "total_steps": 6840, "loss": 0.5732975006103516, "lr": 3.257671878724722e-08, "epoch": 1.951601111273578, "percentage": 97.57, "elapsed_time": "9:46:32", "remaining_time": "0:14:35"} +{"current_steps": 6675, "total_steps": 6840, "loss": 0.46968942880630493, "lr": 3.218795533298624e-08, "epoch": 1.9518935516888434, "percentage": 97.59, "elapsed_time": "9:46:39", "remaining_time": "0:14:30"} +{"current_steps": 6676, "total_steps": 6840, "loss": 0.5651586055755615, "lr": 3.180152179418472e-08, "epoch": 1.9521859921041087, "percentage": 97.6, "elapsed_time": "9:46:43", "remaining_time": "0:14:24"} +{"current_steps": 6677, "total_steps": 6840, "loss": 0.46789437532424927, "lr": 3.141741826117151e-08, "epoch": 1.9524784325193743, "percentage": 97.62, "elapsed_time": "9:46:47", "remaining_time": "0:14:19"} +{"current_steps": 6678, "total_steps": 6840, "loss": 0.5332610011100769, "lr": 3.1035644823725896e-08, "epoch": 1.9527708729346396, "percentage": 97.63, "elapsed_time": "9:46:54", "remaining_time": "0:14:14"} +{"current_steps": 6679, "total_steps": 6840, "loss": 0.49613600969314575, "lr": 3.06562015710854e-08, "epoch": 1.953063313349905, "percentage": 97.65, "elapsed_time": "9:46:58", "remaining_time": "0:14:08"} +{"current_steps": 6680, "total_steps": 6840, "loss": 0.5498408079147339, "lr": 3.027908859194351e-08, "epoch": 1.9533557537651705, "percentage": 97.66, "elapsed_time": "9:47:02", "remaining_time": "0:14:03"} +{"current_steps": 6681, "total_steps": 6840, "loss": 0.6802657842636108, "lr": 2.99043059744486e-08, "epoch": 1.9536481941804358, "percentage": 97.68, "elapsed_time": "9:47:07", "remaining_time": "0:13:58"} +{"current_steps": 6682, "total_steps": 6840, "loss": 0.5149989724159241, "lr": 2.9531853806201716e-08, "epoch": 1.953940634595701, "percentage": 97.69, "elapsed_time": "9:47:12", "remaining_time": "0:13:53"} +{"current_steps": 6683, "total_steps": 6840, "loss": 0.5249730944633484, "lr": 2.9161732174263212e-08, "epoch": 1.9542330750109667, "percentage": 97.7, "elapsed_time": "9:47:16", "remaining_time": "0:13:47"} +{"current_steps": 6684, "total_steps": 6840, "loss": 0.5711483359336853, "lr": 2.8793941165147222e-08, "epoch": 1.9545255154262318, "percentage": 97.72, "elapsed_time": "9:47:22", "remaining_time": "0:13:42"} +{"current_steps": 6685, "total_steps": 6840, "loss": 0.4591020345687866, "lr": 2.842848086482053e-08, "epoch": 1.9548179558414973, "percentage": 97.73, "elapsed_time": "9:47:27", "remaining_time": "0:13:37"} +{"current_steps": 6686, "total_steps": 6840, "loss": 0.575869083404541, "lr": 2.8065351358708136e-08, "epoch": 1.9551103962567626, "percentage": 97.75, "elapsed_time": "9:47:32", "remaining_time": "0:13:31"} +{"current_steps": 6687, "total_steps": 6840, "loss": 0.5664101839065552, "lr": 2.7704552731688816e-08, "epoch": 1.955402836672028, "percentage": 97.76, "elapsed_time": "9:47:37", "remaining_time": "0:13:26"} +{"current_steps": 6688, "total_steps": 6840, "loss": 0.5739811062812805, "lr": 2.7346085068098437e-08, "epoch": 1.9556952770872935, "percentage": 97.78, "elapsed_time": "9:47:44", "remaining_time": "0:13:21"} +{"current_steps": 6689, "total_steps": 6840, "loss": 0.4707348942756653, "lr": 2.6989948451726643e-08, "epoch": 1.9559877175025588, "percentage": 97.79, "elapsed_time": "9:47:48", "remaining_time": "0:13:16"} +{"current_steps": 6690, "total_steps": 6840, "loss": 0.38842523097991943, "lr": 2.6636142965816848e-08, "epoch": 1.9562801579178242, "percentage": 97.81, "elapsed_time": "9:47:54", "remaining_time": "0:13:10"} +{"current_steps": 6691, "total_steps": 6840, "loss": 0.4295673668384552, "lr": 2.628466869306956e-08, "epoch": 1.9565725983330897, "percentage": 97.82, "elapsed_time": "9:47:58", "remaining_time": "0:13:05"} +{"current_steps": 6692, "total_steps": 6840, "loss": 0.5358999967575073, "lr": 2.5935525715640176e-08, "epoch": 1.956865038748355, "percentage": 97.84, "elapsed_time": "9:48:02", "remaining_time": "0:13:00"} +{"current_steps": 6693, "total_steps": 6840, "loss": 0.49730730056762695, "lr": 2.5588714115137857e-08, "epoch": 1.9571574791636204, "percentage": 97.85, "elapsed_time": "9:48:08", "remaining_time": "0:12:55"} +{"current_steps": 6694, "total_steps": 6840, "loss": 0.5368232131004333, "lr": 2.5244233972627762e-08, "epoch": 1.957449919578886, "percentage": 97.87, "elapsed_time": "9:48:12", "remaining_time": "0:12:49"} +{"current_steps": 6695, "total_steps": 6840, "loss": 0.48084500432014465, "lr": 2.4902085368632144e-08, "epoch": 1.957742359994151, "percentage": 97.88, "elapsed_time": "9:48:16", "remaining_time": "0:12:44"} +{"current_steps": 6696, "total_steps": 6840, "loss": 0.5197296142578125, "lr": 2.45622683831237e-08, "epoch": 1.9580348004094166, "percentage": 97.89, "elapsed_time": "9:48:22", "remaining_time": "0:12:39"} +{"current_steps": 6697, "total_steps": 6840, "loss": 0.4807678163051605, "lr": 2.4224783095532224e-08, "epoch": 1.9583272408246821, "percentage": 97.91, "elapsed_time": "9:48:28", "remaining_time": "0:12:33"} +{"current_steps": 6698, "total_steps": 6840, "loss": 0.5117641687393188, "lr": 2.388962958474461e-08, "epoch": 1.9586196812399472, "percentage": 97.92, "elapsed_time": "9:48:34", "remaining_time": "0:12:28"} +{"current_steps": 6699, "total_steps": 6840, "loss": 0.5318149328231812, "lr": 2.355680792910153e-08, "epoch": 1.9589121216552128, "percentage": 97.94, "elapsed_time": "9:48:40", "remaining_time": "0:12:23"} +{"current_steps": 6700, "total_steps": 6840, "loss": 0.5590193271636963, "lr": 2.3226318206395206e-08, "epoch": 1.959204562070478, "percentage": 97.95, "elapsed_time": "9:48:46", "remaining_time": "0:12:18"} +{"current_steps": 6701, "total_steps": 6840, "loss": 0.7686688899993896, "lr": 2.2898160493878275e-08, "epoch": 1.9594970024857434, "percentage": 97.97, "elapsed_time": "9:48:54", "remaining_time": "0:12:12"} +{"current_steps": 6702, "total_steps": 6840, "loss": 0.5085177421569824, "lr": 2.257233486825383e-08, "epoch": 1.959789442901009, "percentage": 97.98, "elapsed_time": "9:49:00", "remaining_time": "0:12:07"} +{"current_steps": 6703, "total_steps": 6840, "loss": 0.44002413749694824, "lr": 2.2248841405683176e-08, "epoch": 1.9600818833162743, "percentage": 98.0, "elapsed_time": "9:49:05", "remaining_time": "0:12:02"} +{"current_steps": 6704, "total_steps": 6840, "loss": 0.5369126796722412, "lr": 2.1927680181779154e-08, "epoch": 1.9603743237315396, "percentage": 98.01, "elapsed_time": "9:49:11", "remaining_time": "0:11:57"} +{"current_steps": 6705, "total_steps": 6840, "loss": 0.516021728515625, "lr": 2.1608851271612828e-08, "epoch": 1.9606667641468052, "percentage": 98.03, "elapsed_time": "9:49:17", "remaining_time": "0:11:51"} +{"current_steps": 6706, "total_steps": 6840, "loss": 0.5215185284614563, "lr": 2.1292354749707922e-08, "epoch": 1.9609592045620705, "percentage": 98.04, "elapsed_time": "9:49:23", "remaining_time": "0:11:46"} +{"current_steps": 6707, "total_steps": 6840, "loss": 0.6051908731460571, "lr": 2.0978190690043032e-08, "epoch": 1.9612516449773358, "percentage": 98.06, "elapsed_time": "9:49:28", "remaining_time": "0:11:41"} +{"current_steps": 6708, "total_steps": 6840, "loss": 0.5426267385482788, "lr": 2.066635916605386e-08, "epoch": 1.9615440853926014, "percentage": 98.07, "elapsed_time": "9:49:32", "remaining_time": "0:11:36"} +{"current_steps": 6709, "total_steps": 6840, "loss": 0.5888626575469971, "lr": 2.0356860250626554e-08, "epoch": 1.9618365258078665, "percentage": 98.08, "elapsed_time": "9:49:36", "remaining_time": "0:11:30"} +{"current_steps": 6710, "total_steps": 6840, "loss": 0.5225001573562622, "lr": 2.004969401610657e-08, "epoch": 1.962128966223132, "percentage": 98.1, "elapsed_time": "9:49:43", "remaining_time": "0:11:25"} +{"current_steps": 6711, "total_steps": 6840, "loss": 0.5735136270523071, "lr": 1.974486053429092e-08, "epoch": 1.9624214066383976, "percentage": 98.11, "elapsed_time": "9:49:50", "remaining_time": "0:11:20"} +{"current_steps": 6712, "total_steps": 6840, "loss": 0.5302764177322388, "lr": 1.9442359876433724e-08, "epoch": 1.9627138470536627, "percentage": 98.13, "elapsed_time": "9:49:54", "remaining_time": "0:11:14"} +{"current_steps": 6713, "total_steps": 6840, "loss": 0.5078837871551514, "lr": 1.9142192113241752e-08, "epoch": 1.9630062874689282, "percentage": 98.14, "elapsed_time": "9:50:00", "remaining_time": "0:11:09"} +{"current_steps": 6714, "total_steps": 6840, "loss": 0.5772985219955444, "lr": 1.884435731487888e-08, "epoch": 1.9632987278841936, "percentage": 98.16, "elapsed_time": "9:50:06", "remaining_time": "0:11:04"} +{"current_steps": 6715, "total_steps": 6840, "loss": 0.5974931716918945, "lr": 1.8548855550959423e-08, "epoch": 1.963591168299459, "percentage": 98.17, "elapsed_time": "9:50:11", "remaining_time": "0:10:59"} +{"current_steps": 6716, "total_steps": 6840, "loss": 0.5065072774887085, "lr": 1.8255686890558123e-08, "epoch": 1.9638836087147244, "percentage": 98.19, "elapsed_time": "9:50:16", "remaining_time": "0:10:53"} +{"current_steps": 6717, "total_steps": 6840, "loss": 0.4729428291320801, "lr": 1.7964851402199058e-08, "epoch": 1.9641760491299898, "percentage": 98.2, "elapsed_time": "9:50:22", "remaining_time": "0:10:48"} +{"current_steps": 6718, "total_steps": 6840, "loss": 0.46363723278045654, "lr": 1.7676349153864515e-08, "epoch": 1.964468489545255, "percentage": 98.22, "elapsed_time": "9:50:26", "remaining_time": "0:10:43"} +{"current_steps": 6719, "total_steps": 6840, "loss": 0.5436959266662598, "lr": 1.7390180212990547e-08, "epoch": 1.9647609299605207, "percentage": 98.23, "elapsed_time": "9:50:32", "remaining_time": "0:10:38"} +{"current_steps": 6720, "total_steps": 6840, "loss": 0.7571452856063843, "lr": 1.7106344646465877e-08, "epoch": 1.965053370375786, "percentage": 98.25, "elapsed_time": "9:50:37", "remaining_time": "0:10:32"} +{"current_steps": 6721, "total_steps": 6840, "loss": 0.5724680423736572, "lr": 1.682484252063632e-08, "epoch": 1.9653458107910513, "percentage": 98.26, "elapsed_time": "9:50:43", "remaining_time": "0:10:27"} +{"current_steps": 6722, "total_steps": 6840, "loss": 0.46937745809555054, "lr": 1.654567390130146e-08, "epoch": 1.9656382512063169, "percentage": 98.27, "elapsed_time": "9:50:49", "remaining_time": "0:10:22"} +{"current_steps": 6723, "total_steps": 6840, "loss": 0.5764822363853455, "lr": 1.6268838853713552e-08, "epoch": 1.965930691621582, "percentage": 98.29, "elapsed_time": "9:50:54", "remaining_time": "0:10:17"} +{"current_steps": 6724, "total_steps": 6840, "loss": 0.6074192523956299, "lr": 1.5994337442584164e-08, "epoch": 1.9662231320368475, "percentage": 98.3, "elapsed_time": "9:50:58", "remaining_time": "0:10:11"} +{"current_steps": 6725, "total_steps": 6840, "loss": 0.6001715064048767, "lr": 1.572216973207419e-08, "epoch": 1.9665155724521128, "percentage": 98.32, "elapsed_time": "9:51:03", "remaining_time": "0:10:06"} +{"current_steps": 6726, "total_steps": 6840, "loss": 0.5819540619850159, "lr": 1.545233578580163e-08, "epoch": 1.9668080128673782, "percentage": 98.33, "elapsed_time": "9:51:07", "remaining_time": "0:10:01"} +{"current_steps": 6727, "total_steps": 6840, "loss": 0.4745405912399292, "lr": 1.518483566683826e-08, "epoch": 1.9671004532826437, "percentage": 98.35, "elapsed_time": "9:51:13", "remaining_time": "0:09:55"} +{"current_steps": 6728, "total_steps": 6840, "loss": 0.4438042640686035, "lr": 1.4919669437710725e-08, "epoch": 1.967392893697909, "percentage": 98.36, "elapsed_time": "9:51:17", "remaining_time": "0:09:50"} +{"current_steps": 6729, "total_steps": 6840, "loss": 0.45798003673553467, "lr": 1.465683716040056e-08, "epoch": 1.9676853341131744, "percentage": 98.38, "elapsed_time": "9:51:22", "remaining_time": "0:09:45"} +{"current_steps": 6730, "total_steps": 6840, "loss": 0.3918766379356384, "lr": 1.4396338896341955e-08, "epoch": 1.96797777452844, "percentage": 98.39, "elapsed_time": "9:51:27", "remaining_time": "0:09:40"} +{"current_steps": 6731, "total_steps": 6840, "loss": 0.5266170501708984, "lr": 1.4138174706426199e-08, "epoch": 1.9682702149437052, "percentage": 98.41, "elapsed_time": "9:51:33", "remaining_time": "0:09:34"} +{"current_steps": 6732, "total_steps": 6840, "loss": 0.5166668891906738, "lr": 1.3882344650998359e-08, "epoch": 1.9685626553589706, "percentage": 98.42, "elapsed_time": "9:51:37", "remaining_time": "0:09:29"} +{"current_steps": 6733, "total_steps": 6840, "loss": 0.39324697852134705, "lr": 1.3628848789853932e-08, "epoch": 1.9688550957742361, "percentage": 98.44, "elapsed_time": "9:51:41", "remaining_time": "0:09:24"} +{"current_steps": 6734, "total_steps": 6840, "loss": 0.4915732443332672, "lr": 1.3377687182248855e-08, "epoch": 1.9691475361895012, "percentage": 98.45, "elapsed_time": "9:51:46", "remaining_time": "0:09:18"} +{"current_steps": 6735, "total_steps": 6840, "loss": 0.5416492819786072, "lr": 1.31288598868895e-08, "epoch": 1.9694399766047668, "percentage": 98.46, "elapsed_time": "9:51:50", "remaining_time": "0:09:13"} +{"current_steps": 6736, "total_steps": 6840, "loss": 0.4713748097419739, "lr": 1.288236696193823e-08, "epoch": 1.9697324170200323, "percentage": 98.48, "elapsed_time": "9:51:55", "remaining_time": "0:09:08"} +{"current_steps": 6737, "total_steps": 6840, "loss": 0.44074663519859314, "lr": 1.263820846501118e-08, "epoch": 1.9700248574352974, "percentage": 98.49, "elapsed_time": "9:51:59", "remaining_time": "0:09:03"} +{"current_steps": 6738, "total_steps": 6840, "loss": 0.4694680869579315, "lr": 1.2396384453179366e-08, "epoch": 1.970317297850563, "percentage": 98.51, "elapsed_time": "9:52:04", "remaining_time": "0:08:57"} +{"current_steps": 6739, "total_steps": 6840, "loss": 0.553142786026001, "lr": 1.215689498296535e-08, "epoch": 1.9706097382658283, "percentage": 98.52, "elapsed_time": "9:52:11", "remaining_time": "0:08:52"} +{"current_steps": 6740, "total_steps": 6840, "loss": 0.533849835395813, "lr": 1.1919740110351019e-08, "epoch": 1.9709021786810936, "percentage": 98.54, "elapsed_time": "9:52:17", "remaining_time": "0:08:47"} +{"current_steps": 6741, "total_steps": 6840, "loss": 0.5448808670043945, "lr": 1.1684919890768698e-08, "epoch": 1.9711946190963592, "percentage": 98.55, "elapsed_time": "9:52:22", "remaining_time": "0:08:41"} +{"current_steps": 6742, "total_steps": 6840, "loss": 0.46860289573669434, "lr": 1.1452434379106703e-08, "epoch": 1.9714870595116245, "percentage": 98.57, "elapsed_time": "9:52:28", "remaining_time": "0:08:36"} +{"current_steps": 6743, "total_steps": 6840, "loss": 0.5552232265472412, "lr": 1.122228362970712e-08, "epoch": 1.9717794999268898, "percentage": 98.58, "elapsed_time": "9:52:33", "remaining_time": "0:08:31"} +{"current_steps": 6744, "total_steps": 6840, "loss": 0.4639692008495331, "lr": 1.0994467696364698e-08, "epoch": 1.9720719403421554, "percentage": 98.6, "elapsed_time": "9:52:39", "remaining_time": "0:08:26"} +{"current_steps": 6745, "total_steps": 6840, "loss": 0.7129387259483337, "lr": 1.076898663233239e-08, "epoch": 1.9723643807574207, "percentage": 98.61, "elapsed_time": "9:52:43", "remaining_time": "0:08:20"} +{"current_steps": 6746, "total_steps": 6840, "loss": 0.6637833118438721, "lr": 1.0545840490313597e-08, "epoch": 1.972656821172686, "percentage": 98.63, "elapsed_time": "9:52:48", "remaining_time": "0:08:15"} +{"current_steps": 6747, "total_steps": 6840, "loss": 0.6215991973876953, "lr": 1.0325029322467705e-08, "epoch": 1.9729492615879516, "percentage": 98.64, "elapsed_time": "9:52:53", "remaining_time": "0:08:10"} +{"current_steps": 6748, "total_steps": 6840, "loss": 0.48594456911087036, "lr": 1.0106553180407874e-08, "epoch": 1.9732417020032167, "percentage": 98.65, "elapsed_time": "9:52:59", "remaining_time": "0:08:05"} +{"current_steps": 6749, "total_steps": 6840, "loss": 0.5443629622459412, "lr": 9.890412115202142e-09, "epoch": 1.9735341424184822, "percentage": 98.67, "elapsed_time": "9:53:03", "remaining_time": "0:07:59"} +{"current_steps": 6750, "total_steps": 6840, "loss": 0.643796443939209, "lr": 9.676606177371207e-09, "epoch": 1.9738265828337478, "percentage": 98.68, "elapsed_time": "9:53:08", "remaining_time": "0:07:54"} +{"current_steps": 6751, "total_steps": 6840, "loss": 0.6305385828018188, "lr": 9.465135416891757e-09, "epoch": 1.974119023249013, "percentage": 98.7, "elapsed_time": "9:53:13", "remaining_time": "0:07:49"} +{"current_steps": 6752, "total_steps": 6840, "loss": 0.5120108723640442, "lr": 9.255999883193146e-09, "epoch": 1.9744114636642784, "percentage": 98.71, "elapsed_time": "9:53:17", "remaining_time": "0:07:43"} +{"current_steps": 6753, "total_steps": 6840, "loss": 0.5552967190742493, "lr": 9.0491996251596e-09, "epoch": 1.9747039040795438, "percentage": 98.73, "elapsed_time": "9:53:22", "remaining_time": "0:07:38"} +{"current_steps": 6754, "total_steps": 6840, "loss": 0.6341986656188965, "lr": 8.84473469113023e-09, "epoch": 1.974996344494809, "percentage": 98.74, "elapsed_time": "9:53:27", "remaining_time": "0:07:33"} +{"current_steps": 6755, "total_steps": 6840, "loss": 0.5204262137413025, "lr": 8.642605128896808e-09, "epoch": 1.9752887849100746, "percentage": 98.76, "elapsed_time": "9:53:31", "remaining_time": "0:07:28"} +{"current_steps": 6756, "total_steps": 6840, "loss": 0.4980974793434143, "lr": 8.442810985705984e-09, "epoch": 1.97558122532534, "percentage": 98.77, "elapsed_time": "9:53:36", "remaining_time": "0:07:22"} +{"current_steps": 6757, "total_steps": 6840, "loss": 0.5432465076446533, "lr": 8.245352308258181e-09, "epoch": 1.9758736657406053, "percentage": 98.79, "elapsed_time": "9:53:42", "remaining_time": "0:07:17"} +{"current_steps": 6758, "total_steps": 6840, "loss": 0.813039243221283, "lr": 8.0502291427087e-09, "epoch": 1.9761661061558708, "percentage": 98.8, "elapsed_time": "9:53:47", "remaining_time": "0:07:12"} +{"current_steps": 6759, "total_steps": 6840, "loss": 0.5723720788955688, "lr": 7.85744153466661e-09, "epoch": 1.9764585465711362, "percentage": 98.82, "elapsed_time": "9:53:52", "remaining_time": "0:07:07"} +{"current_steps": 6760, "total_steps": 6840, "loss": 0.5562596321105957, "lr": 7.666989529193647e-09, "epoch": 1.9767509869864015, "percentage": 98.83, "elapsed_time": "9:53:56", "remaining_time": "0:07:01"} +{"current_steps": 6761, "total_steps": 6840, "loss": 0.5455175638198853, "lr": 7.478873170807532e-09, "epoch": 1.977043427401667, "percentage": 98.85, "elapsed_time": "9:54:01", "remaining_time": "0:06:56"} +{"current_steps": 6762, "total_steps": 6840, "loss": 0.5753832459449768, "lr": 7.2930925034797595e-09, "epoch": 1.9773358678169322, "percentage": 98.86, "elapsed_time": "9:54:06", "remaining_time": "0:06:51"} +{"current_steps": 6763, "total_steps": 6840, "loss": 0.49962282180786133, "lr": 7.109647570634482e-09, "epoch": 1.9776283082321977, "percentage": 98.87, "elapsed_time": "9:54:12", "remaining_time": "0:06:45"} +{"current_steps": 6764, "total_steps": 6840, "loss": 0.44443345069885254, "lr": 6.9285384151507316e-09, "epoch": 1.977920748647463, "percentage": 98.89, "elapsed_time": "9:54:18", "remaining_time": "0:06:40"} +{"current_steps": 6765, "total_steps": 6840, "loss": 0.3236424922943115, "lr": 6.749765079363535e-09, "epoch": 1.9782131890627284, "percentage": 98.9, "elapsed_time": "9:54:24", "remaining_time": "0:06:35"} +{"current_steps": 6766, "total_steps": 6840, "loss": 0.5246942639350891, "lr": 6.573327605057245e-09, "epoch": 1.978505629477994, "percentage": 98.92, "elapsed_time": "9:54:30", "remaining_time": "0:06:30"} +{"current_steps": 6767, "total_steps": 6840, "loss": 0.6525053381919861, "lr": 6.399226033475536e-09, "epoch": 1.9787980698932592, "percentage": 98.93, "elapsed_time": "9:54:36", "remaining_time": "0:06:24"} +{"current_steps": 6768, "total_steps": 6840, "loss": 0.502121090888977, "lr": 6.227460405312524e-09, "epoch": 1.9790905103085246, "percentage": 98.95, "elapsed_time": "9:54:42", "remaining_time": "0:06:19"} +{"current_steps": 6769, "total_steps": 6840, "loss": 0.6137609481811523, "lr": 6.058030760718314e-09, "epoch": 1.9793829507237901, "percentage": 98.96, "elapsed_time": "9:54:45", "remaining_time": "0:06:14"} +{"current_steps": 6770, "total_steps": 6840, "loss": 0.6673166751861572, "lr": 5.890937139294561e-09, "epoch": 1.9796753911390554, "percentage": 98.98, "elapsed_time": "9:54:50", "remaining_time": "0:06:09"} +{"current_steps": 6771, "total_steps": 6840, "loss": 0.5888657569885254, "lr": 5.726179580098912e-09, "epoch": 1.9799678315543208, "percentage": 98.99, "elapsed_time": "9:54:55", "remaining_time": "0:06:03"} +{"current_steps": 6772, "total_steps": 6840, "loss": 0.5239546298980713, "lr": 5.563758121642781e-09, "epoch": 1.9802602719695863, "percentage": 99.01, "elapsed_time": "9:55:02", "remaining_time": "0:05:58"} +{"current_steps": 6773, "total_steps": 6840, "loss": 0.5446778535842896, "lr": 5.403672801890247e-09, "epoch": 1.9805527123848514, "percentage": 99.02, "elapsed_time": "9:55:06", "remaining_time": "0:05:53"} +{"current_steps": 6774, "total_steps": 6840, "loss": 0.6198326349258423, "lr": 5.245923658262486e-09, "epoch": 1.980845152800117, "percentage": 99.04, "elapsed_time": "9:55:11", "remaining_time": "0:05:47"} +{"current_steps": 6775, "total_steps": 6840, "loss": 0.586353063583374, "lr": 5.090510727630005e-09, "epoch": 1.9811375932153825, "percentage": 99.05, "elapsed_time": "9:55:17", "remaining_time": "0:05:42"} +{"current_steps": 6776, "total_steps": 6840, "loss": 0.6344239711761475, "lr": 4.93743404632041e-09, "epoch": 1.9814300336306476, "percentage": 99.06, "elapsed_time": "9:55:23", "remaining_time": "0:05:37"} +{"current_steps": 6777, "total_steps": 6840, "loss": 0.529091477394104, "lr": 4.7866936501150816e-09, "epoch": 1.9817224740459132, "percentage": 99.08, "elapsed_time": "9:55:29", "remaining_time": "0:05:32"} +{"current_steps": 6778, "total_steps": 6840, "loss": 0.50063157081604, "lr": 4.6382895742491665e-09, "epoch": 1.9820149144611785, "percentage": 99.09, "elapsed_time": "9:55:34", "remaining_time": "0:05:26"} +{"current_steps": 6779, "total_steps": 6840, "loss": 0.48398512601852417, "lr": 4.492221853409362e-09, "epoch": 1.9823073548764438, "percentage": 99.11, "elapsed_time": "9:55:39", "remaining_time": "0:05:21"} +{"current_steps": 6780, "total_steps": 6840, "loss": 0.5330454707145691, "lr": 4.348490521738358e-09, "epoch": 1.9825997952917094, "percentage": 99.12, "elapsed_time": "9:55:43", "remaining_time": "0:05:16"} +{"current_steps": 6781, "total_steps": 6840, "loss": 0.4562032222747803, "lr": 4.207095612833723e-09, "epoch": 1.9828922357069747, "percentage": 99.14, "elapsed_time": "9:55:49", "remaining_time": "0:05:11"} +{"current_steps": 6782, "total_steps": 6840, "loss": 0.47456252574920654, "lr": 4.0680371597456855e-09, "epoch": 1.98318467612224, "percentage": 99.15, "elapsed_time": "9:55:53", "remaining_time": "0:05:05"} +{"current_steps": 6783, "total_steps": 6840, "loss": 0.6283844709396362, "lr": 3.931315194977137e-09, "epoch": 1.9834771165375056, "percentage": 99.17, "elapsed_time": "9:55:58", "remaining_time": "0:05:00"} +{"current_steps": 6784, "total_steps": 6840, "loss": 0.5886485576629639, "lr": 3.7969297504858445e-09, "epoch": 1.983769556952771, "percentage": 99.18, "elapsed_time": "9:56:03", "remaining_time": "0:04:55"} +{"current_steps": 6785, "total_steps": 6840, "loss": 0.4711921811103821, "lr": 3.664880857685571e-09, "epoch": 1.9840619973680362, "percentage": 99.2, "elapsed_time": "9:56:08", "remaining_time": "0:04:49"} +{"current_steps": 6786, "total_steps": 6840, "loss": 0.5372034311294556, "lr": 3.5351685474394048e-09, "epoch": 1.9843544377833018, "percentage": 99.21, "elapsed_time": "9:56:13", "remaining_time": "0:04:44"} +{"current_steps": 6787, "total_steps": 6840, "loss": 0.5314334034919739, "lr": 3.4077928500686473e-09, "epoch": 1.9846468781985669, "percentage": 99.23, "elapsed_time": "9:56:18", "remaining_time": "0:04:39"} +{"current_steps": 6788, "total_steps": 6840, "loss": 0.6022863984107971, "lr": 3.2827537953461496e-09, "epoch": 1.9849393186138324, "percentage": 99.24, "elapsed_time": "9:56:24", "remaining_time": "0:04:34"} +{"current_steps": 6789, "total_steps": 6840, "loss": 0.6739746928215027, "lr": 3.160051412499643e-09, "epoch": 1.985231759029098, "percentage": 99.25, "elapsed_time": "9:56:30", "remaining_time": "0:04:28"} +{"current_steps": 6790, "total_steps": 6840, "loss": 0.6454254388809204, "lr": 3.0396857302084082e-09, "epoch": 1.985524199444363, "percentage": 99.27, "elapsed_time": "9:56:36", "remaining_time": "0:04:23"} +{"current_steps": 6791, "total_steps": 6840, "loss": 0.567995011806488, "lr": 2.9216567766088276e-09, "epoch": 1.9858166398596286, "percentage": 99.28, "elapsed_time": "9:56:42", "remaining_time": "0:04:18"} +{"current_steps": 6792, "total_steps": 6840, "loss": 0.568576455116272, "lr": 2.8059645792877233e-09, "epoch": 1.986109080274894, "percentage": 99.3, "elapsed_time": "9:56:48", "remaining_time": "0:04:13"} +{"current_steps": 6793, "total_steps": 6840, "loss": 0.5053816437721252, "lr": 2.6926091652890175e-09, "epoch": 1.9864015206901593, "percentage": 99.31, "elapsed_time": "9:56:54", "remaining_time": "0:04:07"} +{"current_steps": 6794, "total_steps": 6840, "loss": 0.47705504298210144, "lr": 2.5815905611081825e-09, "epoch": 1.9866939611054248, "percentage": 99.33, "elapsed_time": "9:56:59", "remaining_time": "0:04:02"} +{"current_steps": 6795, "total_steps": 6840, "loss": 0.48271438479423523, "lr": 2.472908792695572e-09, "epoch": 1.9869864015206902, "percentage": 99.34, "elapsed_time": "9:57:03", "remaining_time": "0:03:57"} +{"current_steps": 6796, "total_steps": 6840, "loss": 0.5694486498832703, "lr": 2.3665638854541982e-09, "epoch": 1.9872788419359555, "percentage": 99.36, "elapsed_time": "9:57:08", "remaining_time": "0:03:51"} +{"current_steps": 6797, "total_steps": 6840, "loss": 0.4940011501312256, "lr": 2.2625558642419553e-09, "epoch": 1.987571282351221, "percentage": 99.37, "elapsed_time": "9:57:13", "remaining_time": "0:03:46"} +{"current_steps": 6798, "total_steps": 6840, "loss": 0.5536549091339111, "lr": 2.160884753370507e-09, "epoch": 1.9878637227664864, "percentage": 99.39, "elapsed_time": "9:57:18", "remaining_time": "0:03:41"} +{"current_steps": 6799, "total_steps": 6840, "loss": 0.5354948043823242, "lr": 2.0615505766041765e-09, "epoch": 1.9881561631817517, "percentage": 99.4, "elapsed_time": "9:57:23", "remaining_time": "0:03:36"} +{"current_steps": 6800, "total_steps": 6840, "loss": 0.6246936321258545, "lr": 1.9645533571610585e-09, "epoch": 1.9884486035970173, "percentage": 99.42, "elapsed_time": "9:57:28", "remaining_time": "0:03:30"} +{"current_steps": 6801, "total_steps": 6840, "loss": 0.6690058708190918, "lr": 1.869893117715238e-09, "epoch": 1.9887410440122824, "percentage": 99.43, "elapsed_time": "9:57:38", "remaining_time": "0:03:25"} +{"current_steps": 6802, "total_steps": 6840, "loss": 0.4022945761680603, "lr": 1.7775698803923491e-09, "epoch": 1.989033484427548, "percentage": 99.44, "elapsed_time": "9:57:44", "remaining_time": "0:03:20"} +{"current_steps": 6803, "total_steps": 6840, "loss": 0.7192882299423218, "lr": 1.6875836667729073e-09, "epoch": 1.9893259248428132, "percentage": 99.46, "elapsed_time": "9:57:49", "remaining_time": "0:03:15"} +{"current_steps": 6804, "total_steps": 6840, "loss": 0.4818531274795532, "lr": 1.5999344978889774e-09, "epoch": 1.9896183652580786, "percentage": 99.47, "elapsed_time": "9:57:54", "remaining_time": "0:03:09"} +{"current_steps": 6805, "total_steps": 6840, "loss": 0.5877143144607544, "lr": 1.5146223942297256e-09, "epoch": 1.989910805673344, "percentage": 99.49, "elapsed_time": "9:58:00", "remaining_time": "0:03:04"} +{"current_steps": 6806, "total_steps": 6840, "loss": 0.5317925810813904, "lr": 1.4316473757347571e-09, "epoch": 1.9902032460886094, "percentage": 99.5, "elapsed_time": "9:58:05", "remaining_time": "0:02:59"} +{"current_steps": 6807, "total_steps": 6840, "loss": 0.5203319787979126, "lr": 1.3510094618007785e-09, "epoch": 1.9904956865038748, "percentage": 99.52, "elapsed_time": "9:58:11", "remaining_time": "0:02:54"} +{"current_steps": 6808, "total_steps": 6840, "loss": 0.5171575546264648, "lr": 1.2727086712760462e-09, "epoch": 1.9907881269191403, "percentage": 99.53, "elapsed_time": "9:58:18", "remaining_time": "0:02:48"} +{"current_steps": 6809, "total_steps": 6840, "loss": 0.4570615291595459, "lr": 1.1967450224614763e-09, "epoch": 1.9910805673344056, "percentage": 99.55, "elapsed_time": "9:58:24", "remaining_time": "0:02:43"} +{"current_steps": 6810, "total_steps": 6840, "loss": 0.5689741969108582, "lr": 1.123118533113976e-09, "epoch": 1.991373007749671, "percentage": 99.56, "elapsed_time": "9:58:30", "remaining_time": "0:02:38"} +{"current_steps": 6811, "total_steps": 6840, "loss": 0.5029700994491577, "lr": 1.0518292204442226e-09, "epoch": 1.9916654481649365, "percentage": 99.58, "elapsed_time": "9:58:34", "remaining_time": "0:02:32"} +{"current_steps": 6812, "total_steps": 6840, "loss": 0.5461232662200928, "lr": 9.828771011144434e-10, "epoch": 1.9919578885802016, "percentage": 99.59, "elapsed_time": "9:58:39", "remaining_time": "0:02:27"} +{"current_steps": 6813, "total_steps": 6840, "loss": 0.4681328535079956, "lr": 9.162621912417458e-10, "epoch": 1.9922503289954672, "percentage": 99.61, "elapsed_time": "9:58:44", "remaining_time": "0:02:22"} +{"current_steps": 6814, "total_steps": 6840, "loss": 0.6356761455535889, "lr": 8.519845063970078e-10, "epoch": 1.9925427694107327, "percentage": 99.62, "elapsed_time": "9:58:50", "remaining_time": "0:02:17"} +{"current_steps": 6815, "total_steps": 6840, "loss": 0.48491230607032776, "lr": 7.900440616059879e-10, "epoch": 1.9928352098259978, "percentage": 99.63, "elapsed_time": "9:58:54", "remaining_time": "0:02:11"} +{"current_steps": 6816, "total_steps": 6840, "loss": 0.45563238859176636, "lr": 7.304408713448841e-10, "epoch": 1.9931276502412634, "percentage": 99.65, "elapsed_time": "9:59:00", "remaining_time": "0:02:06"} +{"current_steps": 6817, "total_steps": 6840, "loss": 0.6067036986351013, "lr": 6.731749495481054e-10, "epoch": 1.9934200906565287, "percentage": 99.66, "elapsed_time": "9:59:05", "remaining_time": "0:02:01"} +{"current_steps": 6818, "total_steps": 6840, "loss": 0.6162583827972412, "lr": 6.182463095982805e-10, "epoch": 1.993712531071794, "percentage": 99.68, "elapsed_time": "9:59:09", "remaining_time": "0:01:56"} +{"current_steps": 6819, "total_steps": 6840, "loss": 0.6621623039245605, "lr": 5.656549643373587e-10, "epoch": 1.9940049714870596, "percentage": 99.69, "elapsed_time": "9:59:14", "remaining_time": "0:01:50"} +{"current_steps": 6820, "total_steps": 6840, "loss": 0.5374715328216553, "lr": 5.154009260566195e-10, "epoch": 1.994297411902325, "percentage": 99.71, "elapsed_time": "9:59:20", "remaining_time": "0:01:45"} +{"current_steps": 6821, "total_steps": 6840, "loss": 0.5164921283721924, "lr": 4.674842065033325e-10, "epoch": 1.9945898523175902, "percentage": 99.72, "elapsed_time": "9:59:25", "remaining_time": "0:01:40"} +{"current_steps": 6822, "total_steps": 6840, "loss": 0.4816705584526062, "lr": 4.2190481687631736e-10, "epoch": 1.9948822927328558, "percentage": 99.74, "elapsed_time": "9:59:30", "remaining_time": "0:01:34"} +{"current_steps": 6823, "total_steps": 6840, "loss": 0.5393646955490112, "lr": 3.786627678314947e-10, "epoch": 1.995174733148121, "percentage": 99.75, "elapsed_time": "9:59:34", "remaining_time": "0:01:29"} +{"current_steps": 6824, "total_steps": 6840, "loss": 0.6161901950836182, "lr": 3.377580694763349e-10, "epoch": 1.9954671735633864, "percentage": 99.77, "elapsed_time": "9:59:39", "remaining_time": "0:01:24"} +{"current_steps": 6825, "total_steps": 6840, "loss": 0.45819348096847534, "lr": 2.991907313698583e-10, "epoch": 1.995759613978652, "percentage": 99.78, "elapsed_time": "9:59:45", "remaining_time": "0:01:19"} +{"current_steps": 6826, "total_steps": 6840, "loss": 0.4111405611038208, "lr": 2.6296076252929623e-10, "epoch": 1.996052054393917, "percentage": 99.8, "elapsed_time": "9:59:49", "remaining_time": "0:01:13"} +{"current_steps": 6827, "total_steps": 6840, "loss": 0.5351378917694092, "lr": 2.2906817142120952e-10, "epoch": 1.9963444948091826, "percentage": 99.81, "elapsed_time": "9:59:56", "remaining_time": "0:01:08"} +{"current_steps": 6828, "total_steps": 6840, "loss": 0.5349807739257812, "lr": 1.9751296597037007e-10, "epoch": 1.9966369352244482, "percentage": 99.82, "elapsed_time": "10:00:01", "remaining_time": "0:01:03"} +{"current_steps": 6829, "total_steps": 6840, "loss": 0.3669770061969757, "lr": 1.68295153549769e-10, "epoch": 1.9969293756397133, "percentage": 99.84, "elapsed_time": "10:00:06", "remaining_time": "0:00:57"} +{"current_steps": 6830, "total_steps": 6840, "loss": 0.51691073179245, "lr": 1.414147409906086e-10, "epoch": 1.9972218160549788, "percentage": 99.85, "elapsed_time": "10:00:12", "remaining_time": "0:00:52"} +{"current_steps": 6831, "total_steps": 6840, "loss": 0.530505895614624, "lr": 1.1687173457564095e-10, "epoch": 1.9975142564702442, "percentage": 99.87, "elapsed_time": "10:00:18", "remaining_time": "0:00:47"} +{"current_steps": 6832, "total_steps": 6840, "loss": 0.6562793850898743, "lr": 9.466614004138841e-11, "epoch": 1.9978066968855095, "percentage": 99.88, "elapsed_time": "10:00:22", "remaining_time": "0:00:42"} +{"current_steps": 6833, "total_steps": 6840, "loss": 0.5174558758735657, "lr": 7.479796257925387e-11, "epoch": 1.998099137300775, "percentage": 99.9, "elapsed_time": "10:00:27", "remaining_time": "0:00:36"} +{"current_steps": 6834, "total_steps": 6840, "loss": 0.5514833331108093, "lr": 5.726720683219e-11, "epoch": 1.9983915777160404, "percentage": 99.91, "elapsed_time": "10:00:31", "remaining_time": "0:00:31"} +{"current_steps": 6835, "total_steps": 6840, "loss": 0.4652816653251648, "lr": 4.207387689803e-11, "epoch": 1.9986840181313057, "percentage": 99.93, "elapsed_time": "10:00:37", "remaining_time": "0:00:26"} +{"current_steps": 6836, "total_steps": 6840, "loss": 0.420850932598114, "lr": 2.9217976328377305e-11, "epoch": 1.9989764585465712, "percentage": 99.94, "elapsed_time": "10:00:44", "remaining_time": "0:00:21"} +{"current_steps": 6837, "total_steps": 6840, "loss": 0.5394539833068848, "lr": 1.8699508128605658e-11, "epoch": 1.9992688989618366, "percentage": 99.96, "elapsed_time": "10:00:49", "remaining_time": "0:00:15"} +{"current_steps": 6838, "total_steps": 6840, "loss": 0.458107590675354, "lr": 1.051847475674883e-11, "epoch": 1.999561339377102, "percentage": 99.97, "elapsed_time": "10:00:55", "remaining_time": "0:00:10"} +{"current_steps": 6839, "total_steps": 6840, "loss": 0.552463173866272, "lr": 4.6748781246108706e-12, "epoch": 1.9998537797923674, "percentage": 99.99, "elapsed_time": "10:01:00", "remaining_time": "0:00:05"} +{"current_steps": 6840, "total_steps": 6840, "loss": 0.4656301736831665, "lr": 1.1687195999865453e-12, "epoch": 2.0, "percentage": 100.0, "elapsed_time": "10:01:01", "remaining_time": "0:00:00"} +{"current_steps": 6840, "total_steps": 6840, "epoch": 2.0, "percentage": 100.0, "elapsed_time": "10:01:06", "remaining_time": "0:00:00"} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac8ed18b42722bb76b5643aac1962f2866140f9 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,47923 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 6840, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00029244041526538966, + "grad_norm": 1.376689135449382, + "learning_rate": 0.0, + "loss": 1.2599382400512695, + "step": 1 + }, + { + "epoch": 0.0005848808305307793, + "grad_norm": 1.234681838317607, + "learning_rate": 5.847953216374269e-08, + "loss": 0.9314937591552734, + "step": 2 + }, + { + "epoch": 0.000877321245796169, + "grad_norm": 1.3874138849382744, + "learning_rate": 1.1695906432748539e-07, + "loss": 1.1433629989624023, + "step": 3 + }, + { + "epoch": 0.0011697616610615586, + "grad_norm": 1.4618979511530414, + "learning_rate": 1.7543859649122808e-07, + "loss": 1.2224640846252441, + "step": 4 + }, + { + "epoch": 0.0014622020763269484, + "grad_norm": 1.236340065064986, + "learning_rate": 2.3391812865497077e-07, + "loss": 1.0468370914459229, + "step": 5 + }, + { + "epoch": 0.001754642491592338, + "grad_norm": 1.358651453520776, + "learning_rate": 2.9239766081871344e-07, + "loss": 1.1314436197280884, + "step": 6 + }, + { + "epoch": 0.0020470829068577278, + "grad_norm": 1.3850033876300505, + "learning_rate": 3.5087719298245616e-07, + "loss": 0.9903597831726074, + "step": 7 + }, + { + "epoch": 0.0023395233221231173, + "grad_norm": 1.326993456005612, + "learning_rate": 4.093567251461988e-07, + "loss": 1.1988611221313477, + "step": 8 + }, + { + "epoch": 0.002631963737388507, + "grad_norm": 1.3313234883955534, + "learning_rate": 4.6783625730994155e-07, + "loss": 1.1209533214569092, + "step": 9 + }, + { + "epoch": 0.0029244041526538967, + "grad_norm": 1.3471142230235869, + "learning_rate": 5.263157894736843e-07, + "loss": 1.1582586765289307, + "step": 10 + }, + { + "epoch": 0.0032168445679192866, + "grad_norm": 1.3073172655293792, + "learning_rate": 5.847953216374269e-07, + "loss": 1.2469007968902588, + "step": 11 + }, + { + "epoch": 0.003509284983184676, + "grad_norm": 1.500493931988472, + "learning_rate": 6.432748538011696e-07, + "loss": 1.115494728088379, + "step": 12 + }, + { + "epoch": 0.0038017253984500656, + "grad_norm": 1.4157975190751417, + "learning_rate": 7.017543859649123e-07, + "loss": 1.1927871704101562, + "step": 13 + }, + { + "epoch": 0.0040941658137154556, + "grad_norm": 1.4273551735693608, + "learning_rate": 7.60233918128655e-07, + "loss": 1.1014869213104248, + "step": 14 + }, + { + "epoch": 0.004386606228980845, + "grad_norm": 1.214320734942881, + "learning_rate": 8.187134502923977e-07, + "loss": 1.1055865287780762, + "step": 15 + }, + { + "epoch": 0.0046790466442462346, + "grad_norm": 1.2962699407775686, + "learning_rate": 8.771929824561404e-07, + "loss": 1.1071349382400513, + "step": 16 + }, + { + "epoch": 0.004971487059511625, + "grad_norm": 1.2885224717964352, + "learning_rate": 9.356725146198831e-07, + "loss": 1.1737473011016846, + "step": 17 + }, + { + "epoch": 0.005263927474777014, + "grad_norm": 1.407390623938155, + "learning_rate": 9.941520467836258e-07, + "loss": 1.283717155456543, + "step": 18 + }, + { + "epoch": 0.005556367890042404, + "grad_norm": 1.4470139877184414, + "learning_rate": 1.0526315789473685e-06, + "loss": 1.2509160041809082, + "step": 19 + }, + { + "epoch": 0.005848808305307793, + "grad_norm": 1.3242663031296102, + "learning_rate": 1.111111111111111e-06, + "loss": 0.9722317457199097, + "step": 20 + }, + { + "epoch": 0.006141248720573183, + "grad_norm": 1.7221218211796423, + "learning_rate": 1.1695906432748538e-06, + "loss": 1.1927049160003662, + "step": 21 + }, + { + "epoch": 0.006433689135838573, + "grad_norm": 1.4346324267765085, + "learning_rate": 1.2280701754385965e-06, + "loss": 1.2133033275604248, + "step": 22 + }, + { + "epoch": 0.006726129551103963, + "grad_norm": 1.449278395489955, + "learning_rate": 1.2865497076023392e-06, + "loss": 1.2373273372650146, + "step": 23 + }, + { + "epoch": 0.007018569966369352, + "grad_norm": 1.6650860096596214, + "learning_rate": 1.345029239766082e-06, + "loss": 0.9476668834686279, + "step": 24 + }, + { + "epoch": 0.007311010381634742, + "grad_norm": 1.2748998150534738, + "learning_rate": 1.4035087719298246e-06, + "loss": 1.1171324253082275, + "step": 25 + }, + { + "epoch": 0.007603450796900131, + "grad_norm": 1.4396688825039674, + "learning_rate": 1.4619883040935674e-06, + "loss": 1.1276075839996338, + "step": 26 + }, + { + "epoch": 0.007895891212165522, + "grad_norm": 1.4009443443291978, + "learning_rate": 1.52046783625731e-06, + "loss": 1.190751314163208, + "step": 27 + }, + { + "epoch": 0.008188331627430911, + "grad_norm": 1.3912141798418658, + "learning_rate": 1.5789473684210526e-06, + "loss": 1.2171813249588013, + "step": 28 + }, + { + "epoch": 0.0084807720426963, + "grad_norm": 1.3073224250652524, + "learning_rate": 1.6374269005847953e-06, + "loss": 0.8595987558364868, + "step": 29 + }, + { + "epoch": 0.00877321245796169, + "grad_norm": 1.2671914308960317, + "learning_rate": 1.695906432748538e-06, + "loss": 1.0270106792449951, + "step": 30 + }, + { + "epoch": 0.00906565287322708, + "grad_norm": 1.5005896829818803, + "learning_rate": 1.7543859649122807e-06, + "loss": 1.068537712097168, + "step": 31 + }, + { + "epoch": 0.009358093288492469, + "grad_norm": 1.2766478202995049, + "learning_rate": 1.8128654970760235e-06, + "loss": 1.1307867765426636, + "step": 32 + }, + { + "epoch": 0.009650533703757859, + "grad_norm": 1.5582616996952416, + "learning_rate": 1.8713450292397662e-06, + "loss": 1.0837950706481934, + "step": 33 + }, + { + "epoch": 0.00994297411902325, + "grad_norm": 1.4304945053464713, + "learning_rate": 1.929824561403509e-06, + "loss": 1.1506178379058838, + "step": 34 + }, + { + "epoch": 0.01023541453428864, + "grad_norm": 1.4722243618391941, + "learning_rate": 1.9883040935672516e-06, + "loss": 0.9450151324272156, + "step": 35 + }, + { + "epoch": 0.010527854949554029, + "grad_norm": 1.4847744449229108, + "learning_rate": 2.0467836257309943e-06, + "loss": 1.2040901184082031, + "step": 36 + }, + { + "epoch": 0.010820295364819418, + "grad_norm": 1.4600954284408973, + "learning_rate": 2.105263157894737e-06, + "loss": 1.2316429615020752, + "step": 37 + }, + { + "epoch": 0.011112735780084808, + "grad_norm": 1.479845514016971, + "learning_rate": 2.1637426900584798e-06, + "loss": 1.2119100093841553, + "step": 38 + }, + { + "epoch": 0.011405176195350197, + "grad_norm": 1.353351745720387, + "learning_rate": 2.222222222222222e-06, + "loss": 1.276926875114441, + "step": 39 + }, + { + "epoch": 0.011697616610615587, + "grad_norm": 1.256680621146734, + "learning_rate": 2.280701754385965e-06, + "loss": 0.9357824921607971, + "step": 40 + }, + { + "epoch": 0.011990057025880976, + "grad_norm": 1.3348703609284243, + "learning_rate": 2.3391812865497075e-06, + "loss": 1.1861131191253662, + "step": 41 + }, + { + "epoch": 0.012282497441146366, + "grad_norm": 1.3287978940598948, + "learning_rate": 2.3976608187134502e-06, + "loss": 1.1745539903640747, + "step": 42 + }, + { + "epoch": 0.012574937856411755, + "grad_norm": 1.1561631937443322, + "learning_rate": 2.456140350877193e-06, + "loss": 1.0291770696640015, + "step": 43 + }, + { + "epoch": 0.012867378271677147, + "grad_norm": 1.2176771446345134, + "learning_rate": 2.5146198830409357e-06, + "loss": 1.2361294031143188, + "step": 44 + }, + { + "epoch": 0.013159818686942536, + "grad_norm": 1.3295063710563702, + "learning_rate": 2.5730994152046784e-06, + "loss": 1.1909143924713135, + "step": 45 + }, + { + "epoch": 0.013452259102207926, + "grad_norm": 1.2650643173778968, + "learning_rate": 2.631578947368421e-06, + "loss": 1.1998133659362793, + "step": 46 + }, + { + "epoch": 0.013744699517473315, + "grad_norm": 1.1278701463292995, + "learning_rate": 2.690058479532164e-06, + "loss": 1.0011268854141235, + "step": 47 + }, + { + "epoch": 0.014037139932738705, + "grad_norm": 1.4726969666937608, + "learning_rate": 2.7485380116959066e-06, + "loss": 1.0552136898040771, + "step": 48 + }, + { + "epoch": 0.014329580348004094, + "grad_norm": 1.0797124442917296, + "learning_rate": 2.8070175438596493e-06, + "loss": 0.9727921485900879, + "step": 49 + }, + { + "epoch": 0.014622020763269484, + "grad_norm": 1.1798592697113668, + "learning_rate": 2.865497076023392e-06, + "loss": 0.9361351728439331, + "step": 50 + }, + { + "epoch": 0.014914461178534873, + "grad_norm": 1.1254749584923542, + "learning_rate": 2.9239766081871347e-06, + "loss": 1.140329360961914, + "step": 51 + }, + { + "epoch": 0.015206901593800263, + "grad_norm": 1.1050662639156084, + "learning_rate": 2.9824561403508774e-06, + "loss": 0.991325855255127, + "step": 52 + }, + { + "epoch": 0.015499342009065652, + "grad_norm": 1.364923415701691, + "learning_rate": 3.04093567251462e-06, + "loss": 1.3082914352416992, + "step": 53 + }, + { + "epoch": 0.015791782424331043, + "grad_norm": 1.1357483626397489, + "learning_rate": 3.0994152046783624e-06, + "loss": 0.9767723083496094, + "step": 54 + }, + { + "epoch": 0.016084222839596433, + "grad_norm": 1.1338887919712684, + "learning_rate": 3.157894736842105e-06, + "loss": 1.193568229675293, + "step": 55 + }, + { + "epoch": 0.016376663254861822, + "grad_norm": 1.176328275981774, + "learning_rate": 3.216374269005848e-06, + "loss": 0.9767440557479858, + "step": 56 + }, + { + "epoch": 0.016669103670127212, + "grad_norm": 1.0263265896491178, + "learning_rate": 3.2748538011695906e-06, + "loss": 0.8888605833053589, + "step": 57 + }, + { + "epoch": 0.0169615440853926, + "grad_norm": 1.0668435517314094, + "learning_rate": 3.3333333333333333e-06, + "loss": 1.087357997894287, + "step": 58 + }, + { + "epoch": 0.01725398450065799, + "grad_norm": 1.1952584851106463, + "learning_rate": 3.391812865497076e-06, + "loss": 1.0217459201812744, + "step": 59 + }, + { + "epoch": 0.01754642491592338, + "grad_norm": 1.1279843674972485, + "learning_rate": 3.4502923976608188e-06, + "loss": 1.0783777236938477, + "step": 60 + }, + { + "epoch": 0.01783886533118877, + "grad_norm": 0.9080265579264722, + "learning_rate": 3.5087719298245615e-06, + "loss": 0.85099196434021, + "step": 61 + }, + { + "epoch": 0.01813130574645416, + "grad_norm": 1.0228765689803359, + "learning_rate": 3.567251461988304e-06, + "loss": 0.9322569966316223, + "step": 62 + }, + { + "epoch": 0.01842374616171955, + "grad_norm": 0.991842254830473, + "learning_rate": 3.625730994152047e-06, + "loss": 0.8749685287475586, + "step": 63 + }, + { + "epoch": 0.018716186576984938, + "grad_norm": 0.9789077968505817, + "learning_rate": 3.6842105263157896e-06, + "loss": 0.857900857925415, + "step": 64 + }, + { + "epoch": 0.019008626992250328, + "grad_norm": 0.8092242526335478, + "learning_rate": 3.7426900584795324e-06, + "loss": 0.8891770243644714, + "step": 65 + }, + { + "epoch": 0.019301067407515717, + "grad_norm": 1.0526332302181824, + "learning_rate": 3.801169590643275e-06, + "loss": 1.0730159282684326, + "step": 66 + }, + { + "epoch": 0.019593507822781107, + "grad_norm": 1.124329301516788, + "learning_rate": 3.859649122807018e-06, + "loss": 1.108138084411621, + "step": 67 + }, + { + "epoch": 0.0198859482380465, + "grad_norm": 1.3581659451048562, + "learning_rate": 3.9181286549707605e-06, + "loss": 1.2126305103302002, + "step": 68 + }, + { + "epoch": 0.02017838865331189, + "grad_norm": 1.1108109420327934, + "learning_rate": 3.976608187134503e-06, + "loss": 0.9527193307876587, + "step": 69 + }, + { + "epoch": 0.02047082906857728, + "grad_norm": 0.9965971604796123, + "learning_rate": 4.035087719298246e-06, + "loss": 1.0454832315444946, + "step": 70 + }, + { + "epoch": 0.020763269483842668, + "grad_norm": 0.821178202034714, + "learning_rate": 4.093567251461989e-06, + "loss": 0.7075237035751343, + "step": 71 + }, + { + "epoch": 0.021055709899108058, + "grad_norm": 1.2413273222740282, + "learning_rate": 4.152046783625731e-06, + "loss": 1.0972111225128174, + "step": 72 + }, + { + "epoch": 0.021348150314373447, + "grad_norm": 0.9838475362870381, + "learning_rate": 4.210526315789474e-06, + "loss": 1.0400984287261963, + "step": 73 + }, + { + "epoch": 0.021640590729638837, + "grad_norm": 0.8577987626348056, + "learning_rate": 4.269005847953217e-06, + "loss": 0.7712557315826416, + "step": 74 + }, + { + "epoch": 0.021933031144904226, + "grad_norm": 1.0937426764383058, + "learning_rate": 4.3274853801169596e-06, + "loss": 1.1733636856079102, + "step": 75 + }, + { + "epoch": 0.022225471560169616, + "grad_norm": 0.9896291906902066, + "learning_rate": 4.385964912280702e-06, + "loss": 0.8653621673583984, + "step": 76 + }, + { + "epoch": 0.022517911975435005, + "grad_norm": 0.9059062097735997, + "learning_rate": 4.444444444444444e-06, + "loss": 0.8797299861907959, + "step": 77 + }, + { + "epoch": 0.022810352390700395, + "grad_norm": 1.0128235878781693, + "learning_rate": 4.502923976608187e-06, + "loss": 0.8357750177383423, + "step": 78 + }, + { + "epoch": 0.023102792805965784, + "grad_norm": 1.241636412088512, + "learning_rate": 4.56140350877193e-06, + "loss": 1.1249456405639648, + "step": 79 + }, + { + "epoch": 0.023395233221231174, + "grad_norm": 1.2743547410748093, + "learning_rate": 4.619883040935672e-06, + "loss": 0.9920758008956909, + "step": 80 + }, + { + "epoch": 0.023687673636496563, + "grad_norm": 1.0290847197991744, + "learning_rate": 4.678362573099415e-06, + "loss": 0.8115094900131226, + "step": 81 + }, + { + "epoch": 0.023980114051761953, + "grad_norm": 0.9339898981913745, + "learning_rate": 4.736842105263158e-06, + "loss": 1.060575246810913, + "step": 82 + }, + { + "epoch": 0.024272554467027342, + "grad_norm": 1.1898301512766587, + "learning_rate": 4.7953216374269005e-06, + "loss": 1.028218150138855, + "step": 83 + }, + { + "epoch": 0.02456499488229273, + "grad_norm": 0.9840324243241313, + "learning_rate": 4.853801169590643e-06, + "loss": 1.090872049331665, + "step": 84 + }, + { + "epoch": 0.02485743529755812, + "grad_norm": 1.110956193223445, + "learning_rate": 4.912280701754386e-06, + "loss": 1.0069574117660522, + "step": 85 + }, + { + "epoch": 0.02514987571282351, + "grad_norm": 1.0134868000559825, + "learning_rate": 4.970760233918129e-06, + "loss": 0.9391698837280273, + "step": 86 + }, + { + "epoch": 0.025442316128088904, + "grad_norm": 1.0912235029106665, + "learning_rate": 5.029239766081871e-06, + "loss": 0.881995677947998, + "step": 87 + }, + { + "epoch": 0.025734756543354293, + "grad_norm": 1.0399116507679627, + "learning_rate": 5.087719298245615e-06, + "loss": 0.87871253490448, + "step": 88 + }, + { + "epoch": 0.026027196958619683, + "grad_norm": 1.0265015868708693, + "learning_rate": 5.146198830409357e-06, + "loss": 1.005904197692871, + "step": 89 + }, + { + "epoch": 0.026319637373885072, + "grad_norm": 1.0161210383553128, + "learning_rate": 5.2046783625731e-06, + "loss": 0.8624223470687866, + "step": 90 + }, + { + "epoch": 0.02661207778915046, + "grad_norm": 1.0154040401745301, + "learning_rate": 5.263157894736842e-06, + "loss": 0.9976427555084229, + "step": 91 + }, + { + "epoch": 0.02690451820441585, + "grad_norm": 1.157266795240935, + "learning_rate": 5.321637426900586e-06, + "loss": 0.7743148803710938, + "step": 92 + }, + { + "epoch": 0.02719695861968124, + "grad_norm": 1.0027983307117943, + "learning_rate": 5.380116959064328e-06, + "loss": 0.8541792631149292, + "step": 93 + }, + { + "epoch": 0.02748939903494663, + "grad_norm": 1.0195872536359372, + "learning_rate": 5.438596491228071e-06, + "loss": 0.9141846895217896, + "step": 94 + }, + { + "epoch": 0.02778183945021202, + "grad_norm": 0.9964676811589505, + "learning_rate": 5.497076023391813e-06, + "loss": 0.9762974977493286, + "step": 95 + }, + { + "epoch": 0.02807427986547741, + "grad_norm": 1.086834377136063, + "learning_rate": 5.555555555555557e-06, + "loss": 0.8039775490760803, + "step": 96 + }, + { + "epoch": 0.0283667202807428, + "grad_norm": 1.0288673358640383, + "learning_rate": 5.6140350877192985e-06, + "loss": 0.9464477300643921, + "step": 97 + }, + { + "epoch": 0.028659160696008188, + "grad_norm": 0.9989091266376411, + "learning_rate": 5.672514619883041e-06, + "loss": 0.8264896869659424, + "step": 98 + }, + { + "epoch": 0.028951601111273578, + "grad_norm": 1.239452647422259, + "learning_rate": 5.730994152046784e-06, + "loss": 0.8347363471984863, + "step": 99 + }, + { + "epoch": 0.029244041526538967, + "grad_norm": 1.1482101557047766, + "learning_rate": 5.789473684210527e-06, + "loss": 0.7974327802658081, + "step": 100 + }, + { + "epoch": 0.029536481941804357, + "grad_norm": 1.040746567320999, + "learning_rate": 5.847953216374269e-06, + "loss": 0.7953752875328064, + "step": 101 + }, + { + "epoch": 0.029828922357069746, + "grad_norm": 1.0186289029859024, + "learning_rate": 5.906432748538012e-06, + "loss": 0.8652607798576355, + "step": 102 + }, + { + "epoch": 0.030121362772335136, + "grad_norm": 1.0719829766550855, + "learning_rate": 5.964912280701755e-06, + "loss": 0.973792552947998, + "step": 103 + }, + { + "epoch": 0.030413803187600525, + "grad_norm": 0.9226382056883017, + "learning_rate": 6.023391812865498e-06, + "loss": 0.8093612194061279, + "step": 104 + }, + { + "epoch": 0.030706243602865915, + "grad_norm": 0.9154711374479992, + "learning_rate": 6.08187134502924e-06, + "loss": 0.8463394045829773, + "step": 105 + }, + { + "epoch": 0.030998684018131304, + "grad_norm": 1.2769916053670627, + "learning_rate": 6.140350877192983e-06, + "loss": 0.7898350358009338, + "step": 106 + }, + { + "epoch": 0.0312911244333967, + "grad_norm": 1.298220618549192, + "learning_rate": 6.198830409356725e-06, + "loss": 0.9750698804855347, + "step": 107 + }, + { + "epoch": 0.031583564848662087, + "grad_norm": 1.000315516155276, + "learning_rate": 6.2573099415204685e-06, + "loss": 0.8137387633323669, + "step": 108 + }, + { + "epoch": 0.031876005263927476, + "grad_norm": 1.082436003075408, + "learning_rate": 6.31578947368421e-06, + "loss": 1.0641593933105469, + "step": 109 + }, + { + "epoch": 0.032168445679192866, + "grad_norm": 1.0363310086535433, + "learning_rate": 6.374269005847954e-06, + "loss": 0.9647193551063538, + "step": 110 + }, + { + "epoch": 0.032460886094458255, + "grad_norm": 1.1062097211432278, + "learning_rate": 6.432748538011696e-06, + "loss": 0.9693200588226318, + "step": 111 + }, + { + "epoch": 0.032753326509723645, + "grad_norm": 1.145031857661525, + "learning_rate": 6.491228070175439e-06, + "loss": 0.9600590467453003, + "step": 112 + }, + { + "epoch": 0.033045766924989034, + "grad_norm": 1.0203404188427831, + "learning_rate": 6.549707602339181e-06, + "loss": 0.8908880949020386, + "step": 113 + }, + { + "epoch": 0.033338207340254424, + "grad_norm": 1.2162435709165451, + "learning_rate": 6.608187134502925e-06, + "loss": 0.9803124666213989, + "step": 114 + }, + { + "epoch": 0.03363064775551981, + "grad_norm": 1.1738875143751093, + "learning_rate": 6.666666666666667e-06, + "loss": 0.8288271427154541, + "step": 115 + }, + { + "epoch": 0.0339230881707852, + "grad_norm": 0.9490473067752526, + "learning_rate": 6.72514619883041e-06, + "loss": 0.7203798890113831, + "step": 116 + }, + { + "epoch": 0.03421552858605059, + "grad_norm": 1.0046253156347025, + "learning_rate": 6.783625730994152e-06, + "loss": 0.7670629024505615, + "step": 117 + }, + { + "epoch": 0.03450796900131598, + "grad_norm": 1.0563125407630551, + "learning_rate": 6.842105263157896e-06, + "loss": 0.8487929105758667, + "step": 118 + }, + { + "epoch": 0.03480040941658137, + "grad_norm": 1.1292147521599132, + "learning_rate": 6.9005847953216375e-06, + "loss": 0.8332704305648804, + "step": 119 + }, + { + "epoch": 0.03509284983184676, + "grad_norm": 1.2138847310663696, + "learning_rate": 6.959064327485381e-06, + "loss": 0.9984017610549927, + "step": 120 + }, + { + "epoch": 0.03538529024711215, + "grad_norm": 1.126543099330432, + "learning_rate": 7.017543859649123e-06, + "loss": 0.788459062576294, + "step": 121 + }, + { + "epoch": 0.03567773066237754, + "grad_norm": 1.5166585395762038, + "learning_rate": 7.0760233918128665e-06, + "loss": 1.0288443565368652, + "step": 122 + }, + { + "epoch": 0.03597017107764293, + "grad_norm": 1.0086777607738802, + "learning_rate": 7.134502923976608e-06, + "loss": 0.7939552664756775, + "step": 123 + }, + { + "epoch": 0.03626261149290832, + "grad_norm": 1.0254521267017753, + "learning_rate": 7.192982456140352e-06, + "loss": 0.8816506862640381, + "step": 124 + }, + { + "epoch": 0.03655505190817371, + "grad_norm": 1.0223917066157164, + "learning_rate": 7.251461988304094e-06, + "loss": 0.8864353895187378, + "step": 125 + }, + { + "epoch": 0.0368474923234391, + "grad_norm": 1.2363556273996017, + "learning_rate": 7.309941520467837e-06, + "loss": 0.9817954897880554, + "step": 126 + }, + { + "epoch": 0.03713993273870449, + "grad_norm": 1.0757650534793346, + "learning_rate": 7.368421052631579e-06, + "loss": 0.8423842787742615, + "step": 127 + }, + { + "epoch": 0.037432373153969876, + "grad_norm": 1.1636915661730252, + "learning_rate": 7.426900584795322e-06, + "loss": 0.8375135660171509, + "step": 128 + }, + { + "epoch": 0.037724813569235266, + "grad_norm": 1.2215328884976426, + "learning_rate": 7.485380116959065e-06, + "loss": 0.9105685949325562, + "step": 129 + }, + { + "epoch": 0.038017253984500655, + "grad_norm": 1.1346801425180852, + "learning_rate": 7.5438596491228074e-06, + "loss": 0.8784557580947876, + "step": 130 + }, + { + "epoch": 0.038309694399766045, + "grad_norm": 1.0071578019284073, + "learning_rate": 7.60233918128655e-06, + "loss": 0.7557879686355591, + "step": 131 + }, + { + "epoch": 0.038602134815031434, + "grad_norm": 1.228942961434803, + "learning_rate": 7.660818713450294e-06, + "loss": 0.8966819047927856, + "step": 132 + }, + { + "epoch": 0.038894575230296824, + "grad_norm": 1.0961114842309465, + "learning_rate": 7.719298245614036e-06, + "loss": 0.7642185091972351, + "step": 133 + }, + { + "epoch": 0.03918701564556221, + "grad_norm": 1.062961529950125, + "learning_rate": 7.77777777777778e-06, + "loss": 0.8313230276107788, + "step": 134 + }, + { + "epoch": 0.0394794560608276, + "grad_norm": 1.3350623914867434, + "learning_rate": 7.836257309941521e-06, + "loss": 0.8388677835464478, + "step": 135 + }, + { + "epoch": 0.039771896476093, + "grad_norm": 1.2027686314521255, + "learning_rate": 7.894736842105265e-06, + "loss": 0.9065952301025391, + "step": 136 + }, + { + "epoch": 0.04006433689135839, + "grad_norm": 1.123144368922916, + "learning_rate": 7.953216374269006e-06, + "loss": 0.8153767585754395, + "step": 137 + }, + { + "epoch": 0.04035677730662378, + "grad_norm": 1.163761684167935, + "learning_rate": 8.01169590643275e-06, + "loss": 0.8976421356201172, + "step": 138 + }, + { + "epoch": 0.04064921772188917, + "grad_norm": 1.1354333989669174, + "learning_rate": 8.070175438596492e-06, + "loss": 0.7360264658927917, + "step": 139 + }, + { + "epoch": 0.04094165813715456, + "grad_norm": 1.1009203930924998, + "learning_rate": 8.128654970760235e-06, + "loss": 0.8442148566246033, + "step": 140 + }, + { + "epoch": 0.04123409855241995, + "grad_norm": 1.0872796831159965, + "learning_rate": 8.187134502923977e-06, + "loss": 0.6541435718536377, + "step": 141 + }, + { + "epoch": 0.041526538967685336, + "grad_norm": 1.2792221696979318, + "learning_rate": 8.24561403508772e-06, + "loss": 0.7492353916168213, + "step": 142 + }, + { + "epoch": 0.041818979382950726, + "grad_norm": 1.0406728730985955, + "learning_rate": 8.304093567251463e-06, + "loss": 0.6681893467903137, + "step": 143 + }, + { + "epoch": 0.042111419798216115, + "grad_norm": 1.2507905783247102, + "learning_rate": 8.362573099415205e-06, + "loss": 0.8384866714477539, + "step": 144 + }, + { + "epoch": 0.042403860213481505, + "grad_norm": 1.125680624680095, + "learning_rate": 8.421052631578948e-06, + "loss": 0.8338214159011841, + "step": 145 + }, + { + "epoch": 0.042696300628746894, + "grad_norm": 1.3441065562284606, + "learning_rate": 8.47953216374269e-06, + "loss": 0.8549021482467651, + "step": 146 + }, + { + "epoch": 0.042988741044012284, + "grad_norm": 1.0226139512096055, + "learning_rate": 8.538011695906434e-06, + "loss": 0.8324464559555054, + "step": 147 + }, + { + "epoch": 0.04328118145927767, + "grad_norm": 1.3742681865566602, + "learning_rate": 8.596491228070176e-06, + "loss": 0.9247474670410156, + "step": 148 + }, + { + "epoch": 0.04357362187454306, + "grad_norm": 1.3295257009133983, + "learning_rate": 8.654970760233919e-06, + "loss": 0.8488880395889282, + "step": 149 + }, + { + "epoch": 0.04386606228980845, + "grad_norm": 1.244174459745273, + "learning_rate": 8.713450292397661e-06, + "loss": 0.7844473123550415, + "step": 150 + }, + { + "epoch": 0.04415850270507384, + "grad_norm": 1.3605735346558072, + "learning_rate": 8.771929824561405e-06, + "loss": 1.0540976524353027, + "step": 151 + }, + { + "epoch": 0.04445094312033923, + "grad_norm": 1.096092225329518, + "learning_rate": 8.830409356725146e-06, + "loss": 0.7919446229934692, + "step": 152 + }, + { + "epoch": 0.04474338353560462, + "grad_norm": 1.1577837223865697, + "learning_rate": 8.888888888888888e-06, + "loss": 0.818670928478241, + "step": 153 + }, + { + "epoch": 0.04503582395087001, + "grad_norm": 1.4320201209257988, + "learning_rate": 8.947368421052632e-06, + "loss": 0.8491114377975464, + "step": 154 + }, + { + "epoch": 0.0453282643661354, + "grad_norm": 1.8326606844764444, + "learning_rate": 9.005847953216374e-06, + "loss": 0.660563588142395, + "step": 155 + }, + { + "epoch": 0.04562070478140079, + "grad_norm": 1.1838649114458772, + "learning_rate": 9.064327485380117e-06, + "loss": 0.8559159636497498, + "step": 156 + }, + { + "epoch": 0.04591314519666618, + "grad_norm": 1.0968958293675206, + "learning_rate": 9.12280701754386e-06, + "loss": 0.8478386402130127, + "step": 157 + }, + { + "epoch": 0.04620558561193157, + "grad_norm": 1.1272218094040445, + "learning_rate": 9.181286549707603e-06, + "loss": 0.758915901184082, + "step": 158 + }, + { + "epoch": 0.04649802602719696, + "grad_norm": 1.3159367769875163, + "learning_rate": 9.239766081871345e-06, + "loss": 0.773307204246521, + "step": 159 + }, + { + "epoch": 0.04679046644246235, + "grad_norm": 1.29739510285095, + "learning_rate": 9.298245614035088e-06, + "loss": 0.8948490023612976, + "step": 160 + }, + { + "epoch": 0.04708290685772774, + "grad_norm": 1.2170406448830853, + "learning_rate": 9.35672514619883e-06, + "loss": 0.83086097240448, + "step": 161 + }, + { + "epoch": 0.047375347272993126, + "grad_norm": 1.474814122834776, + "learning_rate": 9.415204678362574e-06, + "loss": 0.7683168649673462, + "step": 162 + }, + { + "epoch": 0.047667787688258516, + "grad_norm": 1.2546637555360107, + "learning_rate": 9.473684210526315e-06, + "loss": 0.9267748594284058, + "step": 163 + }, + { + "epoch": 0.047960228103523905, + "grad_norm": 1.1945733924353639, + "learning_rate": 9.532163742690059e-06, + "loss": 0.9243365526199341, + "step": 164 + }, + { + "epoch": 0.048252668518789295, + "grad_norm": 1.1508961292698372, + "learning_rate": 9.590643274853801e-06, + "loss": 0.7841176986694336, + "step": 165 + }, + { + "epoch": 0.048545108934054684, + "grad_norm": 1.1853174404309834, + "learning_rate": 9.649122807017545e-06, + "loss": 0.8318643569946289, + "step": 166 + }, + { + "epoch": 0.048837549349320074, + "grad_norm": 1.3089312801161905, + "learning_rate": 9.707602339181286e-06, + "loss": 0.866286039352417, + "step": 167 + }, + { + "epoch": 0.04912998976458546, + "grad_norm": 1.32215003396801, + "learning_rate": 9.76608187134503e-06, + "loss": 0.8232241868972778, + "step": 168 + }, + { + "epoch": 0.04942243017985085, + "grad_norm": 1.4759162272800292, + "learning_rate": 9.824561403508772e-06, + "loss": 0.874968945980072, + "step": 169 + }, + { + "epoch": 0.04971487059511624, + "grad_norm": 1.3247540509223557, + "learning_rate": 9.883040935672515e-06, + "loss": 0.9048999547958374, + "step": 170 + }, + { + "epoch": 0.05000731101038163, + "grad_norm": 1.4647995646715117, + "learning_rate": 9.941520467836257e-06, + "loss": 0.9220215082168579, + "step": 171 + }, + { + "epoch": 0.05029975142564702, + "grad_norm": 1.3290504006044366, + "learning_rate": 1e-05, + "loss": 0.8326996564865112, + "step": 172 + }, + { + "epoch": 0.05059219184091241, + "grad_norm": 1.0687285940591045, + "learning_rate": 1.0058479532163743e-05, + "loss": 0.8023662567138672, + "step": 173 + }, + { + "epoch": 0.05088463225617781, + "grad_norm": 1.4370267362244613, + "learning_rate": 1.0116959064327488e-05, + "loss": 0.9172271490097046, + "step": 174 + }, + { + "epoch": 0.0511770726714432, + "grad_norm": 1.2538172153184461, + "learning_rate": 1.017543859649123e-05, + "loss": 0.8016377687454224, + "step": 175 + }, + { + "epoch": 0.051469513086708586, + "grad_norm": 1.1436252675754246, + "learning_rate": 1.0233918128654972e-05, + "loss": 0.7656369805335999, + "step": 176 + }, + { + "epoch": 0.051761953501973976, + "grad_norm": 1.1951944941269466, + "learning_rate": 1.0292397660818714e-05, + "loss": 0.7769640684127808, + "step": 177 + }, + { + "epoch": 0.052054393917239365, + "grad_norm": 1.3791114600068226, + "learning_rate": 1.0350877192982459e-05, + "loss": 0.9830589294433594, + "step": 178 + }, + { + "epoch": 0.052346834332504755, + "grad_norm": 1.1501081025808126, + "learning_rate": 1.04093567251462e-05, + "loss": 0.8002523183822632, + "step": 179 + }, + { + "epoch": 0.052639274747770144, + "grad_norm": 1.3726838653365003, + "learning_rate": 1.0467836257309943e-05, + "loss": 0.879243016242981, + "step": 180 + }, + { + "epoch": 0.052931715163035534, + "grad_norm": 1.2863425151805854, + "learning_rate": 1.0526315789473684e-05, + "loss": 0.7266525030136108, + "step": 181 + }, + { + "epoch": 0.05322415557830092, + "grad_norm": 1.350994010752117, + "learning_rate": 1.0584795321637428e-05, + "loss": 0.784702479839325, + "step": 182 + }, + { + "epoch": 0.05351659599356631, + "grad_norm": 1.415897619399055, + "learning_rate": 1.0643274853801172e-05, + "loss": 0.8419734239578247, + "step": 183 + }, + { + "epoch": 0.0538090364088317, + "grad_norm": 1.201782404599289, + "learning_rate": 1.0701754385964913e-05, + "loss": 0.8462855815887451, + "step": 184 + }, + { + "epoch": 0.05410147682409709, + "grad_norm": 1.361501494219251, + "learning_rate": 1.0760233918128655e-05, + "loss": 0.8888737559318542, + "step": 185 + }, + { + "epoch": 0.05439391723936248, + "grad_norm": 1.3305576553150047, + "learning_rate": 1.0818713450292399e-05, + "loss": 0.8063781261444092, + "step": 186 + }, + { + "epoch": 0.05468635765462787, + "grad_norm": 1.2109684966022718, + "learning_rate": 1.0877192982456142e-05, + "loss": 0.7981499433517456, + "step": 187 + }, + { + "epoch": 0.05497879806989326, + "grad_norm": 1.5415785509759563, + "learning_rate": 1.0935672514619884e-05, + "loss": 0.8474490642547607, + "step": 188 + }, + { + "epoch": 0.05527123848515865, + "grad_norm": 1.300197838887535, + "learning_rate": 1.0994152046783626e-05, + "loss": 0.818732500076294, + "step": 189 + }, + { + "epoch": 0.05556367890042404, + "grad_norm": 1.3192619521811115, + "learning_rate": 1.105263157894737e-05, + "loss": 0.7660291194915771, + "step": 190 + }, + { + "epoch": 0.05585611931568943, + "grad_norm": 1.2626389127660034, + "learning_rate": 1.1111111111111113e-05, + "loss": 0.8240147233009338, + "step": 191 + }, + { + "epoch": 0.05614855973095482, + "grad_norm": 1.340830231936402, + "learning_rate": 1.1169590643274855e-05, + "loss": 0.9377203583717346, + "step": 192 + }, + { + "epoch": 0.05644100014622021, + "grad_norm": 1.416661564809907, + "learning_rate": 1.1228070175438597e-05, + "loss": 0.8662704229354858, + "step": 193 + }, + { + "epoch": 0.0567334405614856, + "grad_norm": 1.3274611257173192, + "learning_rate": 1.128654970760234e-05, + "loss": 0.717308759689331, + "step": 194 + }, + { + "epoch": 0.05702588097675099, + "grad_norm": 1.1942152308113003, + "learning_rate": 1.1345029239766083e-05, + "loss": 0.8538037538528442, + "step": 195 + }, + { + "epoch": 0.057318321392016376, + "grad_norm": 1.4411136610170212, + "learning_rate": 1.1403508771929826e-05, + "loss": 0.9016960859298706, + "step": 196 + }, + { + "epoch": 0.057610761807281766, + "grad_norm": 1.4664426354083508, + "learning_rate": 1.1461988304093568e-05, + "loss": 0.9313502311706543, + "step": 197 + }, + { + "epoch": 0.057903202222547155, + "grad_norm": 1.2885330427126278, + "learning_rate": 1.1520467836257312e-05, + "loss": 0.7330124974250793, + "step": 198 + }, + { + "epoch": 0.058195642637812545, + "grad_norm": 1.272277327326545, + "learning_rate": 1.1578947368421053e-05, + "loss": 0.8904056549072266, + "step": 199 + }, + { + "epoch": 0.058488083053077934, + "grad_norm": 1.4761275028472136, + "learning_rate": 1.1637426900584797e-05, + "loss": 0.7816377878189087, + "step": 200 + }, + { + "epoch": 0.058780523468343324, + "grad_norm": 1.3244130760300052, + "learning_rate": 1.1695906432748539e-05, + "loss": 0.7109910249710083, + "step": 201 + }, + { + "epoch": 0.05907296388360871, + "grad_norm": 1.499082853070359, + "learning_rate": 1.1754385964912282e-05, + "loss": 0.7657924890518188, + "step": 202 + }, + { + "epoch": 0.0593654042988741, + "grad_norm": 1.5632309821036996, + "learning_rate": 1.1812865497076024e-05, + "loss": 0.8521978259086609, + "step": 203 + }, + { + "epoch": 0.05965784471413949, + "grad_norm": 1.3625729366507646, + "learning_rate": 1.1871345029239766e-05, + "loss": 0.7558364868164062, + "step": 204 + }, + { + "epoch": 0.05995028512940488, + "grad_norm": 1.3362044158661328, + "learning_rate": 1.192982456140351e-05, + "loss": 0.8488497734069824, + "step": 205 + }, + { + "epoch": 0.06024272554467027, + "grad_norm": 1.5823695803446844, + "learning_rate": 1.1988304093567253e-05, + "loss": 0.7905591726303101, + "step": 206 + }, + { + "epoch": 0.06053516595993566, + "grad_norm": 1.324069880941127, + "learning_rate": 1.2046783625730995e-05, + "loss": 0.747936487197876, + "step": 207 + }, + { + "epoch": 0.06082760637520105, + "grad_norm": 1.3370127883002023, + "learning_rate": 1.2105263157894737e-05, + "loss": 0.8653486967086792, + "step": 208 + }, + { + "epoch": 0.06112004679046644, + "grad_norm": 1.295171295812896, + "learning_rate": 1.216374269005848e-05, + "loss": 0.8662437200546265, + "step": 209 + }, + { + "epoch": 0.06141248720573183, + "grad_norm": 1.6369328366726996, + "learning_rate": 1.2222222222222224e-05, + "loss": 0.9567133188247681, + "step": 210 + }, + { + "epoch": 0.06170492762099722, + "grad_norm": 1.4011109813275144, + "learning_rate": 1.2280701754385966e-05, + "loss": 0.8994660377502441, + "step": 211 + }, + { + "epoch": 0.06199736803626261, + "grad_norm": 1.2989562892904951, + "learning_rate": 1.2339181286549708e-05, + "loss": 0.7889316082000732, + "step": 212 + }, + { + "epoch": 0.062289808451528005, + "grad_norm": 1.2266327731037636, + "learning_rate": 1.239766081871345e-05, + "loss": 0.883985161781311, + "step": 213 + }, + { + "epoch": 0.0625822488667934, + "grad_norm": 1.2190679056716556, + "learning_rate": 1.2456140350877195e-05, + "loss": 0.7780495882034302, + "step": 214 + }, + { + "epoch": 0.06287468928205878, + "grad_norm": 1.3596314866008754, + "learning_rate": 1.2514619883040937e-05, + "loss": 0.6514906883239746, + "step": 215 + }, + { + "epoch": 0.06316712969732417, + "grad_norm": 1.3008367711622892, + "learning_rate": 1.2573099415204679e-05, + "loss": 0.750559389591217, + "step": 216 + }, + { + "epoch": 0.06345957011258956, + "grad_norm": 1.4761536100726258, + "learning_rate": 1.263157894736842e-05, + "loss": 0.8330573439598083, + "step": 217 + }, + { + "epoch": 0.06375201052785495, + "grad_norm": 1.4144186396910836, + "learning_rate": 1.2690058479532166e-05, + "loss": 0.8075361847877502, + "step": 218 + }, + { + "epoch": 0.06404445094312033, + "grad_norm": 1.2867265784947997, + "learning_rate": 1.2748538011695908e-05, + "loss": 0.7636772394180298, + "step": 219 + }, + { + "epoch": 0.06433689135838573, + "grad_norm": 1.1905704140813884, + "learning_rate": 1.280701754385965e-05, + "loss": 0.8241903185844421, + "step": 220 + }, + { + "epoch": 0.06462933177365111, + "grad_norm": 1.261461662230418, + "learning_rate": 1.2865497076023392e-05, + "loss": 0.6582514047622681, + "step": 221 + }, + { + "epoch": 0.06492177218891651, + "grad_norm": 1.461492259499335, + "learning_rate": 1.2923976608187137e-05, + "loss": 0.6363992691040039, + "step": 222 + }, + { + "epoch": 0.06521421260418189, + "grad_norm": 1.5776709499534403, + "learning_rate": 1.2982456140350879e-05, + "loss": 0.8093860149383545, + "step": 223 + }, + { + "epoch": 0.06550665301944729, + "grad_norm": 1.5281675606912017, + "learning_rate": 1.304093567251462e-05, + "loss": 0.7719511985778809, + "step": 224 + }, + { + "epoch": 0.06579909343471267, + "grad_norm": 1.4484434101459598, + "learning_rate": 1.3099415204678362e-05, + "loss": 0.8314809799194336, + "step": 225 + }, + { + "epoch": 0.06609153384997807, + "grad_norm": 1.3751378156667435, + "learning_rate": 1.3157894736842108e-05, + "loss": 0.8752902746200562, + "step": 226 + }, + { + "epoch": 0.06638397426524345, + "grad_norm": 1.4660956062146326, + "learning_rate": 1.321637426900585e-05, + "loss": 0.7564839124679565, + "step": 227 + }, + { + "epoch": 0.06667641468050885, + "grad_norm": 1.6744274403459947, + "learning_rate": 1.3274853801169591e-05, + "loss": 0.7377971410751343, + "step": 228 + }, + { + "epoch": 0.06696885509577423, + "grad_norm": 1.3046915227989528, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.7298087477684021, + "step": 229 + }, + { + "epoch": 0.06726129551103963, + "grad_norm": 1.4026797729918719, + "learning_rate": 1.3391812865497079e-05, + "loss": 0.7291176915168762, + "step": 230 + }, + { + "epoch": 0.06755373592630501, + "grad_norm": 1.3421785664914363, + "learning_rate": 1.345029239766082e-05, + "loss": 0.8226944208145142, + "step": 231 + }, + { + "epoch": 0.0678461763415704, + "grad_norm": 1.4277073905518047, + "learning_rate": 1.3508771929824562e-05, + "loss": 0.7185185551643372, + "step": 232 + }, + { + "epoch": 0.0681386167568358, + "grad_norm": 1.2950151686673683, + "learning_rate": 1.3567251461988304e-05, + "loss": 0.7028212547302246, + "step": 233 + }, + { + "epoch": 0.06843105717210118, + "grad_norm": 1.6157016450339874, + "learning_rate": 1.362573099415205e-05, + "loss": 0.8809897899627686, + "step": 234 + }, + { + "epoch": 0.06872349758736658, + "grad_norm": 1.388536739112073, + "learning_rate": 1.3684210526315791e-05, + "loss": 0.7779085040092468, + "step": 235 + }, + { + "epoch": 0.06901593800263196, + "grad_norm": 1.5070530641919806, + "learning_rate": 1.3742690058479533e-05, + "loss": 0.731019139289856, + "step": 236 + }, + { + "epoch": 0.06930837841789736, + "grad_norm": 1.4005389899518954, + "learning_rate": 1.3801169590643275e-05, + "loss": 0.7495850920677185, + "step": 237 + }, + { + "epoch": 0.06960081883316274, + "grad_norm": 1.2241508662035476, + "learning_rate": 1.385964912280702e-05, + "loss": 0.7018189430236816, + "step": 238 + }, + { + "epoch": 0.06989325924842814, + "grad_norm": 1.2596692368793962, + "learning_rate": 1.3918128654970762e-05, + "loss": 0.7072417736053467, + "step": 239 + }, + { + "epoch": 0.07018569966369352, + "grad_norm": 1.3606864903220994, + "learning_rate": 1.3976608187134504e-05, + "loss": 0.8125720620155334, + "step": 240 + }, + { + "epoch": 0.07047814007895892, + "grad_norm": 1.442924901417446, + "learning_rate": 1.4035087719298246e-05, + "loss": 0.6101655960083008, + "step": 241 + }, + { + "epoch": 0.0707705804942243, + "grad_norm": 1.3725413795436465, + "learning_rate": 1.409356725146199e-05, + "loss": 0.9005568623542786, + "step": 242 + }, + { + "epoch": 0.0710630209094897, + "grad_norm": 1.4215646059439664, + "learning_rate": 1.4152046783625733e-05, + "loss": 0.7678338289260864, + "step": 243 + }, + { + "epoch": 0.07135546132475508, + "grad_norm": 1.4745728838056915, + "learning_rate": 1.4210526315789475e-05, + "loss": 0.7563410997390747, + "step": 244 + }, + { + "epoch": 0.07164790174002048, + "grad_norm": 1.3043448641122064, + "learning_rate": 1.4269005847953217e-05, + "loss": 0.7497583627700806, + "step": 245 + }, + { + "epoch": 0.07194034215528586, + "grad_norm": 1.8237088246729396, + "learning_rate": 1.432748538011696e-05, + "loss": 0.8913442492485046, + "step": 246 + }, + { + "epoch": 0.07223278257055125, + "grad_norm": 1.446976759622428, + "learning_rate": 1.4385964912280704e-05, + "loss": 0.7714704871177673, + "step": 247 + }, + { + "epoch": 0.07252522298581664, + "grad_norm": 1.4721214924941617, + "learning_rate": 1.4444444444444446e-05, + "loss": 0.6752789616584778, + "step": 248 + }, + { + "epoch": 0.07281766340108203, + "grad_norm": 1.4015875441769006, + "learning_rate": 1.4502923976608188e-05, + "loss": 0.6092795133590698, + "step": 249 + }, + { + "epoch": 0.07311010381634742, + "grad_norm": 1.4602535650914903, + "learning_rate": 1.4561403508771931e-05, + "loss": 0.9300343990325928, + "step": 250 + }, + { + "epoch": 0.07340254423161281, + "grad_norm": 1.3884630911660603, + "learning_rate": 1.4619883040935675e-05, + "loss": 0.8005613088607788, + "step": 251 + }, + { + "epoch": 0.0736949846468782, + "grad_norm": 1.2918508056771596, + "learning_rate": 1.4678362573099417e-05, + "loss": 0.7188931703567505, + "step": 252 + }, + { + "epoch": 0.07398742506214359, + "grad_norm": 1.3258314938186555, + "learning_rate": 1.4736842105263159e-05, + "loss": 0.6967242956161499, + "step": 253 + }, + { + "epoch": 0.07427986547740897, + "grad_norm": 1.300875000270566, + "learning_rate": 1.4795321637426902e-05, + "loss": 0.6921653747558594, + "step": 254 + }, + { + "epoch": 0.07457230589267437, + "grad_norm": 1.4258732788152875, + "learning_rate": 1.4853801169590644e-05, + "loss": 0.8498743772506714, + "step": 255 + }, + { + "epoch": 0.07486474630793975, + "grad_norm": 1.4311730434285577, + "learning_rate": 1.4912280701754388e-05, + "loss": 0.6420027017593384, + "step": 256 + }, + { + "epoch": 0.07515718672320515, + "grad_norm": 1.3747073212413874, + "learning_rate": 1.497076023391813e-05, + "loss": 0.7101434469223022, + "step": 257 + }, + { + "epoch": 0.07544962713847053, + "grad_norm": 1.562801712624193, + "learning_rate": 1.5029239766081873e-05, + "loss": 0.740740180015564, + "step": 258 + }, + { + "epoch": 0.07574206755373593, + "grad_norm": 1.726645998674187, + "learning_rate": 1.5087719298245615e-05, + "loss": 0.891905665397644, + "step": 259 + }, + { + "epoch": 0.07603450796900131, + "grad_norm": 1.5486677390214905, + "learning_rate": 1.5146198830409358e-05, + "loss": 0.867740273475647, + "step": 260 + }, + { + "epoch": 0.07632694838426671, + "grad_norm": 1.5072500165891534, + "learning_rate": 1.52046783625731e-05, + "loss": 0.7895220518112183, + "step": 261 + }, + { + "epoch": 0.07661938879953209, + "grad_norm": 1.5579945503860015, + "learning_rate": 1.5263157894736846e-05, + "loss": 0.7987008094787598, + "step": 262 + }, + { + "epoch": 0.07691182921479749, + "grad_norm": 1.4014455476427317, + "learning_rate": 1.5321637426900587e-05, + "loss": 0.7780282497406006, + "step": 263 + }, + { + "epoch": 0.07720426963006287, + "grad_norm": 1.2290290646079385, + "learning_rate": 1.538011695906433e-05, + "loss": 0.6265891194343567, + "step": 264 + }, + { + "epoch": 0.07749671004532827, + "grad_norm": 1.4917276843875658, + "learning_rate": 1.543859649122807e-05, + "loss": 0.6559646129608154, + "step": 265 + }, + { + "epoch": 0.07778915046059365, + "grad_norm": 1.4406503206723986, + "learning_rate": 1.5497076023391816e-05, + "loss": 0.8362047672271729, + "step": 266 + }, + { + "epoch": 0.07808159087585904, + "grad_norm": 1.481487764499426, + "learning_rate": 1.555555555555556e-05, + "loss": 0.707663357257843, + "step": 267 + }, + { + "epoch": 0.07837403129112443, + "grad_norm": 1.398507930714671, + "learning_rate": 1.56140350877193e-05, + "loss": 0.67903071641922, + "step": 268 + }, + { + "epoch": 0.07866647170638982, + "grad_norm": 1.3187056037490035, + "learning_rate": 1.5672514619883042e-05, + "loss": 0.7634894251823425, + "step": 269 + }, + { + "epoch": 0.0789589121216552, + "grad_norm": 1.3791372975152867, + "learning_rate": 1.5730994152046787e-05, + "loss": 0.6395117044448853, + "step": 270 + }, + { + "epoch": 0.0792513525369206, + "grad_norm": 1.4273746235266698, + "learning_rate": 1.578947368421053e-05, + "loss": 0.6948165893554688, + "step": 271 + }, + { + "epoch": 0.079543792952186, + "grad_norm": 1.342718294320327, + "learning_rate": 1.584795321637427e-05, + "loss": 0.9288383722305298, + "step": 272 + }, + { + "epoch": 0.07983623336745138, + "grad_norm": 1.4727633207578312, + "learning_rate": 1.5906432748538013e-05, + "loss": 0.9291346073150635, + "step": 273 + }, + { + "epoch": 0.08012867378271678, + "grad_norm": 1.3613936763496384, + "learning_rate": 1.5964912280701755e-05, + "loss": 0.7399512529373169, + "step": 274 + }, + { + "epoch": 0.08042111419798216, + "grad_norm": 1.5856072060707183, + "learning_rate": 1.60233918128655e-05, + "loss": 0.6890764236450195, + "step": 275 + }, + { + "epoch": 0.08071355461324756, + "grad_norm": 1.1844012071470522, + "learning_rate": 1.6081871345029242e-05, + "loss": 0.6520324349403381, + "step": 276 + }, + { + "epoch": 0.08100599502851294, + "grad_norm": 1.4161353486782806, + "learning_rate": 1.6140350877192984e-05, + "loss": 0.6726658344268799, + "step": 277 + }, + { + "epoch": 0.08129843544377834, + "grad_norm": 1.5076627116667636, + "learning_rate": 1.6198830409356726e-05, + "loss": 0.7453294992446899, + "step": 278 + }, + { + "epoch": 0.08159087585904372, + "grad_norm": 1.6796077609043067, + "learning_rate": 1.625730994152047e-05, + "loss": 0.755578875541687, + "step": 279 + }, + { + "epoch": 0.08188331627430911, + "grad_norm": 1.576837195920435, + "learning_rate": 1.6315789473684213e-05, + "loss": 0.713086724281311, + "step": 280 + }, + { + "epoch": 0.0821757566895745, + "grad_norm": 1.5223162841340931, + "learning_rate": 1.6374269005847955e-05, + "loss": 0.8714310526847839, + "step": 281 + }, + { + "epoch": 0.0824681971048399, + "grad_norm": 1.4999918578300349, + "learning_rate": 1.6432748538011697e-05, + "loss": 0.6827348470687866, + "step": 282 + }, + { + "epoch": 0.08276063752010528, + "grad_norm": 1.5263417760460645, + "learning_rate": 1.649122807017544e-05, + "loss": 0.8613482713699341, + "step": 283 + }, + { + "epoch": 0.08305307793537067, + "grad_norm": 1.3847261162959308, + "learning_rate": 1.6549707602339184e-05, + "loss": 0.7442763447761536, + "step": 284 + }, + { + "epoch": 0.08334551835063606, + "grad_norm": 1.3784508201309091, + "learning_rate": 1.6608187134502926e-05, + "loss": 0.7505494356155396, + "step": 285 + }, + { + "epoch": 0.08363795876590145, + "grad_norm": 1.3042392110114591, + "learning_rate": 1.6666666666666667e-05, + "loss": 0.7720779776573181, + "step": 286 + }, + { + "epoch": 0.08393039918116683, + "grad_norm": 1.5516828033558783, + "learning_rate": 1.672514619883041e-05, + "loss": 0.7746216654777527, + "step": 287 + }, + { + "epoch": 0.08422283959643223, + "grad_norm": 1.4429865955911445, + "learning_rate": 1.6783625730994155e-05, + "loss": 0.8471436500549316, + "step": 288 + }, + { + "epoch": 0.08451528001169761, + "grad_norm": 1.4116704654777366, + "learning_rate": 1.6842105263157896e-05, + "loss": 0.7117248773574829, + "step": 289 + }, + { + "epoch": 0.08480772042696301, + "grad_norm": 1.4428575448924124, + "learning_rate": 1.690058479532164e-05, + "loss": 0.758680522441864, + "step": 290 + }, + { + "epoch": 0.08510016084222839, + "grad_norm": 1.4632326474117294, + "learning_rate": 1.695906432748538e-05, + "loss": 0.9083560705184937, + "step": 291 + }, + { + "epoch": 0.08539260125749379, + "grad_norm": 1.3444847997489586, + "learning_rate": 1.7017543859649125e-05, + "loss": 0.7457551956176758, + "step": 292 + }, + { + "epoch": 0.08568504167275917, + "grad_norm": 1.423532632485526, + "learning_rate": 1.7076023391812867e-05, + "loss": 0.7463638782501221, + "step": 293 + }, + { + "epoch": 0.08597748208802457, + "grad_norm": 1.4584931442713187, + "learning_rate": 1.713450292397661e-05, + "loss": 0.6983559131622314, + "step": 294 + }, + { + "epoch": 0.08626992250328995, + "grad_norm": 1.3612667828489424, + "learning_rate": 1.719298245614035e-05, + "loss": 0.8043842911720276, + "step": 295 + }, + { + "epoch": 0.08656236291855535, + "grad_norm": 1.5042924331122234, + "learning_rate": 1.7251461988304093e-05, + "loss": 0.7150747776031494, + "step": 296 + }, + { + "epoch": 0.08685480333382073, + "grad_norm": 2.0308017082996326, + "learning_rate": 1.7309941520467838e-05, + "loss": 0.7805558443069458, + "step": 297 + }, + { + "epoch": 0.08714724374908613, + "grad_norm": 1.4326584270734728, + "learning_rate": 1.736842105263158e-05, + "loss": 0.7158486843109131, + "step": 298 + }, + { + "epoch": 0.08743968416435151, + "grad_norm": 1.2329719748746066, + "learning_rate": 1.7426900584795322e-05, + "loss": 0.6496458053588867, + "step": 299 + }, + { + "epoch": 0.0877321245796169, + "grad_norm": 1.3255444740397837, + "learning_rate": 1.7485380116959064e-05, + "loss": 0.7488506436347961, + "step": 300 + }, + { + "epoch": 0.08802456499488229, + "grad_norm": 1.5658056782887144, + "learning_rate": 1.754385964912281e-05, + "loss": 0.8370999097824097, + "step": 301 + }, + { + "epoch": 0.08831700541014768, + "grad_norm": 1.3342670844496862, + "learning_rate": 1.760233918128655e-05, + "loss": 0.6624353528022766, + "step": 302 + }, + { + "epoch": 0.08860944582541307, + "grad_norm": 1.4627534576360353, + "learning_rate": 1.7660818713450293e-05, + "loss": 0.6861047148704529, + "step": 303 + }, + { + "epoch": 0.08890188624067846, + "grad_norm": 1.6532053166188327, + "learning_rate": 1.7719298245614035e-05, + "loss": 0.746711015701294, + "step": 304 + }, + { + "epoch": 0.08919432665594385, + "grad_norm": 1.554160121250669, + "learning_rate": 1.7777777777777777e-05, + "loss": 0.7794955968856812, + "step": 305 + }, + { + "epoch": 0.08948676707120924, + "grad_norm": 1.7649976265227958, + "learning_rate": 1.7836257309941522e-05, + "loss": 0.7202489972114563, + "step": 306 + }, + { + "epoch": 0.08977920748647462, + "grad_norm": 1.6262384567896693, + "learning_rate": 1.7894736842105264e-05, + "loss": 0.7252119183540344, + "step": 307 + }, + { + "epoch": 0.09007164790174002, + "grad_norm": 1.5452508352574224, + "learning_rate": 1.7953216374269006e-05, + "loss": 0.9168737530708313, + "step": 308 + }, + { + "epoch": 0.0903640883170054, + "grad_norm": 1.487069935429652, + "learning_rate": 1.8011695906432747e-05, + "loss": 0.7647944688796997, + "step": 309 + }, + { + "epoch": 0.0906565287322708, + "grad_norm": 1.7447386842901849, + "learning_rate": 1.8070175438596493e-05, + "loss": 0.7836136817932129, + "step": 310 + }, + { + "epoch": 0.0909489691475362, + "grad_norm": 1.2604562921756688, + "learning_rate": 1.8128654970760235e-05, + "loss": 0.6495587825775146, + "step": 311 + }, + { + "epoch": 0.09124140956280158, + "grad_norm": 1.5613577023920442, + "learning_rate": 1.8187134502923976e-05, + "loss": 0.7266290187835693, + "step": 312 + }, + { + "epoch": 0.09153384997806698, + "grad_norm": 1.9984801625992445, + "learning_rate": 1.824561403508772e-05, + "loss": 0.8417587876319885, + "step": 313 + }, + { + "epoch": 0.09182629039333236, + "grad_norm": 1.5767499272635297, + "learning_rate": 1.8304093567251464e-05, + "loss": 0.8431564569473267, + "step": 314 + }, + { + "epoch": 0.09211873080859775, + "grad_norm": 1.4390326104450535, + "learning_rate": 1.8362573099415205e-05, + "loss": 0.7724050283432007, + "step": 315 + }, + { + "epoch": 0.09241117122386314, + "grad_norm": 1.4145032164176374, + "learning_rate": 1.8421052631578947e-05, + "loss": 0.6687352657318115, + "step": 316 + }, + { + "epoch": 0.09270361163912853, + "grad_norm": 1.3696816256616517, + "learning_rate": 1.847953216374269e-05, + "loss": 0.7465454339981079, + "step": 317 + }, + { + "epoch": 0.09299605205439392, + "grad_norm": 1.507661205433782, + "learning_rate": 1.8538011695906434e-05, + "loss": 0.6944088935852051, + "step": 318 + }, + { + "epoch": 0.09328849246965931, + "grad_norm": 1.2922205760098913, + "learning_rate": 1.8596491228070176e-05, + "loss": 0.6692598462104797, + "step": 319 + }, + { + "epoch": 0.0935809328849247, + "grad_norm": 1.4345621362788812, + "learning_rate": 1.8654970760233918e-05, + "loss": 0.7287981510162354, + "step": 320 + }, + { + "epoch": 0.09387337330019009, + "grad_norm": 1.426362426046858, + "learning_rate": 1.871345029239766e-05, + "loss": 0.704437255859375, + "step": 321 + }, + { + "epoch": 0.09416581371545547, + "grad_norm": 1.2757141813139592, + "learning_rate": 1.8771929824561405e-05, + "loss": 0.6425009965896606, + "step": 322 + }, + { + "epoch": 0.09445825413072087, + "grad_norm": 1.4929466314279891, + "learning_rate": 1.8830409356725147e-05, + "loss": 0.765799880027771, + "step": 323 + }, + { + "epoch": 0.09475069454598625, + "grad_norm": 1.482293870539422, + "learning_rate": 1.888888888888889e-05, + "loss": 0.9151520133018494, + "step": 324 + }, + { + "epoch": 0.09504313496125165, + "grad_norm": 1.5087468194478204, + "learning_rate": 1.894736842105263e-05, + "loss": 0.8753486275672913, + "step": 325 + }, + { + "epoch": 0.09533557537651703, + "grad_norm": 1.649363404228967, + "learning_rate": 1.9005847953216376e-05, + "loss": 0.7652826309204102, + "step": 326 + }, + { + "epoch": 0.09562801579178243, + "grad_norm": 1.405975419146797, + "learning_rate": 1.9064327485380118e-05, + "loss": 0.7309015393257141, + "step": 327 + }, + { + "epoch": 0.09592045620704781, + "grad_norm": 1.6766609888433524, + "learning_rate": 1.912280701754386e-05, + "loss": 0.7656553983688354, + "step": 328 + }, + { + "epoch": 0.09621289662231321, + "grad_norm": 1.4942542074310006, + "learning_rate": 1.9181286549707602e-05, + "loss": 0.7400631904602051, + "step": 329 + }, + { + "epoch": 0.09650533703757859, + "grad_norm": 1.4740815055784118, + "learning_rate": 1.9239766081871347e-05, + "loss": 0.6812465190887451, + "step": 330 + }, + { + "epoch": 0.09679777745284399, + "grad_norm": 1.4394939888427052, + "learning_rate": 1.929824561403509e-05, + "loss": 0.6820628046989441, + "step": 331 + }, + { + "epoch": 0.09709021786810937, + "grad_norm": 1.9824484648298863, + "learning_rate": 1.935672514619883e-05, + "loss": 0.7437758445739746, + "step": 332 + }, + { + "epoch": 0.09738265828337477, + "grad_norm": 1.4755288186056683, + "learning_rate": 1.9415204678362573e-05, + "loss": 0.8011504411697388, + "step": 333 + }, + { + "epoch": 0.09767509869864015, + "grad_norm": 1.3829561395962537, + "learning_rate": 1.9473684210526318e-05, + "loss": 0.7437810301780701, + "step": 334 + }, + { + "epoch": 0.09796753911390554, + "grad_norm": 1.328838303483977, + "learning_rate": 1.953216374269006e-05, + "loss": 0.7419568300247192, + "step": 335 + }, + { + "epoch": 0.09825997952917093, + "grad_norm": 1.4291436246188844, + "learning_rate": 1.9590643274853802e-05, + "loss": 0.7805042266845703, + "step": 336 + }, + { + "epoch": 0.09855241994443632, + "grad_norm": 1.3104711543583085, + "learning_rate": 1.9649122807017544e-05, + "loss": 0.6952530145645142, + "step": 337 + }, + { + "epoch": 0.0988448603597017, + "grad_norm": 1.313224719465845, + "learning_rate": 1.970760233918129e-05, + "loss": 0.7669289112091064, + "step": 338 + }, + { + "epoch": 0.0991373007749671, + "grad_norm": 1.4101609769639065, + "learning_rate": 1.976608187134503e-05, + "loss": 0.8033919930458069, + "step": 339 + }, + { + "epoch": 0.09942974119023248, + "grad_norm": 1.2883543538345825, + "learning_rate": 1.9824561403508773e-05, + "loss": 0.6523177623748779, + "step": 340 + }, + { + "epoch": 0.09972218160549788, + "grad_norm": 1.3960808628411998, + "learning_rate": 1.9883040935672515e-05, + "loss": 0.7221896648406982, + "step": 341 + }, + { + "epoch": 0.10001462202076326, + "grad_norm": 1.2255647850534943, + "learning_rate": 1.994152046783626e-05, + "loss": 0.6054700016975403, + "step": 342 + }, + { + "epoch": 0.10030706243602866, + "grad_norm": 1.6303566611100393, + "learning_rate": 2e-05, + "loss": 0.8368290662765503, + "step": 343 + }, + { + "epoch": 0.10059950285129404, + "grad_norm": 1.4276425594743465, + "learning_rate": 1.99999988312804e-05, + "loss": 0.9075677990913391, + "step": 344 + }, + { + "epoch": 0.10089194326655944, + "grad_norm": 1.4517524210925274, + "learning_rate": 1.999999532512188e-05, + "loss": 0.7202495336532593, + "step": 345 + }, + { + "epoch": 0.10118438368182482, + "grad_norm": 1.5340311782896001, + "learning_rate": 1.9999989481525245e-05, + "loss": 0.7373536229133606, + "step": 346 + }, + { + "epoch": 0.10147682409709022, + "grad_norm": 1.3128585037330316, + "learning_rate": 1.9999981300491873e-05, + "loss": 0.7292035222053528, + "step": 347 + }, + { + "epoch": 0.10176926451235561, + "grad_norm": 1.2681362139682877, + "learning_rate": 1.9999970782023673e-05, + "loss": 0.8970675468444824, + "step": 348 + }, + { + "epoch": 0.102061704927621, + "grad_norm": 1.384714606589521, + "learning_rate": 1.9999957926123104e-05, + "loss": 0.7909846305847168, + "step": 349 + }, + { + "epoch": 0.1023541453428864, + "grad_norm": 1.3537270396362884, + "learning_rate": 1.999994273279317e-05, + "loss": 0.7784097790718079, + "step": 350 + }, + { + "epoch": 0.10264658575815178, + "grad_norm": 1.4008631296209513, + "learning_rate": 1.9999925202037422e-05, + "loss": 0.7129874229431152, + "step": 351 + }, + { + "epoch": 0.10293902617341717, + "grad_norm": 1.3322666039831734, + "learning_rate": 1.999990533385996e-05, + "loss": 0.7185519337654114, + "step": 352 + }, + { + "epoch": 0.10323146658868255, + "grad_norm": 1.379111892126872, + "learning_rate": 1.9999883128265428e-05, + "loss": 0.812228798866272, + "step": 353 + }, + { + "epoch": 0.10352390700394795, + "grad_norm": 1.2831139743741589, + "learning_rate": 1.999985858525901e-05, + "loss": 0.7187886238098145, + "step": 354 + }, + { + "epoch": 0.10381634741921333, + "grad_norm": 1.133776070922858, + "learning_rate": 1.9999831704846452e-05, + "loss": 0.6618789434432983, + "step": 355 + }, + { + "epoch": 0.10410878783447873, + "grad_norm": 1.5601168208020613, + "learning_rate": 1.999980248703403e-05, + "loss": 0.9226458072662354, + "step": 356 + }, + { + "epoch": 0.10440122824974411, + "grad_norm": 1.3702611517072447, + "learning_rate": 1.9999770931828578e-05, + "loss": 0.7326352596282959, + "step": 357 + }, + { + "epoch": 0.10469366866500951, + "grad_norm": 1.4755549813416367, + "learning_rate": 1.9999737039237472e-05, + "loss": 0.719240128993988, + "step": 358 + }, + { + "epoch": 0.10498610908027489, + "grad_norm": 1.2914576093532248, + "learning_rate": 1.999970080926863e-05, + "loss": 0.7380290031433105, + "step": 359 + }, + { + "epoch": 0.10527854949554029, + "grad_norm": 1.6255135036531254, + "learning_rate": 1.9999662241930523e-05, + "loss": 0.736219048500061, + "step": 360 + }, + { + "epoch": 0.10557098991080567, + "grad_norm": 1.381933387611508, + "learning_rate": 1.999962133723217e-05, + "loss": 0.8160735368728638, + "step": 361 + }, + { + "epoch": 0.10586343032607107, + "grad_norm": 1.4607575491849774, + "learning_rate": 1.9999578095183126e-05, + "loss": 0.6679781675338745, + "step": 362 + }, + { + "epoch": 0.10615587074133645, + "grad_norm": 1.551414308388604, + "learning_rate": 1.9999532515793498e-05, + "loss": 0.7670542001724243, + "step": 363 + }, + { + "epoch": 0.10644831115660185, + "grad_norm": 1.2802491712211252, + "learning_rate": 1.9999484599073945e-05, + "loss": 0.6395057439804077, + "step": 364 + }, + { + "epoch": 0.10674075157186723, + "grad_norm": 1.571289013739176, + "learning_rate": 1.9999434345035666e-05, + "loss": 0.7226368188858032, + "step": 365 + }, + { + "epoch": 0.10703319198713263, + "grad_norm": 1.4755023089198305, + "learning_rate": 1.9999381753690403e-05, + "loss": 0.6236128211021423, + "step": 366 + }, + { + "epoch": 0.10732563240239801, + "grad_norm": 1.2507526885979663, + "learning_rate": 1.9999326825050455e-05, + "loss": 0.5937299132347107, + "step": 367 + }, + { + "epoch": 0.1076180728176634, + "grad_norm": 1.294239826855842, + "learning_rate": 1.999926955912866e-05, + "loss": 0.6014857292175293, + "step": 368 + }, + { + "epoch": 0.10791051323292879, + "grad_norm": 1.1031323946933334, + "learning_rate": 1.9999209955938394e-05, + "loss": 0.5898704528808594, + "step": 369 + }, + { + "epoch": 0.10820295364819418, + "grad_norm": 1.475520460275832, + "learning_rate": 1.9999148015493602e-05, + "loss": 0.6879048943519592, + "step": 370 + }, + { + "epoch": 0.10849539406345957, + "grad_norm": 1.5235484717330832, + "learning_rate": 1.999908373780876e-05, + "loss": 0.781298041343689, + "step": 371 + }, + { + "epoch": 0.10878783447872496, + "grad_norm": 1.2913472995661532, + "learning_rate": 1.9999017122898886e-05, + "loss": 0.6997531652450562, + "step": 372 + }, + { + "epoch": 0.10908027489399034, + "grad_norm": 1.2104967688689228, + "learning_rate": 1.9998948170779556e-05, + "loss": 0.6979694366455078, + "step": 373 + }, + { + "epoch": 0.10937271530925574, + "grad_norm": 1.6154905149339498, + "learning_rate": 1.999887688146689e-05, + "loss": 0.8069214820861816, + "step": 374 + }, + { + "epoch": 0.10966515572452112, + "grad_norm": 1.4534879205249425, + "learning_rate": 1.9998803254977538e-05, + "loss": 0.875137448310852, + "step": 375 + }, + { + "epoch": 0.10995759613978652, + "grad_norm": 1.4252221781216903, + "learning_rate": 1.9998727291328725e-05, + "loss": 0.8267173767089844, + "step": 376 + }, + { + "epoch": 0.1102500365550519, + "grad_norm": 1.3704709368430794, + "learning_rate": 1.99986489905382e-05, + "loss": 0.7589337825775146, + "step": 377 + }, + { + "epoch": 0.1105424769703173, + "grad_norm": 1.7248131297126135, + "learning_rate": 1.999856835262427e-05, + "loss": 0.7479992508888245, + "step": 378 + }, + { + "epoch": 0.11083491738558268, + "grad_norm": 1.2827951417341936, + "learning_rate": 1.999848537760577e-05, + "loss": 0.7315084934234619, + "step": 379 + }, + { + "epoch": 0.11112735780084808, + "grad_norm": 1.2954297558049002, + "learning_rate": 1.9998400065502113e-05, + "loss": 0.6256793737411499, + "step": 380 + }, + { + "epoch": 0.11141979821611346, + "grad_norm": 1.3569633064170001, + "learning_rate": 1.999831241633323e-05, + "loss": 0.7521710395812988, + "step": 381 + }, + { + "epoch": 0.11171223863137886, + "grad_norm": 1.0851029845548303, + "learning_rate": 1.999822243011961e-05, + "loss": 0.6824651956558228, + "step": 382 + }, + { + "epoch": 0.11200467904664424, + "grad_norm": 1.4206429861314096, + "learning_rate": 1.9998130106882286e-05, + "loss": 0.7254977226257324, + "step": 383 + }, + { + "epoch": 0.11229711946190964, + "grad_norm": 1.4795080730717471, + "learning_rate": 1.999803544664284e-05, + "loss": 0.8263741731643677, + "step": 384 + }, + { + "epoch": 0.11258955987717502, + "grad_norm": 1.3096519492267191, + "learning_rate": 1.9997938449423397e-05, + "loss": 0.6829507350921631, + "step": 385 + }, + { + "epoch": 0.11288200029244042, + "grad_norm": 1.2970935037264724, + "learning_rate": 1.9997839115246632e-05, + "loss": 0.7452428340911865, + "step": 386 + }, + { + "epoch": 0.11317444070770581, + "grad_norm": 1.322513824449788, + "learning_rate": 1.999773744413576e-05, + "loss": 0.7900702953338623, + "step": 387 + }, + { + "epoch": 0.1134668811229712, + "grad_norm": 1.288312120065537, + "learning_rate": 1.9997633436114547e-05, + "loss": 0.6215303540229797, + "step": 388 + }, + { + "epoch": 0.11375932153823659, + "grad_norm": 1.3132613017546322, + "learning_rate": 1.999752709120731e-05, + "loss": 0.798041820526123, + "step": 389 + }, + { + "epoch": 0.11405176195350197, + "grad_norm": 1.1590478323977431, + "learning_rate": 1.9997418409438893e-05, + "loss": 0.6033064126968384, + "step": 390 + }, + { + "epoch": 0.11434420236876737, + "grad_norm": 1.0686988063553795, + "learning_rate": 1.9997307390834712e-05, + "loss": 0.6358453631401062, + "step": 391 + }, + { + "epoch": 0.11463664278403275, + "grad_norm": 1.2775095189945147, + "learning_rate": 1.999719403542071e-05, + "loss": 0.6544308662414551, + "step": 392 + }, + { + "epoch": 0.11492908319929815, + "grad_norm": 1.3305771925144483, + "learning_rate": 1.9997078343223393e-05, + "loss": 0.73077392578125, + "step": 393 + }, + { + "epoch": 0.11522152361456353, + "grad_norm": 1.1914838503287841, + "learning_rate": 1.9996960314269792e-05, + "loss": 0.5874192118644714, + "step": 394 + }, + { + "epoch": 0.11551396402982893, + "grad_norm": 1.420658082184349, + "learning_rate": 1.9996839948587503e-05, + "loss": 0.8242438435554504, + "step": 395 + }, + { + "epoch": 0.11580640444509431, + "grad_norm": 1.705790457884444, + "learning_rate": 1.9996717246204655e-05, + "loss": 0.9496668577194214, + "step": 396 + }, + { + "epoch": 0.1160988448603597, + "grad_norm": 1.2258839048083405, + "learning_rate": 1.9996592207149933e-05, + "loss": 0.6940287351608276, + "step": 397 + }, + { + "epoch": 0.11639128527562509, + "grad_norm": 1.4226760671412086, + "learning_rate": 1.999646483145256e-05, + "loss": 0.7403827905654907, + "step": 398 + }, + { + "epoch": 0.11668372569089049, + "grad_norm": 1.441557495225195, + "learning_rate": 1.9996335119142315e-05, + "loss": 0.7493172287940979, + "step": 399 + }, + { + "epoch": 0.11697616610615587, + "grad_norm": 1.1233068749163333, + "learning_rate": 1.9996203070249516e-05, + "loss": 0.6048015356063843, + "step": 400 + }, + { + "epoch": 0.11726860652142126, + "grad_norm": 1.218449987518831, + "learning_rate": 1.9996068684805025e-05, + "loss": 0.7220426797866821, + "step": 401 + }, + { + "epoch": 0.11756104693668665, + "grad_norm": 1.4820269559236292, + "learning_rate": 1.9995931962840255e-05, + "loss": 0.7294620275497437, + "step": 402 + }, + { + "epoch": 0.11785348735195204, + "grad_norm": 1.2693334480850886, + "learning_rate": 1.999579290438717e-05, + "loss": 0.7075647115707397, + "step": 403 + }, + { + "epoch": 0.11814592776721743, + "grad_norm": 1.4353448940274405, + "learning_rate": 1.9995651509478264e-05, + "loss": 0.7396657466888428, + "step": 404 + }, + { + "epoch": 0.11843836818248282, + "grad_norm": 1.5214596029668779, + "learning_rate": 1.999550777814659e-05, + "loss": 0.8240506649017334, + "step": 405 + }, + { + "epoch": 0.1187308085977482, + "grad_norm": 1.3463253886040645, + "learning_rate": 1.9995361710425752e-05, + "loss": 0.7518147826194763, + "step": 406 + }, + { + "epoch": 0.1190232490130136, + "grad_norm": 1.3938258800517485, + "learning_rate": 1.9995213306349886e-05, + "loss": 0.6998933553695679, + "step": 407 + }, + { + "epoch": 0.11931568942827898, + "grad_norm": 2.8811625928277134, + "learning_rate": 1.999506256595368e-05, + "loss": 0.659205973148346, + "step": 408 + }, + { + "epoch": 0.11960812984354438, + "grad_norm": 1.6815673603725616, + "learning_rate": 1.9994909489272372e-05, + "loss": 0.7826964259147644, + "step": 409 + }, + { + "epoch": 0.11990057025880976, + "grad_norm": 1.4225942370637599, + "learning_rate": 1.999475407634174e-05, + "loss": 0.770768404006958, + "step": 410 + }, + { + "epoch": 0.12019301067407516, + "grad_norm": 1.4031411556955713, + "learning_rate": 1.9994596327198113e-05, + "loss": 0.7390692234039307, + "step": 411 + }, + { + "epoch": 0.12048545108934054, + "grad_norm": 1.238945633280151, + "learning_rate": 1.999443624187836e-05, + "loss": 0.7092628479003906, + "step": 412 + }, + { + "epoch": 0.12077789150460594, + "grad_norm": 1.2795019723948553, + "learning_rate": 1.9994273820419903e-05, + "loss": 0.5252765417098999, + "step": 413 + }, + { + "epoch": 0.12107033191987132, + "grad_norm": 1.389583747663469, + "learning_rate": 1.9994109062860707e-05, + "loss": 0.8131704330444336, + "step": 414 + }, + { + "epoch": 0.12136277233513672, + "grad_norm": 1.490804798338551, + "learning_rate": 1.9993941969239284e-05, + "loss": 0.8257562518119812, + "step": 415 + }, + { + "epoch": 0.1216552127504021, + "grad_norm": 1.5541597255876767, + "learning_rate": 1.999377253959469e-05, + "loss": 0.7163048982620239, + "step": 416 + }, + { + "epoch": 0.1219476531656675, + "grad_norm": 1.590877283394053, + "learning_rate": 1.9993600773966528e-05, + "loss": 0.7216504812240601, + "step": 417 + }, + { + "epoch": 0.12224009358093288, + "grad_norm": 1.6748981575800963, + "learning_rate": 1.9993426672394945e-05, + "loss": 0.7831340432167053, + "step": 418 + }, + { + "epoch": 0.12253253399619828, + "grad_norm": 1.3976993960000088, + "learning_rate": 1.9993250234920638e-05, + "loss": 0.7675709128379822, + "step": 419 + }, + { + "epoch": 0.12282497441146366, + "grad_norm": 1.454911379398845, + "learning_rate": 1.999307146158485e-05, + "loss": 0.8085238337516785, + "step": 420 + }, + { + "epoch": 0.12311741482672905, + "grad_norm": 1.2979608734451222, + "learning_rate": 1.9992890352429368e-05, + "loss": 0.735150933265686, + "step": 421 + }, + { + "epoch": 0.12340985524199444, + "grad_norm": 1.2046206432187132, + "learning_rate": 1.9992706907496523e-05, + "loss": 0.612186074256897, + "step": 422 + }, + { + "epoch": 0.12370229565725983, + "grad_norm": 1.364838486847665, + "learning_rate": 1.9992521126829194e-05, + "loss": 0.6636590957641602, + "step": 423 + }, + { + "epoch": 0.12399473607252522, + "grad_norm": 1.4068215451581474, + "learning_rate": 1.9992333010470806e-05, + "loss": 0.6814526319503784, + "step": 424 + }, + { + "epoch": 0.12428717648779061, + "grad_norm": 1.3620595505436823, + "learning_rate": 1.9992142558465335e-05, + "loss": 0.6940894722938538, + "step": 425 + }, + { + "epoch": 0.12457961690305601, + "grad_norm": 1.3427645949787534, + "learning_rate": 1.9991949770857294e-05, + "loss": 0.7485121488571167, + "step": 426 + }, + { + "epoch": 0.12487205731832139, + "grad_norm": 1.266832638558228, + "learning_rate": 1.9991754647691744e-05, + "loss": 0.5315885543823242, + "step": 427 + }, + { + "epoch": 0.1251644977335868, + "grad_norm": 1.2511757429133081, + "learning_rate": 1.9991557189014297e-05, + "loss": 0.7416529655456543, + "step": 428 + }, + { + "epoch": 0.12545693814885217, + "grad_norm": 1.4031357379707678, + "learning_rate": 1.9991357394871106e-05, + "loss": 0.7937026023864746, + "step": 429 + }, + { + "epoch": 0.12574937856411755, + "grad_norm": 1.3448962462478107, + "learning_rate": 1.9991155265308872e-05, + "loss": 0.7009662389755249, + "step": 430 + }, + { + "epoch": 0.12604181897938296, + "grad_norm": 1.3042132277590721, + "learning_rate": 1.999095080037484e-05, + "loss": 0.6577681303024292, + "step": 431 + }, + { + "epoch": 0.12633425939464835, + "grad_norm": 1.4036627734956777, + "learning_rate": 1.9990744000116808e-05, + "loss": 0.7372399568557739, + "step": 432 + }, + { + "epoch": 0.12662669980991373, + "grad_norm": 1.3819832545517663, + "learning_rate": 1.999053486458311e-05, + "loss": 0.5959814190864563, + "step": 433 + }, + { + "epoch": 0.1269191402251791, + "grad_norm": 1.424207998116027, + "learning_rate": 1.999032339382263e-05, + "loss": 0.6684107780456543, + "step": 434 + }, + { + "epoch": 0.12721158064044452, + "grad_norm": 1.7048493578408517, + "learning_rate": 1.99901095878848e-05, + "loss": 0.8837687373161316, + "step": 435 + }, + { + "epoch": 0.1275040210557099, + "grad_norm": 3.7468635382669717, + "learning_rate": 1.9989893446819594e-05, + "loss": 0.7128579616546631, + "step": 436 + }, + { + "epoch": 0.1277964614709753, + "grad_norm": 1.2617709714670788, + "learning_rate": 1.9989674970677533e-05, + "loss": 0.6634687185287476, + "step": 437 + }, + { + "epoch": 0.12808890188624067, + "grad_norm": 1.626814629507008, + "learning_rate": 1.998945415950969e-05, + "loss": 0.7866299152374268, + "step": 438 + }, + { + "epoch": 0.12838134230150608, + "grad_norm": 1.6912246432889755, + "learning_rate": 1.998923101336767e-05, + "loss": 0.8104820251464844, + "step": 439 + }, + { + "epoch": 0.12867378271677146, + "grad_norm": 1.3163679319076276, + "learning_rate": 1.9989005532303637e-05, + "loss": 0.6643097400665283, + "step": 440 + }, + { + "epoch": 0.12896622313203684, + "grad_norm": 1.304280975921877, + "learning_rate": 1.9988777716370293e-05, + "loss": 0.7663843631744385, + "step": 441 + }, + { + "epoch": 0.12925866354730223, + "grad_norm": 1.4275530439491644, + "learning_rate": 1.9988547565620896e-05, + "loss": 0.8831629753112793, + "step": 442 + }, + { + "epoch": 0.12955110396256764, + "grad_norm": 1.2581390355141424, + "learning_rate": 1.9988315080109233e-05, + "loss": 0.6889798045158386, + "step": 443 + }, + { + "epoch": 0.12984354437783302, + "grad_norm": 1.2589816711321935, + "learning_rate": 1.9988080259889652e-05, + "loss": 0.8173589706420898, + "step": 444 + }, + { + "epoch": 0.1301359847930984, + "grad_norm": 1.437216407920067, + "learning_rate": 1.998784310501704e-05, + "loss": 0.7444369196891785, + "step": 445 + }, + { + "epoch": 0.13042842520836379, + "grad_norm": 1.2527388287385341, + "learning_rate": 1.998760361554682e-05, + "loss": 0.6728573441505432, + "step": 446 + }, + { + "epoch": 0.1307208656236292, + "grad_norm": 1.4620149588082576, + "learning_rate": 1.998736179153499e-05, + "loss": 0.6398168802261353, + "step": 447 + }, + { + "epoch": 0.13101330603889458, + "grad_norm": 1.3925962417611275, + "learning_rate": 1.9987117633038063e-05, + "loss": 0.7367146015167236, + "step": 448 + }, + { + "epoch": 0.13130574645415996, + "grad_norm": 1.3497781950543108, + "learning_rate": 1.998687114011311e-05, + "loss": 0.7072159051895142, + "step": 449 + }, + { + "epoch": 0.13159818686942534, + "grad_norm": 1.402234544131691, + "learning_rate": 1.998662231281775e-05, + "loss": 0.7899993062019348, + "step": 450 + }, + { + "epoch": 0.13189062728469075, + "grad_norm": 1.4376114251018388, + "learning_rate": 1.9986371151210146e-05, + "loss": 0.7668592929840088, + "step": 451 + }, + { + "epoch": 0.13218306769995614, + "grad_norm": 1.3943197925338484, + "learning_rate": 1.9986117655349003e-05, + "loss": 0.7222825288772583, + "step": 452 + }, + { + "epoch": 0.13247550811522152, + "grad_norm": 1.2939952744587226, + "learning_rate": 1.9985861825293577e-05, + "loss": 0.7301540374755859, + "step": 453 + }, + { + "epoch": 0.1327679485304869, + "grad_norm": 1.174339392511722, + "learning_rate": 1.998560366110366e-05, + "loss": 0.6517907381057739, + "step": 454 + }, + { + "epoch": 0.1330603889457523, + "grad_norm": 1.5763167634786863, + "learning_rate": 1.99853431628396e-05, + "loss": 0.6889342069625854, + "step": 455 + }, + { + "epoch": 0.1333528293610177, + "grad_norm": 1.525770213874127, + "learning_rate": 1.9985080330562293e-05, + "loss": 0.6804303526878357, + "step": 456 + }, + { + "epoch": 0.13364526977628308, + "grad_norm": 1.3944930335298842, + "learning_rate": 1.9984815164333163e-05, + "loss": 0.7699184417724609, + "step": 457 + }, + { + "epoch": 0.13393771019154846, + "grad_norm": 1.4886205672815649, + "learning_rate": 1.99845476642142e-05, + "loss": 0.7470533847808838, + "step": 458 + }, + { + "epoch": 0.13423015060681387, + "grad_norm": 1.251305257809984, + "learning_rate": 1.9984277830267927e-05, + "loss": 0.6689419746398926, + "step": 459 + }, + { + "epoch": 0.13452259102207925, + "grad_norm": 1.5088252817247363, + "learning_rate": 1.998400566255742e-05, + "loss": 0.6395387649536133, + "step": 460 + }, + { + "epoch": 0.13481503143734463, + "grad_norm": 1.3414013526988133, + "learning_rate": 1.9983731161146288e-05, + "loss": 0.7785208225250244, + "step": 461 + }, + { + "epoch": 0.13510747185261002, + "grad_norm": 1.2995640327613904, + "learning_rate": 1.9983454326098703e-05, + "loss": 0.6864018440246582, + "step": 462 + }, + { + "epoch": 0.13539991226787543, + "grad_norm": 1.424075352019454, + "learning_rate": 1.9983175157479366e-05, + "loss": 0.7201317548751831, + "step": 463 + }, + { + "epoch": 0.1356923526831408, + "grad_norm": 1.4977322356937255, + "learning_rate": 1.9982893655353534e-05, + "loss": 0.7128555774688721, + "step": 464 + }, + { + "epoch": 0.1359847930984062, + "grad_norm": 1.2421635772982216, + "learning_rate": 1.998260981978701e-05, + "loss": 0.7252457141876221, + "step": 465 + }, + { + "epoch": 0.1362772335136716, + "grad_norm": 1.472555101507684, + "learning_rate": 1.9982323650846137e-05, + "loss": 0.7453348636627197, + "step": 466 + }, + { + "epoch": 0.13656967392893699, + "grad_norm": 1.153602031844393, + "learning_rate": 1.9982035148597804e-05, + "loss": 0.6643078923225403, + "step": 467 + }, + { + "epoch": 0.13686211434420237, + "grad_norm": 1.280273878296217, + "learning_rate": 1.9981744313109445e-05, + "loss": 0.7249360084533691, + "step": 468 + }, + { + "epoch": 0.13715455475946775, + "grad_norm": 1.2363385614561972, + "learning_rate": 1.9981451144449042e-05, + "loss": 0.8179303407669067, + "step": 469 + }, + { + "epoch": 0.13744699517473316, + "grad_norm": 1.1335812448130365, + "learning_rate": 1.9981155642685125e-05, + "loss": 0.6763637661933899, + "step": 470 + }, + { + "epoch": 0.13773943558999854, + "grad_norm": 1.4603088026603306, + "learning_rate": 1.998085780788676e-05, + "loss": 0.6684300303459167, + "step": 471 + }, + { + "epoch": 0.13803187600526393, + "grad_norm": 1.2670786265894947, + "learning_rate": 1.9980557640123566e-05, + "loss": 0.7251675128936768, + "step": 472 + }, + { + "epoch": 0.1383243164205293, + "grad_norm": 1.5269819113708596, + "learning_rate": 1.998025513946571e-05, + "loss": 0.7146456241607666, + "step": 473 + }, + { + "epoch": 0.13861675683579472, + "grad_norm": 1.2263952606430522, + "learning_rate": 1.9979950305983895e-05, + "loss": 0.7067978382110596, + "step": 474 + }, + { + "epoch": 0.1389091972510601, + "grad_norm": 1.2396761565289731, + "learning_rate": 1.9979643139749373e-05, + "loss": 0.7017637491226196, + "step": 475 + }, + { + "epoch": 0.13920163766632548, + "grad_norm": 1.397663972134979, + "learning_rate": 1.9979333640833947e-05, + "loss": 0.7511367201805115, + "step": 476 + }, + { + "epoch": 0.13949407808159087, + "grad_norm": 1.5675722536579784, + "learning_rate": 1.997902180930996e-05, + "loss": 0.8129127025604248, + "step": 477 + }, + { + "epoch": 0.13978651849685628, + "grad_norm": 1.3801608404871573, + "learning_rate": 1.9978707645250293e-05, + "loss": 0.7760868072509766, + "step": 478 + }, + { + "epoch": 0.14007895891212166, + "grad_norm": 1.2722362515735255, + "learning_rate": 1.9978391148728388e-05, + "loss": 0.5190733671188354, + "step": 479 + }, + { + "epoch": 0.14037139932738704, + "grad_norm": 1.4267690174722667, + "learning_rate": 1.9978072319818222e-05, + "loss": 0.759798526763916, + "step": 480 + }, + { + "epoch": 0.14066383974265242, + "grad_norm": 1.3594087764036291, + "learning_rate": 1.997775115859432e-05, + "loss": 0.5750235319137573, + "step": 481 + }, + { + "epoch": 0.14095628015791783, + "grad_norm": 1.5288357817907694, + "learning_rate": 1.9977427665131748e-05, + "loss": 0.6837687492370605, + "step": 482 + }, + { + "epoch": 0.14124872057318322, + "grad_norm": 1.4085455647433316, + "learning_rate": 1.9977101839506123e-05, + "loss": 0.8774302005767822, + "step": 483 + }, + { + "epoch": 0.1415411609884486, + "grad_norm": 1.3951237263634118, + "learning_rate": 1.9976773681793605e-05, + "loss": 0.6447024345397949, + "step": 484 + }, + { + "epoch": 0.14183360140371398, + "grad_norm": 1.3077152366881364, + "learning_rate": 1.99764431920709e-05, + "loss": 0.6212965250015259, + "step": 485 + }, + { + "epoch": 0.1421260418189794, + "grad_norm": 1.7246179492768339, + "learning_rate": 1.9976110370415257e-05, + "loss": 0.7606823444366455, + "step": 486 + }, + { + "epoch": 0.14241848223424478, + "grad_norm": 1.6009360634049956, + "learning_rate": 1.9975775216904468e-05, + "loss": 0.792106032371521, + "step": 487 + }, + { + "epoch": 0.14271092264951016, + "grad_norm": 1.526072177508378, + "learning_rate": 1.997543773161688e-05, + "loss": 0.828373372554779, + "step": 488 + }, + { + "epoch": 0.14300336306477554, + "grad_norm": 1.2193329399673667, + "learning_rate": 1.997509791463137e-05, + "loss": 0.7148743867874146, + "step": 489 + }, + { + "epoch": 0.14329580348004095, + "grad_norm": 1.617921839516307, + "learning_rate": 1.9974755766027372e-05, + "loss": 0.6566554307937622, + "step": 490 + }, + { + "epoch": 0.14358824389530633, + "grad_norm": 1.2041404679997165, + "learning_rate": 1.9974411285884865e-05, + "loss": 0.7833706140518188, + "step": 491 + }, + { + "epoch": 0.14388068431057172, + "grad_norm": 1.3715764541616051, + "learning_rate": 1.997406447428436e-05, + "loss": 0.7661226987838745, + "step": 492 + }, + { + "epoch": 0.1441731247258371, + "grad_norm": 1.2510873907811162, + "learning_rate": 1.9973715331306935e-05, + "loss": 0.5403884649276733, + "step": 493 + }, + { + "epoch": 0.1444655651411025, + "grad_norm": 1.417853529635827, + "learning_rate": 1.9973363857034183e-05, + "loss": 0.7744722366333008, + "step": 494 + }, + { + "epoch": 0.1447580055563679, + "grad_norm": 1.7245567814035911, + "learning_rate": 1.9973010051548274e-05, + "loss": 0.9036808013916016, + "step": 495 + }, + { + "epoch": 0.14505044597163327, + "grad_norm": 1.2752769917707012, + "learning_rate": 1.9972653914931902e-05, + "loss": 0.6952388286590576, + "step": 496 + }, + { + "epoch": 0.14534288638689866, + "grad_norm": 1.5454177465030166, + "learning_rate": 1.9972295447268312e-05, + "loss": 0.7818677425384521, + "step": 497 + }, + { + "epoch": 0.14563532680216407, + "grad_norm": 1.2104336195623258, + "learning_rate": 1.9971934648641294e-05, + "loss": 0.8197327256202698, + "step": 498 + }, + { + "epoch": 0.14592776721742945, + "grad_norm": 1.1376920899270277, + "learning_rate": 1.997157151913518e-05, + "loss": 0.5898807644844055, + "step": 499 + }, + { + "epoch": 0.14622020763269483, + "grad_norm": 1.6480348319290024, + "learning_rate": 1.9971206058834857e-05, + "loss": 0.7980005741119385, + "step": 500 + }, + { + "epoch": 0.14651264804796021, + "grad_norm": 1.2480430258500308, + "learning_rate": 1.997083826782574e-05, + "loss": 0.7161837816238403, + "step": 501 + }, + { + "epoch": 0.14680508846322562, + "grad_norm": 1.436852590534495, + "learning_rate": 1.99704681461938e-05, + "loss": 0.7657293081283569, + "step": 502 + }, + { + "epoch": 0.147097528878491, + "grad_norm": 1.256627894457605, + "learning_rate": 1.9970095694025553e-05, + "loss": 0.6638028621673584, + "step": 503 + }, + { + "epoch": 0.1473899692937564, + "grad_norm": 1.344090583049545, + "learning_rate": 1.996972091140806e-05, + "loss": 0.8759262561798096, + "step": 504 + }, + { + "epoch": 0.1476824097090218, + "grad_norm": 1.1099770302505587, + "learning_rate": 1.9969343798428916e-05, + "loss": 0.6686065196990967, + "step": 505 + }, + { + "epoch": 0.14797485012428718, + "grad_norm": 1.5672815870081807, + "learning_rate": 1.9968964355176276e-05, + "loss": 0.7900313138961792, + "step": 506 + }, + { + "epoch": 0.14826729053955257, + "grad_norm": 1.3116088800480374, + "learning_rate": 1.996858258173883e-05, + "loss": 0.699286937713623, + "step": 507 + }, + { + "epoch": 0.14855973095481795, + "grad_norm": 1.149004701425465, + "learning_rate": 1.9968198478205817e-05, + "loss": 0.6613560914993286, + "step": 508 + }, + { + "epoch": 0.14885217137008336, + "grad_norm": 1.471579106109443, + "learning_rate": 1.9967812044667014e-05, + "loss": 0.8586459755897522, + "step": 509 + }, + { + "epoch": 0.14914461178534874, + "grad_norm": 1.5307049334622256, + "learning_rate": 1.9967423281212754e-05, + "loss": 0.6620850563049316, + "step": 510 + }, + { + "epoch": 0.14943705220061412, + "grad_norm": 1.6192191406380994, + "learning_rate": 1.9967032187933905e-05, + "loss": 0.7991048097610474, + "step": 511 + }, + { + "epoch": 0.1497294926158795, + "grad_norm": 1.2792732447271702, + "learning_rate": 1.9966638764921882e-05, + "loss": 0.7301167845726013, + "step": 512 + }, + { + "epoch": 0.15002193303114492, + "grad_norm": 1.244527824938295, + "learning_rate": 1.9966243012268645e-05, + "loss": 0.6470698118209839, + "step": 513 + }, + { + "epoch": 0.1503143734464103, + "grad_norm": 1.3436689137677134, + "learning_rate": 1.99658449300667e-05, + "loss": 0.5766996145248413, + "step": 514 + }, + { + "epoch": 0.15060681386167568, + "grad_norm": 1.2104018154852028, + "learning_rate": 1.9965444518409098e-05, + "loss": 0.6365845203399658, + "step": 515 + }, + { + "epoch": 0.15089925427694106, + "grad_norm": 1.6995742833660814, + "learning_rate": 1.9965041777389426e-05, + "loss": 0.6945745944976807, + "step": 516 + }, + { + "epoch": 0.15119169469220647, + "grad_norm": 1.6841525179657149, + "learning_rate": 1.996463670710183e-05, + "loss": 0.802032470703125, + "step": 517 + }, + { + "epoch": 0.15148413510747186, + "grad_norm": 1.4666130226044234, + "learning_rate": 1.996422930764099e-05, + "loss": 0.7429964542388916, + "step": 518 + }, + { + "epoch": 0.15177657552273724, + "grad_norm": 1.5508181233008433, + "learning_rate": 1.9963819579102134e-05, + "loss": 0.6462180614471436, + "step": 519 + }, + { + "epoch": 0.15206901593800262, + "grad_norm": 1.3226128228565077, + "learning_rate": 1.996340752158103e-05, + "loss": 0.888412594795227, + "step": 520 + }, + { + "epoch": 0.15236145635326803, + "grad_norm": 1.386680099002057, + "learning_rate": 1.9962993135173996e-05, + "loss": 0.6734700798988342, + "step": 521 + }, + { + "epoch": 0.15265389676853341, + "grad_norm": 1.385050142293082, + "learning_rate": 1.9962576419977894e-05, + "loss": 0.6951336860656738, + "step": 522 + }, + { + "epoch": 0.1529463371837988, + "grad_norm": 1.26022036147928, + "learning_rate": 1.9962157376090126e-05, + "loss": 0.7130852341651917, + "step": 523 + }, + { + "epoch": 0.15323877759906418, + "grad_norm": 1.4353500802059385, + "learning_rate": 1.9961736003608646e-05, + "loss": 0.8322055339813232, + "step": 524 + }, + { + "epoch": 0.1535312180143296, + "grad_norm": 1.2563635075596429, + "learning_rate": 1.996131230263194e-05, + "loss": 0.7031791806221008, + "step": 525 + }, + { + "epoch": 0.15382365842959497, + "grad_norm": 1.3606474846075662, + "learning_rate": 1.9960886273259052e-05, + "loss": 0.8268769979476929, + "step": 526 + }, + { + "epoch": 0.15411609884486036, + "grad_norm": 1.048782156231717, + "learning_rate": 1.9960457915589557e-05, + "loss": 0.6843237280845642, + "step": 527 + }, + { + "epoch": 0.15440853926012574, + "grad_norm": 1.29845256190474, + "learning_rate": 1.9960027229723585e-05, + "loss": 0.8267906904220581, + "step": 528 + }, + { + "epoch": 0.15470097967539115, + "grad_norm": 1.502232175088585, + "learning_rate": 1.9959594215761807e-05, + "loss": 0.8259629011154175, + "step": 529 + }, + { + "epoch": 0.15499342009065653, + "grad_norm": 1.3618507954167858, + "learning_rate": 1.9959158873805435e-05, + "loss": 0.654765248298645, + "step": 530 + }, + { + "epoch": 0.1552858605059219, + "grad_norm": 1.3762650099604372, + "learning_rate": 1.9958721203956233e-05, + "loss": 0.7841149568557739, + "step": 531 + }, + { + "epoch": 0.1555783009211873, + "grad_norm": 1.131527995151024, + "learning_rate": 1.9958281206316497e-05, + "loss": 0.7364583015441895, + "step": 532 + }, + { + "epoch": 0.1558707413364527, + "grad_norm": 1.2428392866727909, + "learning_rate": 1.9957838880989076e-05, + "loss": 0.7985796928405762, + "step": 533 + }, + { + "epoch": 0.1561631817517181, + "grad_norm": 1.7674168807742325, + "learning_rate": 1.9957394228077363e-05, + "loss": 0.8432350754737854, + "step": 534 + }, + { + "epoch": 0.15645562216698347, + "grad_norm": 1.409652061557183, + "learning_rate": 1.995694724768529e-05, + "loss": 0.713615894317627, + "step": 535 + }, + { + "epoch": 0.15674806258224885, + "grad_norm": 1.3406073565001748, + "learning_rate": 1.9956497939917336e-05, + "loss": 0.6472936868667603, + "step": 536 + }, + { + "epoch": 0.15704050299751426, + "grad_norm": 1.4828550722777096, + "learning_rate": 1.9956046304878528e-05, + "loss": 0.7963594198226929, + "step": 537 + }, + { + "epoch": 0.15733294341277965, + "grad_norm": 1.3875627998599316, + "learning_rate": 1.9955592342674427e-05, + "loss": 0.8043302893638611, + "step": 538 + }, + { + "epoch": 0.15762538382804503, + "grad_norm": 1.3187786308741334, + "learning_rate": 1.995513605341115e-05, + "loss": 0.6277294754981995, + "step": 539 + }, + { + "epoch": 0.1579178242433104, + "grad_norm": 1.2577326193858611, + "learning_rate": 1.9954677437195345e-05, + "loss": 0.569086492061615, + "step": 540 + }, + { + "epoch": 0.15821026465857582, + "grad_norm": 1.4002846512494251, + "learning_rate": 1.9954216494134217e-05, + "loss": 0.7694308757781982, + "step": 541 + }, + { + "epoch": 0.1585027050738412, + "grad_norm": 1.2602961243105442, + "learning_rate": 1.9953753224335504e-05, + "loss": 0.7782721519470215, + "step": 542 + }, + { + "epoch": 0.1587951454891066, + "grad_norm": 1.4115021596903525, + "learning_rate": 1.9953287627907498e-05, + "loss": 0.6231539249420166, + "step": 543 + }, + { + "epoch": 0.159087585904372, + "grad_norm": 1.6469299670076099, + "learning_rate": 1.9952819704959022e-05, + "loss": 0.6431725025177002, + "step": 544 + }, + { + "epoch": 0.15938002631963738, + "grad_norm": 1.3041234892791729, + "learning_rate": 1.9952349455599455e-05, + "loss": 0.7062366008758545, + "step": 545 + }, + { + "epoch": 0.15967246673490276, + "grad_norm": 1.3521393089140767, + "learning_rate": 1.9951876879938716e-05, + "loss": 0.5376520156860352, + "step": 546 + }, + { + "epoch": 0.15996490715016815, + "grad_norm": 1.385911158215194, + "learning_rate": 1.9951401978087267e-05, + "loss": 0.7693386077880859, + "step": 547 + }, + { + "epoch": 0.16025734756543356, + "grad_norm": 1.7168051396485104, + "learning_rate": 1.9950924750156107e-05, + "loss": 0.6735765337944031, + "step": 548 + }, + { + "epoch": 0.16054978798069894, + "grad_norm": 1.1186135901816567, + "learning_rate": 1.995044519625679e-05, + "loss": 0.5333552360534668, + "step": 549 + }, + { + "epoch": 0.16084222839596432, + "grad_norm": 1.2149038323564916, + "learning_rate": 1.994996331650141e-05, + "loss": 0.6694493293762207, + "step": 550 + }, + { + "epoch": 0.1611346688112297, + "grad_norm": 1.3895443962170193, + "learning_rate": 1.9949479111002596e-05, + "loss": 0.6056857109069824, + "step": 551 + }, + { + "epoch": 0.1614271092264951, + "grad_norm": 1.4990214566868623, + "learning_rate": 1.9948992579873538e-05, + "loss": 0.7174896001815796, + "step": 552 + }, + { + "epoch": 0.1617195496417605, + "grad_norm": 1.4417886999069138, + "learning_rate": 1.9948503723227954e-05, + "loss": 0.9150595664978027, + "step": 553 + }, + { + "epoch": 0.16201199005702588, + "grad_norm": 1.475120009674046, + "learning_rate": 1.9948012541180116e-05, + "loss": 0.7418098449707031, + "step": 554 + }, + { + "epoch": 0.16230443047229126, + "grad_norm": 1.3802668140870205, + "learning_rate": 1.9947519033844828e-05, + "loss": 0.6937648057937622, + "step": 555 + }, + { + "epoch": 0.16259687088755667, + "grad_norm": 1.198267913228467, + "learning_rate": 1.9947023201337448e-05, + "loss": 0.628747820854187, + "step": 556 + }, + { + "epoch": 0.16288931130282205, + "grad_norm": 1.3166666693196283, + "learning_rate": 1.9946525043773875e-05, + "loss": 0.6252326965332031, + "step": 557 + }, + { + "epoch": 0.16318175171808744, + "grad_norm": 1.4085830915284543, + "learning_rate": 1.9946024561270547e-05, + "loss": 0.6243278980255127, + "step": 558 + }, + { + "epoch": 0.16347419213335282, + "grad_norm": 1.515326552036181, + "learning_rate": 1.994552175394445e-05, + "loss": 0.7613602876663208, + "step": 559 + }, + { + "epoch": 0.16376663254861823, + "grad_norm": 1.4167210376939137, + "learning_rate": 1.9945016621913115e-05, + "loss": 0.7680152654647827, + "step": 560 + }, + { + "epoch": 0.1640590729638836, + "grad_norm": 1.4413485992010024, + "learning_rate": 1.9944509165294614e-05, + "loss": 0.6926383972167969, + "step": 561 + }, + { + "epoch": 0.164351513379149, + "grad_norm": 1.3901402403092062, + "learning_rate": 1.9943999384207556e-05, + "loss": 0.6822172403335571, + "step": 562 + }, + { + "epoch": 0.16464395379441438, + "grad_norm": 1.1253426305557543, + "learning_rate": 1.99434872787711e-05, + "loss": 0.6533722281455994, + "step": 563 + }, + { + "epoch": 0.1649363942096798, + "grad_norm": 1.3434183662540475, + "learning_rate": 1.9942972849104955e-05, + "loss": 0.6754113435745239, + "step": 564 + }, + { + "epoch": 0.16522883462494517, + "grad_norm": 1.3906070154993262, + "learning_rate": 1.9942456095329357e-05, + "loss": 0.5585163235664368, + "step": 565 + }, + { + "epoch": 0.16552127504021055, + "grad_norm": 1.18702583603665, + "learning_rate": 1.99419370175651e-05, + "loss": 0.6268453598022461, + "step": 566 + }, + { + "epoch": 0.16581371545547594, + "grad_norm": 1.3564219134919553, + "learning_rate": 1.994141561593351e-05, + "loss": 0.6508245468139648, + "step": 567 + }, + { + "epoch": 0.16610615587074135, + "grad_norm": 1.353057425024783, + "learning_rate": 1.9940891890556468e-05, + "loss": 0.7337379455566406, + "step": 568 + }, + { + "epoch": 0.16639859628600673, + "grad_norm": 1.3764723902611744, + "learning_rate": 1.9940365841556385e-05, + "loss": 0.7888853549957275, + "step": 569 + }, + { + "epoch": 0.1666910367012721, + "grad_norm": 1.5384301744775797, + "learning_rate": 1.993983746905623e-05, + "loss": 0.777199923992157, + "step": 570 + }, + { + "epoch": 0.1669834771165375, + "grad_norm": 1.5194907821323576, + "learning_rate": 1.9939306773179498e-05, + "loss": 0.761531412601471, + "step": 571 + }, + { + "epoch": 0.1672759175318029, + "grad_norm": 1.5063040441270878, + "learning_rate": 1.993877375405024e-05, + "loss": 0.7060664296150208, + "step": 572 + }, + { + "epoch": 0.16756835794706829, + "grad_norm": 1.472994627130685, + "learning_rate": 1.9938238411793045e-05, + "loss": 0.6797431707382202, + "step": 573 + }, + { + "epoch": 0.16786079836233367, + "grad_norm": 1.3131930617818641, + "learning_rate": 1.9937700746533048e-05, + "loss": 0.7202910780906677, + "step": 574 + }, + { + "epoch": 0.16815323877759905, + "grad_norm": 1.198711592546953, + "learning_rate": 1.9937160758395923e-05, + "loss": 0.7241546511650085, + "step": 575 + }, + { + "epoch": 0.16844567919286446, + "grad_norm": 1.3694786109804489, + "learning_rate": 1.993661844750789e-05, + "loss": 0.7055338621139526, + "step": 576 + }, + { + "epoch": 0.16873811960812984, + "grad_norm": 1.4237978283864139, + "learning_rate": 1.993607381399571e-05, + "loss": 0.6973986625671387, + "step": 577 + }, + { + "epoch": 0.16903056002339523, + "grad_norm": 1.1715457050926792, + "learning_rate": 1.993552685798669e-05, + "loss": 0.693436861038208, + "step": 578 + }, + { + "epoch": 0.1693230004386606, + "grad_norm": 1.5585764488361307, + "learning_rate": 1.9934977579608676e-05, + "loss": 0.6687765121459961, + "step": 579 + }, + { + "epoch": 0.16961544085392602, + "grad_norm": 1.3798925262407884, + "learning_rate": 1.9934425978990057e-05, + "loss": 0.7776578068733215, + "step": 580 + }, + { + "epoch": 0.1699078812691914, + "grad_norm": 1.3168335454892666, + "learning_rate": 1.9933872056259768e-05, + "loss": 0.6914045810699463, + "step": 581 + }, + { + "epoch": 0.17020032168445678, + "grad_norm": 1.4649859185166105, + "learning_rate": 1.9933315811547283e-05, + "loss": 0.8005306720733643, + "step": 582 + }, + { + "epoch": 0.1704927620997222, + "grad_norm": 1.3952257625848015, + "learning_rate": 1.9932757244982625e-05, + "loss": 0.6936507225036621, + "step": 583 + }, + { + "epoch": 0.17078520251498758, + "grad_norm": 1.157795409448355, + "learning_rate": 1.9932196356696353e-05, + "loss": 0.6915504932403564, + "step": 584 + }, + { + "epoch": 0.17107764293025296, + "grad_norm": 1.4153568154846778, + "learning_rate": 1.9931633146819573e-05, + "loss": 0.7583723664283752, + "step": 585 + }, + { + "epoch": 0.17137008334551834, + "grad_norm": 1.2959976429359619, + "learning_rate": 1.9931067615483927e-05, + "loss": 0.7097266912460327, + "step": 586 + }, + { + "epoch": 0.17166252376078375, + "grad_norm": 1.5238633829769868, + "learning_rate": 1.9930499762821608e-05, + "loss": 0.7586667537689209, + "step": 587 + }, + { + "epoch": 0.17195496417604914, + "grad_norm": 1.3505202775838374, + "learning_rate": 1.9929929588965352e-05, + "loss": 0.7043411731719971, + "step": 588 + }, + { + "epoch": 0.17224740459131452, + "grad_norm": 1.3150009626714483, + "learning_rate": 1.9929357094048425e-05, + "loss": 0.8502261638641357, + "step": 589 + }, + { + "epoch": 0.1725398450065799, + "grad_norm": 1.3901300269374877, + "learning_rate": 1.992878227820465e-05, + "loss": 0.7196993827819824, + "step": 590 + }, + { + "epoch": 0.1728322854218453, + "grad_norm": 1.5475395216492736, + "learning_rate": 1.9928205141568388e-05, + "loss": 0.6783720850944519, + "step": 591 + }, + { + "epoch": 0.1731247258371107, + "grad_norm": 1.1911883688546063, + "learning_rate": 1.9927625684274534e-05, + "loss": 0.7128307819366455, + "step": 592 + }, + { + "epoch": 0.17341716625237608, + "grad_norm": 1.226507853409212, + "learning_rate": 1.9927043906458538e-05, + "loss": 0.7289423942565918, + "step": 593 + }, + { + "epoch": 0.17370960666764146, + "grad_norm": 1.298942183876381, + "learning_rate": 1.992645980825639e-05, + "loss": 0.6306120157241821, + "step": 594 + }, + { + "epoch": 0.17400204708290687, + "grad_norm": 1.2456494719411173, + "learning_rate": 1.9925873389804614e-05, + "loss": 0.7910655736923218, + "step": 595 + }, + { + "epoch": 0.17429448749817225, + "grad_norm": 1.267940212117298, + "learning_rate": 1.9925284651240282e-05, + "loss": 0.6075282096862793, + "step": 596 + }, + { + "epoch": 0.17458692791343763, + "grad_norm": 1.251937615037275, + "learning_rate": 1.992469359270101e-05, + "loss": 0.6270443201065063, + "step": 597 + }, + { + "epoch": 0.17487936832870302, + "grad_norm": 1.3200413033724028, + "learning_rate": 1.9924100214324955e-05, + "loss": 0.6487830877304077, + "step": 598 + }, + { + "epoch": 0.17517180874396843, + "grad_norm": 1.45237431858529, + "learning_rate": 1.9923504516250814e-05, + "loss": 0.5986843705177307, + "step": 599 + }, + { + "epoch": 0.1754642491592338, + "grad_norm": 1.2191897136056242, + "learning_rate": 1.992290649861783e-05, + "loss": 0.7734183073043823, + "step": 600 + }, + { + "epoch": 0.1757566895744992, + "grad_norm": 1.167414919229407, + "learning_rate": 1.9922306161565782e-05, + "loss": 0.5784964561462402, + "step": 601 + }, + { + "epoch": 0.17604912998976457, + "grad_norm": 1.501564665297397, + "learning_rate": 1.9921703505234995e-05, + "loss": 0.8034321069717407, + "step": 602 + }, + { + "epoch": 0.17634157040502998, + "grad_norm": 1.314622713247698, + "learning_rate": 1.992109852976634e-05, + "loss": 0.8153722882270813, + "step": 603 + }, + { + "epoch": 0.17663401082029537, + "grad_norm": 1.877065501880657, + "learning_rate": 1.992049123530123e-05, + "loss": 0.7293002605438232, + "step": 604 + }, + { + "epoch": 0.17692645123556075, + "grad_norm": 1.514670729590329, + "learning_rate": 1.9919881621981606e-05, + "loss": 0.7108439207077026, + "step": 605 + }, + { + "epoch": 0.17721889165082613, + "grad_norm": 1.4748189889445555, + "learning_rate": 1.9919269689949968e-05, + "loss": 0.7581946849822998, + "step": 606 + }, + { + "epoch": 0.17751133206609154, + "grad_norm": 1.2337358872247315, + "learning_rate": 1.991865543934935e-05, + "loss": 0.6821258068084717, + "step": 607 + }, + { + "epoch": 0.17780377248135693, + "grad_norm": 1.2791852908008183, + "learning_rate": 1.991803887032333e-05, + "loss": 0.7116109728813171, + "step": 608 + }, + { + "epoch": 0.1780962128966223, + "grad_norm": 1.2208883706731903, + "learning_rate": 1.9917419983016025e-05, + "loss": 0.6680186986923218, + "step": 609 + }, + { + "epoch": 0.1783886533118877, + "grad_norm": 1.3494621179320938, + "learning_rate": 1.99167987775721e-05, + "loss": 0.6763704419136047, + "step": 610 + }, + { + "epoch": 0.1786810937271531, + "grad_norm": 1.4133729383070797, + "learning_rate": 1.9916175254136755e-05, + "loss": 0.756158709526062, + "step": 611 + }, + { + "epoch": 0.17897353414241848, + "grad_norm": 1.4652489049885558, + "learning_rate": 1.9915549412855734e-05, + "loss": 0.600861132144928, + "step": 612 + }, + { + "epoch": 0.17926597455768387, + "grad_norm": 1.4731466609399737, + "learning_rate": 1.991492125387533e-05, + "loss": 0.6927047967910767, + "step": 613 + }, + { + "epoch": 0.17955841497294925, + "grad_norm": 1.6937006516406405, + "learning_rate": 1.9914290777342362e-05, + "loss": 0.6908516883850098, + "step": 614 + }, + { + "epoch": 0.17985085538821466, + "grad_norm": 1.4155029526585772, + "learning_rate": 1.9913657983404206e-05, + "loss": 0.7968926429748535, + "step": 615 + }, + { + "epoch": 0.18014329580348004, + "grad_norm": 1.1016955037712495, + "learning_rate": 1.9913022872208773e-05, + "loss": 0.6035164594650269, + "step": 616 + }, + { + "epoch": 0.18043573621874542, + "grad_norm": 1.4061380717551752, + "learning_rate": 1.9912385443904518e-05, + "loss": 0.6733090877532959, + "step": 617 + }, + { + "epoch": 0.1807281766340108, + "grad_norm": 2.2181842231696645, + "learning_rate": 1.9911745698640426e-05, + "loss": 0.6968391537666321, + "step": 618 + }, + { + "epoch": 0.18102061704927622, + "grad_norm": 1.2136657361400474, + "learning_rate": 1.991110363656605e-05, + "loss": 0.7126309871673584, + "step": 619 + }, + { + "epoch": 0.1813130574645416, + "grad_norm": 1.5461052617008268, + "learning_rate": 1.9910459257831455e-05, + "loss": 0.8604997396469116, + "step": 620 + }, + { + "epoch": 0.18160549787980698, + "grad_norm": 1.4378853015325992, + "learning_rate": 1.9909812562587266e-05, + "loss": 0.674797534942627, + "step": 621 + }, + { + "epoch": 0.1818979382950724, + "grad_norm": 1.4538548213207452, + "learning_rate": 1.9909163550984644e-05, + "loss": 0.7439107894897461, + "step": 622 + }, + { + "epoch": 0.18219037871033777, + "grad_norm": 1.4410118469577065, + "learning_rate": 1.9908512223175293e-05, + "loss": 0.7137601971626282, + "step": 623 + }, + { + "epoch": 0.18248281912560316, + "grad_norm": 1.286772355171783, + "learning_rate": 1.9907858579311448e-05, + "loss": 0.6395502090454102, + "step": 624 + }, + { + "epoch": 0.18277525954086854, + "grad_norm": 1.7411485569290241, + "learning_rate": 1.9907202619545905e-05, + "loss": 0.6747852563858032, + "step": 625 + }, + { + "epoch": 0.18306769995613395, + "grad_norm": 1.3891342500470065, + "learning_rate": 1.9906544344031986e-05, + "loss": 0.6995632648468018, + "step": 626 + }, + { + "epoch": 0.18336014037139933, + "grad_norm": 1.3916150531596103, + "learning_rate": 1.9905883752923557e-05, + "loss": 0.7006711363792419, + "step": 627 + }, + { + "epoch": 0.18365258078666472, + "grad_norm": 1.189158109720048, + "learning_rate": 1.990522084637503e-05, + "loss": 0.660778820514679, + "step": 628 + }, + { + "epoch": 0.1839450212019301, + "grad_norm": 1.258003733155152, + "learning_rate": 1.9904555624541362e-05, + "loss": 0.5826665163040161, + "step": 629 + }, + { + "epoch": 0.1842374616171955, + "grad_norm": 1.5565251427155322, + "learning_rate": 1.990388808757803e-05, + "loss": 0.8064266443252563, + "step": 630 + }, + { + "epoch": 0.1845299020324609, + "grad_norm": 1.3066621609893527, + "learning_rate": 1.9903218235641078e-05, + "loss": 0.6856451034545898, + "step": 631 + }, + { + "epoch": 0.18482234244772627, + "grad_norm": 1.325447510265949, + "learning_rate": 1.9902546068887076e-05, + "loss": 0.6423801183700562, + "step": 632 + }, + { + "epoch": 0.18511478286299166, + "grad_norm": 1.252931011950935, + "learning_rate": 1.9901871587473135e-05, + "loss": 0.6903005242347717, + "step": 633 + }, + { + "epoch": 0.18540722327825707, + "grad_norm": 1.2981623515351661, + "learning_rate": 1.9901194791556916e-05, + "loss": 0.636742115020752, + "step": 634 + }, + { + "epoch": 0.18569966369352245, + "grad_norm": 1.154196245030106, + "learning_rate": 1.9900515681296614e-05, + "loss": 0.6541105508804321, + "step": 635 + }, + { + "epoch": 0.18599210410878783, + "grad_norm": 1.2463484642096474, + "learning_rate": 1.9899834256850973e-05, + "loss": 0.7026485204696655, + "step": 636 + }, + { + "epoch": 0.1862845445240532, + "grad_norm": 1.2626549460002545, + "learning_rate": 1.989915051837926e-05, + "loss": 0.6232702732086182, + "step": 637 + }, + { + "epoch": 0.18657698493931862, + "grad_norm": 1.222405284140282, + "learning_rate": 1.9898464466041306e-05, + "loss": 0.5971217155456543, + "step": 638 + }, + { + "epoch": 0.186869425354584, + "grad_norm": 1.228365693552395, + "learning_rate": 1.9897776099997463e-05, + "loss": 0.7942230701446533, + "step": 639 + }, + { + "epoch": 0.1871618657698494, + "grad_norm": 1.4547764939553913, + "learning_rate": 1.9897085420408637e-05, + "loss": 0.6578072309494019, + "step": 640 + }, + { + "epoch": 0.18745430618511477, + "grad_norm": 1.3118111344764942, + "learning_rate": 1.989639242743627e-05, + "loss": 0.6928422451019287, + "step": 641 + }, + { + "epoch": 0.18774674660038018, + "grad_norm": 1.4232777703090678, + "learning_rate": 1.9895697121242346e-05, + "loss": 0.7656213641166687, + "step": 642 + }, + { + "epoch": 0.18803918701564556, + "grad_norm": 1.3841907158773847, + "learning_rate": 1.9894999501989383e-05, + "loss": 0.6540038585662842, + "step": 643 + }, + { + "epoch": 0.18833162743091095, + "grad_norm": 1.5637672668766274, + "learning_rate": 1.989429956984045e-05, + "loss": 0.707741379737854, + "step": 644 + }, + { + "epoch": 0.18862406784617633, + "grad_norm": 1.2389494128425964, + "learning_rate": 1.9893597324959156e-05, + "loss": 0.6191326379776001, + "step": 645 + }, + { + "epoch": 0.18891650826144174, + "grad_norm": 1.2174290538744046, + "learning_rate": 1.9892892767509634e-05, + "loss": 0.616736114025116, + "step": 646 + }, + { + "epoch": 0.18920894867670712, + "grad_norm": 1.4366227278982104, + "learning_rate": 1.989218589765658e-05, + "loss": 0.803301215171814, + "step": 647 + }, + { + "epoch": 0.1895013890919725, + "grad_norm": 1.2775653707157333, + "learning_rate": 1.989147671556522e-05, + "loss": 0.6528021097183228, + "step": 648 + }, + { + "epoch": 0.1897938295072379, + "grad_norm": 1.5463247112798635, + "learning_rate": 1.9890765221401314e-05, + "loss": 0.6966919898986816, + "step": 649 + }, + { + "epoch": 0.1900862699225033, + "grad_norm": 1.2768484224289256, + "learning_rate": 1.9890051415331178e-05, + "loss": 0.7223595380783081, + "step": 650 + }, + { + "epoch": 0.19037871033776868, + "grad_norm": 1.404271714764208, + "learning_rate": 1.9889335297521656e-05, + "loss": 0.6727452278137207, + "step": 651 + }, + { + "epoch": 0.19067115075303406, + "grad_norm": 1.5662163632688932, + "learning_rate": 1.988861686814014e-05, + "loss": 0.7008258104324341, + "step": 652 + }, + { + "epoch": 0.19096359116829945, + "grad_norm": 1.3756400508505757, + "learning_rate": 1.988789612735455e-05, + "loss": 0.7624703049659729, + "step": 653 + }, + { + "epoch": 0.19125603158356486, + "grad_norm": 1.4133612106119275, + "learning_rate": 1.988717307533336e-05, + "loss": 0.6813088655471802, + "step": 654 + }, + { + "epoch": 0.19154847199883024, + "grad_norm": 1.1919173127519105, + "learning_rate": 1.988644771224558e-05, + "loss": 0.5401284694671631, + "step": 655 + }, + { + "epoch": 0.19184091241409562, + "grad_norm": 1.4613018451006843, + "learning_rate": 1.9885720038260756e-05, + "loss": 0.6805379986763, + "step": 656 + }, + { + "epoch": 0.19213335282936103, + "grad_norm": 1.5412845974712732, + "learning_rate": 1.9884990053548982e-05, + "loss": 0.6449974775314331, + "step": 657 + }, + { + "epoch": 0.19242579324462641, + "grad_norm": 1.3481077932409014, + "learning_rate": 1.988425775828088e-05, + "loss": 0.6940032839775085, + "step": 658 + }, + { + "epoch": 0.1927182336598918, + "grad_norm": 1.3088210596354761, + "learning_rate": 1.9883523152627626e-05, + "loss": 0.7089565396308899, + "step": 659 + }, + { + "epoch": 0.19301067407515718, + "grad_norm": 1.3865316758332553, + "learning_rate": 1.9882786236760932e-05, + "loss": 0.7508438229560852, + "step": 660 + }, + { + "epoch": 0.1933031144904226, + "grad_norm": 1.6156320166139564, + "learning_rate": 1.988204701085304e-05, + "loss": 0.6828616261482239, + "step": 661 + }, + { + "epoch": 0.19359555490568797, + "grad_norm": 1.2372815991073003, + "learning_rate": 1.9881305475076744e-05, + "loss": 0.6652963161468506, + "step": 662 + }, + { + "epoch": 0.19388799532095335, + "grad_norm": 1.2410743539313074, + "learning_rate": 1.988056162960537e-05, + "loss": 0.6859447360038757, + "step": 663 + }, + { + "epoch": 0.19418043573621874, + "grad_norm": 1.4440746421071415, + "learning_rate": 1.9879815474612794e-05, + "loss": 0.693805992603302, + "step": 664 + }, + { + "epoch": 0.19447287615148415, + "grad_norm": 1.359257774367856, + "learning_rate": 1.987906701027342e-05, + "loss": 0.7028747200965881, + "step": 665 + }, + { + "epoch": 0.19476531656674953, + "grad_norm": 1.2833261279779522, + "learning_rate": 1.9878316236762195e-05, + "loss": 0.7492112517356873, + "step": 666 + }, + { + "epoch": 0.1950577569820149, + "grad_norm": 1.2065346249489062, + "learning_rate": 1.9877563154254613e-05, + "loss": 0.5394963026046753, + "step": 667 + }, + { + "epoch": 0.1953501973972803, + "grad_norm": 1.1848542596539768, + "learning_rate": 1.98768077629267e-05, + "loss": 0.5185493230819702, + "step": 668 + }, + { + "epoch": 0.1956426378125457, + "grad_norm": 1.2600065416138704, + "learning_rate": 1.9876050062955027e-05, + "loss": 0.7279829382896423, + "step": 669 + }, + { + "epoch": 0.1959350782278111, + "grad_norm": 1.3533145550923509, + "learning_rate": 1.9875290054516692e-05, + "loss": 0.7437206506729126, + "step": 670 + }, + { + "epoch": 0.19622751864307647, + "grad_norm": 1.6022192807514979, + "learning_rate": 1.9874527737789358e-05, + "loss": 0.7294617891311646, + "step": 671 + }, + { + "epoch": 0.19651995905834185, + "grad_norm": 1.3433918645025815, + "learning_rate": 1.9873763112951198e-05, + "loss": 0.7710307240486145, + "step": 672 + }, + { + "epoch": 0.19681239947360726, + "grad_norm": 1.3797998364213817, + "learning_rate": 1.9872996180180947e-05, + "loss": 0.690025806427002, + "step": 673 + }, + { + "epoch": 0.19710483988887265, + "grad_norm": 1.2826936342217614, + "learning_rate": 1.9872226939657867e-05, + "loss": 0.6690589189529419, + "step": 674 + }, + { + "epoch": 0.19739728030413803, + "grad_norm": 1.31971712284742, + "learning_rate": 1.9871455391561764e-05, + "loss": 0.7587239742279053, + "step": 675 + }, + { + "epoch": 0.1976897207194034, + "grad_norm": 1.2583882254944232, + "learning_rate": 1.987068153607298e-05, + "loss": 0.8048006296157837, + "step": 676 + }, + { + "epoch": 0.19798216113466882, + "grad_norm": 1.4904938665104162, + "learning_rate": 1.9869905373372402e-05, + "loss": 0.721023678779602, + "step": 677 + }, + { + "epoch": 0.1982746015499342, + "grad_norm": 1.2975987405043754, + "learning_rate": 1.9869126903641457e-05, + "loss": 0.646798849105835, + "step": 678 + }, + { + "epoch": 0.1985670419651996, + "grad_norm": 1.2591898865565592, + "learning_rate": 1.9868346127062098e-05, + "loss": 0.597393274307251, + "step": 679 + }, + { + "epoch": 0.19885948238046497, + "grad_norm": 1.2773189541737207, + "learning_rate": 1.9867563043816836e-05, + "loss": 0.8619129657745361, + "step": 680 + }, + { + "epoch": 0.19915192279573038, + "grad_norm": 1.2343587826225086, + "learning_rate": 1.986677765408871e-05, + "loss": 0.5391764640808105, + "step": 681 + }, + { + "epoch": 0.19944436321099576, + "grad_norm": 1.360221019641669, + "learning_rate": 1.9865989958061297e-05, + "loss": 0.8185729384422302, + "step": 682 + }, + { + "epoch": 0.19973680362626114, + "grad_norm": 1.3798220626145994, + "learning_rate": 1.9865199955918712e-05, + "loss": 0.6629397869110107, + "step": 683 + }, + { + "epoch": 0.20002924404152653, + "grad_norm": 1.2700323386046573, + "learning_rate": 1.9864407647845626e-05, + "loss": 0.6752325296401978, + "step": 684 + }, + { + "epoch": 0.20032168445679194, + "grad_norm": 1.4583632577866723, + "learning_rate": 1.9863613034027224e-05, + "loss": 0.8509782552719116, + "step": 685 + }, + { + "epoch": 0.20061412487205732, + "grad_norm": 1.2832087066986109, + "learning_rate": 1.986281611464925e-05, + "loss": 0.5573478937149048, + "step": 686 + }, + { + "epoch": 0.2009065652873227, + "grad_norm": 1.4672386586086157, + "learning_rate": 1.9862016889897976e-05, + "loss": 0.8152032494544983, + "step": 687 + }, + { + "epoch": 0.20119900570258809, + "grad_norm": 1.2878245307564982, + "learning_rate": 1.9861215359960217e-05, + "loss": 0.6346902847290039, + "step": 688 + }, + { + "epoch": 0.2014914461178535, + "grad_norm": 1.3877152633732261, + "learning_rate": 1.986041152502332e-05, + "loss": 0.6608721017837524, + "step": 689 + }, + { + "epoch": 0.20178388653311888, + "grad_norm": 1.5061562575575014, + "learning_rate": 1.9859605385275188e-05, + "loss": 0.7753713130950928, + "step": 690 + }, + { + "epoch": 0.20207632694838426, + "grad_norm": 1.2917361787707549, + "learning_rate": 1.9858796940904238e-05, + "loss": 0.6747434139251709, + "step": 691 + }, + { + "epoch": 0.20236876736364964, + "grad_norm": 1.4853341728710303, + "learning_rate": 1.9857986192099446e-05, + "loss": 0.7263737320899963, + "step": 692 + }, + { + "epoch": 0.20266120777891505, + "grad_norm": 1.2072706917482865, + "learning_rate": 1.9857173139050324e-05, + "loss": 0.7910827994346619, + "step": 693 + }, + { + "epoch": 0.20295364819418044, + "grad_norm": 1.479189890111576, + "learning_rate": 1.9856357781946913e-05, + "loss": 0.7245683670043945, + "step": 694 + }, + { + "epoch": 0.20324608860944582, + "grad_norm": 1.146324196354459, + "learning_rate": 1.9855540120979794e-05, + "loss": 0.7440140247344971, + "step": 695 + }, + { + "epoch": 0.20353852902471123, + "grad_norm": 1.823699641073059, + "learning_rate": 1.9854720156340096e-05, + "loss": 0.7485358715057373, + "step": 696 + }, + { + "epoch": 0.2038309694399766, + "grad_norm": 1.3927934028554216, + "learning_rate": 1.985389788821948e-05, + "loss": 0.7658560872077942, + "step": 697 + }, + { + "epoch": 0.204123409855242, + "grad_norm": 1.5269096149843602, + "learning_rate": 1.9853073316810144e-05, + "loss": 0.7366135120391846, + "step": 698 + }, + { + "epoch": 0.20441585027050738, + "grad_norm": 1.2008198015347107, + "learning_rate": 1.985224644230483e-05, + "loss": 0.622355580329895, + "step": 699 + }, + { + "epoch": 0.2047082906857728, + "grad_norm": 1.1924050316279482, + "learning_rate": 1.985141726489681e-05, + "loss": 0.6123125553131104, + "step": 700 + }, + { + "epoch": 0.20500073110103817, + "grad_norm": 1.3537888634275872, + "learning_rate": 1.9850585784779907e-05, + "loss": 0.6768301725387573, + "step": 701 + }, + { + "epoch": 0.20529317151630355, + "grad_norm": 1.2390814549745153, + "learning_rate": 1.9849752002148465e-05, + "loss": 0.6562466621398926, + "step": 702 + }, + { + "epoch": 0.20558561193156893, + "grad_norm": 1.5562868949340583, + "learning_rate": 1.984891591719738e-05, + "loss": 0.7818280458450317, + "step": 703 + }, + { + "epoch": 0.20587805234683434, + "grad_norm": 1.3407102317592055, + "learning_rate": 1.9848077530122083e-05, + "loss": 0.7144001722335815, + "step": 704 + }, + { + "epoch": 0.20617049276209973, + "grad_norm": 1.1671039191657233, + "learning_rate": 1.9847236841118537e-05, + "loss": 0.700564980506897, + "step": 705 + }, + { + "epoch": 0.2064629331773651, + "grad_norm": 1.3051666135645792, + "learning_rate": 1.984639385038326e-05, + "loss": 0.5933517217636108, + "step": 706 + }, + { + "epoch": 0.2067553735926305, + "grad_norm": 1.2749925819283578, + "learning_rate": 1.9845548558113278e-05, + "loss": 0.6174886226654053, + "step": 707 + }, + { + "epoch": 0.2070478140078959, + "grad_norm": 1.3159599421199524, + "learning_rate": 1.9844700964506188e-05, + "loss": 0.7241572141647339, + "step": 708 + }, + { + "epoch": 0.20734025442316129, + "grad_norm": 1.227834334214839, + "learning_rate": 1.9843851069760103e-05, + "loss": 0.6620675325393677, + "step": 709 + }, + { + "epoch": 0.20763269483842667, + "grad_norm": 1.3263327729601424, + "learning_rate": 1.9842998874073682e-05, + "loss": 0.6115273237228394, + "step": 710 + }, + { + "epoch": 0.20792513525369205, + "grad_norm": 1.2961824988419117, + "learning_rate": 1.984214437764612e-05, + "loss": 0.6871848106384277, + "step": 711 + }, + { + "epoch": 0.20821757566895746, + "grad_norm": 1.3134080639211354, + "learning_rate": 1.9841287580677152e-05, + "loss": 0.6887271404266357, + "step": 712 + }, + { + "epoch": 0.20851001608422284, + "grad_norm": 1.4994035488495783, + "learning_rate": 1.9840428483367046e-05, + "loss": 0.8519056439399719, + "step": 713 + }, + { + "epoch": 0.20880245649948823, + "grad_norm": 1.1754556134484295, + "learning_rate": 1.9839567085916617e-05, + "loss": 0.8168978691101074, + "step": 714 + }, + { + "epoch": 0.2090948969147536, + "grad_norm": 1.3651960767502735, + "learning_rate": 1.98387033885272e-05, + "loss": 0.6565415859222412, + "step": 715 + }, + { + "epoch": 0.20938733733001902, + "grad_norm": 1.3008644261492222, + "learning_rate": 1.9837837391400697e-05, + "loss": 0.7305471897125244, + "step": 716 + }, + { + "epoch": 0.2096797777452844, + "grad_norm": 1.4799180289336367, + "learning_rate": 1.9836969094739512e-05, + "loss": 0.7676819562911987, + "step": 717 + }, + { + "epoch": 0.20997221816054978, + "grad_norm": 1.8463650009400876, + "learning_rate": 1.983609849874661e-05, + "loss": 0.6519052982330322, + "step": 718 + }, + { + "epoch": 0.21026465857581517, + "grad_norm": 1.2876599445155823, + "learning_rate": 1.9835225603625488e-05, + "loss": 0.6298089623451233, + "step": 719 + }, + { + "epoch": 0.21055709899108058, + "grad_norm": 1.3906710149258825, + "learning_rate": 1.9834350409580184e-05, + "loss": 0.6384454369544983, + "step": 720 + }, + { + "epoch": 0.21084953940634596, + "grad_norm": 1.1568343654967514, + "learning_rate": 1.9833472916815264e-05, + "loss": 0.6335986852645874, + "step": 721 + }, + { + "epoch": 0.21114197982161134, + "grad_norm": 1.3831022749264381, + "learning_rate": 1.983259312553584e-05, + "loss": 0.6587867736816406, + "step": 722 + }, + { + "epoch": 0.21143442023687672, + "grad_norm": 1.4202837808347009, + "learning_rate": 1.9831711035947552e-05, + "loss": 0.6884294748306274, + "step": 723 + }, + { + "epoch": 0.21172686065214213, + "grad_norm": 1.3257507653834097, + "learning_rate": 1.983082664825659e-05, + "loss": 0.7094298601150513, + "step": 724 + }, + { + "epoch": 0.21201930106740752, + "grad_norm": 1.2528953355997736, + "learning_rate": 1.982993996266967e-05, + "loss": 0.736876368522644, + "step": 725 + }, + { + "epoch": 0.2123117414826729, + "grad_norm": 1.3690939580337487, + "learning_rate": 1.9829050979394052e-05, + "loss": 0.7802199125289917, + "step": 726 + }, + { + "epoch": 0.21260418189793828, + "grad_norm": 1.1986325257536081, + "learning_rate": 1.9828159698637527e-05, + "loss": 0.602590799331665, + "step": 727 + }, + { + "epoch": 0.2128966223132037, + "grad_norm": 1.2705657575851783, + "learning_rate": 1.982726612060843e-05, + "loss": 0.6855295896530151, + "step": 728 + }, + { + "epoch": 0.21318906272846908, + "grad_norm": 1.3075577627317818, + "learning_rate": 1.982637024551563e-05, + "loss": 0.7174949645996094, + "step": 729 + }, + { + "epoch": 0.21348150314373446, + "grad_norm": 1.404568014095412, + "learning_rate": 1.9825472073568527e-05, + "loss": 0.7002695798873901, + "step": 730 + }, + { + "epoch": 0.21377394355899984, + "grad_norm": 1.3606210741478622, + "learning_rate": 1.982457160497707e-05, + "loss": 0.7256268262863159, + "step": 731 + }, + { + "epoch": 0.21406638397426525, + "grad_norm": 1.6598974008247112, + "learning_rate": 1.9823668839951732e-05, + "loss": 0.8223557472229004, + "step": 732 + }, + { + "epoch": 0.21435882438953063, + "grad_norm": 1.361285088499868, + "learning_rate": 1.982276377870353e-05, + "loss": 0.760543942451477, + "step": 733 + }, + { + "epoch": 0.21465126480479602, + "grad_norm": 1.1189262427603888, + "learning_rate": 1.982185642144402e-05, + "loss": 0.5587141513824463, + "step": 734 + }, + { + "epoch": 0.21494370522006143, + "grad_norm": 1.5077440828298982, + "learning_rate": 1.9820946768385295e-05, + "loss": 0.5775829553604126, + "step": 735 + }, + { + "epoch": 0.2152361456353268, + "grad_norm": 1.2761529870001347, + "learning_rate": 1.982003481973997e-05, + "loss": 0.6654443144798279, + "step": 736 + }, + { + "epoch": 0.2155285860505922, + "grad_norm": 1.5826837327135188, + "learning_rate": 1.9819120575721212e-05, + "loss": 0.7963466048240662, + "step": 737 + }, + { + "epoch": 0.21582102646585757, + "grad_norm": 1.3788031698645051, + "learning_rate": 1.981820403654272e-05, + "loss": 0.6748678684234619, + "step": 738 + }, + { + "epoch": 0.21611346688112298, + "grad_norm": 1.4155297807006182, + "learning_rate": 1.9817285202418733e-05, + "loss": 0.7041783928871155, + "step": 739 + }, + { + "epoch": 0.21640590729638837, + "grad_norm": 1.5390789301713295, + "learning_rate": 1.981636407356402e-05, + "loss": 0.8008041381835938, + "step": 740 + }, + { + "epoch": 0.21669834771165375, + "grad_norm": 1.4349473190399622, + "learning_rate": 1.9815440650193887e-05, + "loss": 0.6873682141304016, + "step": 741 + }, + { + "epoch": 0.21699078812691913, + "grad_norm": 1.4041288075629241, + "learning_rate": 1.981451493252418e-05, + "loss": 0.6316831111907959, + "step": 742 + }, + { + "epoch": 0.21728322854218454, + "grad_norm": 1.3377112960270812, + "learning_rate": 1.9813586920771283e-05, + "loss": 0.6481543779373169, + "step": 743 + }, + { + "epoch": 0.21757566895744992, + "grad_norm": 1.2613104485847573, + "learning_rate": 1.9812656615152112e-05, + "loss": 0.6642731428146362, + "step": 744 + }, + { + "epoch": 0.2178681093727153, + "grad_norm": 1.4870873028073741, + "learning_rate": 1.9811724015884115e-05, + "loss": 0.6769483089447021, + "step": 745 + }, + { + "epoch": 0.2181605497879807, + "grad_norm": 1.4050593471281791, + "learning_rate": 1.981078912318529e-05, + "loss": 0.6397525072097778, + "step": 746 + }, + { + "epoch": 0.2184529902032461, + "grad_norm": 1.170420294448055, + "learning_rate": 1.9809851937274154e-05, + "loss": 0.4963756203651428, + "step": 747 + }, + { + "epoch": 0.21874543061851148, + "grad_norm": 1.6049508757911466, + "learning_rate": 1.9808912458369774e-05, + "loss": 0.7352936267852783, + "step": 748 + }, + { + "epoch": 0.21903787103377687, + "grad_norm": 1.3947943752325116, + "learning_rate": 1.980797068669175e-05, + "loss": 0.7177609205245972, + "step": 749 + }, + { + "epoch": 0.21933031144904225, + "grad_norm": 1.2819324457206713, + "learning_rate": 1.980702662246021e-05, + "loss": 0.76703941822052, + "step": 750 + }, + { + "epoch": 0.21962275186430766, + "grad_norm": 1.4885423867402507, + "learning_rate": 1.980608026589582e-05, + "loss": 0.8591324090957642, + "step": 751 + }, + { + "epoch": 0.21991519227957304, + "grad_norm": 1.1920075550965599, + "learning_rate": 1.9805131617219792e-05, + "loss": 0.6216185092926025, + "step": 752 + }, + { + "epoch": 0.22020763269483842, + "grad_norm": 1.359972752643247, + "learning_rate": 1.9804180676653867e-05, + "loss": 0.6067323684692383, + "step": 753 + }, + { + "epoch": 0.2205000731101038, + "grad_norm": 1.329886038437426, + "learning_rate": 1.9803227444420316e-05, + "loss": 0.5832521319389343, + "step": 754 + }, + { + "epoch": 0.22079251352536922, + "grad_norm": 1.3701144460168073, + "learning_rate": 1.9802271920741957e-05, + "loss": 0.6181083917617798, + "step": 755 + }, + { + "epoch": 0.2210849539406346, + "grad_norm": 1.6323941211416428, + "learning_rate": 1.9801314105842135e-05, + "loss": 0.614393949508667, + "step": 756 + }, + { + "epoch": 0.22137739435589998, + "grad_norm": 1.4783150089736257, + "learning_rate": 1.980035399994473e-05, + "loss": 0.7598476409912109, + "step": 757 + }, + { + "epoch": 0.22166983477116536, + "grad_norm": 1.3445249209174277, + "learning_rate": 1.979939160327417e-05, + "loss": 0.7185830473899841, + "step": 758 + }, + { + "epoch": 0.22196227518643077, + "grad_norm": 1.2604381133839313, + "learning_rate": 1.9798426916055403e-05, + "loss": 0.6672362089157104, + "step": 759 + }, + { + "epoch": 0.22225471560169616, + "grad_norm": 1.323605486489286, + "learning_rate": 1.9797459938513918e-05, + "loss": 0.60948646068573, + "step": 760 + }, + { + "epoch": 0.22254715601696154, + "grad_norm": 1.376081699980774, + "learning_rate": 1.979649067087574e-05, + "loss": 0.6073893308639526, + "step": 761 + }, + { + "epoch": 0.22283959643222692, + "grad_norm": 2.11374968768554, + "learning_rate": 1.9795519113367434e-05, + "loss": 0.7521525025367737, + "step": 762 + }, + { + "epoch": 0.22313203684749233, + "grad_norm": 1.3631196959673009, + "learning_rate": 1.979454526621609e-05, + "loss": 0.7281486988067627, + "step": 763 + }, + { + "epoch": 0.22342447726275771, + "grad_norm": 1.3466801989985047, + "learning_rate": 1.9793569129649345e-05, + "loss": 0.5628652572631836, + "step": 764 + }, + { + "epoch": 0.2237169176780231, + "grad_norm": 1.7030188389110175, + "learning_rate": 1.9792590703895364e-05, + "loss": 0.9115084409713745, + "step": 765 + }, + { + "epoch": 0.22400935809328848, + "grad_norm": 1.1906430527809846, + "learning_rate": 1.9791609989182843e-05, + "loss": 0.5793902277946472, + "step": 766 + }, + { + "epoch": 0.2243017985085539, + "grad_norm": 1.319680929079464, + "learning_rate": 1.979062698574102e-05, + "loss": 0.5811150074005127, + "step": 767 + }, + { + "epoch": 0.22459423892381927, + "grad_norm": 1.8337754364313175, + "learning_rate": 1.978964169379967e-05, + "loss": 0.7450643181800842, + "step": 768 + }, + { + "epoch": 0.22488667933908466, + "grad_norm": 1.2696945630714354, + "learning_rate": 1.9788654113589093e-05, + "loss": 0.6617515087127686, + "step": 769 + }, + { + "epoch": 0.22517911975435004, + "grad_norm": 1.1685310150494228, + "learning_rate": 1.9787664245340137e-05, + "loss": 0.6240406036376953, + "step": 770 + }, + { + "epoch": 0.22547156016961545, + "grad_norm": 1.450209328719988, + "learning_rate": 1.978667208928417e-05, + "loss": 0.694688081741333, + "step": 771 + }, + { + "epoch": 0.22576400058488083, + "grad_norm": 1.274649499261431, + "learning_rate": 1.9785677645653107e-05, + "loss": 0.6855190396308899, + "step": 772 + }, + { + "epoch": 0.2260564410001462, + "grad_norm": 1.5531275718881066, + "learning_rate": 1.978468091467939e-05, + "loss": 0.8132567405700684, + "step": 773 + }, + { + "epoch": 0.22634888141541162, + "grad_norm": 1.2819374084058084, + "learning_rate": 1.9783681896596006e-05, + "loss": 0.7011039853096008, + "step": 774 + }, + { + "epoch": 0.226641321830677, + "grad_norm": 1.2317633693628418, + "learning_rate": 1.9782680591636462e-05, + "loss": 0.5754199028015137, + "step": 775 + }, + { + "epoch": 0.2269337622459424, + "grad_norm": 1.3342396229289735, + "learning_rate": 1.9781677000034807e-05, + "loss": 0.7518784403800964, + "step": 776 + }, + { + "epoch": 0.22722620266120777, + "grad_norm": 1.4619385156109748, + "learning_rate": 1.978067112202563e-05, + "loss": 0.6802738904953003, + "step": 777 + }, + { + "epoch": 0.22751864307647318, + "grad_norm": 1.2836639966818497, + "learning_rate": 1.9779662957844046e-05, + "loss": 0.7667055726051331, + "step": 778 + }, + { + "epoch": 0.22781108349173856, + "grad_norm": 1.3402387686228199, + "learning_rate": 1.9778652507725704e-05, + "loss": 0.7590975165367126, + "step": 779 + }, + { + "epoch": 0.22810352390700395, + "grad_norm": 1.5322182562597366, + "learning_rate": 1.9777639771906795e-05, + "loss": 0.8009685277938843, + "step": 780 + }, + { + "epoch": 0.22839596432226933, + "grad_norm": 1.2184372022517955, + "learning_rate": 1.977662475062404e-05, + "loss": 0.6094385981559753, + "step": 781 + }, + { + "epoch": 0.22868840473753474, + "grad_norm": 1.2258891813878965, + "learning_rate": 1.977560744411469e-05, + "loss": 0.5919946432113647, + "step": 782 + }, + { + "epoch": 0.22898084515280012, + "grad_norm": 1.3994922066796667, + "learning_rate": 1.9774587852616537e-05, + "loss": 0.7616838216781616, + "step": 783 + }, + { + "epoch": 0.2292732855680655, + "grad_norm": 1.0864449553171927, + "learning_rate": 1.9773565976367903e-05, + "loss": 0.5107603073120117, + "step": 784 + }, + { + "epoch": 0.2295657259833309, + "grad_norm": 1.3785741559157736, + "learning_rate": 1.9772541815607645e-05, + "loss": 0.6819792985916138, + "step": 785 + }, + { + "epoch": 0.2298581663985963, + "grad_norm": 1.3095462010721952, + "learning_rate": 1.977151537057516e-05, + "loss": 0.748264729976654, + "step": 786 + }, + { + "epoch": 0.23015060681386168, + "grad_norm": 1.511078591377817, + "learning_rate": 1.977048664151037e-05, + "loss": 0.7341534495353699, + "step": 787 + }, + { + "epoch": 0.23044304722912706, + "grad_norm": 1.3481462417331131, + "learning_rate": 1.976945562865373e-05, + "loss": 0.569247841835022, + "step": 788 + }, + { + "epoch": 0.23073548764439245, + "grad_norm": 1.4792545387125078, + "learning_rate": 1.9768422332246233e-05, + "loss": 0.7003188133239746, + "step": 789 + }, + { + "epoch": 0.23102792805965786, + "grad_norm": 1.222254549739519, + "learning_rate": 1.9767386752529415e-05, + "loss": 0.6484041810035706, + "step": 790 + }, + { + "epoch": 0.23132036847492324, + "grad_norm": 1.2921197831934208, + "learning_rate": 1.9766348889745324e-05, + "loss": 0.6635721921920776, + "step": 791 + }, + { + "epoch": 0.23161280889018862, + "grad_norm": 1.3606759597173597, + "learning_rate": 1.9765308744136568e-05, + "loss": 0.5855914354324341, + "step": 792 + }, + { + "epoch": 0.231905249305454, + "grad_norm": 1.3590534475124305, + "learning_rate": 1.976426631594626e-05, + "loss": 0.7606059312820435, + "step": 793 + }, + { + "epoch": 0.2321976897207194, + "grad_norm": 1.399907486961256, + "learning_rate": 1.976322160541807e-05, + "loss": 0.7080718278884888, + "step": 794 + }, + { + "epoch": 0.2324901301359848, + "grad_norm": 1.6372996876909576, + "learning_rate": 1.9762174612796195e-05, + "loss": 0.8838162422180176, + "step": 795 + }, + { + "epoch": 0.23278257055125018, + "grad_norm": 1.1906217629409164, + "learning_rate": 1.9761125338325357e-05, + "loss": 0.5776950120925903, + "step": 796 + }, + { + "epoch": 0.23307501096651556, + "grad_norm": 1.4075761903811832, + "learning_rate": 1.9760073782250817e-05, + "loss": 0.7455854415893555, + "step": 797 + }, + { + "epoch": 0.23336745138178097, + "grad_norm": 1.4778525028622385, + "learning_rate": 1.9759019944818375e-05, + "loss": 0.7160001993179321, + "step": 798 + }, + { + "epoch": 0.23365989179704635, + "grad_norm": 1.2680712563874137, + "learning_rate": 1.9757963826274357e-05, + "loss": 0.6282311081886292, + "step": 799 + }, + { + "epoch": 0.23395233221231174, + "grad_norm": 1.3617325718771658, + "learning_rate": 1.9756905426865626e-05, + "loss": 0.6479916572570801, + "step": 800 + }, + { + "epoch": 0.23424477262757712, + "grad_norm": 1.2789508587545713, + "learning_rate": 1.9755844746839573e-05, + "loss": 0.6519639492034912, + "step": 801 + }, + { + "epoch": 0.23453721304284253, + "grad_norm": 1.4374021901805083, + "learning_rate": 1.9754781786444122e-05, + "loss": 0.5591464638710022, + "step": 802 + }, + { + "epoch": 0.2348296534581079, + "grad_norm": 1.6094479116430809, + "learning_rate": 1.9753716545927745e-05, + "loss": 0.6378511190414429, + "step": 803 + }, + { + "epoch": 0.2351220938733733, + "grad_norm": 1.593476138868701, + "learning_rate": 1.9752649025539424e-05, + "loss": 0.7932485342025757, + "step": 804 + }, + { + "epoch": 0.23541453428863868, + "grad_norm": 1.327032855057245, + "learning_rate": 1.9751579225528694e-05, + "loss": 0.7344592809677124, + "step": 805 + }, + { + "epoch": 0.2357069747039041, + "grad_norm": 1.5060138108990804, + "learning_rate": 1.975050714614561e-05, + "loss": 0.7879096269607544, + "step": 806 + }, + { + "epoch": 0.23599941511916947, + "grad_norm": 1.31391427286964, + "learning_rate": 1.9749432787640764e-05, + "loss": 0.6428436040878296, + "step": 807 + }, + { + "epoch": 0.23629185553443485, + "grad_norm": 1.1924129057081494, + "learning_rate": 1.9748356150265283e-05, + "loss": 0.7018194198608398, + "step": 808 + }, + { + "epoch": 0.23658429594970024, + "grad_norm": 1.3487665777693398, + "learning_rate": 1.974727723427082e-05, + "loss": 0.7696131467819214, + "step": 809 + }, + { + "epoch": 0.23687673636496565, + "grad_norm": 1.2806200429683234, + "learning_rate": 1.974619603990957e-05, + "loss": 0.6429424285888672, + "step": 810 + }, + { + "epoch": 0.23716917678023103, + "grad_norm": 1.4197164517856635, + "learning_rate": 1.9745112567434254e-05, + "loss": 0.7205626964569092, + "step": 811 + }, + { + "epoch": 0.2374616171954964, + "grad_norm": 1.206628595880062, + "learning_rate": 1.9744026817098122e-05, + "loss": 0.7018989324569702, + "step": 812 + }, + { + "epoch": 0.23775405761076182, + "grad_norm": 1.4562632106002198, + "learning_rate": 1.974293878915497e-05, + "loss": 0.6861958503723145, + "step": 813 + }, + { + "epoch": 0.2380464980260272, + "grad_norm": 1.8277672251442496, + "learning_rate": 1.9741848483859117e-05, + "loss": 0.687503457069397, + "step": 814 + }, + { + "epoch": 0.23833893844129259, + "grad_norm": 1.6702364448324796, + "learning_rate": 1.9740755901465408e-05, + "loss": 0.7808526754379272, + "step": 815 + }, + { + "epoch": 0.23863137885655797, + "grad_norm": 1.4777579354772585, + "learning_rate": 1.973966104222923e-05, + "loss": 0.7387286424636841, + "step": 816 + }, + { + "epoch": 0.23892381927182338, + "grad_norm": 1.2761337726208828, + "learning_rate": 1.9738563906406508e-05, + "loss": 0.6262110471725464, + "step": 817 + }, + { + "epoch": 0.23921625968708876, + "grad_norm": 1.2308979686961945, + "learning_rate": 1.973746449425368e-05, + "loss": 0.6618830561637878, + "step": 818 + }, + { + "epoch": 0.23950870010235414, + "grad_norm": 1.3525742869997646, + "learning_rate": 1.9736362806027732e-05, + "loss": 0.5866184234619141, + "step": 819 + }, + { + "epoch": 0.23980114051761953, + "grad_norm": 1.1916120410649227, + "learning_rate": 1.9735258841986175e-05, + "loss": 0.6413314342498779, + "step": 820 + }, + { + "epoch": 0.24009358093288494, + "grad_norm": 1.3855684564301443, + "learning_rate": 1.9734152602387054e-05, + "loss": 0.6125906109809875, + "step": 821 + }, + { + "epoch": 0.24038602134815032, + "grad_norm": 1.3708182915073268, + "learning_rate": 1.973304408748895e-05, + "loss": 0.6128122806549072, + "step": 822 + }, + { + "epoch": 0.2406784617634157, + "grad_norm": 1.4552398411515748, + "learning_rate": 1.973193329755097e-05, + "loss": 0.7763051986694336, + "step": 823 + }, + { + "epoch": 0.24097090217868108, + "grad_norm": 1.406068384249821, + "learning_rate": 1.9730820232832747e-05, + "loss": 0.7187550067901611, + "step": 824 + }, + { + "epoch": 0.2412633425939465, + "grad_norm": 1.4089612736012989, + "learning_rate": 1.972970489359446e-05, + "loss": 0.6564748287200928, + "step": 825 + }, + { + "epoch": 0.24155578300921188, + "grad_norm": 1.2962838731212396, + "learning_rate": 1.9728587280096815e-05, + "loss": 0.6573271751403809, + "step": 826 + }, + { + "epoch": 0.24184822342447726, + "grad_norm": 1.606482466732529, + "learning_rate": 1.9727467392601042e-05, + "loss": 0.8032153844833374, + "step": 827 + }, + { + "epoch": 0.24214066383974264, + "grad_norm": 1.344534982986645, + "learning_rate": 1.972634523136891e-05, + "loss": 0.6781449913978577, + "step": 828 + }, + { + "epoch": 0.24243310425500805, + "grad_norm": 1.3970734980370678, + "learning_rate": 1.972522079666272e-05, + "loss": 0.580757737159729, + "step": 829 + }, + { + "epoch": 0.24272554467027344, + "grad_norm": 1.4569992070347761, + "learning_rate": 1.97240940887453e-05, + "loss": 0.626894474029541, + "step": 830 + }, + { + "epoch": 0.24301798508553882, + "grad_norm": 1.4885978649776115, + "learning_rate": 1.9722965107880005e-05, + "loss": 0.8188163042068481, + "step": 831 + }, + { + "epoch": 0.2433104255008042, + "grad_norm": 1.4514623765445114, + "learning_rate": 1.9721833854330734e-05, + "loss": 0.6943579912185669, + "step": 832 + }, + { + "epoch": 0.2436028659160696, + "grad_norm": 1.3452906489662066, + "learning_rate": 1.972070032836191e-05, + "loss": 0.6177504658699036, + "step": 833 + }, + { + "epoch": 0.243895306331335, + "grad_norm": 1.3249219466208975, + "learning_rate": 1.971956453023849e-05, + "loss": 0.683998703956604, + "step": 834 + }, + { + "epoch": 0.24418774674660038, + "grad_norm": 1.3523687150823345, + "learning_rate": 1.9718426460225952e-05, + "loss": 0.77602219581604, + "step": 835 + }, + { + "epoch": 0.24448018716186576, + "grad_norm": 1.0190390519787025, + "learning_rate": 1.971728611859032e-05, + "loss": 0.4930742383003235, + "step": 836 + }, + { + "epoch": 0.24477262757713117, + "grad_norm": 1.057766741950331, + "learning_rate": 1.971614350559814e-05, + "loss": 0.634628415107727, + "step": 837 + }, + { + "epoch": 0.24506506799239655, + "grad_norm": 1.4273024070967653, + "learning_rate": 1.971499862151649e-05, + "loss": 0.6439167857170105, + "step": 838 + }, + { + "epoch": 0.24535750840766193, + "grad_norm": 1.1385728991135244, + "learning_rate": 1.9713851466612982e-05, + "loss": 0.701258659362793, + "step": 839 + }, + { + "epoch": 0.24564994882292732, + "grad_norm": 1.4590112387376561, + "learning_rate": 1.9712702041155753e-05, + "loss": 0.6488544344902039, + "step": 840 + }, + { + "epoch": 0.24594238923819273, + "grad_norm": 1.3405708553224296, + "learning_rate": 1.9711550345413476e-05, + "loss": 0.6962910890579224, + "step": 841 + }, + { + "epoch": 0.2462348296534581, + "grad_norm": 1.1939053963741824, + "learning_rate": 1.9710396379655355e-05, + "loss": 0.6617723703384399, + "step": 842 + }, + { + "epoch": 0.2465272700687235, + "grad_norm": 1.2279058278823862, + "learning_rate": 1.970924014415112e-05, + "loss": 0.7152801752090454, + "step": 843 + }, + { + "epoch": 0.24681971048398887, + "grad_norm": 1.2796222731345095, + "learning_rate": 1.9708081639171035e-05, + "loss": 0.6712393760681152, + "step": 844 + }, + { + "epoch": 0.24711215089925428, + "grad_norm": 1.3941735155074029, + "learning_rate": 1.970692086498589e-05, + "loss": 0.8413758277893066, + "step": 845 + }, + { + "epoch": 0.24740459131451967, + "grad_norm": 1.423836225011119, + "learning_rate": 1.9705757821867015e-05, + "loss": 0.6460679769515991, + "step": 846 + }, + { + "epoch": 0.24769703172978505, + "grad_norm": 1.3704721229511874, + "learning_rate": 1.970459251008626e-05, + "loss": 0.759244441986084, + "step": 847 + }, + { + "epoch": 0.24798947214505043, + "grad_norm": 1.2356631241001201, + "learning_rate": 1.970342492991601e-05, + "loss": 0.8148110508918762, + "step": 848 + }, + { + "epoch": 0.24828191256031584, + "grad_norm": 1.2587770996787473, + "learning_rate": 1.970225508162918e-05, + "loss": 0.6620084047317505, + "step": 849 + }, + { + "epoch": 0.24857435297558123, + "grad_norm": 1.451838551232366, + "learning_rate": 1.9701082965499217e-05, + "loss": 0.7090305089950562, + "step": 850 + }, + { + "epoch": 0.2488667933908466, + "grad_norm": 1.2074340737341804, + "learning_rate": 1.9699908581800094e-05, + "loss": 0.6846730709075928, + "step": 851 + }, + { + "epoch": 0.24915923380611202, + "grad_norm": 1.0752757256209107, + "learning_rate": 1.9698731930806315e-05, + "loss": 0.5183212757110596, + "step": 852 + }, + { + "epoch": 0.2494516742213774, + "grad_norm": 1.4176078828661092, + "learning_rate": 1.9697553012792915e-05, + "loss": 0.6913097500801086, + "step": 853 + }, + { + "epoch": 0.24974411463664278, + "grad_norm": 1.4996885245263052, + "learning_rate": 1.9696371828035466e-05, + "loss": 0.7896280884742737, + "step": 854 + }, + { + "epoch": 0.2500365550519082, + "grad_norm": 1.4718644942105623, + "learning_rate": 1.9695188376810055e-05, + "loss": 0.947577714920044, + "step": 855 + }, + { + "epoch": 0.2503289954671736, + "grad_norm": 1.3825164821538705, + "learning_rate": 1.9694002659393306e-05, + "loss": 0.7772419452667236, + "step": 856 + }, + { + "epoch": 0.25062143588243896, + "grad_norm": 1.3624521016930335, + "learning_rate": 1.9692814676062376e-05, + "loss": 0.6255912780761719, + "step": 857 + }, + { + "epoch": 0.25091387629770434, + "grad_norm": 1.3319834146029552, + "learning_rate": 1.969162442709495e-05, + "loss": 0.6572105884552002, + "step": 858 + }, + { + "epoch": 0.2512063167129697, + "grad_norm": 1.3718275193420901, + "learning_rate": 1.969043191276924e-05, + "loss": 0.6387436389923096, + "step": 859 + }, + { + "epoch": 0.2514987571282351, + "grad_norm": 1.1976239787141296, + "learning_rate": 1.968923713336399e-05, + "loss": 0.9180483222007751, + "step": 860 + }, + { + "epoch": 0.2517911975435005, + "grad_norm": 1.211847411431562, + "learning_rate": 1.9688040089158473e-05, + "loss": 0.6830536127090454, + "step": 861 + }, + { + "epoch": 0.2520836379587659, + "grad_norm": 1.6904119232689327, + "learning_rate": 1.9686840780432487e-05, + "loss": 0.9061588644981384, + "step": 862 + }, + { + "epoch": 0.2523760783740313, + "grad_norm": 1.157670921080695, + "learning_rate": 1.9685639207466365e-05, + "loss": 0.558010458946228, + "step": 863 + }, + { + "epoch": 0.2526685187892967, + "grad_norm": 1.1825470022948923, + "learning_rate": 1.968443537054097e-05, + "loss": 0.6788249611854553, + "step": 864 + }, + { + "epoch": 0.2529609592045621, + "grad_norm": 1.2105730438992965, + "learning_rate": 1.968322926993769e-05, + "loss": 0.576469898223877, + "step": 865 + }, + { + "epoch": 0.25325339961982746, + "grad_norm": 1.2982512656817862, + "learning_rate": 1.9682020905938438e-05, + "loss": 0.6994123458862305, + "step": 866 + }, + { + "epoch": 0.25354584003509284, + "grad_norm": 1.206872992638966, + "learning_rate": 1.9680810278825672e-05, + "loss": 0.6929521560668945, + "step": 867 + }, + { + "epoch": 0.2538382804503582, + "grad_norm": 1.273656030058159, + "learning_rate": 1.9679597388882363e-05, + "loss": 0.7596743106842041, + "step": 868 + }, + { + "epoch": 0.2541307208656236, + "grad_norm": 1.4805809886864818, + "learning_rate": 1.9678382236392013e-05, + "loss": 0.7925904989242554, + "step": 869 + }, + { + "epoch": 0.25442316128088904, + "grad_norm": 1.3335550122348163, + "learning_rate": 1.9677164821638666e-05, + "loss": 0.722467839717865, + "step": 870 + }, + { + "epoch": 0.2547156016961544, + "grad_norm": 1.3131624182400288, + "learning_rate": 1.9675945144906882e-05, + "loss": 0.7165451049804688, + "step": 871 + }, + { + "epoch": 0.2550080421114198, + "grad_norm": 1.1797512350865442, + "learning_rate": 1.9674723206481746e-05, + "loss": 0.5897061824798584, + "step": 872 + }, + { + "epoch": 0.2553004825266852, + "grad_norm": 1.2365962649439657, + "learning_rate": 1.9673499006648885e-05, + "loss": 0.6634531021118164, + "step": 873 + }, + { + "epoch": 0.2555929229419506, + "grad_norm": 1.3214235822507945, + "learning_rate": 1.9672272545694445e-05, + "loss": 0.7237584590911865, + "step": 874 + }, + { + "epoch": 0.25588536335721596, + "grad_norm": 1.4848759223566366, + "learning_rate": 1.967104382390511e-05, + "loss": 0.6382388472557068, + "step": 875 + }, + { + "epoch": 0.25617780377248134, + "grad_norm": 1.31447030866248, + "learning_rate": 1.966981284156808e-05, + "loss": 0.6788768768310547, + "step": 876 + }, + { + "epoch": 0.2564702441877467, + "grad_norm": 1.3072783419197107, + "learning_rate": 1.966857959897109e-05, + "loss": 0.6347095966339111, + "step": 877 + }, + { + "epoch": 0.25676268460301216, + "grad_norm": 1.4344629064681063, + "learning_rate": 1.9667344096402406e-05, + "loss": 0.8896903991699219, + "step": 878 + }, + { + "epoch": 0.25705512501827754, + "grad_norm": 1.3876445939749689, + "learning_rate": 1.966610633415082e-05, + "loss": 0.71473228931427, + "step": 879 + }, + { + "epoch": 0.2573475654335429, + "grad_norm": 1.3302375445053003, + "learning_rate": 1.9664866312505646e-05, + "loss": 0.7311601638793945, + "step": 880 + }, + { + "epoch": 0.2576400058488083, + "grad_norm": 1.2472942559074918, + "learning_rate": 1.9663624031756737e-05, + "loss": 0.6186199188232422, + "step": 881 + }, + { + "epoch": 0.2579324462640737, + "grad_norm": 1.4896774549089442, + "learning_rate": 1.9662379492194467e-05, + "loss": 0.8059204816818237, + "step": 882 + }, + { + "epoch": 0.25822488667933907, + "grad_norm": 1.4468929069066396, + "learning_rate": 1.9661132694109736e-05, + "loss": 0.6065236330032349, + "step": 883 + }, + { + "epoch": 0.25851732709460445, + "grad_norm": 1.182060018600662, + "learning_rate": 1.965988363779398e-05, + "loss": 0.6491106152534485, + "step": 884 + }, + { + "epoch": 0.25880976750986984, + "grad_norm": 1.197300798410388, + "learning_rate": 1.9658632323539158e-05, + "loss": 0.526267945766449, + "step": 885 + }, + { + "epoch": 0.2591022079251353, + "grad_norm": 1.5008074138248908, + "learning_rate": 1.9657378751637755e-05, + "loss": 0.812760591506958, + "step": 886 + }, + { + "epoch": 0.25939464834040066, + "grad_norm": 1.807239371921464, + "learning_rate": 1.9656122922382786e-05, + "loss": 0.7957908511161804, + "step": 887 + }, + { + "epoch": 0.25968708875566604, + "grad_norm": 1.3552357306732934, + "learning_rate": 1.9654864836067796e-05, + "loss": 0.7426323890686035, + "step": 888 + }, + { + "epoch": 0.2599795291709314, + "grad_norm": 1.3206271267013228, + "learning_rate": 1.9653604492986852e-05, + "loss": 0.602961540222168, + "step": 889 + }, + { + "epoch": 0.2602719695861968, + "grad_norm": 1.7789592821205134, + "learning_rate": 1.965234189343455e-05, + "loss": 0.8706510066986084, + "step": 890 + }, + { + "epoch": 0.2605644100014622, + "grad_norm": 1.3042391493572836, + "learning_rate": 1.965107703770602e-05, + "loss": 0.6245810985565186, + "step": 891 + }, + { + "epoch": 0.26085685041672757, + "grad_norm": 1.3389608750174764, + "learning_rate": 1.964980992609691e-05, + "loss": 0.7455421686172485, + "step": 892 + }, + { + "epoch": 0.261149290831993, + "grad_norm": 1.3769047718413097, + "learning_rate": 1.9648540558903404e-05, + "loss": 0.6917043328285217, + "step": 893 + }, + { + "epoch": 0.2614417312472584, + "grad_norm": 1.5543661242785587, + "learning_rate": 1.9647268936422204e-05, + "loss": 0.6488040685653687, + "step": 894 + }, + { + "epoch": 0.2617341716625238, + "grad_norm": 1.4168880936407573, + "learning_rate": 1.964599505895055e-05, + "loss": 0.7416148781776428, + "step": 895 + }, + { + "epoch": 0.26202661207778916, + "grad_norm": 1.2398123962846468, + "learning_rate": 1.9644718926786196e-05, + "loss": 0.7012773156166077, + "step": 896 + }, + { + "epoch": 0.26231905249305454, + "grad_norm": 1.4024640685787384, + "learning_rate": 1.9643440540227438e-05, + "loss": 0.8644432425498962, + "step": 897 + }, + { + "epoch": 0.2626114929083199, + "grad_norm": 1.2155057674795815, + "learning_rate": 1.9642159899573084e-05, + "loss": 0.614842414855957, + "step": 898 + }, + { + "epoch": 0.2629039333235853, + "grad_norm": 1.406064497865486, + "learning_rate": 1.964087700512248e-05, + "loss": 0.7794508337974548, + "step": 899 + }, + { + "epoch": 0.2631963737388507, + "grad_norm": 1.3041032890013364, + "learning_rate": 1.9639591857175492e-05, + "loss": 0.49217259883880615, + "step": 900 + }, + { + "epoch": 0.2634888141541161, + "grad_norm": 2.5300379427879656, + "learning_rate": 1.9638304456032516e-05, + "loss": 0.6319605708122253, + "step": 901 + }, + { + "epoch": 0.2637812545693815, + "grad_norm": 1.2937854520821135, + "learning_rate": 1.9637014801994478e-05, + "loss": 0.6066744327545166, + "step": 902 + }, + { + "epoch": 0.2640736949846469, + "grad_norm": 1.3364560601793205, + "learning_rate": 1.9635722895362824e-05, + "loss": 0.7529127597808838, + "step": 903 + }, + { + "epoch": 0.26436613539991227, + "grad_norm": 1.1766314649269587, + "learning_rate": 1.9634428736439524e-05, + "loss": 0.6026389598846436, + "step": 904 + }, + { + "epoch": 0.26465857581517765, + "grad_norm": 1.1341480559887087, + "learning_rate": 1.9633132325527092e-05, + "loss": 0.6227229237556458, + "step": 905 + }, + { + "epoch": 0.26495101623044304, + "grad_norm": 1.0934147682033295, + "learning_rate": 1.9631833662928548e-05, + "loss": 0.5959285497665405, + "step": 906 + }, + { + "epoch": 0.2652434566457084, + "grad_norm": 1.5332323248713289, + "learning_rate": 1.9630532748947445e-05, + "loss": 0.8104684352874756, + "step": 907 + }, + { + "epoch": 0.2655358970609738, + "grad_norm": 1.4286964634802555, + "learning_rate": 1.962922958388787e-05, + "loss": 0.6722325682640076, + "step": 908 + }, + { + "epoch": 0.26582833747623924, + "grad_norm": 1.3146328085881052, + "learning_rate": 1.962792416805442e-05, + "loss": 0.5996029376983643, + "step": 909 + }, + { + "epoch": 0.2661207778915046, + "grad_norm": 1.2576705371159294, + "learning_rate": 1.962661650175224e-05, + "loss": 0.7214776873588562, + "step": 910 + }, + { + "epoch": 0.26641321830677, + "grad_norm": 1.3644451050997106, + "learning_rate": 1.9625306585286986e-05, + "loss": 0.6833420991897583, + "step": 911 + }, + { + "epoch": 0.2667056587220354, + "grad_norm": 1.3539788924921423, + "learning_rate": 1.9623994418964834e-05, + "loss": 0.5571368336677551, + "step": 912 + }, + { + "epoch": 0.26699809913730077, + "grad_norm": 1.3710487138213245, + "learning_rate": 1.9622680003092503e-05, + "loss": 0.6748533248901367, + "step": 913 + }, + { + "epoch": 0.26729053955256615, + "grad_norm": 1.3715994474814863, + "learning_rate": 1.9621363337977232e-05, + "loss": 0.6681679487228394, + "step": 914 + }, + { + "epoch": 0.26758297996783154, + "grad_norm": 1.482670676536411, + "learning_rate": 1.9620044423926775e-05, + "loss": 0.6839786767959595, + "step": 915 + }, + { + "epoch": 0.2678754203830969, + "grad_norm": 1.4250296018843953, + "learning_rate": 1.961872326124943e-05, + "loss": 0.7481753826141357, + "step": 916 + }, + { + "epoch": 0.26816786079836236, + "grad_norm": 1.2167024955211783, + "learning_rate": 1.9617399850254e-05, + "loss": 0.6044093370437622, + "step": 917 + }, + { + "epoch": 0.26846030121362774, + "grad_norm": 1.284073365031053, + "learning_rate": 1.9616074191249833e-05, + "loss": 0.6399786472320557, + "step": 918 + }, + { + "epoch": 0.2687527416288931, + "grad_norm": 1.4810486497659208, + "learning_rate": 1.961474628454679e-05, + "loss": 0.6769053339958191, + "step": 919 + }, + { + "epoch": 0.2690451820441585, + "grad_norm": 1.3650368498715015, + "learning_rate": 1.961341613045526e-05, + "loss": 0.7508189678192139, + "step": 920 + }, + { + "epoch": 0.2693376224594239, + "grad_norm": 1.3260194970823536, + "learning_rate": 1.9612083729286164e-05, + "loss": 0.728675365447998, + "step": 921 + }, + { + "epoch": 0.26963006287468927, + "grad_norm": 1.241243201070507, + "learning_rate": 1.9610749081350934e-05, + "loss": 0.6886277794837952, + "step": 922 + }, + { + "epoch": 0.26992250328995465, + "grad_norm": 1.272552251820391, + "learning_rate": 1.9609412186961542e-05, + "loss": 0.6756877899169922, + "step": 923 + }, + { + "epoch": 0.27021494370522003, + "grad_norm": 1.3464083414999921, + "learning_rate": 1.960807304643048e-05, + "loss": 0.6761744022369385, + "step": 924 + }, + { + "epoch": 0.2705073841204855, + "grad_norm": 1.3141872927798783, + "learning_rate": 1.9606731660070758e-05, + "loss": 0.6475736498832703, + "step": 925 + }, + { + "epoch": 0.27079982453575085, + "grad_norm": 1.2576667239396297, + "learning_rate": 1.9605388028195922e-05, + "loss": 0.6169984936714172, + "step": 926 + }, + { + "epoch": 0.27109226495101624, + "grad_norm": 1.36667119537221, + "learning_rate": 1.9604042151120035e-05, + "loss": 0.6411685943603516, + "step": 927 + }, + { + "epoch": 0.2713847053662816, + "grad_norm": 1.203794827188605, + "learning_rate": 1.960269402915769e-05, + "loss": 0.6802625060081482, + "step": 928 + }, + { + "epoch": 0.271677145781547, + "grad_norm": 1.1204382547238934, + "learning_rate": 1.9601343662624e-05, + "loss": 0.6321320533752441, + "step": 929 + }, + { + "epoch": 0.2719695861968124, + "grad_norm": 1.1836254946940896, + "learning_rate": 1.959999105183461e-05, + "loss": 0.6242578029632568, + "step": 930 + }, + { + "epoch": 0.27226202661207777, + "grad_norm": 1.3574626937776866, + "learning_rate": 1.9598636197105672e-05, + "loss": 0.8106271624565125, + "step": 931 + }, + { + "epoch": 0.2725544670273432, + "grad_norm": 1.3336233570386715, + "learning_rate": 1.9597279098753893e-05, + "loss": 0.6810879707336426, + "step": 932 + }, + { + "epoch": 0.2728469074426086, + "grad_norm": 1.4182604377271, + "learning_rate": 1.959591975709647e-05, + "loss": 0.6121781468391418, + "step": 933 + }, + { + "epoch": 0.27313934785787397, + "grad_norm": 1.3855646528211634, + "learning_rate": 1.9594558172451153e-05, + "loss": 0.7347930669784546, + "step": 934 + }, + { + "epoch": 0.27343178827313935, + "grad_norm": 1.7726573891466724, + "learning_rate": 1.9593194345136196e-05, + "loss": 0.8280940651893616, + "step": 935 + }, + { + "epoch": 0.27372422868840474, + "grad_norm": 1.7069126445705718, + "learning_rate": 1.959182827547039e-05, + "loss": 0.8171218633651733, + "step": 936 + }, + { + "epoch": 0.2740166691036701, + "grad_norm": 1.5519639216005559, + "learning_rate": 1.9590459963773043e-05, + "loss": 0.7350337505340576, + "step": 937 + }, + { + "epoch": 0.2743091095189355, + "grad_norm": 1.2380635233009907, + "learning_rate": 1.9589089410363992e-05, + "loss": 0.5648026466369629, + "step": 938 + }, + { + "epoch": 0.2746015499342009, + "grad_norm": 1.2184482229154892, + "learning_rate": 1.9587716615563592e-05, + "loss": 0.630626916885376, + "step": 939 + }, + { + "epoch": 0.2748939903494663, + "grad_norm": 1.247434869071023, + "learning_rate": 1.9586341579692728e-05, + "loss": 0.658649206161499, + "step": 940 + }, + { + "epoch": 0.2751864307647317, + "grad_norm": 1.3583264773002954, + "learning_rate": 1.9584964303072804e-05, + "loss": 0.6938339471817017, + "step": 941 + }, + { + "epoch": 0.2754788711799971, + "grad_norm": 1.2844871691004516, + "learning_rate": 1.9583584786025755e-05, + "loss": 0.7124238014221191, + "step": 942 + }, + { + "epoch": 0.27577131159526247, + "grad_norm": 1.295461976555009, + "learning_rate": 1.9582203028874027e-05, + "loss": 0.5879669189453125, + "step": 943 + }, + { + "epoch": 0.27606375201052785, + "grad_norm": 1.3092326597229536, + "learning_rate": 1.9580819031940605e-05, + "loss": 0.6169895529747009, + "step": 944 + }, + { + "epoch": 0.27635619242579323, + "grad_norm": 1.3408083006486937, + "learning_rate": 1.9579432795548986e-05, + "loss": 0.6367429494857788, + "step": 945 + }, + { + "epoch": 0.2766486328410586, + "grad_norm": 1.294470969807804, + "learning_rate": 1.9578044320023195e-05, + "loss": 0.6198331117630005, + "step": 946 + }, + { + "epoch": 0.276941073256324, + "grad_norm": 1.2934388501492589, + "learning_rate": 1.9576653605687782e-05, + "loss": 0.6731230616569519, + "step": 947 + }, + { + "epoch": 0.27723351367158944, + "grad_norm": 1.3743119206413423, + "learning_rate": 1.957526065286781e-05, + "loss": 0.7185516953468323, + "step": 948 + }, + { + "epoch": 0.2775259540868548, + "grad_norm": 1.5124791251983178, + "learning_rate": 1.9573865461888882e-05, + "loss": 0.7362357378005981, + "step": 949 + }, + { + "epoch": 0.2778183945021202, + "grad_norm": 1.481999625276378, + "learning_rate": 1.9572468033077113e-05, + "loss": 0.7051525712013245, + "step": 950 + }, + { + "epoch": 0.2781108349173856, + "grad_norm": 1.3167000079730038, + "learning_rate": 1.9571068366759143e-05, + "loss": 0.6267420053482056, + "step": 951 + }, + { + "epoch": 0.27840327533265097, + "grad_norm": 1.4667668035632615, + "learning_rate": 1.9569666463262136e-05, + "loss": 0.649080753326416, + "step": 952 + }, + { + "epoch": 0.27869571574791635, + "grad_norm": 1.1940294879505342, + "learning_rate": 1.9568262322913777e-05, + "loss": 0.5700061321258545, + "step": 953 + }, + { + "epoch": 0.27898815616318173, + "grad_norm": 1.21562106075719, + "learning_rate": 1.9566855946042274e-05, + "loss": 0.6121870875358582, + "step": 954 + }, + { + "epoch": 0.2792805965784471, + "grad_norm": 1.3828404656512372, + "learning_rate": 1.9565447332976362e-05, + "loss": 0.8294541239738464, + "step": 955 + }, + { + "epoch": 0.27957303699371255, + "grad_norm": 1.2953263908127255, + "learning_rate": 1.9564036484045295e-05, + "loss": 0.6979323625564575, + "step": 956 + }, + { + "epoch": 0.27986547740897794, + "grad_norm": 1.4787353970640398, + "learning_rate": 1.9562623399578853e-05, + "loss": 0.6847009658813477, + "step": 957 + }, + { + "epoch": 0.2801579178242433, + "grad_norm": 1.174633661295302, + "learning_rate": 1.956120807990733e-05, + "loss": 0.6821733713150024, + "step": 958 + }, + { + "epoch": 0.2804503582395087, + "grad_norm": 1.2766608312969014, + "learning_rate": 1.955979052536155e-05, + "loss": 0.6943963766098022, + "step": 959 + }, + { + "epoch": 0.2807427986547741, + "grad_norm": 1.6283703947702834, + "learning_rate": 1.955837073627286e-05, + "loss": 0.5841893553733826, + "step": 960 + }, + { + "epoch": 0.28103523907003947, + "grad_norm": 1.4526296199919857, + "learning_rate": 1.955694871297313e-05, + "loss": 0.7196778059005737, + "step": 961 + }, + { + "epoch": 0.28132767948530485, + "grad_norm": 1.3568922084457422, + "learning_rate": 1.9555524455794743e-05, + "loss": 0.697501540184021, + "step": 962 + }, + { + "epoch": 0.28162011990057023, + "grad_norm": 1.3269336256780513, + "learning_rate": 1.9554097965070612e-05, + "loss": 0.7265810966491699, + "step": 963 + }, + { + "epoch": 0.28191256031583567, + "grad_norm": 1.1794879937673313, + "learning_rate": 1.955266924113417e-05, + "loss": 0.5766021013259888, + "step": 964 + }, + { + "epoch": 0.28220500073110105, + "grad_norm": 1.1486001787824904, + "learning_rate": 1.955123828431938e-05, + "loss": 0.6885402202606201, + "step": 965 + }, + { + "epoch": 0.28249744114636643, + "grad_norm": 1.4093622546586522, + "learning_rate": 1.954980509496071e-05, + "loss": 0.719329297542572, + "step": 966 + }, + { + "epoch": 0.2827898815616318, + "grad_norm": 1.1657877260705576, + "learning_rate": 1.954836967339316e-05, + "loss": 0.5621368885040283, + "step": 967 + }, + { + "epoch": 0.2830823219768972, + "grad_norm": 1.4684107409650433, + "learning_rate": 1.954693201995226e-05, + "loss": 0.6323715448379517, + "step": 968 + }, + { + "epoch": 0.2833747623921626, + "grad_norm": 1.1727530946898588, + "learning_rate": 1.954549213497404e-05, + "loss": 0.6265028119087219, + "step": 969 + }, + { + "epoch": 0.28366720280742797, + "grad_norm": 1.2740242277637046, + "learning_rate": 1.9544050018795076e-05, + "loss": 0.6234713792800903, + "step": 970 + }, + { + "epoch": 0.2839596432226934, + "grad_norm": 1.2342517719802, + "learning_rate": 1.9542605671752447e-05, + "loss": 0.6505804657936096, + "step": 971 + }, + { + "epoch": 0.2842520836379588, + "grad_norm": 1.408353713096739, + "learning_rate": 1.954115909418376e-05, + "loss": 0.7756558656692505, + "step": 972 + }, + { + "epoch": 0.28454452405322417, + "grad_norm": 1.4275947350210108, + "learning_rate": 1.953971028642715e-05, + "loss": 0.767257034778595, + "step": 973 + }, + { + "epoch": 0.28483696446848955, + "grad_norm": 1.5164327383088176, + "learning_rate": 1.9538259248821265e-05, + "loss": 0.6702018976211548, + "step": 974 + }, + { + "epoch": 0.28512940488375493, + "grad_norm": 1.5385088670888984, + "learning_rate": 1.953680598170527e-05, + "loss": 0.7072827816009521, + "step": 975 + }, + { + "epoch": 0.2854218452990203, + "grad_norm": 1.4449259987675327, + "learning_rate": 1.953535048541886e-05, + "loss": 0.6343571543693542, + "step": 976 + }, + { + "epoch": 0.2857142857142857, + "grad_norm": 1.2668558478543779, + "learning_rate": 1.953389276030225e-05, + "loss": 0.6361520290374756, + "step": 977 + }, + { + "epoch": 0.2860067261295511, + "grad_norm": 1.144363699587152, + "learning_rate": 1.9532432806696178e-05, + "loss": 0.6757364273071289, + "step": 978 + }, + { + "epoch": 0.2862991665448165, + "grad_norm": 1.2373799950730142, + "learning_rate": 1.9530970624941896e-05, + "loss": 0.6311759948730469, + "step": 979 + }, + { + "epoch": 0.2865916069600819, + "grad_norm": 1.3327233434420644, + "learning_rate": 1.9529506215381176e-05, + "loss": 0.6207036972045898, + "step": 980 + }, + { + "epoch": 0.2868840473753473, + "grad_norm": 1.182706201187961, + "learning_rate": 1.952803957835632e-05, + "loss": 0.5154495239257812, + "step": 981 + }, + { + "epoch": 0.28717648779061267, + "grad_norm": 1.4885508278374788, + "learning_rate": 1.9526570714210146e-05, + "loss": 0.797666072845459, + "step": 982 + }, + { + "epoch": 0.28746892820587805, + "grad_norm": 1.5013519512468485, + "learning_rate": 1.9525099623285983e-05, + "loss": 0.659400224685669, + "step": 983 + }, + { + "epoch": 0.28776136862114343, + "grad_norm": 1.565667149921291, + "learning_rate": 1.9523626305927706e-05, + "loss": 0.7638698816299438, + "step": 984 + }, + { + "epoch": 0.2880538090364088, + "grad_norm": 1.282540952352899, + "learning_rate": 1.952215076247968e-05, + "loss": 0.6656497120857239, + "step": 985 + }, + { + "epoch": 0.2883462494516742, + "grad_norm": 1.6004320535828411, + "learning_rate": 1.9520672993286807e-05, + "loss": 0.7701614499092102, + "step": 986 + }, + { + "epoch": 0.28863868986693964, + "grad_norm": 1.4907110687279852, + "learning_rate": 1.951919299869451e-05, + "loss": 0.6710221767425537, + "step": 987 + }, + { + "epoch": 0.288931130282205, + "grad_norm": 1.3912460639172692, + "learning_rate": 1.951771077904873e-05, + "loss": 0.6307191848754883, + "step": 988 + }, + { + "epoch": 0.2892235706974704, + "grad_norm": 1.5585350101159294, + "learning_rate": 1.951622633469592e-05, + "loss": 0.8226636648178101, + "step": 989 + }, + { + "epoch": 0.2895160111127358, + "grad_norm": 1.3925257650330547, + "learning_rate": 1.9514739665983065e-05, + "loss": 0.6286089420318604, + "step": 990 + }, + { + "epoch": 0.28980845152800117, + "grad_norm": 1.3766260212895336, + "learning_rate": 1.9513250773257667e-05, + "loss": 0.8167316317558289, + "step": 991 + }, + { + "epoch": 0.29010089194326655, + "grad_norm": 1.3082034964893225, + "learning_rate": 1.9511759656867738e-05, + "loss": 0.6840806603431702, + "step": 992 + }, + { + "epoch": 0.29039333235853193, + "grad_norm": 4.707433700267527, + "learning_rate": 1.9510266317161823e-05, + "loss": 0.5731699466705322, + "step": 993 + }, + { + "epoch": 0.2906857727737973, + "grad_norm": 1.179743170686313, + "learning_rate": 1.950877075448898e-05, + "loss": 0.696578860282898, + "step": 994 + }, + { + "epoch": 0.29097821318906275, + "grad_norm": 1.28092562469002, + "learning_rate": 1.9507272969198787e-05, + "loss": 0.7194398641586304, + "step": 995 + }, + { + "epoch": 0.29127065360432813, + "grad_norm": 1.7406610068492592, + "learning_rate": 1.9505772961641342e-05, + "loss": 0.7041016817092896, + "step": 996 + }, + { + "epoch": 0.2915630940195935, + "grad_norm": 1.2586308004321554, + "learning_rate": 1.9504270732167267e-05, + "loss": 0.7073841691017151, + "step": 997 + }, + { + "epoch": 0.2918555344348589, + "grad_norm": 1.204085782896564, + "learning_rate": 1.9502766281127693e-05, + "loss": 0.5097789764404297, + "step": 998 + }, + { + "epoch": 0.2921479748501243, + "grad_norm": 1.1340482101200409, + "learning_rate": 1.9501259608874276e-05, + "loss": 0.6522337198257446, + "step": 999 + }, + { + "epoch": 0.29244041526538966, + "grad_norm": 1.2639457143948831, + "learning_rate": 1.9499750715759197e-05, + "loss": 0.8276036381721497, + "step": 1000 + }, + { + "epoch": 0.29273285568065505, + "grad_norm": 1.3336888124261281, + "learning_rate": 1.9498239602135145e-05, + "loss": 0.7701225876808167, + "step": 1001 + }, + { + "epoch": 0.29302529609592043, + "grad_norm": 1.4216994028606598, + "learning_rate": 1.949672626835534e-05, + "loss": 0.6112316846847534, + "step": 1002 + }, + { + "epoch": 0.29331773651118587, + "grad_norm": 1.5055133598944146, + "learning_rate": 1.9495210714773506e-05, + "loss": 0.7196093201637268, + "step": 1003 + }, + { + "epoch": 0.29361017692645125, + "grad_norm": 1.3102459642638802, + "learning_rate": 1.9493692941743903e-05, + "loss": 0.708210825920105, + "step": 1004 + }, + { + "epoch": 0.29390261734171663, + "grad_norm": 1.1576562552023075, + "learning_rate": 1.9492172949621298e-05, + "loss": 0.6156430244445801, + "step": 1005 + }, + { + "epoch": 0.294195057756982, + "grad_norm": 1.3177580817558727, + "learning_rate": 1.9490650738760977e-05, + "loss": 0.6125216484069824, + "step": 1006 + }, + { + "epoch": 0.2944874981722474, + "grad_norm": 1.5792615772910776, + "learning_rate": 1.9489126309518752e-05, + "loss": 0.5691695213317871, + "step": 1007 + }, + { + "epoch": 0.2947799385875128, + "grad_norm": 1.2458453862912673, + "learning_rate": 1.9487599662250945e-05, + "loss": 0.6733062267303467, + "step": 1008 + }, + { + "epoch": 0.29507237900277816, + "grad_norm": 1.3579820847813902, + "learning_rate": 1.94860707973144e-05, + "loss": 0.6069025993347168, + "step": 1009 + }, + { + "epoch": 0.2953648194180436, + "grad_norm": 1.3771790647505693, + "learning_rate": 1.9484539715066488e-05, + "loss": 0.6191028356552124, + "step": 1010 + }, + { + "epoch": 0.295657259833309, + "grad_norm": 1.3927395620788336, + "learning_rate": 1.9483006415865082e-05, + "loss": 0.7423045635223389, + "step": 1011 + }, + { + "epoch": 0.29594970024857437, + "grad_norm": 1.584259935283413, + "learning_rate": 1.9481470900068585e-05, + "loss": 0.854878306388855, + "step": 1012 + }, + { + "epoch": 0.29624214066383975, + "grad_norm": 1.3274147652805814, + "learning_rate": 1.9479933168035914e-05, + "loss": 0.6950500011444092, + "step": 1013 + }, + { + "epoch": 0.29653458107910513, + "grad_norm": 1.2664754529699496, + "learning_rate": 1.9478393220126503e-05, + "loss": 0.6944484710693359, + "step": 1014 + }, + { + "epoch": 0.2968270214943705, + "grad_norm": 1.3385070796010239, + "learning_rate": 1.9476851056700303e-05, + "loss": 0.7120212316513062, + "step": 1015 + }, + { + "epoch": 0.2971194619096359, + "grad_norm": 1.2818173555684258, + "learning_rate": 1.9475306678117792e-05, + "loss": 0.6271052956581116, + "step": 1016 + }, + { + "epoch": 0.2974119023249013, + "grad_norm": 1.386949235285712, + "learning_rate": 1.9473760084739958e-05, + "loss": 0.6398453712463379, + "step": 1017 + }, + { + "epoch": 0.2977043427401667, + "grad_norm": 1.440440679973054, + "learning_rate": 1.94722112769283e-05, + "loss": 0.5563585758209229, + "step": 1018 + }, + { + "epoch": 0.2979967831554321, + "grad_norm": 1.2637928746894573, + "learning_rate": 1.947066025504485e-05, + "loss": 0.7895959615707397, + "step": 1019 + }, + { + "epoch": 0.2982892235706975, + "grad_norm": 1.2684661754258477, + "learning_rate": 1.9469107019452148e-05, + "loss": 0.6304349303245544, + "step": 1020 + }, + { + "epoch": 0.29858166398596286, + "grad_norm": 1.4493096125993807, + "learning_rate": 1.9467551570513257e-05, + "loss": 0.6915549039840698, + "step": 1021 + }, + { + "epoch": 0.29887410440122825, + "grad_norm": 1.2593652754748748, + "learning_rate": 1.9465993908591748e-05, + "loss": 0.6257511377334595, + "step": 1022 + }, + { + "epoch": 0.29916654481649363, + "grad_norm": 1.4075585450481771, + "learning_rate": 1.9464434034051716e-05, + "loss": 0.6409085988998413, + "step": 1023 + }, + { + "epoch": 0.299458985231759, + "grad_norm": 1.358442522813864, + "learning_rate": 1.9462871947257772e-05, + "loss": 0.7281351089477539, + "step": 1024 + }, + { + "epoch": 0.2997514256470244, + "grad_norm": 1.441690145181621, + "learning_rate": 1.9461307648575047e-05, + "loss": 0.8016781806945801, + "step": 1025 + }, + { + "epoch": 0.30004386606228983, + "grad_norm": 1.2844064559637345, + "learning_rate": 1.9459741138369186e-05, + "loss": 0.5883209705352783, + "step": 1026 + }, + { + "epoch": 0.3003363064775552, + "grad_norm": 1.674320224055934, + "learning_rate": 1.9458172417006347e-05, + "loss": 0.6414197683334351, + "step": 1027 + }, + { + "epoch": 0.3006287468928206, + "grad_norm": 1.465437904752509, + "learning_rate": 1.9456601484853218e-05, + "loss": 0.7076515555381775, + "step": 1028 + }, + { + "epoch": 0.300921187308086, + "grad_norm": 1.4091861442316225, + "learning_rate": 1.9455028342276984e-05, + "loss": 0.8102637529373169, + "step": 1029 + }, + { + "epoch": 0.30121362772335136, + "grad_norm": 1.3935099692215975, + "learning_rate": 1.9453452989645362e-05, + "loss": 0.6954574584960938, + "step": 1030 + }, + { + "epoch": 0.30150606813861675, + "grad_norm": 1.1912974865854908, + "learning_rate": 1.9451875427326585e-05, + "loss": 0.6647125482559204, + "step": 1031 + }, + { + "epoch": 0.3017985085538821, + "grad_norm": 1.2637381593470247, + "learning_rate": 1.9450295655689392e-05, + "loss": 0.5501933097839355, + "step": 1032 + }, + { + "epoch": 0.3020909489691475, + "grad_norm": 1.1642394496276798, + "learning_rate": 1.944871367510305e-05, + "loss": 0.6561415195465088, + "step": 1033 + }, + { + "epoch": 0.30238338938441295, + "grad_norm": 1.2818557575199787, + "learning_rate": 1.9447129485937335e-05, + "loss": 0.6768229007720947, + "step": 1034 + }, + { + "epoch": 0.30267582979967833, + "grad_norm": 1.229414584528048, + "learning_rate": 1.9445543088562543e-05, + "loss": 0.5693868398666382, + "step": 1035 + }, + { + "epoch": 0.3029682702149437, + "grad_norm": 1.197937800783061, + "learning_rate": 1.9443954483349485e-05, + "loss": 0.6165708303451538, + "step": 1036 + }, + { + "epoch": 0.3032607106302091, + "grad_norm": 1.0808504567320436, + "learning_rate": 1.944236367066948e-05, + "loss": 0.6116082668304443, + "step": 1037 + }, + { + "epoch": 0.3035531510454745, + "grad_norm": 1.2481100676234638, + "learning_rate": 1.9440770650894384e-05, + "loss": 0.7027714848518372, + "step": 1038 + }, + { + "epoch": 0.30384559146073986, + "grad_norm": 1.3613368127158991, + "learning_rate": 1.943917542439655e-05, + "loss": 0.7339189052581787, + "step": 1039 + }, + { + "epoch": 0.30413803187600524, + "grad_norm": 1.322856585416547, + "learning_rate": 1.943757799154885e-05, + "loss": 0.7975895404815674, + "step": 1040 + }, + { + "epoch": 0.3044304722912706, + "grad_norm": 1.2603507441667385, + "learning_rate": 1.9435978352724673e-05, + "loss": 0.6421841382980347, + "step": 1041 + }, + { + "epoch": 0.30472291270653606, + "grad_norm": 1.3017046883641064, + "learning_rate": 1.943437650829793e-05, + "loss": 0.6731791496276855, + "step": 1042 + }, + { + "epoch": 0.30501535312180145, + "grad_norm": 1.224211690521448, + "learning_rate": 1.943277245864304e-05, + "loss": 0.7008551359176636, + "step": 1043 + }, + { + "epoch": 0.30530779353706683, + "grad_norm": 1.2549197569852149, + "learning_rate": 1.943116620413494e-05, + "loss": 0.6777141094207764, + "step": 1044 + }, + { + "epoch": 0.3056002339523322, + "grad_norm": 1.258074600817151, + "learning_rate": 1.9429557745149084e-05, + "loss": 0.7649033069610596, + "step": 1045 + }, + { + "epoch": 0.3058926743675976, + "grad_norm": 1.2626508350830759, + "learning_rate": 1.9427947082061432e-05, + "loss": 0.6460477709770203, + "step": 1046 + }, + { + "epoch": 0.306185114782863, + "grad_norm": 1.3748035809258794, + "learning_rate": 1.942633421524848e-05, + "loss": 0.5939697623252869, + "step": 1047 + }, + { + "epoch": 0.30647755519812836, + "grad_norm": 1.3696807292374817, + "learning_rate": 1.9424719145087216e-05, + "loss": 0.606407880783081, + "step": 1048 + }, + { + "epoch": 0.3067699956133938, + "grad_norm": 1.2114201905625201, + "learning_rate": 1.9423101871955153e-05, + "loss": 0.5515298843383789, + "step": 1049 + }, + { + "epoch": 0.3070624360286592, + "grad_norm": 1.4449996700249255, + "learning_rate": 1.942148239623032e-05, + "loss": 0.7397217154502869, + "step": 1050 + }, + { + "epoch": 0.30735487644392456, + "grad_norm": 1.708533630902304, + "learning_rate": 1.9419860718291265e-05, + "loss": 0.6397782564163208, + "step": 1051 + }, + { + "epoch": 0.30764731685918995, + "grad_norm": 1.1946031757535738, + "learning_rate": 1.9418236838517036e-05, + "loss": 0.589732825756073, + "step": 1052 + }, + { + "epoch": 0.30793975727445533, + "grad_norm": 1.4196894685331136, + "learning_rate": 1.941661075728721e-05, + "loss": 0.7968351244926453, + "step": 1053 + }, + { + "epoch": 0.3082321976897207, + "grad_norm": 1.35500416476017, + "learning_rate": 1.9414982474981877e-05, + "loss": 0.5740514397621155, + "step": 1054 + }, + { + "epoch": 0.3085246381049861, + "grad_norm": 1.314001411398827, + "learning_rate": 1.9413351991981632e-05, + "loss": 0.656599760055542, + "step": 1055 + }, + { + "epoch": 0.3088170785202515, + "grad_norm": 1.2592244001939052, + "learning_rate": 1.9411719308667593e-05, + "loss": 0.5638262033462524, + "step": 1056 + }, + { + "epoch": 0.3091095189355169, + "grad_norm": 1.3510783569743914, + "learning_rate": 1.9410084425421392e-05, + "loss": 0.6391294002532959, + "step": 1057 + }, + { + "epoch": 0.3094019593507823, + "grad_norm": 1.300451628146748, + "learning_rate": 1.9408447342625167e-05, + "loss": 0.7109906077384949, + "step": 1058 + }, + { + "epoch": 0.3096943997660477, + "grad_norm": 1.35271058872007, + "learning_rate": 1.9406808060661583e-05, + "loss": 0.6922626495361328, + "step": 1059 + }, + { + "epoch": 0.30998684018131306, + "grad_norm": 1.3729160813047252, + "learning_rate": 1.9405166579913808e-05, + "loss": 0.6708151698112488, + "step": 1060 + }, + { + "epoch": 0.31027928059657844, + "grad_norm": 1.3049592711968918, + "learning_rate": 1.940352290076553e-05, + "loss": 0.6259905099868774, + "step": 1061 + }, + { + "epoch": 0.3105717210118438, + "grad_norm": 1.3047971530530311, + "learning_rate": 1.940187702360095e-05, + "loss": 0.6590703725814819, + "step": 1062 + }, + { + "epoch": 0.3108641614271092, + "grad_norm": 1.5136066296614852, + "learning_rate": 1.9400228948804777e-05, + "loss": 0.7371482849121094, + "step": 1063 + }, + { + "epoch": 0.3111566018423746, + "grad_norm": 1.3637094061000257, + "learning_rate": 1.9398578676762243e-05, + "loss": 0.6954984664916992, + "step": 1064 + }, + { + "epoch": 0.31144904225764003, + "grad_norm": 1.197618668709007, + "learning_rate": 1.9396926207859085e-05, + "loss": 0.604501485824585, + "step": 1065 + }, + { + "epoch": 0.3117414826729054, + "grad_norm": 1.4637648544146704, + "learning_rate": 1.939527154248156e-05, + "loss": 0.7580305337905884, + "step": 1066 + }, + { + "epoch": 0.3120339230881708, + "grad_norm": 1.2774221611024956, + "learning_rate": 1.9393614681016443e-05, + "loss": 0.5996969938278198, + "step": 1067 + }, + { + "epoch": 0.3123263635034362, + "grad_norm": 1.2247945329694363, + "learning_rate": 1.9391955623851e-05, + "loss": 0.5939687490463257, + "step": 1068 + }, + { + "epoch": 0.31261880391870156, + "grad_norm": 1.2833481425507127, + "learning_rate": 1.939029437137304e-05, + "loss": 0.6194947957992554, + "step": 1069 + }, + { + "epoch": 0.31291124433396694, + "grad_norm": 1.406800587144287, + "learning_rate": 1.9388630923970862e-05, + "loss": 0.7419420480728149, + "step": 1070 + }, + { + "epoch": 0.3132036847492323, + "grad_norm": 1.4290715744520364, + "learning_rate": 1.938696528203329e-05, + "loss": 0.6950613856315613, + "step": 1071 + }, + { + "epoch": 0.3134961251644977, + "grad_norm": 1.542135386244918, + "learning_rate": 1.9385297445949657e-05, + "loss": 0.7376282215118408, + "step": 1072 + }, + { + "epoch": 0.31378856557976315, + "grad_norm": 1.4197281288148755, + "learning_rate": 1.938362741610981e-05, + "loss": 0.800892174243927, + "step": 1073 + }, + { + "epoch": 0.31408100599502853, + "grad_norm": 1.3238571566647774, + "learning_rate": 1.938195519290411e-05, + "loss": 0.5747013688087463, + "step": 1074 + }, + { + "epoch": 0.3143734464102939, + "grad_norm": 1.3986418419585354, + "learning_rate": 1.9380280776723422e-05, + "loss": 0.7341697216033936, + "step": 1075 + }, + { + "epoch": 0.3146658868255593, + "grad_norm": 1.2703636347468634, + "learning_rate": 1.9378604167959138e-05, + "loss": 0.6229791641235352, + "step": 1076 + }, + { + "epoch": 0.3149583272408247, + "grad_norm": 1.2978515497126824, + "learning_rate": 1.937692536700315e-05, + "loss": 0.7266645431518555, + "step": 1077 + }, + { + "epoch": 0.31525076765609006, + "grad_norm": 1.1885775086050685, + "learning_rate": 1.937524437424787e-05, + "loss": 0.6163127422332764, + "step": 1078 + }, + { + "epoch": 0.31554320807135544, + "grad_norm": 1.3182925237610392, + "learning_rate": 1.9373561190086225e-05, + "loss": 0.6609925031661987, + "step": 1079 + }, + { + "epoch": 0.3158356484866208, + "grad_norm": 1.305005533703013, + "learning_rate": 1.937187581491164e-05, + "loss": 0.7157741785049438, + "step": 1080 + }, + { + "epoch": 0.31612808890188626, + "grad_norm": 1.286980397276467, + "learning_rate": 1.937018824911807e-05, + "loss": 0.6486212015151978, + "step": 1081 + }, + { + "epoch": 0.31642052931715164, + "grad_norm": 1.3383942599839993, + "learning_rate": 1.9368498493099963e-05, + "loss": 0.6931928396224976, + "step": 1082 + }, + { + "epoch": 0.316712969732417, + "grad_norm": 1.3241410530363713, + "learning_rate": 1.9366806547252295e-05, + "loss": 0.9291354417800903, + "step": 1083 + }, + { + "epoch": 0.3170054101476824, + "grad_norm": 1.247174604159187, + "learning_rate": 1.936511241197055e-05, + "loss": 0.6276642084121704, + "step": 1084 + }, + { + "epoch": 0.3172978505629478, + "grad_norm": 1.5622111951472988, + "learning_rate": 1.936341608765072e-05, + "loss": 0.7836581468582153, + "step": 1085 + }, + { + "epoch": 0.3175902909782132, + "grad_norm": 1.3508415072117352, + "learning_rate": 1.9361717574689308e-05, + "loss": 0.6785084009170532, + "step": 1086 + }, + { + "epoch": 0.31788273139347856, + "grad_norm": 1.2099617708073434, + "learning_rate": 1.936001687348333e-05, + "loss": 0.5715218782424927, + "step": 1087 + }, + { + "epoch": 0.318175171808744, + "grad_norm": 1.4697249093694587, + "learning_rate": 1.9358313984430324e-05, + "loss": 0.8417775630950928, + "step": 1088 + }, + { + "epoch": 0.3184676122240094, + "grad_norm": 1.2670961493626953, + "learning_rate": 1.935660890792832e-05, + "loss": 0.598076343536377, + "step": 1089 + }, + { + "epoch": 0.31876005263927476, + "grad_norm": 1.1923598198798329, + "learning_rate": 1.9354901644375876e-05, + "loss": 0.5830154418945312, + "step": 1090 + }, + { + "epoch": 0.31905249305454014, + "grad_norm": 1.307163759376097, + "learning_rate": 1.935319219417205e-05, + "loss": 0.5746437311172485, + "step": 1091 + }, + { + "epoch": 0.3193449334698055, + "grad_norm": 1.1091668518622428, + "learning_rate": 1.9351480557716414e-05, + "loss": 0.5520191788673401, + "step": 1092 + }, + { + "epoch": 0.3196373738850709, + "grad_norm": 1.218411442512637, + "learning_rate": 1.9349766735409058e-05, + "loss": 0.5847123861312866, + "step": 1093 + }, + { + "epoch": 0.3199298143003363, + "grad_norm": 1.568693203893066, + "learning_rate": 1.9348050727650577e-05, + "loss": 0.7390924692153931, + "step": 1094 + }, + { + "epoch": 0.3202222547156017, + "grad_norm": 1.4427842898109178, + "learning_rate": 1.9346332534842074e-05, + "loss": 0.5812145471572876, + "step": 1095 + }, + { + "epoch": 0.3205146951308671, + "grad_norm": 1.615769461575852, + "learning_rate": 1.9344612157385166e-05, + "loss": 0.6958816647529602, + "step": 1096 + }, + { + "epoch": 0.3208071355461325, + "grad_norm": 1.3481467288956208, + "learning_rate": 1.9342889595681986e-05, + "loss": 0.5618177652359009, + "step": 1097 + }, + { + "epoch": 0.3210995759613979, + "grad_norm": 1.2846515235734224, + "learning_rate": 1.9341164850135163e-05, + "loss": 0.6099411845207214, + "step": 1098 + }, + { + "epoch": 0.32139201637666326, + "grad_norm": 1.1242331249756639, + "learning_rate": 1.9339437921147854e-05, + "loss": 0.6772094964981079, + "step": 1099 + }, + { + "epoch": 0.32168445679192864, + "grad_norm": 1.4006184046576602, + "learning_rate": 1.9337708809123718e-05, + "loss": 0.6916643381118774, + "step": 1100 + }, + { + "epoch": 0.321976897207194, + "grad_norm": 1.199381767960838, + "learning_rate": 1.933597751446692e-05, + "loss": 0.5716762542724609, + "step": 1101 + }, + { + "epoch": 0.3222693376224594, + "grad_norm": 1.511781401125701, + "learning_rate": 1.9334244037582143e-05, + "loss": 0.68224036693573, + "step": 1102 + }, + { + "epoch": 0.3225617780377248, + "grad_norm": 1.3199204633429549, + "learning_rate": 1.933250837887457e-05, + "loss": 0.6888231635093689, + "step": 1103 + }, + { + "epoch": 0.3228542184529902, + "grad_norm": 1.4809797608653643, + "learning_rate": 1.933077053874991e-05, + "loss": 0.6469036340713501, + "step": 1104 + }, + { + "epoch": 0.3231466588682556, + "grad_norm": 1.5099365665086963, + "learning_rate": 1.932903051761437e-05, + "loss": 0.6202501058578491, + "step": 1105 + }, + { + "epoch": 0.323439099283521, + "grad_norm": 1.454362918518285, + "learning_rate": 1.932728831587467e-05, + "loss": 0.6041314601898193, + "step": 1106 + }, + { + "epoch": 0.3237315396987864, + "grad_norm": 1.3479422917529533, + "learning_rate": 1.9325543933938034e-05, + "loss": 0.7081667184829712, + "step": 1107 + }, + { + "epoch": 0.32402398011405176, + "grad_norm": 1.477559211803618, + "learning_rate": 1.9323797372212204e-05, + "loss": 0.7743494510650635, + "step": 1108 + }, + { + "epoch": 0.32431642052931714, + "grad_norm": 1.3188148010775738, + "learning_rate": 1.9322048631105428e-05, + "loss": 0.6122584342956543, + "step": 1109 + }, + { + "epoch": 0.3246088609445825, + "grad_norm": 1.279178726850882, + "learning_rate": 1.932029771102646e-05, + "loss": 0.6106122732162476, + "step": 1110 + }, + { + "epoch": 0.3249013013598479, + "grad_norm": 1.1897376224269591, + "learning_rate": 1.9318544612384572e-05, + "loss": 0.5082784295082092, + "step": 1111 + }, + { + "epoch": 0.32519374177511334, + "grad_norm": 1.3081590787355515, + "learning_rate": 1.9316789335589542e-05, + "loss": 0.6845188140869141, + "step": 1112 + }, + { + "epoch": 0.3254861821903787, + "grad_norm": 1.343292960468675, + "learning_rate": 1.9315031881051653e-05, + "loss": 0.5972481966018677, + "step": 1113 + }, + { + "epoch": 0.3257786226056441, + "grad_norm": 1.372744387816622, + "learning_rate": 1.931327224918169e-05, + "loss": 0.6312427520751953, + "step": 1114 + }, + { + "epoch": 0.3260710630209095, + "grad_norm": 1.6334469145871557, + "learning_rate": 1.9311510440390973e-05, + "loss": 0.7904551029205322, + "step": 1115 + }, + { + "epoch": 0.3263635034361749, + "grad_norm": 1.4496533611968336, + "learning_rate": 1.9309746455091302e-05, + "loss": 0.6513646841049194, + "step": 1116 + }, + { + "epoch": 0.32665594385144026, + "grad_norm": 1.2559107839078971, + "learning_rate": 1.9307980293694997e-05, + "loss": 0.5349715948104858, + "step": 1117 + }, + { + "epoch": 0.32694838426670564, + "grad_norm": 1.122130050588245, + "learning_rate": 1.93062119566149e-05, + "loss": 0.5815087556838989, + "step": 1118 + }, + { + "epoch": 0.327240824681971, + "grad_norm": 1.3638823451289013, + "learning_rate": 1.9304441444264335e-05, + "loss": 0.6380286812782288, + "step": 1119 + }, + { + "epoch": 0.32753326509723646, + "grad_norm": 1.2646470578382853, + "learning_rate": 1.9302668757057157e-05, + "loss": 0.7222728729248047, + "step": 1120 + }, + { + "epoch": 0.32782570551250184, + "grad_norm": 1.1611022114208025, + "learning_rate": 1.9300893895407715e-05, + "loss": 0.6262868642807007, + "step": 1121 + }, + { + "epoch": 0.3281181459277672, + "grad_norm": 1.522127361747668, + "learning_rate": 1.929911685973088e-05, + "loss": 0.6387197971343994, + "step": 1122 + }, + { + "epoch": 0.3284105863430326, + "grad_norm": 1.3879760662124887, + "learning_rate": 1.9297337650442015e-05, + "loss": 0.77378249168396, + "step": 1123 + }, + { + "epoch": 0.328703026758298, + "grad_norm": 1.3651260322738243, + "learning_rate": 1.9295556267957004e-05, + "loss": 0.7589142322540283, + "step": 1124 + }, + { + "epoch": 0.32899546717356337, + "grad_norm": 1.481283036614999, + "learning_rate": 1.9293772712692233e-05, + "loss": 0.7153090238571167, + "step": 1125 + }, + { + "epoch": 0.32928790758882875, + "grad_norm": 1.2572705841763243, + "learning_rate": 1.9291986985064595e-05, + "loss": 0.5738104581832886, + "step": 1126 + }, + { + "epoch": 0.3295803480040942, + "grad_norm": 1.2803221849130417, + "learning_rate": 1.92901990854915e-05, + "loss": 0.6530819535255432, + "step": 1127 + }, + { + "epoch": 0.3298727884193596, + "grad_norm": 1.221270033991816, + "learning_rate": 1.9288409014390854e-05, + "loss": 0.59107506275177, + "step": 1128 + }, + { + "epoch": 0.33016522883462496, + "grad_norm": 1.4658806562930384, + "learning_rate": 1.9286616772181072e-05, + "loss": 0.5798863172531128, + "step": 1129 + }, + { + "epoch": 0.33045766924989034, + "grad_norm": 1.689951660615568, + "learning_rate": 1.9284822359281085e-05, + "loss": 0.6957223415374756, + "step": 1130 + }, + { + "epoch": 0.3307501096651557, + "grad_norm": 1.3614959188818774, + "learning_rate": 1.9283025776110326e-05, + "loss": 0.6933379173278809, + "step": 1131 + }, + { + "epoch": 0.3310425500804211, + "grad_norm": 1.2712490538707164, + "learning_rate": 1.928122702308873e-05, + "loss": 0.527482271194458, + "step": 1132 + }, + { + "epoch": 0.3313349904956865, + "grad_norm": 1.2166131933862214, + "learning_rate": 1.927942610063675e-05, + "loss": 0.7244399785995483, + "step": 1133 + }, + { + "epoch": 0.33162743091095187, + "grad_norm": 1.4636848406157517, + "learning_rate": 1.9277623009175338e-05, + "loss": 0.7881563901901245, + "step": 1134 + }, + { + "epoch": 0.3319198713262173, + "grad_norm": 1.415089568819196, + "learning_rate": 1.9275817749125956e-05, + "loss": 0.7523232698440552, + "step": 1135 + }, + { + "epoch": 0.3322123117414827, + "grad_norm": 1.5537538186729503, + "learning_rate": 1.9274010320910575e-05, + "loss": 0.7226657867431641, + "step": 1136 + }, + { + "epoch": 0.3325047521567481, + "grad_norm": 1.4230293062648038, + "learning_rate": 1.9272200724951666e-05, + "loss": 0.6461686491966248, + "step": 1137 + }, + { + "epoch": 0.33279719257201346, + "grad_norm": 1.1785466753796996, + "learning_rate": 1.9270388961672214e-05, + "loss": 0.6343599557876587, + "step": 1138 + }, + { + "epoch": 0.33308963298727884, + "grad_norm": 1.2762072218920462, + "learning_rate": 1.926857503149571e-05, + "loss": 0.5510993599891663, + "step": 1139 + }, + { + "epoch": 0.3333820734025442, + "grad_norm": 1.3887196408907312, + "learning_rate": 1.9266758934846142e-05, + "loss": 0.6022439002990723, + "step": 1140 + }, + { + "epoch": 0.3336745138178096, + "grad_norm": 1.2716403438701216, + "learning_rate": 1.9264940672148018e-05, + "loss": 0.708207368850708, + "step": 1141 + }, + { + "epoch": 0.333966954233075, + "grad_norm": 1.3137035916667523, + "learning_rate": 1.9263120243826345e-05, + "loss": 0.566935122013092, + "step": 1142 + }, + { + "epoch": 0.3342593946483404, + "grad_norm": 1.634118861015607, + "learning_rate": 1.9261297650306635e-05, + "loss": 0.6848355531692505, + "step": 1143 + }, + { + "epoch": 0.3345518350636058, + "grad_norm": 1.3058715943169161, + "learning_rate": 1.9259472892014907e-05, + "loss": 0.7335090637207031, + "step": 1144 + }, + { + "epoch": 0.3348442754788712, + "grad_norm": 1.425387946547354, + "learning_rate": 1.925764596937769e-05, + "loss": 0.7323876023292542, + "step": 1145 + }, + { + "epoch": 0.33513671589413657, + "grad_norm": 1.377083094919456, + "learning_rate": 1.9255816882822017e-05, + "loss": 0.5564731955528259, + "step": 1146 + }, + { + "epoch": 0.33542915630940195, + "grad_norm": 1.5323554162589257, + "learning_rate": 1.925398563277542e-05, + "loss": 0.7699049711227417, + "step": 1147 + }, + { + "epoch": 0.33572159672466734, + "grad_norm": 1.4580663324783634, + "learning_rate": 1.925215221966595e-05, + "loss": 0.688602089881897, + "step": 1148 + }, + { + "epoch": 0.3360140371399327, + "grad_norm": 1.4706838569192882, + "learning_rate": 1.9250316643922153e-05, + "loss": 0.7103208899497986, + "step": 1149 + }, + { + "epoch": 0.3363064775551981, + "grad_norm": 1.2723502109555263, + "learning_rate": 1.9248478905973078e-05, + "loss": 0.6313603520393372, + "step": 1150 + }, + { + "epoch": 0.33659891797046354, + "grad_norm": 1.4985289931464978, + "learning_rate": 1.9246639006248294e-05, + "loss": 0.8420118093490601, + "step": 1151 + }, + { + "epoch": 0.3368913583857289, + "grad_norm": 1.4358130705661303, + "learning_rate": 1.9244796945177864e-05, + "loss": 0.6566640734672546, + "step": 1152 + }, + { + "epoch": 0.3371837988009943, + "grad_norm": 1.3408154011751006, + "learning_rate": 1.9242952723192357e-05, + "loss": 0.6455206274986267, + "step": 1153 + }, + { + "epoch": 0.3374762392162597, + "grad_norm": 1.3469873034007918, + "learning_rate": 1.924110634072285e-05, + "loss": 0.7348071336746216, + "step": 1154 + }, + { + "epoch": 0.33776867963152507, + "grad_norm": 1.7471975705727423, + "learning_rate": 1.9239257798200918e-05, + "loss": 0.7187973260879517, + "step": 1155 + }, + { + "epoch": 0.33806112004679045, + "grad_norm": 1.2712100505239146, + "learning_rate": 1.9237407096058655e-05, + "loss": 0.683181643486023, + "step": 1156 + }, + { + "epoch": 0.33835356046205584, + "grad_norm": 1.2445760635583791, + "learning_rate": 1.9235554234728646e-05, + "loss": 0.7296931743621826, + "step": 1157 + }, + { + "epoch": 0.3386460008773212, + "grad_norm": 1.1890040509691011, + "learning_rate": 1.923369921464399e-05, + "loss": 0.6656480431556702, + "step": 1158 + }, + { + "epoch": 0.33893844129258666, + "grad_norm": 1.3248976863888173, + "learning_rate": 1.923184203623828e-05, + "loss": 0.6284874677658081, + "step": 1159 + }, + { + "epoch": 0.33923088170785204, + "grad_norm": 1.29677623825286, + "learning_rate": 1.922998269994563e-05, + "loss": 0.7065030336380005, + "step": 1160 + }, + { + "epoch": 0.3395233221231174, + "grad_norm": 1.3728212504218815, + "learning_rate": 1.9228121206200637e-05, + "loss": 0.7077580690383911, + "step": 1161 + }, + { + "epoch": 0.3398157625383828, + "grad_norm": 1.4855746260471363, + "learning_rate": 1.9226257555438428e-05, + "loss": 0.6012637615203857, + "step": 1162 + }, + { + "epoch": 0.3401082029536482, + "grad_norm": 1.304745585816947, + "learning_rate": 1.9224391748094607e-05, + "loss": 0.8166115283966064, + "step": 1163 + }, + { + "epoch": 0.34040064336891357, + "grad_norm": 1.502407347484804, + "learning_rate": 1.92225237846053e-05, + "loss": 0.6066576242446899, + "step": 1164 + }, + { + "epoch": 0.34069308378417895, + "grad_norm": 1.2739635711375565, + "learning_rate": 1.922065366540713e-05, + "loss": 0.7226361632347107, + "step": 1165 + }, + { + "epoch": 0.3409855241994444, + "grad_norm": 1.6597939275709621, + "learning_rate": 1.9218781390937233e-05, + "loss": 0.7786005139350891, + "step": 1166 + }, + { + "epoch": 0.3412779646147098, + "grad_norm": 1.4812475412632635, + "learning_rate": 1.9216906961633234e-05, + "loss": 0.6534268856048584, + "step": 1167 + }, + { + "epoch": 0.34157040502997515, + "grad_norm": 1.2208380860350694, + "learning_rate": 1.9215030377933274e-05, + "loss": 0.6048434376716614, + "step": 1168 + }, + { + "epoch": 0.34186284544524054, + "grad_norm": 1.356773734579803, + "learning_rate": 1.921315164027599e-05, + "loss": 0.8321201205253601, + "step": 1169 + }, + { + "epoch": 0.3421552858605059, + "grad_norm": 1.5327102242092, + "learning_rate": 1.9211270749100527e-05, + "loss": 0.7142379283905029, + "step": 1170 + }, + { + "epoch": 0.3424477262757713, + "grad_norm": 1.205344060714777, + "learning_rate": 1.9209387704846535e-05, + "loss": 0.6262812614440918, + "step": 1171 + }, + { + "epoch": 0.3427401666910367, + "grad_norm": 1.4345003452190939, + "learning_rate": 1.920750250795416e-05, + "loss": 0.7242386341094971, + "step": 1172 + }, + { + "epoch": 0.34303260710630207, + "grad_norm": 1.3077522067723235, + "learning_rate": 1.9205615158864054e-05, + "loss": 0.6064128875732422, + "step": 1173 + }, + { + "epoch": 0.3433250475215675, + "grad_norm": 1.3868970600691566, + "learning_rate": 1.9203725658017374e-05, + "loss": 0.6720623970031738, + "step": 1174 + }, + { + "epoch": 0.3436174879368329, + "grad_norm": 1.3281016407079367, + "learning_rate": 1.9201834005855785e-05, + "loss": 0.745712161064148, + "step": 1175 + }, + { + "epoch": 0.34390992835209827, + "grad_norm": 1.3336156391355163, + "learning_rate": 1.9199940202821445e-05, + "loss": 0.6387969255447388, + "step": 1176 + }, + { + "epoch": 0.34420236876736365, + "grad_norm": 1.3104807608053473, + "learning_rate": 1.9198044249357018e-05, + "loss": 0.6634984612464905, + "step": 1177 + }, + { + "epoch": 0.34449480918262904, + "grad_norm": 1.0968916991502757, + "learning_rate": 1.919614614590567e-05, + "loss": 0.4732145667076111, + "step": 1178 + }, + { + "epoch": 0.3447872495978944, + "grad_norm": 1.4315145497505135, + "learning_rate": 1.9194245892911077e-05, + "loss": 0.6621897220611572, + "step": 1179 + }, + { + "epoch": 0.3450796900131598, + "grad_norm": 1.3492357768294603, + "learning_rate": 1.9192343490817412e-05, + "loss": 0.5691112279891968, + "step": 1180 + }, + { + "epoch": 0.3453721304284252, + "grad_norm": 1.5009979829344267, + "learning_rate": 1.919043894006934e-05, + "loss": 0.6326683759689331, + "step": 1181 + }, + { + "epoch": 0.3456645708436906, + "grad_norm": 1.3965991672121214, + "learning_rate": 1.9188532241112047e-05, + "loss": 0.6068567037582397, + "step": 1182 + }, + { + "epoch": 0.345957011258956, + "grad_norm": 1.5425955582670972, + "learning_rate": 1.918662339439121e-05, + "loss": 0.707065761089325, + "step": 1183 + }, + { + "epoch": 0.3462494516742214, + "grad_norm": 1.4506511105853803, + "learning_rate": 1.9184712400353008e-05, + "loss": 0.7821887135505676, + "step": 1184 + }, + { + "epoch": 0.34654189208948677, + "grad_norm": 1.3192596730278041, + "learning_rate": 1.918279925944413e-05, + "loss": 0.6759425401687622, + "step": 1185 + }, + { + "epoch": 0.34683433250475215, + "grad_norm": 1.2819153702423505, + "learning_rate": 1.9180883972111756e-05, + "loss": 0.5660048127174377, + "step": 1186 + }, + { + "epoch": 0.34712677292001753, + "grad_norm": 1.392117573401842, + "learning_rate": 1.9178966538803574e-05, + "loss": 0.708798885345459, + "step": 1187 + }, + { + "epoch": 0.3474192133352829, + "grad_norm": 1.0828895012382165, + "learning_rate": 1.9177046959967774e-05, + "loss": 0.603208065032959, + "step": 1188 + }, + { + "epoch": 0.3477116537505483, + "grad_norm": 1.2856052178527815, + "learning_rate": 1.9175125236053043e-05, + "loss": 0.8259323835372925, + "step": 1189 + }, + { + "epoch": 0.34800409416581374, + "grad_norm": 1.2349901090123199, + "learning_rate": 1.9173201367508572e-05, + "loss": 0.573014497756958, + "step": 1190 + }, + { + "epoch": 0.3482965345810791, + "grad_norm": 1.49130421629148, + "learning_rate": 1.9171275354784062e-05, + "loss": 0.8202974200248718, + "step": 1191 + }, + { + "epoch": 0.3485889749963445, + "grad_norm": 1.313328733803151, + "learning_rate": 1.9169347198329693e-05, + "loss": 0.5352192521095276, + "step": 1192 + }, + { + "epoch": 0.3488814154116099, + "grad_norm": 1.4707600848748155, + "learning_rate": 1.916741689859617e-05, + "loss": 0.7303881645202637, + "step": 1193 + }, + { + "epoch": 0.34917385582687527, + "grad_norm": 1.136402601726834, + "learning_rate": 1.9165484456034683e-05, + "loss": 0.670224666595459, + "step": 1194 + }, + { + "epoch": 0.34946629624214065, + "grad_norm": 1.213410956274994, + "learning_rate": 1.9163549871096934e-05, + "loss": 0.7311158776283264, + "step": 1195 + }, + { + "epoch": 0.34975873665740603, + "grad_norm": 1.3163563045896416, + "learning_rate": 1.9161613144235117e-05, + "loss": 0.6346032619476318, + "step": 1196 + }, + { + "epoch": 0.3500511770726714, + "grad_norm": 1.3538502473866518, + "learning_rate": 1.9159674275901932e-05, + "loss": 0.66914302110672, + "step": 1197 + }, + { + "epoch": 0.35034361748793685, + "grad_norm": 1.408804907617288, + "learning_rate": 1.9157733266550577e-05, + "loss": 0.6775194406509399, + "step": 1198 + }, + { + "epoch": 0.35063605790320224, + "grad_norm": 1.3219370751555166, + "learning_rate": 1.915579011663475e-05, + "loss": 0.6887085437774658, + "step": 1199 + }, + { + "epoch": 0.3509284983184676, + "grad_norm": 1.4120877262018603, + "learning_rate": 1.9153844826608652e-05, + "loss": 0.7474929690361023, + "step": 1200 + }, + { + "epoch": 0.351220938733733, + "grad_norm": 1.3551417524104399, + "learning_rate": 1.915189739692698e-05, + "loss": 0.5665907859802246, + "step": 1201 + }, + { + "epoch": 0.3515133791489984, + "grad_norm": 1.4582334765772325, + "learning_rate": 1.9149947828044938e-05, + "loss": 0.6044580340385437, + "step": 1202 + }, + { + "epoch": 0.35180581956426377, + "grad_norm": 1.1481279810019642, + "learning_rate": 1.914799612041822e-05, + "loss": 0.6590601205825806, + "step": 1203 + }, + { + "epoch": 0.35209825997952915, + "grad_norm": 1.1796025597233206, + "learning_rate": 1.9146042274503033e-05, + "loss": 0.5204451084136963, + "step": 1204 + }, + { + "epoch": 0.3523907003947946, + "grad_norm": 1.3267878452954167, + "learning_rate": 1.9144086290756077e-05, + "loss": 0.6036473512649536, + "step": 1205 + }, + { + "epoch": 0.35268314081005997, + "grad_norm": 1.438922587418907, + "learning_rate": 1.914212816963454e-05, + "loss": 0.5652757883071899, + "step": 1206 + }, + { + "epoch": 0.35297558122532535, + "grad_norm": 1.3041918712359999, + "learning_rate": 1.9140167911596133e-05, + "loss": 0.707310676574707, + "step": 1207 + }, + { + "epoch": 0.35326802164059073, + "grad_norm": 1.4881761799215045, + "learning_rate": 1.9138205517099048e-05, + "loss": 0.8539729714393616, + "step": 1208 + }, + { + "epoch": 0.3535604620558561, + "grad_norm": 1.6882139973772572, + "learning_rate": 1.9136240986601986e-05, + "loss": 0.6502546072006226, + "step": 1209 + }, + { + "epoch": 0.3538529024711215, + "grad_norm": 1.3852727639404194, + "learning_rate": 1.9134274320564145e-05, + "loss": 0.7279889583587646, + "step": 1210 + }, + { + "epoch": 0.3541453428863869, + "grad_norm": 1.380784482123245, + "learning_rate": 1.9132305519445215e-05, + "loss": 0.6916895508766174, + "step": 1211 + }, + { + "epoch": 0.35443778330165226, + "grad_norm": 1.3426787464995344, + "learning_rate": 1.9130334583705395e-05, + "loss": 0.6941961050033569, + "step": 1212 + }, + { + "epoch": 0.3547302237169177, + "grad_norm": 1.211958694677935, + "learning_rate": 1.912836151380538e-05, + "loss": 0.6686822175979614, + "step": 1213 + }, + { + "epoch": 0.3550226641321831, + "grad_norm": 1.4335891633323221, + "learning_rate": 1.912638631020636e-05, + "loss": 0.818913459777832, + "step": 1214 + }, + { + "epoch": 0.35531510454744847, + "grad_norm": 1.3902796641746433, + "learning_rate": 1.9124408973370034e-05, + "loss": 0.6461240649223328, + "step": 1215 + }, + { + "epoch": 0.35560754496271385, + "grad_norm": 1.3521177557458626, + "learning_rate": 1.9122429503758586e-05, + "loss": 0.6982225179672241, + "step": 1216 + }, + { + "epoch": 0.35589998537797923, + "grad_norm": 1.2726818955529642, + "learning_rate": 1.9120447901834708e-05, + "loss": 0.6319124698638916, + "step": 1217 + }, + { + "epoch": 0.3561924257932446, + "grad_norm": 1.4379853975185637, + "learning_rate": 1.9118464168061584e-05, + "loss": 0.7092441320419312, + "step": 1218 + }, + { + "epoch": 0.35648486620851, + "grad_norm": 1.4989677994022448, + "learning_rate": 1.9116478302902904e-05, + "loss": 0.7696874141693115, + "step": 1219 + }, + { + "epoch": 0.3567773066237754, + "grad_norm": 1.348418923049424, + "learning_rate": 1.9114490306822846e-05, + "loss": 0.6944275498390198, + "step": 1220 + }, + { + "epoch": 0.3570697470390408, + "grad_norm": 1.4736146352332777, + "learning_rate": 1.9112500180286098e-05, + "loss": 0.6179015636444092, + "step": 1221 + }, + { + "epoch": 0.3573621874543062, + "grad_norm": 1.4832162039625727, + "learning_rate": 1.911050792375784e-05, + "loss": 0.6964149475097656, + "step": 1222 + }, + { + "epoch": 0.3576546278695716, + "grad_norm": 1.314680516503926, + "learning_rate": 1.9108513537703746e-05, + "loss": 0.6923096776008606, + "step": 1223 + }, + { + "epoch": 0.35794706828483697, + "grad_norm": 1.3108509564109556, + "learning_rate": 1.9106517022589993e-05, + "loss": 0.5205660462379456, + "step": 1224 + }, + { + "epoch": 0.35823950870010235, + "grad_norm": 1.604738205722927, + "learning_rate": 1.910451837888325e-05, + "loss": 0.7488006353378296, + "step": 1225 + }, + { + "epoch": 0.35853194911536773, + "grad_norm": 1.1847506052614252, + "learning_rate": 1.91025176070507e-05, + "loss": 0.5414390563964844, + "step": 1226 + }, + { + "epoch": 0.3588243895306331, + "grad_norm": 1.2745914596652235, + "learning_rate": 1.910051470756e-05, + "loss": 0.6891577839851379, + "step": 1227 + }, + { + "epoch": 0.3591168299458985, + "grad_norm": 1.3018823092824294, + "learning_rate": 1.9098509680879318e-05, + "loss": 0.6496376991271973, + "step": 1228 + }, + { + "epoch": 0.35940927036116394, + "grad_norm": 1.386313672695145, + "learning_rate": 1.909650252747732e-05, + "loss": 0.758609414100647, + "step": 1229 + }, + { + "epoch": 0.3597017107764293, + "grad_norm": 1.544442120518355, + "learning_rate": 1.9094493247823164e-05, + "loss": 0.7509145736694336, + "step": 1230 + }, + { + "epoch": 0.3599941511916947, + "grad_norm": 1.2125512669659357, + "learning_rate": 1.9092481842386506e-05, + "loss": 0.7432405352592468, + "step": 1231 + }, + { + "epoch": 0.3602865916069601, + "grad_norm": 1.4492900887661606, + "learning_rate": 1.90904683116375e-05, + "loss": 0.7208698391914368, + "step": 1232 + }, + { + "epoch": 0.36057903202222547, + "grad_norm": 1.4245050002638069, + "learning_rate": 1.9088452656046798e-05, + "loss": 0.638593852519989, + "step": 1233 + }, + { + "epoch": 0.36087147243749085, + "grad_norm": 1.4428449097608804, + "learning_rate": 1.9086434876085548e-05, + "loss": 0.6663007736206055, + "step": 1234 + }, + { + "epoch": 0.36116391285275623, + "grad_norm": 1.4112526680406456, + "learning_rate": 1.908441497222539e-05, + "loss": 0.7132781744003296, + "step": 1235 + }, + { + "epoch": 0.3614563532680216, + "grad_norm": 1.3024532647304885, + "learning_rate": 1.9082392944938467e-05, + "loss": 0.6545308828353882, + "step": 1236 + }, + { + "epoch": 0.36174879368328705, + "grad_norm": 1.1385624733680002, + "learning_rate": 1.908036879469741e-05, + "loss": 0.7525626420974731, + "step": 1237 + }, + { + "epoch": 0.36204123409855243, + "grad_norm": 1.4286424106237192, + "learning_rate": 1.9078342521975365e-05, + "loss": 0.7336804866790771, + "step": 1238 + }, + { + "epoch": 0.3623336745138178, + "grad_norm": 1.3025101490885231, + "learning_rate": 1.907631412724595e-05, + "loss": 0.5822359323501587, + "step": 1239 + }, + { + "epoch": 0.3626261149290832, + "grad_norm": 1.1928464678887247, + "learning_rate": 1.907428361098329e-05, + "loss": 0.6110040545463562, + "step": 1240 + }, + { + "epoch": 0.3629185553443486, + "grad_norm": 1.3329812952112776, + "learning_rate": 1.9072250973662008e-05, + "loss": 0.5363205671310425, + "step": 1241 + }, + { + "epoch": 0.36321099575961396, + "grad_norm": 1.298737392722519, + "learning_rate": 1.9070216215757225e-05, + "loss": 0.6804911494255066, + "step": 1242 + }, + { + "epoch": 0.36350343617487935, + "grad_norm": 1.306574555012534, + "learning_rate": 1.906817933774455e-05, + "loss": 0.5670056343078613, + "step": 1243 + }, + { + "epoch": 0.3637958765901448, + "grad_norm": 1.6342501045897717, + "learning_rate": 1.9066140340100086e-05, + "loss": 0.6839423775672913, + "step": 1244 + }, + { + "epoch": 0.36408831700541017, + "grad_norm": 1.3102468000864722, + "learning_rate": 1.906409922330044e-05, + "loss": 0.6512447595596313, + "step": 1245 + }, + { + "epoch": 0.36438075742067555, + "grad_norm": 1.3767881480650324, + "learning_rate": 1.9062055987822713e-05, + "loss": 0.6602088212966919, + "step": 1246 + }, + { + "epoch": 0.36467319783594093, + "grad_norm": 1.3684046563228518, + "learning_rate": 1.9060010634144502e-05, + "loss": 0.6859074831008911, + "step": 1247 + }, + { + "epoch": 0.3649656382512063, + "grad_norm": 1.4029132597681886, + "learning_rate": 1.9057963162743888e-05, + "loss": 0.6871531009674072, + "step": 1248 + }, + { + "epoch": 0.3652580786664717, + "grad_norm": 1.2778457575589584, + "learning_rate": 1.9055913574099454e-05, + "loss": 0.7396048307418823, + "step": 1249 + }, + { + "epoch": 0.3655505190817371, + "grad_norm": 2.313748947770577, + "learning_rate": 1.9053861868690283e-05, + "loss": 0.7013602256774902, + "step": 1250 + }, + { + "epoch": 0.36584295949700246, + "grad_norm": 1.2854553849472183, + "learning_rate": 1.905180804699595e-05, + "loss": 0.6355527639389038, + "step": 1251 + }, + { + "epoch": 0.3661353999122679, + "grad_norm": 1.1923686434429392, + "learning_rate": 1.9049752109496526e-05, + "loss": 0.6869304180145264, + "step": 1252 + }, + { + "epoch": 0.3664278403275333, + "grad_norm": 1.2404032301108463, + "learning_rate": 1.9047694056672566e-05, + "loss": 0.5267671346664429, + "step": 1253 + }, + { + "epoch": 0.36672028074279867, + "grad_norm": 1.2479293372256655, + "learning_rate": 1.9045633889005134e-05, + "loss": 0.6586635112762451, + "step": 1254 + }, + { + "epoch": 0.36701272115806405, + "grad_norm": 1.2783901733768512, + "learning_rate": 1.9043571606975776e-05, + "loss": 0.6743361949920654, + "step": 1255 + }, + { + "epoch": 0.36730516157332943, + "grad_norm": 1.267912865737822, + "learning_rate": 1.9041507211066543e-05, + "loss": 0.5779668688774109, + "step": 1256 + }, + { + "epoch": 0.3675976019885948, + "grad_norm": 1.240910914837657, + "learning_rate": 1.9039440701759972e-05, + "loss": 0.693313479423523, + "step": 1257 + }, + { + "epoch": 0.3678900424038602, + "grad_norm": 1.2581810913293596, + "learning_rate": 1.9037372079539096e-05, + "loss": 0.6314960718154907, + "step": 1258 + }, + { + "epoch": 0.3681824828191256, + "grad_norm": 1.4026915606466803, + "learning_rate": 1.9035301344887445e-05, + "loss": 0.6483266949653625, + "step": 1259 + }, + { + "epoch": 0.368474923234391, + "grad_norm": 1.1963714897771014, + "learning_rate": 1.903322849828904e-05, + "loss": 0.5896739959716797, + "step": 1260 + }, + { + "epoch": 0.3687673636496564, + "grad_norm": 1.3246139419549132, + "learning_rate": 1.9031153540228398e-05, + "loss": 0.6760983467102051, + "step": 1261 + }, + { + "epoch": 0.3690598040649218, + "grad_norm": 1.409129098147532, + "learning_rate": 1.9029076471190525e-05, + "loss": 0.7453440427780151, + "step": 1262 + }, + { + "epoch": 0.36935224448018716, + "grad_norm": 1.4768395375517958, + "learning_rate": 1.9026997291660926e-05, + "loss": 0.7382408380508423, + "step": 1263 + }, + { + "epoch": 0.36964468489545255, + "grad_norm": 1.3416426687197567, + "learning_rate": 1.9024916002125594e-05, + "loss": 0.6420471668243408, + "step": 1264 + }, + { + "epoch": 0.36993712531071793, + "grad_norm": 1.406350116015231, + "learning_rate": 1.9022832603071017e-05, + "loss": 0.6436389684677124, + "step": 1265 + }, + { + "epoch": 0.3702295657259833, + "grad_norm": 1.3047843220477244, + "learning_rate": 1.9020747094984182e-05, + "loss": 0.689171314239502, + "step": 1266 + }, + { + "epoch": 0.3705220061412487, + "grad_norm": 1.2640328794263636, + "learning_rate": 1.9018659478352556e-05, + "loss": 0.6704196333885193, + "step": 1267 + }, + { + "epoch": 0.37081444655651413, + "grad_norm": 2.0690106215423536, + "learning_rate": 1.9016569753664118e-05, + "loss": 0.6598329544067383, + "step": 1268 + }, + { + "epoch": 0.3711068869717795, + "grad_norm": 1.8262603065561684, + "learning_rate": 1.901447792140732e-05, + "loss": 0.7353986501693726, + "step": 1269 + }, + { + "epoch": 0.3713993273870449, + "grad_norm": 1.4285098808767827, + "learning_rate": 1.9012383982071112e-05, + "loss": 0.666167140007019, + "step": 1270 + }, + { + "epoch": 0.3716917678023103, + "grad_norm": 1.2598465904930443, + "learning_rate": 1.9010287936144948e-05, + "loss": 0.6097015738487244, + "step": 1271 + }, + { + "epoch": 0.37198420821757566, + "grad_norm": 1.107025542737965, + "learning_rate": 1.9008189784118764e-05, + "loss": 0.6352437138557434, + "step": 1272 + }, + { + "epoch": 0.37227664863284105, + "grad_norm": 1.5662430122293758, + "learning_rate": 1.9006089526482982e-05, + "loss": 0.6686104536056519, + "step": 1273 + }, + { + "epoch": 0.3725690890481064, + "grad_norm": 1.1719719158143125, + "learning_rate": 1.9003987163728535e-05, + "loss": 0.6504377126693726, + "step": 1274 + }, + { + "epoch": 0.3728615294633718, + "grad_norm": 1.2550627286183815, + "learning_rate": 1.9001882696346835e-05, + "loss": 0.5834585428237915, + "step": 1275 + }, + { + "epoch": 0.37315396987863725, + "grad_norm": 1.5420452194055032, + "learning_rate": 1.8999776124829788e-05, + "loss": 0.665432870388031, + "step": 1276 + }, + { + "epoch": 0.37344641029390263, + "grad_norm": 1.4022956370096276, + "learning_rate": 1.899766744966979e-05, + "loss": 0.659697949886322, + "step": 1277 + }, + { + "epoch": 0.373738850709168, + "grad_norm": 1.194404836566078, + "learning_rate": 1.899555667135973e-05, + "loss": 0.5703476071357727, + "step": 1278 + }, + { + "epoch": 0.3740312911244334, + "grad_norm": 1.5482464090140011, + "learning_rate": 1.8993443790392994e-05, + "loss": 0.809308648109436, + "step": 1279 + }, + { + "epoch": 0.3743237315396988, + "grad_norm": 1.293354946450912, + "learning_rate": 1.8991328807263455e-05, + "loss": 0.7120508551597595, + "step": 1280 + }, + { + "epoch": 0.37461617195496416, + "grad_norm": 1.2261555314771986, + "learning_rate": 1.898921172246547e-05, + "loss": 0.625985860824585, + "step": 1281 + }, + { + "epoch": 0.37490861237022954, + "grad_norm": 1.3045001966325798, + "learning_rate": 1.898709253649391e-05, + "loss": 0.637261152267456, + "step": 1282 + }, + { + "epoch": 0.375201052785495, + "grad_norm": 1.3550273094265433, + "learning_rate": 1.89849712498441e-05, + "loss": 0.7420133352279663, + "step": 1283 + }, + { + "epoch": 0.37549349320076036, + "grad_norm": 1.2854448504545577, + "learning_rate": 1.8982847863011898e-05, + "loss": 0.6230417490005493, + "step": 1284 + }, + { + "epoch": 0.37578593361602575, + "grad_norm": 1.2127007776565961, + "learning_rate": 1.8980722376493622e-05, + "loss": 0.6896604299545288, + "step": 1285 + }, + { + "epoch": 0.37607837403129113, + "grad_norm": 1.3900367736992565, + "learning_rate": 1.8978594790786092e-05, + "loss": 0.5767710208892822, + "step": 1286 + }, + { + "epoch": 0.3763708144465565, + "grad_norm": 1.3829044432724817, + "learning_rate": 1.8976465106386625e-05, + "loss": 0.6945392489433289, + "step": 1287 + }, + { + "epoch": 0.3766632548618219, + "grad_norm": 1.215943914903153, + "learning_rate": 1.8974333323793014e-05, + "loss": 0.7208314538002014, + "step": 1288 + }, + { + "epoch": 0.3769556952770873, + "grad_norm": 1.5769052361743978, + "learning_rate": 1.8972199443503556e-05, + "loss": 0.7201139330863953, + "step": 1289 + }, + { + "epoch": 0.37724813569235266, + "grad_norm": 1.3366567930451483, + "learning_rate": 1.8970063466017028e-05, + "loss": 0.6791107654571533, + "step": 1290 + }, + { + "epoch": 0.3775405761076181, + "grad_norm": 1.2566261030582595, + "learning_rate": 1.89679253918327e-05, + "loss": 0.6535364389419556, + "step": 1291 + }, + { + "epoch": 0.3778330165228835, + "grad_norm": 1.3498011568256927, + "learning_rate": 1.8965785221450343e-05, + "loss": 0.5910370349884033, + "step": 1292 + }, + { + "epoch": 0.37812545693814886, + "grad_norm": 1.273886266732073, + "learning_rate": 1.8963642955370203e-05, + "loss": 0.7025415897369385, + "step": 1293 + }, + { + "epoch": 0.37841789735341425, + "grad_norm": 1.3743129752593892, + "learning_rate": 1.8961498594093018e-05, + "loss": 0.8007702827453613, + "step": 1294 + }, + { + "epoch": 0.37871033776867963, + "grad_norm": 1.3170193178053329, + "learning_rate": 1.895935213812003e-05, + "loss": 0.6947172284126282, + "step": 1295 + }, + { + "epoch": 0.379002778183945, + "grad_norm": 1.1178211676030798, + "learning_rate": 1.895720358795295e-05, + "loss": 0.7024818658828735, + "step": 1296 + }, + { + "epoch": 0.3792952185992104, + "grad_norm": 1.3447819598276562, + "learning_rate": 1.895505294409399e-05, + "loss": 0.8202607035636902, + "step": 1297 + }, + { + "epoch": 0.3795876590144758, + "grad_norm": 1.3114070841017331, + "learning_rate": 1.8952900207045853e-05, + "loss": 0.8001795411109924, + "step": 1298 + }, + { + "epoch": 0.3798800994297412, + "grad_norm": 1.4499936638579116, + "learning_rate": 1.895074537731173e-05, + "loss": 0.8068668842315674, + "step": 1299 + }, + { + "epoch": 0.3801725398450066, + "grad_norm": 1.5847094151692727, + "learning_rate": 1.8948588455395294e-05, + "loss": 0.7685220241546631, + "step": 1300 + }, + { + "epoch": 0.380464980260272, + "grad_norm": 1.5870604224504243, + "learning_rate": 1.8946429441800715e-05, + "loss": 0.695665717124939, + "step": 1301 + }, + { + "epoch": 0.38075742067553736, + "grad_norm": 1.2451750201018865, + "learning_rate": 1.894426833703265e-05, + "loss": 0.6073132753372192, + "step": 1302 + }, + { + "epoch": 0.38104986109080274, + "grad_norm": 1.3373381283666117, + "learning_rate": 1.894210514159624e-05, + "loss": 0.6334577798843384, + "step": 1303 + }, + { + "epoch": 0.3813423015060681, + "grad_norm": 1.519675902061051, + "learning_rate": 1.8939939855997125e-05, + "loss": 0.6448806524276733, + "step": 1304 + }, + { + "epoch": 0.3816347419213335, + "grad_norm": 1.438744535892561, + "learning_rate": 1.8937772480741427e-05, + "loss": 0.7587993144989014, + "step": 1305 + }, + { + "epoch": 0.3819271823365989, + "grad_norm": 1.3270277700231368, + "learning_rate": 1.8935603016335752e-05, + "loss": 0.6924787759780884, + "step": 1306 + }, + { + "epoch": 0.38221962275186433, + "grad_norm": 1.4268553989545638, + "learning_rate": 1.8933431463287197e-05, + "loss": 0.678055465221405, + "step": 1307 + }, + { + "epoch": 0.3825120631671297, + "grad_norm": 1.5391207675187488, + "learning_rate": 1.8931257822103357e-05, + "loss": 0.7519007325172424, + "step": 1308 + }, + { + "epoch": 0.3828045035823951, + "grad_norm": 1.3654471111870499, + "learning_rate": 1.8929082093292306e-05, + "loss": 0.6905468702316284, + "step": 1309 + }, + { + "epoch": 0.3830969439976605, + "grad_norm": 1.2938870443591295, + "learning_rate": 1.8926904277362603e-05, + "loss": 0.6718122363090515, + "step": 1310 + }, + { + "epoch": 0.38338938441292586, + "grad_norm": 1.2471147738993698, + "learning_rate": 1.89247243748233e-05, + "loss": 0.6903961896896362, + "step": 1311 + }, + { + "epoch": 0.38368182482819124, + "grad_norm": 1.2542516264352948, + "learning_rate": 1.8922542386183942e-05, + "loss": 0.6947582960128784, + "step": 1312 + }, + { + "epoch": 0.3839742652434566, + "grad_norm": 1.188134072228004, + "learning_rate": 1.8920358311954548e-05, + "loss": 0.5850759148597717, + "step": 1313 + }, + { + "epoch": 0.38426670565872206, + "grad_norm": 1.3420186334522382, + "learning_rate": 1.891817215264564e-05, + "loss": 0.6512178778648376, + "step": 1314 + }, + { + "epoch": 0.38455914607398745, + "grad_norm": 1.217758250797112, + "learning_rate": 1.891598390876821e-05, + "loss": 0.5910850167274475, + "step": 1315 + }, + { + "epoch": 0.38485158648925283, + "grad_norm": 1.5593571397265127, + "learning_rate": 1.891379358083375e-05, + "loss": 0.7113536596298218, + "step": 1316 + }, + { + "epoch": 0.3851440269045182, + "grad_norm": 1.293224731928583, + "learning_rate": 1.891160116935424e-05, + "loss": 0.705318808555603, + "step": 1317 + }, + { + "epoch": 0.3854364673197836, + "grad_norm": 1.3559272013315313, + "learning_rate": 1.890940667484214e-05, + "loss": 0.7524716258049011, + "step": 1318 + }, + { + "epoch": 0.385728907735049, + "grad_norm": 1.4516012043532711, + "learning_rate": 1.89072100978104e-05, + "loss": 0.6130248308181763, + "step": 1319 + }, + { + "epoch": 0.38602134815031436, + "grad_norm": 1.4354959704098622, + "learning_rate": 1.8905011438772455e-05, + "loss": 0.6535071134567261, + "step": 1320 + }, + { + "epoch": 0.38631378856557974, + "grad_norm": 1.3663991139031981, + "learning_rate": 1.890281069824223e-05, + "loss": 0.7027082443237305, + "step": 1321 + }, + { + "epoch": 0.3866062289808452, + "grad_norm": 1.4293858623040305, + "learning_rate": 1.8900607876734133e-05, + "loss": 0.7055719494819641, + "step": 1322 + }, + { + "epoch": 0.38689866939611056, + "grad_norm": 1.3719150653410752, + "learning_rate": 1.8898402974763063e-05, + "loss": 0.7403384447097778, + "step": 1323 + }, + { + "epoch": 0.38719110981137594, + "grad_norm": 1.4816215708629428, + "learning_rate": 1.88961959928444e-05, + "loss": 0.6771470308303833, + "step": 1324 + }, + { + "epoch": 0.3874835502266413, + "grad_norm": 1.552809609148836, + "learning_rate": 1.8893986931494015e-05, + "loss": 0.7258767485618591, + "step": 1325 + }, + { + "epoch": 0.3877759906419067, + "grad_norm": 1.4168779145398758, + "learning_rate": 1.889177579122826e-05, + "loss": 0.7587069869041443, + "step": 1326 + }, + { + "epoch": 0.3880684310571721, + "grad_norm": 1.0432394702021985, + "learning_rate": 1.888956257256398e-05, + "loss": 0.5434668660163879, + "step": 1327 + }, + { + "epoch": 0.3883608714724375, + "grad_norm": 1.2927507112469059, + "learning_rate": 1.8887347276018496e-05, + "loss": 0.5311154127120972, + "step": 1328 + }, + { + "epoch": 0.38865331188770286, + "grad_norm": 1.2533915775325788, + "learning_rate": 1.888512990210962e-05, + "loss": 0.5651747584342957, + "step": 1329 + }, + { + "epoch": 0.3889457523029683, + "grad_norm": 1.1863162008873491, + "learning_rate": 1.8882910451355654e-05, + "loss": 0.628046989440918, + "step": 1330 + }, + { + "epoch": 0.3892381927182337, + "grad_norm": 1.362511771688155, + "learning_rate": 1.888068892427538e-05, + "loss": 0.644639253616333, + "step": 1331 + }, + { + "epoch": 0.38953063313349906, + "grad_norm": 1.2081658901416763, + "learning_rate": 1.887846532138806e-05, + "loss": 0.6290382742881775, + "step": 1332 + }, + { + "epoch": 0.38982307354876444, + "grad_norm": 1.319310840364395, + "learning_rate": 1.8876239643213456e-05, + "loss": 0.6881425380706787, + "step": 1333 + }, + { + "epoch": 0.3901155139640298, + "grad_norm": 1.398816649776473, + "learning_rate": 1.8874011890271807e-05, + "loss": 0.645643949508667, + "step": 1334 + }, + { + "epoch": 0.3904079543792952, + "grad_norm": 1.3694554603281324, + "learning_rate": 1.887178206308383e-05, + "loss": 0.6965867280960083, + "step": 1335 + }, + { + "epoch": 0.3907003947945606, + "grad_norm": 1.1683610353079796, + "learning_rate": 1.886955016217074e-05, + "loss": 0.7326550483703613, + "step": 1336 + }, + { + "epoch": 0.390992835209826, + "grad_norm": 1.1781300264884254, + "learning_rate": 1.886731618805422e-05, + "loss": 0.6570208668708801, + "step": 1337 + }, + { + "epoch": 0.3912852756250914, + "grad_norm": 1.3563073747469718, + "learning_rate": 1.886508014125646e-05, + "loss": 0.7391610145568848, + "step": 1338 + }, + { + "epoch": 0.3915777160403568, + "grad_norm": 1.2946978380119605, + "learning_rate": 1.8862842022300124e-05, + "loss": 0.651665985584259, + "step": 1339 + }, + { + "epoch": 0.3918701564556222, + "grad_norm": 1.383137909559831, + "learning_rate": 1.8860601831708346e-05, + "loss": 0.695915699005127, + "step": 1340 + }, + { + "epoch": 0.39216259687088756, + "grad_norm": 1.4373340263094165, + "learning_rate": 1.885835957000476e-05, + "loss": 0.7209347486495972, + "step": 1341 + }, + { + "epoch": 0.39245503728615294, + "grad_norm": 1.3210736597005808, + "learning_rate": 1.885611523771349e-05, + "loss": 0.6083317995071411, + "step": 1342 + }, + { + "epoch": 0.3927474777014183, + "grad_norm": 1.1271235346878163, + "learning_rate": 1.8853868835359127e-05, + "loss": 0.5544713735580444, + "step": 1343 + }, + { + "epoch": 0.3930399181166837, + "grad_norm": 1.5309462253741093, + "learning_rate": 1.8851620363466756e-05, + "loss": 0.6333836317062378, + "step": 1344 + }, + { + "epoch": 0.3933323585319491, + "grad_norm": 1.2454908963117732, + "learning_rate": 1.8849369822561943e-05, + "loss": 0.554995059967041, + "step": 1345 + }, + { + "epoch": 0.3936247989472145, + "grad_norm": 1.1941759610212306, + "learning_rate": 1.884711721317074e-05, + "loss": 0.6696420907974243, + "step": 1346 + }, + { + "epoch": 0.3939172393624799, + "grad_norm": 1.3685512221597254, + "learning_rate": 1.8844862535819682e-05, + "loss": 0.6031695604324341, + "step": 1347 + }, + { + "epoch": 0.3942096797777453, + "grad_norm": 1.2956806020723108, + "learning_rate": 1.884260579103578e-05, + "loss": 0.686814546585083, + "step": 1348 + }, + { + "epoch": 0.3945021201930107, + "grad_norm": 1.3341047094387086, + "learning_rate": 1.884034697934654e-05, + "loss": 0.8113317489624023, + "step": 1349 + }, + { + "epoch": 0.39479456060827606, + "grad_norm": 1.2860694131453334, + "learning_rate": 1.8838086101279946e-05, + "loss": 0.5645952820777893, + "step": 1350 + }, + { + "epoch": 0.39508700102354144, + "grad_norm": 1.2016385972759884, + "learning_rate": 1.883582315736446e-05, + "loss": 0.6431643962860107, + "step": 1351 + }, + { + "epoch": 0.3953794414388068, + "grad_norm": 1.5868415992731069, + "learning_rate": 1.8833558148129034e-05, + "loss": 0.7691985368728638, + "step": 1352 + }, + { + "epoch": 0.39567188185407226, + "grad_norm": 1.2630640231572245, + "learning_rate": 1.88312910741031e-05, + "loss": 0.5951793789863586, + "step": 1353 + }, + { + "epoch": 0.39596432226933764, + "grad_norm": 1.4300765958854422, + "learning_rate": 1.8829021935816572e-05, + "loss": 0.671844482421875, + "step": 1354 + }, + { + "epoch": 0.396256762684603, + "grad_norm": 1.1270834377200167, + "learning_rate": 1.8826750733799845e-05, + "loss": 0.5290843844413757, + "step": 1355 + }, + { + "epoch": 0.3965492030998684, + "grad_norm": 1.5593842090138308, + "learning_rate": 1.8824477468583806e-05, + "loss": 0.6492103934288025, + "step": 1356 + }, + { + "epoch": 0.3968416435151338, + "grad_norm": 1.5289372619537642, + "learning_rate": 1.882220214069981e-05, + "loss": 0.6111055016517639, + "step": 1357 + }, + { + "epoch": 0.3971340839303992, + "grad_norm": 1.5921417426189186, + "learning_rate": 1.8819924750679702e-05, + "loss": 0.8123398423194885, + "step": 1358 + }, + { + "epoch": 0.39742652434566456, + "grad_norm": 1.1798948792158594, + "learning_rate": 1.8817645299055815e-05, + "loss": 0.6118077039718628, + "step": 1359 + }, + { + "epoch": 0.39771896476092994, + "grad_norm": 1.3941391170101545, + "learning_rate": 1.8815363786360948e-05, + "loss": 0.6916248798370361, + "step": 1360 + }, + { + "epoch": 0.3980114051761954, + "grad_norm": 1.2139550861968382, + "learning_rate": 1.8813080213128394e-05, + "loss": 0.5586028099060059, + "step": 1361 + }, + { + "epoch": 0.39830384559146076, + "grad_norm": 1.4269154081314215, + "learning_rate": 1.8810794579891925e-05, + "loss": 0.6132841110229492, + "step": 1362 + }, + { + "epoch": 0.39859628600672614, + "grad_norm": 1.6891257084549798, + "learning_rate": 1.8808506887185793e-05, + "loss": 0.7329133749008179, + "step": 1363 + }, + { + "epoch": 0.3988887264219915, + "grad_norm": 1.2704606915664418, + "learning_rate": 1.8806217135544736e-05, + "loss": 0.5975138545036316, + "step": 1364 + }, + { + "epoch": 0.3991811668372569, + "grad_norm": 1.315662529163245, + "learning_rate": 1.8803925325503963e-05, + "loss": 0.5790295600891113, + "step": 1365 + }, + { + "epoch": 0.3994736072525223, + "grad_norm": 1.2020939373145192, + "learning_rate": 1.8801631457599173e-05, + "loss": 0.6506124138832092, + "step": 1366 + }, + { + "epoch": 0.39976604766778767, + "grad_norm": 1.1981976421950613, + "learning_rate": 1.8799335532366547e-05, + "loss": 0.5577528476715088, + "step": 1367 + }, + { + "epoch": 0.40005848808305305, + "grad_norm": 1.4216027512167424, + "learning_rate": 1.879703755034274e-05, + "loss": 0.675471305847168, + "step": 1368 + }, + { + "epoch": 0.4003509284983185, + "grad_norm": 1.2857175045016282, + "learning_rate": 1.879473751206489e-05, + "loss": 0.5826357007026672, + "step": 1369 + }, + { + "epoch": 0.4006433689135839, + "grad_norm": 1.31090591162355, + "learning_rate": 1.8792435418070623e-05, + "loss": 0.5146772265434265, + "step": 1370 + }, + { + "epoch": 0.40093580932884926, + "grad_norm": 1.224194477069696, + "learning_rate": 1.879013126889804e-05, + "loss": 0.6049208641052246, + "step": 1371 + }, + { + "epoch": 0.40122824974411464, + "grad_norm": 1.4047657351006413, + "learning_rate": 1.878782506508571e-05, + "loss": 0.7058207392692566, + "step": 1372 + }, + { + "epoch": 0.40152069015938, + "grad_norm": 1.3782924104285919, + "learning_rate": 1.8785516807172704e-05, + "loss": 0.6281940937042236, + "step": 1373 + }, + { + "epoch": 0.4018131305746454, + "grad_norm": 1.5710053658084482, + "learning_rate": 1.878320649569856e-05, + "loss": 0.6318703889846802, + "step": 1374 + }, + { + "epoch": 0.4021055709899108, + "grad_norm": 1.369183615763356, + "learning_rate": 1.87808941312033e-05, + "loss": 0.6595311164855957, + "step": 1375 + }, + { + "epoch": 0.40239801140517617, + "grad_norm": 1.2850685362090555, + "learning_rate": 1.8778579714227433e-05, + "loss": 0.6964930295944214, + "step": 1376 + }, + { + "epoch": 0.4026904518204416, + "grad_norm": 1.338060741631637, + "learning_rate": 1.8776263245311926e-05, + "loss": 0.6093966364860535, + "step": 1377 + }, + { + "epoch": 0.402982892235707, + "grad_norm": 1.2438362189425571, + "learning_rate": 1.8773944724998248e-05, + "loss": 0.5337893962860107, + "step": 1378 + }, + { + "epoch": 0.4032753326509724, + "grad_norm": 1.3665212711176857, + "learning_rate": 1.8771624153828336e-05, + "loss": 0.5899128317832947, + "step": 1379 + }, + { + "epoch": 0.40356777306623776, + "grad_norm": 1.41983053503157, + "learning_rate": 1.876930153234461e-05, + "loss": 0.7054699659347534, + "step": 1380 + }, + { + "epoch": 0.40386021348150314, + "grad_norm": 1.4240217447880834, + "learning_rate": 1.876697686108997e-05, + "loss": 0.6910602450370789, + "step": 1381 + }, + { + "epoch": 0.4041526538967685, + "grad_norm": 1.383183489617276, + "learning_rate": 1.876465014060779e-05, + "loss": 0.605659008026123, + "step": 1382 + }, + { + "epoch": 0.4044450943120339, + "grad_norm": 1.4798727853043008, + "learning_rate": 1.8762321371441934e-05, + "loss": 0.7159937620162964, + "step": 1383 + }, + { + "epoch": 0.4047375347272993, + "grad_norm": 1.394589338486404, + "learning_rate": 1.8759990554136733e-05, + "loss": 0.7568333148956299, + "step": 1384 + }, + { + "epoch": 0.4050299751425647, + "grad_norm": 1.613153659624872, + "learning_rate": 1.8757657689236998e-05, + "loss": 0.7117356657981873, + "step": 1385 + }, + { + "epoch": 0.4053224155578301, + "grad_norm": 1.4078248670298101, + "learning_rate": 1.8755322777288027e-05, + "loss": 0.6429109573364258, + "step": 1386 + }, + { + "epoch": 0.4056148559730955, + "grad_norm": 1.571804966063755, + "learning_rate": 1.875298581883559e-05, + "loss": 0.643811821937561, + "step": 1387 + }, + { + "epoch": 0.40590729638836087, + "grad_norm": 1.439883937827134, + "learning_rate": 1.875064681442594e-05, + "loss": 0.7143295407295227, + "step": 1388 + }, + { + "epoch": 0.40619973680362625, + "grad_norm": 1.5655377581350667, + "learning_rate": 1.8748305764605798e-05, + "loss": 0.732312023639679, + "step": 1389 + }, + { + "epoch": 0.40649217721889164, + "grad_norm": 1.2430462426936875, + "learning_rate": 1.8745962669922375e-05, + "loss": 0.6878848075866699, + "step": 1390 + }, + { + "epoch": 0.406784617634157, + "grad_norm": 1.40828278888595, + "learning_rate": 1.8743617530923356e-05, + "loss": 0.6716262698173523, + "step": 1391 + }, + { + "epoch": 0.40707705804942246, + "grad_norm": 1.346254534859124, + "learning_rate": 1.87412703481569e-05, + "loss": 0.4990834593772888, + "step": 1392 + }, + { + "epoch": 0.40736949846468784, + "grad_norm": 1.4289189141042684, + "learning_rate": 1.8738921122171647e-05, + "loss": 0.6541857719421387, + "step": 1393 + }, + { + "epoch": 0.4076619388799532, + "grad_norm": 1.4587922420879296, + "learning_rate": 1.8736569853516715e-05, + "loss": 0.6310811042785645, + "step": 1394 + }, + { + "epoch": 0.4079543792952186, + "grad_norm": 1.5328348699666439, + "learning_rate": 1.8734216542741702e-05, + "loss": 0.6335423588752747, + "step": 1395 + }, + { + "epoch": 0.408246819710484, + "grad_norm": 1.4540701020587141, + "learning_rate": 1.873186119039667e-05, + "loss": 0.6315034627914429, + "step": 1396 + }, + { + "epoch": 0.40853926012574937, + "grad_norm": 1.2591166704430221, + "learning_rate": 1.872950379703218e-05, + "loss": 0.6822362542152405, + "step": 1397 + }, + { + "epoch": 0.40883170054101475, + "grad_norm": 1.3128671260601936, + "learning_rate": 1.8727144363199257e-05, + "loss": 0.699965238571167, + "step": 1398 + }, + { + "epoch": 0.40912414095628014, + "grad_norm": 1.376512615463435, + "learning_rate": 1.8724782889449397e-05, + "loss": 0.6769841313362122, + "step": 1399 + }, + { + "epoch": 0.4094165813715456, + "grad_norm": 1.242831468646962, + "learning_rate": 1.8722419376334584e-05, + "loss": 0.5219473838806152, + "step": 1400 + }, + { + "epoch": 0.40970902178681096, + "grad_norm": 1.1119386554431685, + "learning_rate": 1.872005382440728e-05, + "loss": 0.6091574430465698, + "step": 1401 + }, + { + "epoch": 0.41000146220207634, + "grad_norm": 1.3005524040148213, + "learning_rate": 1.8717686234220406e-05, + "loss": 0.7589390277862549, + "step": 1402 + }, + { + "epoch": 0.4102939026173417, + "grad_norm": 1.2931582987016021, + "learning_rate": 1.8715316606327384e-05, + "loss": 0.7042895555496216, + "step": 1403 + }, + { + "epoch": 0.4105863430326071, + "grad_norm": 1.2509877991876854, + "learning_rate": 1.8712944941282095e-05, + "loss": 0.6490949988365173, + "step": 1404 + }, + { + "epoch": 0.4108787834478725, + "grad_norm": 1.3726951776657805, + "learning_rate": 1.87105712396389e-05, + "loss": 0.6614132523536682, + "step": 1405 + }, + { + "epoch": 0.41117122386313787, + "grad_norm": 1.3416970895813871, + "learning_rate": 1.8708195501952637e-05, + "loss": 0.666157603263855, + "step": 1406 + }, + { + "epoch": 0.41146366427840325, + "grad_norm": 1.5455429688837699, + "learning_rate": 1.8705817728778626e-05, + "loss": 0.7347884178161621, + "step": 1407 + }, + { + "epoch": 0.4117561046936687, + "grad_norm": 1.6323767587093516, + "learning_rate": 1.8703437920672652e-05, + "loss": 0.8129836320877075, + "step": 1408 + }, + { + "epoch": 0.4120485451089341, + "grad_norm": 1.2690047775005027, + "learning_rate": 1.870105607819098e-05, + "loss": 0.645210862159729, + "step": 1409 + }, + { + "epoch": 0.41234098552419945, + "grad_norm": 1.367879279910813, + "learning_rate": 1.8698672201890355e-05, + "loss": 0.6716916561126709, + "step": 1410 + }, + { + "epoch": 0.41263342593946484, + "grad_norm": 1.66933384894401, + "learning_rate": 1.869628629232799e-05, + "loss": 0.8190855383872986, + "step": 1411 + }, + { + "epoch": 0.4129258663547302, + "grad_norm": 1.3900061091611966, + "learning_rate": 1.8693898350061582e-05, + "loss": 0.7618075609207153, + "step": 1412 + }, + { + "epoch": 0.4132183067699956, + "grad_norm": 1.3486290329442485, + "learning_rate": 1.869150837564929e-05, + "loss": 0.719980001449585, + "step": 1413 + }, + { + "epoch": 0.413510747185261, + "grad_norm": 1.2278158990840933, + "learning_rate": 1.8689116369649763e-05, + "loss": 0.6601548194885254, + "step": 1414 + }, + { + "epoch": 0.41380318760052637, + "grad_norm": 1.4157736896401232, + "learning_rate": 1.8686722332622112e-05, + "loss": 0.5991787314414978, + "step": 1415 + }, + { + "epoch": 0.4140956280157918, + "grad_norm": 1.2916299361998576, + "learning_rate": 1.8684326265125935e-05, + "loss": 0.6089641451835632, + "step": 1416 + }, + { + "epoch": 0.4143880684310572, + "grad_norm": 1.4857622706167455, + "learning_rate": 1.8681928167721297e-05, + "loss": 0.8143327236175537, + "step": 1417 + }, + { + "epoch": 0.41468050884632257, + "grad_norm": 1.1137129272750816, + "learning_rate": 1.8679528040968733e-05, + "loss": 0.6127045154571533, + "step": 1418 + }, + { + "epoch": 0.41497294926158795, + "grad_norm": 1.2684856043432204, + "learning_rate": 1.8677125885429262e-05, + "loss": 0.659069299697876, + "step": 1419 + }, + { + "epoch": 0.41526538967685334, + "grad_norm": 1.3122733176612695, + "learning_rate": 1.8674721701664377e-05, + "loss": 0.7277505397796631, + "step": 1420 + }, + { + "epoch": 0.4155578300921187, + "grad_norm": 1.2107555045955465, + "learning_rate": 1.8672315490236034e-05, + "loss": 0.6128710508346558, + "step": 1421 + }, + { + "epoch": 0.4158502705073841, + "grad_norm": 1.7155790773588848, + "learning_rate": 1.866990725170667e-05, + "loss": 0.7439340949058533, + "step": 1422 + }, + { + "epoch": 0.4161427109226495, + "grad_norm": 1.1423179387443951, + "learning_rate": 1.8667496986639206e-05, + "loss": 0.5855459570884705, + "step": 1423 + }, + { + "epoch": 0.4164351513379149, + "grad_norm": 1.3335637335552337, + "learning_rate": 1.866508469559702e-05, + "loss": 0.6865170001983643, + "step": 1424 + }, + { + "epoch": 0.4167275917531803, + "grad_norm": 1.3191415692644766, + "learning_rate": 1.866267037914397e-05, + "loss": 0.6648446917533875, + "step": 1425 + }, + { + "epoch": 0.4170200321684457, + "grad_norm": 1.5198580088053322, + "learning_rate": 1.866025403784439e-05, + "loss": 0.6919275522232056, + "step": 1426 + }, + { + "epoch": 0.41731247258371107, + "grad_norm": 1.3309988770277923, + "learning_rate": 1.865783567226308e-05, + "loss": 0.7270313501358032, + "step": 1427 + }, + { + "epoch": 0.41760491299897645, + "grad_norm": 1.2814450794742573, + "learning_rate": 1.8655415282965327e-05, + "loss": 0.5938387513160706, + "step": 1428 + }, + { + "epoch": 0.41789735341424183, + "grad_norm": 1.378984312222445, + "learning_rate": 1.8652992870516872e-05, + "loss": 0.6517149209976196, + "step": 1429 + }, + { + "epoch": 0.4181897938295072, + "grad_norm": 1.1862439123900306, + "learning_rate": 1.8650568435483948e-05, + "loss": 0.6688356399536133, + "step": 1430 + }, + { + "epoch": 0.41848223424477266, + "grad_norm": 1.153419964025717, + "learning_rate": 1.864814197843325e-05, + "loss": 0.5300855040550232, + "step": 1431 + }, + { + "epoch": 0.41877467466003804, + "grad_norm": 1.270766161064103, + "learning_rate": 1.8645713499931943e-05, + "loss": 0.6404704451560974, + "step": 1432 + }, + { + "epoch": 0.4190671150753034, + "grad_norm": 1.5430855845367462, + "learning_rate": 1.8643283000547673e-05, + "loss": 0.6758813858032227, + "step": 1433 + }, + { + "epoch": 0.4193595554905688, + "grad_norm": 1.3783205387944717, + "learning_rate": 1.8640850480848552e-05, + "loss": 0.6328250169754028, + "step": 1434 + }, + { + "epoch": 0.4196519959058342, + "grad_norm": 1.26459826517306, + "learning_rate": 1.863841594140317e-05, + "loss": 0.6747157573699951, + "step": 1435 + }, + { + "epoch": 0.41994443632109957, + "grad_norm": 1.3504769695047412, + "learning_rate": 1.8635979382780584e-05, + "loss": 0.5314475893974304, + "step": 1436 + }, + { + "epoch": 0.42023687673636495, + "grad_norm": 1.345420561697831, + "learning_rate": 1.863354080555033e-05, + "loss": 0.478320837020874, + "step": 1437 + }, + { + "epoch": 0.42052931715163033, + "grad_norm": 1.7039787917499718, + "learning_rate": 1.86311002102824e-05, + "loss": 0.7389972805976868, + "step": 1438 + }, + { + "epoch": 0.42082175756689577, + "grad_norm": 1.5752835852867741, + "learning_rate": 1.8628657597547273e-05, + "loss": 0.5449938178062439, + "step": 1439 + }, + { + "epoch": 0.42111419798216115, + "grad_norm": 1.417913338852298, + "learning_rate": 1.8626212967915897e-05, + "loss": 0.6752811670303345, + "step": 1440 + }, + { + "epoch": 0.42140663839742654, + "grad_norm": 1.3268814915367182, + "learning_rate": 1.862376632195969e-05, + "loss": 0.7750412821769714, + "step": 1441 + }, + { + "epoch": 0.4216990788126919, + "grad_norm": 1.4150998717703018, + "learning_rate": 1.8621317660250535e-05, + "loss": 0.5967680215835571, + "step": 1442 + }, + { + "epoch": 0.4219915192279573, + "grad_norm": 1.2836272802739963, + "learning_rate": 1.86188669833608e-05, + "loss": 0.6781327724456787, + "step": 1443 + }, + { + "epoch": 0.4222839596432227, + "grad_norm": 1.421988457915262, + "learning_rate": 1.8616414291863307e-05, + "loss": 0.7539681196212769, + "step": 1444 + }, + { + "epoch": 0.42257640005848807, + "grad_norm": 1.5265432564271315, + "learning_rate": 1.8613959586331364e-05, + "loss": 0.6976957321166992, + "step": 1445 + }, + { + "epoch": 0.42286884047375345, + "grad_norm": 1.3365892238255053, + "learning_rate": 1.861150286733874e-05, + "loss": 0.6616528034210205, + "step": 1446 + }, + { + "epoch": 0.4231612808890189, + "grad_norm": 1.4482994306877846, + "learning_rate": 1.860904413545968e-05, + "loss": 0.6407957077026367, + "step": 1447 + }, + { + "epoch": 0.42345372130428427, + "grad_norm": 1.4193133822561126, + "learning_rate": 1.86065833912689e-05, + "loss": 0.5918550491333008, + "step": 1448 + }, + { + "epoch": 0.42374616171954965, + "grad_norm": 1.421765780188314, + "learning_rate": 1.8604120635341574e-05, + "loss": 0.6142056584358215, + "step": 1449 + }, + { + "epoch": 0.42403860213481503, + "grad_norm": 1.4371201128611453, + "learning_rate": 1.8601655868253368e-05, + "loss": 0.6359597444534302, + "step": 1450 + }, + { + "epoch": 0.4243310425500804, + "grad_norm": 1.2914617625794835, + "learning_rate": 1.8599189090580402e-05, + "loss": 0.7149467468261719, + "step": 1451 + }, + { + "epoch": 0.4246234829653458, + "grad_norm": 1.2900964447275098, + "learning_rate": 1.8596720302899272e-05, + "loss": 0.6015822887420654, + "step": 1452 + }, + { + "epoch": 0.4249159233806112, + "grad_norm": 1.1866564154864978, + "learning_rate": 1.8594249505787035e-05, + "loss": 0.6389881372451782, + "step": 1453 + }, + { + "epoch": 0.42520836379587656, + "grad_norm": 1.381321058965008, + "learning_rate": 1.8591776699821235e-05, + "loss": 0.7479783892631531, + "step": 1454 + }, + { + "epoch": 0.425500804211142, + "grad_norm": 1.2271977568055246, + "learning_rate": 1.8589301885579866e-05, + "loss": 0.6574498414993286, + "step": 1455 + }, + { + "epoch": 0.4257932446264074, + "grad_norm": 1.3187836865578064, + "learning_rate": 1.858682506364141e-05, + "loss": 0.6314088702201843, + "step": 1456 + }, + { + "epoch": 0.42608568504167277, + "grad_norm": 1.4747450600155867, + "learning_rate": 1.85843462345848e-05, + "loss": 0.605385959148407, + "step": 1457 + }, + { + "epoch": 0.42637812545693815, + "grad_norm": 1.280849948973879, + "learning_rate": 1.8581865398989452e-05, + "loss": 0.6355551481246948, + "step": 1458 + }, + { + "epoch": 0.42667056587220353, + "grad_norm": 1.3012840164028812, + "learning_rate": 1.8579382557435247e-05, + "loss": 0.6303017139434814, + "step": 1459 + }, + { + "epoch": 0.4269630062874689, + "grad_norm": 1.2629380280411955, + "learning_rate": 1.8576897710502532e-05, + "loss": 0.5916526317596436, + "step": 1460 + }, + { + "epoch": 0.4272554467027343, + "grad_norm": 1.2467440963341316, + "learning_rate": 1.8574410858772126e-05, + "loss": 0.5709279179573059, + "step": 1461 + }, + { + "epoch": 0.4275478871179997, + "grad_norm": 1.2909430743502928, + "learning_rate": 1.8571922002825317e-05, + "loss": 0.571231484413147, + "step": 1462 + }, + { + "epoch": 0.4278403275332651, + "grad_norm": 1.310017395907512, + "learning_rate": 1.8569431143243856e-05, + "loss": 0.6352202892303467, + "step": 1463 + }, + { + "epoch": 0.4281327679485305, + "grad_norm": 1.316165374470179, + "learning_rate": 1.8566938280609965e-05, + "loss": 0.553265392780304, + "step": 1464 + }, + { + "epoch": 0.4284252083637959, + "grad_norm": 1.1127868543655046, + "learning_rate": 1.8564443415506343e-05, + "loss": 0.4913727045059204, + "step": 1465 + }, + { + "epoch": 0.42871764877906127, + "grad_norm": 1.4457215110099157, + "learning_rate": 1.8561946548516143e-05, + "loss": 0.542539119720459, + "step": 1466 + }, + { + "epoch": 0.42901008919432665, + "grad_norm": 1.5261496853017646, + "learning_rate": 1.8559447680222994e-05, + "loss": 0.719292163848877, + "step": 1467 + }, + { + "epoch": 0.42930252960959203, + "grad_norm": 1.4842625427656275, + "learning_rate": 1.8556946811210993e-05, + "loss": 0.8443170785903931, + "step": 1468 + }, + { + "epoch": 0.4295949700248574, + "grad_norm": 1.4024545882927506, + "learning_rate": 1.8554443942064705e-05, + "loss": 0.7899821996688843, + "step": 1469 + }, + { + "epoch": 0.42988741044012285, + "grad_norm": 1.3637198474337424, + "learning_rate": 1.8551939073369155e-05, + "loss": 0.617426872253418, + "step": 1470 + }, + { + "epoch": 0.43017985085538823, + "grad_norm": 1.284473833943433, + "learning_rate": 1.8549432205709842e-05, + "loss": 0.5573505163192749, + "step": 1471 + }, + { + "epoch": 0.4304722912706536, + "grad_norm": 1.2050796372555104, + "learning_rate": 1.8546923339672734e-05, + "loss": 0.5571975111961365, + "step": 1472 + }, + { + "epoch": 0.430764731685919, + "grad_norm": 1.2452948917501594, + "learning_rate": 1.854441247584426e-05, + "loss": 0.6411981582641602, + "step": 1473 + }, + { + "epoch": 0.4310571721011844, + "grad_norm": 1.4342124934143161, + "learning_rate": 1.8541899614811323e-05, + "loss": 0.4766804277896881, + "step": 1474 + }, + { + "epoch": 0.43134961251644977, + "grad_norm": 1.5114551227786939, + "learning_rate": 1.8539384757161285e-05, + "loss": 0.7479405403137207, + "step": 1475 + }, + { + "epoch": 0.43164205293171515, + "grad_norm": 1.3476436799817348, + "learning_rate": 1.8536867903481983e-05, + "loss": 0.6848211288452148, + "step": 1476 + }, + { + "epoch": 0.43193449334698053, + "grad_norm": 1.2973665530504777, + "learning_rate": 1.8534349054361708e-05, + "loss": 0.7413634061813354, + "step": 1477 + }, + { + "epoch": 0.43222693376224597, + "grad_norm": 1.1870657052305638, + "learning_rate": 1.8531828210389236e-05, + "loss": 0.5880843997001648, + "step": 1478 + }, + { + "epoch": 0.43251937417751135, + "grad_norm": 1.16075786792099, + "learning_rate": 1.852930537215379e-05, + "loss": 0.5885627269744873, + "step": 1479 + }, + { + "epoch": 0.43281181459277673, + "grad_norm": 1.3270242768891243, + "learning_rate": 1.8526780540245077e-05, + "loss": 0.706636905670166, + "step": 1480 + }, + { + "epoch": 0.4331042550080421, + "grad_norm": 1.3793959384028218, + "learning_rate": 1.8524253715253255e-05, + "loss": 0.6521843075752258, + "step": 1481 + }, + { + "epoch": 0.4333966954233075, + "grad_norm": 1.3825746336646279, + "learning_rate": 1.8521724897768955e-05, + "loss": 0.6231021881103516, + "step": 1482 + }, + { + "epoch": 0.4336891358385729, + "grad_norm": 1.4460679872410762, + "learning_rate": 1.851919408838327e-05, + "loss": 0.6859451532363892, + "step": 1483 + }, + { + "epoch": 0.43398157625383826, + "grad_norm": 1.2507527028404273, + "learning_rate": 1.851666128768777e-05, + "loss": 0.7948323488235474, + "step": 1484 + }, + { + "epoch": 0.43427401666910365, + "grad_norm": 1.3631419376990976, + "learning_rate": 1.8514126496274473e-05, + "loss": 0.7815203070640564, + "step": 1485 + }, + { + "epoch": 0.4345664570843691, + "grad_norm": 1.2904619284943133, + "learning_rate": 1.8511589714735875e-05, + "loss": 0.6941452622413635, + "step": 1486 + }, + { + "epoch": 0.43485889749963447, + "grad_norm": 1.41567858231915, + "learning_rate": 1.850905094366493e-05, + "loss": 0.5500549674034119, + "step": 1487 + }, + { + "epoch": 0.43515133791489985, + "grad_norm": 1.2918667262960315, + "learning_rate": 1.8506510183655066e-05, + "loss": 0.6616400480270386, + "step": 1488 + }, + { + "epoch": 0.43544377833016523, + "grad_norm": 1.2491627898498192, + "learning_rate": 1.8503967435300166e-05, + "loss": 0.6920043230056763, + "step": 1489 + }, + { + "epoch": 0.4357362187454306, + "grad_norm": 1.215912086863742, + "learning_rate": 1.8501422699194584e-05, + "loss": 0.6080813407897949, + "step": 1490 + }, + { + "epoch": 0.436028659160696, + "grad_norm": 1.2215283867587456, + "learning_rate": 1.8498875975933135e-05, + "loss": 0.576184868812561, + "step": 1491 + }, + { + "epoch": 0.4363210995759614, + "grad_norm": 1.3544983329172053, + "learning_rate": 1.84963272661111e-05, + "loss": 0.6647310256958008, + "step": 1492 + }, + { + "epoch": 0.43661353999122676, + "grad_norm": 1.5126248587795905, + "learning_rate": 1.8493776570324224e-05, + "loss": 0.6738306283950806, + "step": 1493 + }, + { + "epoch": 0.4369059804064922, + "grad_norm": 1.306695091605799, + "learning_rate": 1.849122388916872e-05, + "loss": 0.681056022644043, + "step": 1494 + }, + { + "epoch": 0.4371984208217576, + "grad_norm": 1.2802492616875505, + "learning_rate": 1.848866922324126e-05, + "loss": 0.7844547033309937, + "step": 1495 + }, + { + "epoch": 0.43749086123702297, + "grad_norm": 1.278338668380481, + "learning_rate": 1.8486112573138977e-05, + "loss": 0.6478928327560425, + "step": 1496 + }, + { + "epoch": 0.43778330165228835, + "grad_norm": 1.1565510309984284, + "learning_rate": 1.8483553939459477e-05, + "loss": 0.6035341024398804, + "step": 1497 + }, + { + "epoch": 0.43807574206755373, + "grad_norm": 1.5407821231530743, + "learning_rate": 1.8480993322800826e-05, + "loss": 0.6664912700653076, + "step": 1498 + }, + { + "epoch": 0.4383681824828191, + "grad_norm": 1.2757017491830842, + "learning_rate": 1.847843072376155e-05, + "loss": 0.7171953916549683, + "step": 1499 + }, + { + "epoch": 0.4386606228980845, + "grad_norm": 1.6930649567828897, + "learning_rate": 1.8475866142940646e-05, + "loss": 0.8400344848632812, + "step": 1500 + }, + { + "epoch": 0.4389530633133499, + "grad_norm": 1.4411024776302432, + "learning_rate": 1.8473299580937563e-05, + "loss": 0.5119056701660156, + "step": 1501 + }, + { + "epoch": 0.4392455037286153, + "grad_norm": 1.2781692932924433, + "learning_rate": 1.847073103835222e-05, + "loss": 0.5864866375923157, + "step": 1502 + }, + { + "epoch": 0.4395379441438807, + "grad_norm": 1.1391351003013295, + "learning_rate": 1.8468160515785e-05, + "loss": 0.6389576196670532, + "step": 1503 + }, + { + "epoch": 0.4398303845591461, + "grad_norm": 1.3447539998849671, + "learning_rate": 1.846558801383675e-05, + "loss": 0.6745110750198364, + "step": 1504 + }, + { + "epoch": 0.44012282497441146, + "grad_norm": 1.4359844129069297, + "learning_rate": 1.846301353310877e-05, + "loss": 0.6207559704780579, + "step": 1505 + }, + { + "epoch": 0.44041526538967685, + "grad_norm": 1.4143769366285628, + "learning_rate": 1.8460437074202832e-05, + "loss": 0.6818139553070068, + "step": 1506 + }, + { + "epoch": 0.44070770580494223, + "grad_norm": 1.4877202307925406, + "learning_rate": 1.845785863772117e-05, + "loss": 0.652062714099884, + "step": 1507 + }, + { + "epoch": 0.4410001462202076, + "grad_norm": 1.340284980688535, + "learning_rate": 1.8455278224266476e-05, + "loss": 0.6842166185379028, + "step": 1508 + }, + { + "epoch": 0.44129258663547305, + "grad_norm": 1.3899905625699573, + "learning_rate": 1.8452695834441904e-05, + "loss": 0.6459342837333679, + "step": 1509 + }, + { + "epoch": 0.44158502705073843, + "grad_norm": 1.3677235686172902, + "learning_rate": 1.8450111468851078e-05, + "loss": 0.6036739349365234, + "step": 1510 + }, + { + "epoch": 0.4418774674660038, + "grad_norm": 1.401326082704981, + "learning_rate": 1.844752512809807e-05, + "loss": 0.7530199289321899, + "step": 1511 + }, + { + "epoch": 0.4421699078812692, + "grad_norm": 1.249585374389202, + "learning_rate": 1.8444936812787428e-05, + "loss": 0.6098290085792542, + "step": 1512 + }, + { + "epoch": 0.4424623482965346, + "grad_norm": 1.6252323705163014, + "learning_rate": 1.844234652352415e-05, + "loss": 0.7142464518547058, + "step": 1513 + }, + { + "epoch": 0.44275478871179996, + "grad_norm": 1.3215155589821708, + "learning_rate": 1.8439754260913703e-05, + "loss": 0.4895970821380615, + "step": 1514 + }, + { + "epoch": 0.44304722912706535, + "grad_norm": 1.2855871920553614, + "learning_rate": 1.8437160025562012e-05, + "loss": 0.6166520118713379, + "step": 1515 + }, + { + "epoch": 0.4433396695423307, + "grad_norm": 1.3621423468696194, + "learning_rate": 1.8434563818075462e-05, + "loss": 0.6020585894584656, + "step": 1516 + }, + { + "epoch": 0.44363210995759617, + "grad_norm": 1.3215872914676274, + "learning_rate": 1.8431965639060904e-05, + "loss": 0.6879030466079712, + "step": 1517 + }, + { + "epoch": 0.44392455037286155, + "grad_norm": 1.2000763930073624, + "learning_rate": 1.8429365489125644e-05, + "loss": 0.5753897428512573, + "step": 1518 + }, + { + "epoch": 0.44421699078812693, + "grad_norm": 1.2916902596192155, + "learning_rate": 1.8426763368877455e-05, + "loss": 0.5165301561355591, + "step": 1519 + }, + { + "epoch": 0.4445094312033923, + "grad_norm": 1.630208225804633, + "learning_rate": 1.842415927892456e-05, + "loss": 0.6377310752868652, + "step": 1520 + }, + { + "epoch": 0.4448018716186577, + "grad_norm": 1.4221002668397775, + "learning_rate": 1.842155321987566e-05, + "loss": 0.7429912090301514, + "step": 1521 + }, + { + "epoch": 0.4450943120339231, + "grad_norm": 1.5079395076396265, + "learning_rate": 1.8418945192339892e-05, + "loss": 0.6177542209625244, + "step": 1522 + }, + { + "epoch": 0.44538675244918846, + "grad_norm": 1.2784904022569494, + "learning_rate": 1.8416335196926877e-05, + "loss": 0.662541389465332, + "step": 1523 + }, + { + "epoch": 0.44567919286445384, + "grad_norm": 1.2782173083325044, + "learning_rate": 1.841372323424668e-05, + "loss": 0.6026759743690491, + "step": 1524 + }, + { + "epoch": 0.4459716332797193, + "grad_norm": 1.5759742604234355, + "learning_rate": 1.8411109304909837e-05, + "loss": 0.7902384400367737, + "step": 1525 + }, + { + "epoch": 0.44626407369498466, + "grad_norm": 1.4904175669631523, + "learning_rate": 1.840849340952733e-05, + "loss": 0.6588590145111084, + "step": 1526 + }, + { + "epoch": 0.44655651411025005, + "grad_norm": 1.1682358413615135, + "learning_rate": 1.8405875548710614e-05, + "loss": 0.49133825302124023, + "step": 1527 + }, + { + "epoch": 0.44684895452551543, + "grad_norm": 1.4464174570347765, + "learning_rate": 1.8403255723071597e-05, + "loss": 0.6644654273986816, + "step": 1528 + }, + { + "epoch": 0.4471413949407808, + "grad_norm": 1.2325053536943291, + "learning_rate": 1.8400633933222647e-05, + "loss": 0.6257454752922058, + "step": 1529 + }, + { + "epoch": 0.4474338353560462, + "grad_norm": 1.4100106920950097, + "learning_rate": 1.8398010179776597e-05, + "loss": 0.6671919226646423, + "step": 1530 + }, + { + "epoch": 0.4477262757713116, + "grad_norm": 1.1625081058782702, + "learning_rate": 1.839538446334672e-05, + "loss": 0.6001447439193726, + "step": 1531 + }, + { + "epoch": 0.44801871618657696, + "grad_norm": 1.6509081383772402, + "learning_rate": 1.8392756784546775e-05, + "loss": 0.8103213310241699, + "step": 1532 + }, + { + "epoch": 0.4483111566018424, + "grad_norm": 1.1675484766628168, + "learning_rate": 1.839012714399096e-05, + "loss": 0.7010835409164429, + "step": 1533 + }, + { + "epoch": 0.4486035970171078, + "grad_norm": 1.0773967688725017, + "learning_rate": 1.8387495542293935e-05, + "loss": 0.5709215402603149, + "step": 1534 + }, + { + "epoch": 0.44889603743237316, + "grad_norm": 1.3558935245332375, + "learning_rate": 1.8384861980070826e-05, + "loss": 0.6410949230194092, + "step": 1535 + }, + { + "epoch": 0.44918847784763855, + "grad_norm": 1.358963272892771, + "learning_rate": 1.838222645793721e-05, + "loss": 0.8036839962005615, + "step": 1536 + }, + { + "epoch": 0.44948091826290393, + "grad_norm": 1.1470889977158967, + "learning_rate": 1.8379588976509123e-05, + "loss": 0.49213099479675293, + "step": 1537 + }, + { + "epoch": 0.4497733586781693, + "grad_norm": 1.5829843161961048, + "learning_rate": 1.8376949536403063e-05, + "loss": 0.7111018896102905, + "step": 1538 + }, + { + "epoch": 0.4500657990934347, + "grad_norm": 1.313995907545699, + "learning_rate": 1.837430813823598e-05, + "loss": 0.8506999015808105, + "step": 1539 + }, + { + "epoch": 0.4503582395087001, + "grad_norm": 1.2175571229137518, + "learning_rate": 1.8371664782625287e-05, + "loss": 0.7369798421859741, + "step": 1540 + }, + { + "epoch": 0.4506506799239655, + "grad_norm": 1.3435168892785054, + "learning_rate": 1.8369019470188855e-05, + "loss": 0.5982831120491028, + "step": 1541 + }, + { + "epoch": 0.4509431203392309, + "grad_norm": 1.2303590063922416, + "learning_rate": 1.8366372201545002e-05, + "loss": 0.6129144430160522, + "step": 1542 + }, + { + "epoch": 0.4512355607544963, + "grad_norm": 1.5191607059455674, + "learning_rate": 1.8363722977312512e-05, + "loss": 0.7142921686172485, + "step": 1543 + }, + { + "epoch": 0.45152800116976166, + "grad_norm": 1.1545455601160404, + "learning_rate": 1.8361071798110635e-05, + "loss": 0.515651524066925, + "step": 1544 + }, + { + "epoch": 0.45182044158502704, + "grad_norm": 1.3144713138844157, + "learning_rate": 1.8358418664559058e-05, + "loss": 0.5544168949127197, + "step": 1545 + }, + { + "epoch": 0.4521128820002924, + "grad_norm": 1.2540637765053078, + "learning_rate": 1.8355763577277938e-05, + "loss": 0.6801918745040894, + "step": 1546 + }, + { + "epoch": 0.4524053224155578, + "grad_norm": 1.3664850716479517, + "learning_rate": 1.835310653688789e-05, + "loss": 0.683785080909729, + "step": 1547 + }, + { + "epoch": 0.45269776283082325, + "grad_norm": 1.363558169999723, + "learning_rate": 1.835044754400997e-05, + "loss": 0.5689892172813416, + "step": 1548 + }, + { + "epoch": 0.45299020324608863, + "grad_norm": 1.1621305276584806, + "learning_rate": 1.8347786599265713e-05, + "loss": 0.5260726809501648, + "step": 1549 + }, + { + "epoch": 0.453282643661354, + "grad_norm": 1.2201116845769602, + "learning_rate": 1.834512370327709e-05, + "loss": 0.6792432069778442, + "step": 1550 + }, + { + "epoch": 0.4535750840766194, + "grad_norm": 1.198643016289117, + "learning_rate": 1.8342458856666545e-05, + "loss": 0.6336524486541748, + "step": 1551 + }, + { + "epoch": 0.4538675244918848, + "grad_norm": 1.3472994421503108, + "learning_rate": 1.8339792060056965e-05, + "loss": 0.5929614901542664, + "step": 1552 + }, + { + "epoch": 0.45415996490715016, + "grad_norm": 1.2599505430948363, + "learning_rate": 1.8337123314071696e-05, + "loss": 0.6683382391929626, + "step": 1553 + }, + { + "epoch": 0.45445240532241554, + "grad_norm": 1.2860246628200298, + "learning_rate": 1.833445261933454e-05, + "loss": 0.6256811618804932, + "step": 1554 + }, + { + "epoch": 0.4547448457376809, + "grad_norm": 1.3499468606960694, + "learning_rate": 1.8331779976469765e-05, + "loss": 0.5974653959274292, + "step": 1555 + }, + { + "epoch": 0.45503728615294636, + "grad_norm": 1.2078321854850618, + "learning_rate": 1.8329105386102074e-05, + "loss": 0.5471535325050354, + "step": 1556 + }, + { + "epoch": 0.45532972656821175, + "grad_norm": 1.284169615938693, + "learning_rate": 1.832642884885664e-05, + "loss": 0.5751267075538635, + "step": 1557 + }, + { + "epoch": 0.45562216698347713, + "grad_norm": 1.3234326952626145, + "learning_rate": 1.8323750365359092e-05, + "loss": 0.7003380060195923, + "step": 1558 + }, + { + "epoch": 0.4559146073987425, + "grad_norm": 1.3333099062603002, + "learning_rate": 1.8321069936235503e-05, + "loss": 0.6351351737976074, + "step": 1559 + }, + { + "epoch": 0.4562070478140079, + "grad_norm": 1.4452410048586575, + "learning_rate": 1.8318387562112407e-05, + "loss": 0.6083345413208008, + "step": 1560 + }, + { + "epoch": 0.4564994882292733, + "grad_norm": 1.230127453588353, + "learning_rate": 1.83157032436168e-05, + "loss": 0.589935302734375, + "step": 1561 + }, + { + "epoch": 0.45679192864453866, + "grad_norm": 1.3491229847821233, + "learning_rate": 1.8313016981376116e-05, + "loss": 0.7648014426231384, + "step": 1562 + }, + { + "epoch": 0.45708436905980404, + "grad_norm": 1.2461686063365083, + "learning_rate": 1.831032877601826e-05, + "loss": 0.7309973239898682, + "step": 1563 + }, + { + "epoch": 0.4573768094750695, + "grad_norm": 1.4691097869713072, + "learning_rate": 1.8307638628171575e-05, + "loss": 0.7231593728065491, + "step": 1564 + }, + { + "epoch": 0.45766924989033486, + "grad_norm": 1.4770239307253334, + "learning_rate": 1.8304946538464876e-05, + "loss": 0.7321262359619141, + "step": 1565 + }, + { + "epoch": 0.45796169030560024, + "grad_norm": 1.1157038717428966, + "learning_rate": 1.830225250752742e-05, + "loss": 0.5866271257400513, + "step": 1566 + }, + { + "epoch": 0.4582541307208656, + "grad_norm": 1.4899327841327124, + "learning_rate": 1.8299556535988917e-05, + "loss": 0.7146202325820923, + "step": 1567 + }, + { + "epoch": 0.458546571136131, + "grad_norm": 1.0989226716242009, + "learning_rate": 1.8296858624479536e-05, + "loss": 0.4600168466567993, + "step": 1568 + }, + { + "epoch": 0.4588390115513964, + "grad_norm": 1.5647421342147445, + "learning_rate": 1.8294158773629896e-05, + "loss": 0.5710705518722534, + "step": 1569 + }, + { + "epoch": 0.4591314519666618, + "grad_norm": 1.4737029572986353, + "learning_rate": 1.8291456984071073e-05, + "loss": 0.7075216770172119, + "step": 1570 + }, + { + "epoch": 0.45942389238192716, + "grad_norm": 1.2087048615463696, + "learning_rate": 1.828875325643459e-05, + "loss": 0.5262739062309265, + "step": 1571 + }, + { + "epoch": 0.4597163327971926, + "grad_norm": 1.2732843462549814, + "learning_rate": 1.8286047591352436e-05, + "loss": 0.724657416343689, + "step": 1572 + }, + { + "epoch": 0.460008773212458, + "grad_norm": 1.2778614004914874, + "learning_rate": 1.8283339989457033e-05, + "loss": 0.6047587394714355, + "step": 1573 + }, + { + "epoch": 0.46030121362772336, + "grad_norm": 1.481028950467352, + "learning_rate": 1.828063045138127e-05, + "loss": 0.6647980213165283, + "step": 1574 + }, + { + "epoch": 0.46059365404298874, + "grad_norm": 1.3031844151965102, + "learning_rate": 1.827791897775849e-05, + "loss": 0.6081969738006592, + "step": 1575 + }, + { + "epoch": 0.4608860944582541, + "grad_norm": 1.2574668609577524, + "learning_rate": 1.827520556922248e-05, + "loss": 0.6815003156661987, + "step": 1576 + }, + { + "epoch": 0.4611785348735195, + "grad_norm": 1.25588669780601, + "learning_rate": 1.8272490226407476e-05, + "loss": 0.5571715235710144, + "step": 1577 + }, + { + "epoch": 0.4614709752887849, + "grad_norm": 1.241115553107667, + "learning_rate": 1.8269772949948185e-05, + "loss": 0.7562757730484009, + "step": 1578 + }, + { + "epoch": 0.4617634157040503, + "grad_norm": 1.3753582703744767, + "learning_rate": 1.8267053740479745e-05, + "loss": 0.6330382227897644, + "step": 1579 + }, + { + "epoch": 0.4620558561193157, + "grad_norm": 1.5331426598457012, + "learning_rate": 1.826433259863776e-05, + "loss": 0.7696597576141357, + "step": 1580 + }, + { + "epoch": 0.4623482965345811, + "grad_norm": 1.3594821877317964, + "learning_rate": 1.8261609525058275e-05, + "loss": 0.6953772306442261, + "step": 1581 + }, + { + "epoch": 0.4626407369498465, + "grad_norm": 1.3957443557298115, + "learning_rate": 1.8258884520377797e-05, + "loss": 0.5856037735939026, + "step": 1582 + }, + { + "epoch": 0.46293317736511186, + "grad_norm": 1.3245931479550002, + "learning_rate": 1.8256157585233277e-05, + "loss": 0.5988172888755798, + "step": 1583 + }, + { + "epoch": 0.46322561778037724, + "grad_norm": 1.3153037118046438, + "learning_rate": 1.8253428720262117e-05, + "loss": 0.6320241689682007, + "step": 1584 + }, + { + "epoch": 0.4635180581956426, + "grad_norm": 1.1680775814478943, + "learning_rate": 1.8250697926102182e-05, + "loss": 0.5758935213088989, + "step": 1585 + }, + { + "epoch": 0.463810498610908, + "grad_norm": 1.4295465315991271, + "learning_rate": 1.8247965203391763e-05, + "loss": 0.7104986906051636, + "step": 1586 + }, + { + "epoch": 0.46410293902617344, + "grad_norm": 1.4739846709331708, + "learning_rate": 1.8245230552769634e-05, + "loss": 0.6322015523910522, + "step": 1587 + }, + { + "epoch": 0.4643953794414388, + "grad_norm": 1.4263760736603013, + "learning_rate": 1.824249397487499e-05, + "loss": 0.5881235003471375, + "step": 1588 + }, + { + "epoch": 0.4646878198567042, + "grad_norm": 1.5652864190332019, + "learning_rate": 1.8239755470347497e-05, + "loss": 0.8097240924835205, + "step": 1589 + }, + { + "epoch": 0.4649802602719696, + "grad_norm": 1.4192861983980027, + "learning_rate": 1.823701503982726e-05, + "loss": 0.6538649201393127, + "step": 1590 + }, + { + "epoch": 0.465272700687235, + "grad_norm": 1.2329012857349442, + "learning_rate": 1.8234272683954842e-05, + "loss": 0.5868922472000122, + "step": 1591 + }, + { + "epoch": 0.46556514110250036, + "grad_norm": 1.3076575357637654, + "learning_rate": 1.8231528403371248e-05, + "loss": 0.6747265458106995, + "step": 1592 + }, + { + "epoch": 0.46585758151776574, + "grad_norm": 1.2961728564371904, + "learning_rate": 1.8228782198717936e-05, + "loss": 0.6519996523857117, + "step": 1593 + }, + { + "epoch": 0.4661500219330311, + "grad_norm": 1.2212124627082057, + "learning_rate": 1.822603407063682e-05, + "loss": 0.7268975973129272, + "step": 1594 + }, + { + "epoch": 0.46644246234829656, + "grad_norm": 1.1603454255193932, + "learning_rate": 1.8223284019770252e-05, + "loss": 0.6554980278015137, + "step": 1595 + }, + { + "epoch": 0.46673490276356194, + "grad_norm": 1.350233636463654, + "learning_rate": 1.8220532046761047e-05, + "loss": 0.7014105319976807, + "step": 1596 + }, + { + "epoch": 0.4670273431788273, + "grad_norm": 1.4228663397014873, + "learning_rate": 1.821777815225245e-05, + "loss": 0.5766602158546448, + "step": 1597 + }, + { + "epoch": 0.4673197835940927, + "grad_norm": 1.304159292005938, + "learning_rate": 1.8215022336888182e-05, + "loss": 0.5106521844863892, + "step": 1598 + }, + { + "epoch": 0.4676122240093581, + "grad_norm": 1.419250792414019, + "learning_rate": 1.821226460131239e-05, + "loss": 0.801375150680542, + "step": 1599 + }, + { + "epoch": 0.4679046644246235, + "grad_norm": 1.2845833863087142, + "learning_rate": 1.8209504946169677e-05, + "loss": 0.6189062595367432, + "step": 1600 + }, + { + "epoch": 0.46819710483988886, + "grad_norm": 1.2468379881228138, + "learning_rate": 1.8206743372105098e-05, + "loss": 0.6719359159469604, + "step": 1601 + }, + { + "epoch": 0.46848954525515424, + "grad_norm": 1.4660205035921348, + "learning_rate": 1.8203979879764153e-05, + "loss": 0.7437123656272888, + "step": 1602 + }, + { + "epoch": 0.4687819856704197, + "grad_norm": 1.3639008290802046, + "learning_rate": 1.8201214469792793e-05, + "loss": 0.7273217439651489, + "step": 1603 + }, + { + "epoch": 0.46907442608568506, + "grad_norm": 1.332814377531963, + "learning_rate": 1.8198447142837416e-05, + "loss": 0.6467087268829346, + "step": 1604 + }, + { + "epoch": 0.46936686650095044, + "grad_norm": 1.1167815102053054, + "learning_rate": 1.8195677899544866e-05, + "loss": 0.5764428973197937, + "step": 1605 + }, + { + "epoch": 0.4696593069162158, + "grad_norm": 1.4761144768835275, + "learning_rate": 1.8192906740562437e-05, + "loss": 0.5969977378845215, + "step": 1606 + }, + { + "epoch": 0.4699517473314812, + "grad_norm": 1.3424638711815577, + "learning_rate": 1.819013366653787e-05, + "loss": 0.7237746119499207, + "step": 1607 + }, + { + "epoch": 0.4702441877467466, + "grad_norm": 1.4494789457227795, + "learning_rate": 1.8187358678119355e-05, + "loss": 0.6289568543434143, + "step": 1608 + }, + { + "epoch": 0.47053662816201197, + "grad_norm": 1.1494676131886132, + "learning_rate": 1.8184581775955533e-05, + "loss": 0.5773013234138489, + "step": 1609 + }, + { + "epoch": 0.47082906857727735, + "grad_norm": 1.3055308518970814, + "learning_rate": 1.818180296069548e-05, + "loss": 0.5940284729003906, + "step": 1610 + }, + { + "epoch": 0.4711215089925428, + "grad_norm": 1.506634303312927, + "learning_rate": 1.8179022232988735e-05, + "loss": 0.7051881551742554, + "step": 1611 + }, + { + "epoch": 0.4714139494078082, + "grad_norm": 1.2817274142705404, + "learning_rate": 1.8176239593485267e-05, + "loss": 0.6427813768386841, + "step": 1612 + }, + { + "epoch": 0.47170638982307356, + "grad_norm": 1.3150009445137423, + "learning_rate": 1.817345504283551e-05, + "loss": 0.7041782736778259, + "step": 1613 + }, + { + "epoch": 0.47199883023833894, + "grad_norm": 1.1960422316530261, + "learning_rate": 1.817066858169033e-05, + "loss": 0.6568688154220581, + "step": 1614 + }, + { + "epoch": 0.4722912706536043, + "grad_norm": 1.1082706297141673, + "learning_rate": 1.816788021070105e-05, + "loss": 0.4784452021121979, + "step": 1615 + }, + { + "epoch": 0.4725837110688697, + "grad_norm": 1.403652579196444, + "learning_rate": 1.816508993051943e-05, + "loss": 0.6012705564498901, + "step": 1616 + }, + { + "epoch": 0.4728761514841351, + "grad_norm": 1.441258763214559, + "learning_rate": 1.8162297741797685e-05, + "loss": 0.6414428949356079, + "step": 1617 + }, + { + "epoch": 0.47316859189940047, + "grad_norm": 1.4131643644174843, + "learning_rate": 1.815950364518847e-05, + "loss": 0.6446187496185303, + "step": 1618 + }, + { + "epoch": 0.4734610323146659, + "grad_norm": 1.2552495046018781, + "learning_rate": 1.8156707641344885e-05, + "loss": 0.5153034329414368, + "step": 1619 + }, + { + "epoch": 0.4737534727299313, + "grad_norm": 1.5159052607593526, + "learning_rate": 1.8153909730920485e-05, + "loss": 0.7209463715553284, + "step": 1620 + }, + { + "epoch": 0.4740459131451967, + "grad_norm": 1.2933785450044248, + "learning_rate": 1.8151109914569267e-05, + "loss": 0.5990744829177856, + "step": 1621 + }, + { + "epoch": 0.47433835356046206, + "grad_norm": 1.3033668993107679, + "learning_rate": 1.814830819294566e-05, + "loss": 0.5706672668457031, + "step": 1622 + }, + { + "epoch": 0.47463079397572744, + "grad_norm": 1.1946317041445573, + "learning_rate": 1.814550456670456e-05, + "loss": 0.538548469543457, + "step": 1623 + }, + { + "epoch": 0.4749232343909928, + "grad_norm": 1.3282078081285205, + "learning_rate": 1.8142699036501288e-05, + "loss": 0.6450623273849487, + "step": 1624 + }, + { + "epoch": 0.4752156748062582, + "grad_norm": 1.336508209824809, + "learning_rate": 1.813989160299163e-05, + "loss": 0.6537624597549438, + "step": 1625 + }, + { + "epoch": 0.47550811522152364, + "grad_norm": 1.2777879020397362, + "learning_rate": 1.8137082266831794e-05, + "loss": 0.7126362323760986, + "step": 1626 + }, + { + "epoch": 0.475800555636789, + "grad_norm": 1.4542616967071014, + "learning_rate": 1.813427102867846e-05, + "loss": 0.6686921119689941, + "step": 1627 + }, + { + "epoch": 0.4760929960520544, + "grad_norm": 1.4231643377055359, + "learning_rate": 1.8131457889188723e-05, + "loss": 0.5925619602203369, + "step": 1628 + }, + { + "epoch": 0.4763854364673198, + "grad_norm": 1.2702390975554385, + "learning_rate": 1.8128642849020147e-05, + "loss": 0.7251017689704895, + "step": 1629 + }, + { + "epoch": 0.47667787688258517, + "grad_norm": 1.5675645867645378, + "learning_rate": 1.8125825908830733e-05, + "loss": 0.7524283528327942, + "step": 1630 + }, + { + "epoch": 0.47697031729785055, + "grad_norm": 1.2843975237623166, + "learning_rate": 1.8123007069278914e-05, + "loss": 0.7593197226524353, + "step": 1631 + }, + { + "epoch": 0.47726275771311594, + "grad_norm": 1.2304771008785658, + "learning_rate": 1.812018633102358e-05, + "loss": 0.43353578448295593, + "step": 1632 + }, + { + "epoch": 0.4775551981283813, + "grad_norm": 1.1488804965894268, + "learning_rate": 1.8117363694724063e-05, + "loss": 0.6254708766937256, + "step": 1633 + }, + { + "epoch": 0.47784763854364676, + "grad_norm": 1.2467231401784862, + "learning_rate": 1.811453916104014e-05, + "loss": 0.5970091223716736, + "step": 1634 + }, + { + "epoch": 0.47814007895891214, + "grad_norm": 1.2798152763028137, + "learning_rate": 1.8111712730632024e-05, + "loss": 0.6299331188201904, + "step": 1635 + }, + { + "epoch": 0.4784325193741775, + "grad_norm": 1.4325282365212126, + "learning_rate": 1.810888440416038e-05, + "loss": 0.7461789846420288, + "step": 1636 + }, + { + "epoch": 0.4787249597894429, + "grad_norm": 1.2539146793136515, + "learning_rate": 1.8106054182286305e-05, + "loss": 0.5053290724754333, + "step": 1637 + }, + { + "epoch": 0.4790174002047083, + "grad_norm": 1.2809048918941985, + "learning_rate": 1.810322206567135e-05, + "loss": 0.6853327751159668, + "step": 1638 + }, + { + "epoch": 0.47930984061997367, + "grad_norm": 1.4027979186429358, + "learning_rate": 1.8100388054977508e-05, + "loss": 0.5337134599685669, + "step": 1639 + }, + { + "epoch": 0.47960228103523905, + "grad_norm": 1.357622845311743, + "learning_rate": 1.809755215086721e-05, + "loss": 0.7082560062408447, + "step": 1640 + }, + { + "epoch": 0.47989472145050444, + "grad_norm": 1.3590974916852807, + "learning_rate": 1.8094714354003325e-05, + "loss": 0.680424153804779, + "step": 1641 + }, + { + "epoch": 0.4801871618657699, + "grad_norm": 1.1398057291819046, + "learning_rate": 1.8091874665049183e-05, + "loss": 0.5235139727592468, + "step": 1642 + }, + { + "epoch": 0.48047960228103526, + "grad_norm": 1.3822416905178454, + "learning_rate": 1.8089033084668535e-05, + "loss": 0.7843992114067078, + "step": 1643 + }, + { + "epoch": 0.48077204269630064, + "grad_norm": 1.4941957252025324, + "learning_rate": 1.8086189613525587e-05, + "loss": 0.6736497282981873, + "step": 1644 + }, + { + "epoch": 0.481064483111566, + "grad_norm": 1.3326594399820286, + "learning_rate": 1.808334425228498e-05, + "loss": 0.6898948550224304, + "step": 1645 + }, + { + "epoch": 0.4813569235268314, + "grad_norm": 1.3419429940100798, + "learning_rate": 1.80804970016118e-05, + "loss": 0.6719726324081421, + "step": 1646 + }, + { + "epoch": 0.4816493639420968, + "grad_norm": 1.316270232362313, + "learning_rate": 1.807764786217158e-05, + "loss": 0.6904356479644775, + "step": 1647 + }, + { + "epoch": 0.48194180435736217, + "grad_norm": 1.3009257254922486, + "learning_rate": 1.8074796834630285e-05, + "loss": 0.5956645011901855, + "step": 1648 + }, + { + "epoch": 0.48223424477262755, + "grad_norm": 1.162557710559535, + "learning_rate": 1.8071943919654323e-05, + "loss": 0.5676499009132385, + "step": 1649 + }, + { + "epoch": 0.482526685187893, + "grad_norm": 1.3145895725362904, + "learning_rate": 1.8069089117910547e-05, + "loss": 0.6006937026977539, + "step": 1650 + }, + { + "epoch": 0.48281912560315837, + "grad_norm": 1.3694341047830378, + "learning_rate": 1.806623243006625e-05, + "loss": 0.6241977214813232, + "step": 1651 + }, + { + "epoch": 0.48311156601842375, + "grad_norm": 1.4152304986784254, + "learning_rate": 1.806337385678917e-05, + "loss": 0.7359870672225952, + "step": 1652 + }, + { + "epoch": 0.48340400643368914, + "grad_norm": 1.155725074088707, + "learning_rate": 1.806051339874748e-05, + "loss": 0.6113119125366211, + "step": 1653 + }, + { + "epoch": 0.4836964468489545, + "grad_norm": 1.3288798785197886, + "learning_rate": 1.8057651056609784e-05, + "loss": 0.642951488494873, + "step": 1654 + }, + { + "epoch": 0.4839888872642199, + "grad_norm": 1.3081605749498326, + "learning_rate": 1.8054786831045147e-05, + "loss": 0.7020113468170166, + "step": 1655 + }, + { + "epoch": 0.4842813276794853, + "grad_norm": 1.355302216036822, + "learning_rate": 1.8051920722723063e-05, + "loss": 0.678231418132782, + "step": 1656 + }, + { + "epoch": 0.48457376809475067, + "grad_norm": 1.2407750790627203, + "learning_rate": 1.8049052732313466e-05, + "loss": 0.604765772819519, + "step": 1657 + }, + { + "epoch": 0.4848662085100161, + "grad_norm": 1.501775861517808, + "learning_rate": 1.8046182860486735e-05, + "loss": 0.6812270879745483, + "step": 1658 + }, + { + "epoch": 0.4851586489252815, + "grad_norm": 1.329019452940817, + "learning_rate": 1.8043311107913675e-05, + "loss": 0.6284930109977722, + "step": 1659 + }, + { + "epoch": 0.48545108934054687, + "grad_norm": 1.4460160298748268, + "learning_rate": 1.8040437475265554e-05, + "loss": 0.665177583694458, + "step": 1660 + }, + { + "epoch": 0.48574352975581225, + "grad_norm": 1.365611165893268, + "learning_rate": 1.8037561963214058e-05, + "loss": 0.7628738284111023, + "step": 1661 + }, + { + "epoch": 0.48603597017107764, + "grad_norm": 1.4917601408905583, + "learning_rate": 1.8034684572431322e-05, + "loss": 0.6372654438018799, + "step": 1662 + }, + { + "epoch": 0.486328410586343, + "grad_norm": 1.2986927468884095, + "learning_rate": 1.803180530358992e-05, + "loss": 0.5915756225585938, + "step": 1663 + }, + { + "epoch": 0.4866208510016084, + "grad_norm": 1.3509164579114188, + "learning_rate": 1.802892415736286e-05, + "loss": 0.6821908950805664, + "step": 1664 + }, + { + "epoch": 0.48691329141687384, + "grad_norm": 1.3857679722145793, + "learning_rate": 1.80260411344236e-05, + "loss": 0.6418279409408569, + "step": 1665 + }, + { + "epoch": 0.4872057318321392, + "grad_norm": 1.154306591574384, + "learning_rate": 1.802315623544602e-05, + "loss": 0.5582526922225952, + "step": 1666 + }, + { + "epoch": 0.4874981722474046, + "grad_norm": 1.3431793608397968, + "learning_rate": 1.8020269461104448e-05, + "loss": 0.7145007848739624, + "step": 1667 + }, + { + "epoch": 0.48779061266267, + "grad_norm": 1.2110741699326812, + "learning_rate": 1.8017380812073658e-05, + "loss": 0.5415871739387512, + "step": 1668 + }, + { + "epoch": 0.48808305307793537, + "grad_norm": 1.488356994545647, + "learning_rate": 1.801449028902885e-05, + "loss": 0.728327751159668, + "step": 1669 + }, + { + "epoch": 0.48837549349320075, + "grad_norm": 1.3273378299589804, + "learning_rate": 1.8011597892645665e-05, + "loss": 0.6469160914421082, + "step": 1670 + }, + { + "epoch": 0.48866793390846613, + "grad_norm": 1.3096259850876997, + "learning_rate": 1.8008703623600185e-05, + "loss": 0.7107353210449219, + "step": 1671 + }, + { + "epoch": 0.4889603743237315, + "grad_norm": 1.4201847213896843, + "learning_rate": 1.8005807482568926e-05, + "loss": 0.6918982267379761, + "step": 1672 + }, + { + "epoch": 0.48925281473899696, + "grad_norm": 1.4096024584844806, + "learning_rate": 1.800290947022884e-05, + "loss": 0.661738932132721, + "step": 1673 + }, + { + "epoch": 0.48954525515426234, + "grad_norm": 1.4938181766281158, + "learning_rate": 1.800000958725733e-05, + "loss": 0.6816283464431763, + "step": 1674 + }, + { + "epoch": 0.4898376955695277, + "grad_norm": 1.348689926804817, + "learning_rate": 1.7997107834332217e-05, + "loss": 0.6988941431045532, + "step": 1675 + }, + { + "epoch": 0.4901301359847931, + "grad_norm": 1.5696470599370025, + "learning_rate": 1.799420421213177e-05, + "loss": 0.7997519969940186, + "step": 1676 + }, + { + "epoch": 0.4904225764000585, + "grad_norm": 1.3512394042939826, + "learning_rate": 1.7991298721334697e-05, + "loss": 0.6552794575691223, + "step": 1677 + }, + { + "epoch": 0.49071501681532387, + "grad_norm": 1.2446219807906005, + "learning_rate": 1.7988391362620135e-05, + "loss": 0.6144021153450012, + "step": 1678 + }, + { + "epoch": 0.49100745723058925, + "grad_norm": 1.2086851376188177, + "learning_rate": 1.798548213666766e-05, + "loss": 0.5036276578903198, + "step": 1679 + }, + { + "epoch": 0.49129989764585463, + "grad_norm": 1.1620444251602322, + "learning_rate": 1.7982571044157288e-05, + "loss": 0.5152162313461304, + "step": 1680 + }, + { + "epoch": 0.49159233806112007, + "grad_norm": 1.4266855366652862, + "learning_rate": 1.797965808576947e-05, + "loss": 0.7249797582626343, + "step": 1681 + }, + { + "epoch": 0.49188477847638545, + "grad_norm": 1.138885414798186, + "learning_rate": 1.7976743262185094e-05, + "loss": 0.5769079923629761, + "step": 1682 + }, + { + "epoch": 0.49217721889165084, + "grad_norm": 1.2523240509929359, + "learning_rate": 1.797382657408548e-05, + "loss": 0.7017331123352051, + "step": 1683 + }, + { + "epoch": 0.4924696593069162, + "grad_norm": 1.3095438640742119, + "learning_rate": 1.797090802215238e-05, + "loss": 0.788599967956543, + "step": 1684 + }, + { + "epoch": 0.4927620997221816, + "grad_norm": 1.3652642181905799, + "learning_rate": 1.7967987607067997e-05, + "loss": 0.5716612935066223, + "step": 1685 + }, + { + "epoch": 0.493054540137447, + "grad_norm": 1.396592202891807, + "learning_rate": 1.796506532951496e-05, + "loss": 0.6808345913887024, + "step": 1686 + }, + { + "epoch": 0.49334698055271237, + "grad_norm": 1.421363062787346, + "learning_rate": 1.7962141190176326e-05, + "loss": 0.6540817022323608, + "step": 1687 + }, + { + "epoch": 0.49363942096797775, + "grad_norm": 1.3162774070898267, + "learning_rate": 1.7959215189735604e-05, + "loss": 0.6522870063781738, + "step": 1688 + }, + { + "epoch": 0.4939318613832432, + "grad_norm": 1.2120992084575881, + "learning_rate": 1.7956287328876724e-05, + "loss": 0.5217882990837097, + "step": 1689 + }, + { + "epoch": 0.49422430179850857, + "grad_norm": 1.1456971313507769, + "learning_rate": 1.795335760828405e-05, + "loss": 0.6985372304916382, + "step": 1690 + }, + { + "epoch": 0.49451674221377395, + "grad_norm": 1.6308222645679713, + "learning_rate": 1.7950426028642397e-05, + "loss": 0.7199063301086426, + "step": 1691 + }, + { + "epoch": 0.49480918262903933, + "grad_norm": 1.2503132677681021, + "learning_rate": 1.7947492590636998e-05, + "loss": 0.5810575485229492, + "step": 1692 + }, + { + "epoch": 0.4951016230443047, + "grad_norm": 1.5393913616038981, + "learning_rate": 1.7944557294953528e-05, + "loss": 0.7443726658821106, + "step": 1693 + }, + { + "epoch": 0.4953940634595701, + "grad_norm": 1.4257690332105803, + "learning_rate": 1.7941620142278092e-05, + "loss": 0.6774560213088989, + "step": 1694 + }, + { + "epoch": 0.4956865038748355, + "grad_norm": 1.4876883296800856, + "learning_rate": 1.793868113329724e-05, + "loss": 0.6983137726783752, + "step": 1695 + }, + { + "epoch": 0.49597894429010086, + "grad_norm": 1.500775887710686, + "learning_rate": 1.793574026869793e-05, + "loss": 0.6481274366378784, + "step": 1696 + }, + { + "epoch": 0.4962713847053663, + "grad_norm": 1.5261372345633493, + "learning_rate": 1.793279754916759e-05, + "loss": 0.6489002704620361, + "step": 1697 + }, + { + "epoch": 0.4965638251206317, + "grad_norm": 1.200851338265551, + "learning_rate": 1.7929852975394056e-05, + "loss": 0.7054505348205566, + "step": 1698 + }, + { + "epoch": 0.49685626553589707, + "grad_norm": 1.1948769153228862, + "learning_rate": 1.79269065480656e-05, + "loss": 0.5257681608200073, + "step": 1699 + }, + { + "epoch": 0.49714870595116245, + "grad_norm": 1.2760885846913066, + "learning_rate": 1.7923958267870936e-05, + "loss": 0.8625251054763794, + "step": 1700 + }, + { + "epoch": 0.49744114636642783, + "grad_norm": 1.223950331700182, + "learning_rate": 1.7921008135499205e-05, + "loss": 0.6736147999763489, + "step": 1701 + }, + { + "epoch": 0.4977335867816932, + "grad_norm": 1.351351583663473, + "learning_rate": 1.7918056151639985e-05, + "loss": 0.5079643130302429, + "step": 1702 + }, + { + "epoch": 0.4980260271969586, + "grad_norm": 1.2324398794203584, + "learning_rate": 1.791510231698328e-05, + "loss": 0.597242534160614, + "step": 1703 + }, + { + "epoch": 0.49831846761222404, + "grad_norm": 1.3776511171825507, + "learning_rate": 1.791214663221953e-05, + "loss": 0.6695376038551331, + "step": 1704 + }, + { + "epoch": 0.4986109080274894, + "grad_norm": 1.2400454845090276, + "learning_rate": 1.7909189098039616e-05, + "loss": 0.6411684155464172, + "step": 1705 + }, + { + "epoch": 0.4989033484427548, + "grad_norm": 1.3917271277458743, + "learning_rate": 1.790622971513484e-05, + "loss": 0.6671754121780396, + "step": 1706 + }, + { + "epoch": 0.4991957888580202, + "grad_norm": 1.1384272276613905, + "learning_rate": 1.7903268484196936e-05, + "loss": 0.5312573909759521, + "step": 1707 + }, + { + "epoch": 0.49948822927328557, + "grad_norm": 1.3626241120949947, + "learning_rate": 1.7900305405918076e-05, + "loss": 0.643236517906189, + "step": 1708 + }, + { + "epoch": 0.49978066968855095, + "grad_norm": 1.4093385837144417, + "learning_rate": 1.7897340480990863e-05, + "loss": 0.7942951321601868, + "step": 1709 + }, + { + "epoch": 0.5000731101038164, + "grad_norm": 1.3198251548980515, + "learning_rate": 1.789437371010833e-05, + "loss": 0.701362133026123, + "step": 1710 + }, + { + "epoch": 0.5003655505190817, + "grad_norm": 1.3304955567316399, + "learning_rate": 1.789140509396394e-05, + "loss": 0.6993157863616943, + "step": 1711 + }, + { + "epoch": 0.5006579909343472, + "grad_norm": 1.0719148279657758, + "learning_rate": 1.788843463325159e-05, + "loss": 0.568405270576477, + "step": 1712 + }, + { + "epoch": 0.5009504313496125, + "grad_norm": 0.976150644308567, + "learning_rate": 1.7885462328665605e-05, + "loss": 0.4948374032974243, + "step": 1713 + }, + { + "epoch": 0.5012428717648779, + "grad_norm": 1.4692514127239873, + "learning_rate": 1.7882488180900743e-05, + "loss": 0.6679480671882629, + "step": 1714 + }, + { + "epoch": 0.5015353121801432, + "grad_norm": 1.5018221461401142, + "learning_rate": 1.78795121906522e-05, + "loss": 0.706131100654602, + "step": 1715 + }, + { + "epoch": 0.5018277525954087, + "grad_norm": 1.207740414795638, + "learning_rate": 1.787653435861559e-05, + "loss": 0.6691830158233643, + "step": 1716 + }, + { + "epoch": 0.5021201930106741, + "grad_norm": 1.163150990025552, + "learning_rate": 1.787355468548696e-05, + "loss": 0.5624213218688965, + "step": 1717 + }, + { + "epoch": 0.5024126334259394, + "grad_norm": 1.3394004970303723, + "learning_rate": 1.78705731719628e-05, + "loss": 0.4589618444442749, + "step": 1718 + }, + { + "epoch": 0.5027050738412049, + "grad_norm": 1.384883869852314, + "learning_rate": 1.7867589818740012e-05, + "loss": 0.571403980255127, + "step": 1719 + }, + { + "epoch": 0.5029975142564702, + "grad_norm": 1.0668853872947273, + "learning_rate": 1.786460462651594e-05, + "loss": 0.5395561456680298, + "step": 1720 + }, + { + "epoch": 0.5032899546717357, + "grad_norm": 1.243223907233259, + "learning_rate": 1.7861617595988355e-05, + "loss": 0.6166945695877075, + "step": 1721 + }, + { + "epoch": 0.503582395087001, + "grad_norm": 1.4857752879775032, + "learning_rate": 1.7858628727855458e-05, + "loss": 0.6812523603439331, + "step": 1722 + }, + { + "epoch": 0.5038748355022664, + "grad_norm": 1.2390654420633957, + "learning_rate": 1.7855638022815872e-05, + "loss": 0.6602752208709717, + "step": 1723 + }, + { + "epoch": 0.5041672759175319, + "grad_norm": 1.0873682718880517, + "learning_rate": 1.7852645481568665e-05, + "loss": 0.49925822019577026, + "step": 1724 + }, + { + "epoch": 0.5044597163327972, + "grad_norm": 1.3265310908908576, + "learning_rate": 1.784965110481332e-05, + "loss": 0.5557682514190674, + "step": 1725 + }, + { + "epoch": 0.5047521567480626, + "grad_norm": 1.2775644185514514, + "learning_rate": 1.7846654893249756e-05, + "loss": 0.6576372981071472, + "step": 1726 + }, + { + "epoch": 0.505044597163328, + "grad_norm": 2.047704943438843, + "learning_rate": 1.7843656847578317e-05, + "loss": 0.5266367197036743, + "step": 1727 + }, + { + "epoch": 0.5053370375785934, + "grad_norm": 1.6086224094226402, + "learning_rate": 1.7840656968499782e-05, + "loss": 0.7368261218070984, + "step": 1728 + }, + { + "epoch": 0.5056294779938587, + "grad_norm": 1.2755318597370908, + "learning_rate": 1.7837655256715355e-05, + "loss": 0.6583619117736816, + "step": 1729 + }, + { + "epoch": 0.5059219184091241, + "grad_norm": 1.4196511617190575, + "learning_rate": 1.7834651712926662e-05, + "loss": 0.7323073148727417, + "step": 1730 + }, + { + "epoch": 0.5062143588243895, + "grad_norm": 1.540686270234863, + "learning_rate": 1.783164633783577e-05, + "loss": 0.6059812307357788, + "step": 1731 + }, + { + "epoch": 0.5065067992396549, + "grad_norm": 1.451028079648097, + "learning_rate": 1.782863913214516e-05, + "loss": 0.5992608070373535, + "step": 1732 + }, + { + "epoch": 0.5067992396549204, + "grad_norm": 1.3452146161553644, + "learning_rate": 1.7825630096557754e-05, + "loss": 0.5729147791862488, + "step": 1733 + }, + { + "epoch": 0.5070916800701857, + "grad_norm": 1.4383912240083958, + "learning_rate": 1.782261923177689e-05, + "loss": 0.6708269119262695, + "step": 1734 + }, + { + "epoch": 0.5073841204854511, + "grad_norm": 1.0922943221428454, + "learning_rate": 1.7819606538506347e-05, + "loss": 0.5377235412597656, + "step": 1735 + }, + { + "epoch": 0.5076765609007164, + "grad_norm": 1.3060450837457043, + "learning_rate": 1.781659201745032e-05, + "loss": 0.6899171471595764, + "step": 1736 + }, + { + "epoch": 0.5079690013159819, + "grad_norm": 1.2574262616785272, + "learning_rate": 1.7813575669313434e-05, + "loss": 0.6712576150894165, + "step": 1737 + }, + { + "epoch": 0.5082614417312472, + "grad_norm": 1.3797290531865334, + "learning_rate": 1.781055749480074e-05, + "loss": 0.6989667415618896, + "step": 1738 + }, + { + "epoch": 0.5085538821465126, + "grad_norm": 1.4976341004458755, + "learning_rate": 1.7807537494617723e-05, + "loss": 0.6103490591049194, + "step": 1739 + }, + { + "epoch": 0.5088463225617781, + "grad_norm": 1.2059878229475702, + "learning_rate": 1.7804515669470287e-05, + "loss": 0.4882289171218872, + "step": 1740 + }, + { + "epoch": 0.5091387629770434, + "grad_norm": 1.3963253268337052, + "learning_rate": 1.7801492020064764e-05, + "loss": 0.7244713306427002, + "step": 1741 + }, + { + "epoch": 0.5094312033923089, + "grad_norm": 1.2588544303384788, + "learning_rate": 1.7798466547107918e-05, + "loss": 0.6055952310562134, + "step": 1742 + }, + { + "epoch": 0.5097236438075742, + "grad_norm": 1.3449125705801426, + "learning_rate": 1.779543925130693e-05, + "loss": 0.5893995761871338, + "step": 1743 + }, + { + "epoch": 0.5100160842228396, + "grad_norm": 1.4169541262971606, + "learning_rate": 1.7792410133369413e-05, + "loss": 0.6154330968856812, + "step": 1744 + }, + { + "epoch": 0.5103085246381049, + "grad_norm": 1.294650393818464, + "learning_rate": 1.778937919400341e-05, + "loss": 0.6227806806564331, + "step": 1745 + }, + { + "epoch": 0.5106009650533704, + "grad_norm": 1.563882907776874, + "learning_rate": 1.7786346433917376e-05, + "loss": 0.6192313432693481, + "step": 1746 + }, + { + "epoch": 0.5108934054686358, + "grad_norm": 1.324638073205218, + "learning_rate": 1.7783311853820205e-05, + "loss": 0.6175359487533569, + "step": 1747 + }, + { + "epoch": 0.5111858458839011, + "grad_norm": 1.17912928754983, + "learning_rate": 1.7780275454421218e-05, + "loss": 0.5588991641998291, + "step": 1748 + }, + { + "epoch": 0.5114782862991666, + "grad_norm": 1.0201894222615457, + "learning_rate": 1.777723723643014e-05, + "loss": 0.637115478515625, + "step": 1749 + }, + { + "epoch": 0.5117707267144319, + "grad_norm": 1.5101308062255179, + "learning_rate": 1.777419720055715e-05, + "loss": 0.6762860417366028, + "step": 1750 + }, + { + "epoch": 0.5120631671296973, + "grad_norm": 1.5211239881114056, + "learning_rate": 1.7771155347512828e-05, + "loss": 0.6980293989181519, + "step": 1751 + }, + { + "epoch": 0.5123556075449627, + "grad_norm": 1.3145597239587745, + "learning_rate": 1.7768111678008194e-05, + "loss": 0.6587250232696533, + "step": 1752 + }, + { + "epoch": 0.5126480479602281, + "grad_norm": 1.4750219793579704, + "learning_rate": 1.776506619275469e-05, + "loss": 0.6571120619773865, + "step": 1753 + }, + { + "epoch": 0.5129404883754934, + "grad_norm": 1.705487520120489, + "learning_rate": 1.7762018892464172e-05, + "loss": 0.8127633333206177, + "step": 1754 + }, + { + "epoch": 0.5132329287907589, + "grad_norm": 1.4136977790679228, + "learning_rate": 1.7758969777848935e-05, + "loss": 0.6585550308227539, + "step": 1755 + }, + { + "epoch": 0.5135253692060243, + "grad_norm": 1.5019600327645424, + "learning_rate": 1.7755918849621686e-05, + "loss": 0.6347511410713196, + "step": 1756 + }, + { + "epoch": 0.5138178096212896, + "grad_norm": 1.4489353235186164, + "learning_rate": 1.775286610849556e-05, + "loss": 0.5918457508087158, + "step": 1757 + }, + { + "epoch": 0.5141102500365551, + "grad_norm": 1.2541802522573693, + "learning_rate": 1.774981155518412e-05, + "loss": 0.7042769193649292, + "step": 1758 + }, + { + "epoch": 0.5144026904518204, + "grad_norm": 1.4327318826910254, + "learning_rate": 1.7746755190401353e-05, + "loss": 0.8014250993728638, + "step": 1759 + }, + { + "epoch": 0.5146951308670858, + "grad_norm": 1.339232110324459, + "learning_rate": 1.774369701486166e-05, + "loss": 0.6703939437866211, + "step": 1760 + }, + { + "epoch": 0.5149875712823512, + "grad_norm": 1.1710558248660605, + "learning_rate": 1.774063702927987e-05, + "loss": 0.6189682483673096, + "step": 1761 + }, + { + "epoch": 0.5152800116976166, + "grad_norm": 1.4110546220906648, + "learning_rate": 1.7737575234371238e-05, + "loss": 0.5386991500854492, + "step": 1762 + }, + { + "epoch": 0.515572452112882, + "grad_norm": 1.4204019461155708, + "learning_rate": 1.773451163085144e-05, + "loss": 0.6389357447624207, + "step": 1763 + }, + { + "epoch": 0.5158648925281474, + "grad_norm": 1.1798787279597898, + "learning_rate": 1.7731446219436577e-05, + "loss": 0.7247746586799622, + "step": 1764 + }, + { + "epoch": 0.5161573329434128, + "grad_norm": 1.2114702713778023, + "learning_rate": 1.7728379000843164e-05, + "loss": 0.5538983941078186, + "step": 1765 + }, + { + "epoch": 0.5164497733586781, + "grad_norm": 1.155329008927324, + "learning_rate": 1.7725309975788155e-05, + "loss": 0.6003320813179016, + "step": 1766 + }, + { + "epoch": 0.5167422137739436, + "grad_norm": 1.4065479816352848, + "learning_rate": 1.7722239144988908e-05, + "loss": 0.603177011013031, + "step": 1767 + }, + { + "epoch": 0.5170346541892089, + "grad_norm": 1.1699743536266287, + "learning_rate": 1.771916650916321e-05, + "loss": 0.6071338653564453, + "step": 1768 + }, + { + "epoch": 0.5173270946044743, + "grad_norm": 1.4268603398797357, + "learning_rate": 1.7716092069029275e-05, + "loss": 0.6148535013198853, + "step": 1769 + }, + { + "epoch": 0.5176195350197397, + "grad_norm": 1.3460628970570976, + "learning_rate": 1.7713015825305735e-05, + "loss": 0.6236969828605652, + "step": 1770 + }, + { + "epoch": 0.5179119754350051, + "grad_norm": 1.4613715991480511, + "learning_rate": 1.770993777871164e-05, + "loss": 0.5439775586128235, + "step": 1771 + }, + { + "epoch": 0.5182044158502705, + "grad_norm": 1.3246469866549868, + "learning_rate": 1.770685792996647e-05, + "loss": 0.6498249769210815, + "step": 1772 + }, + { + "epoch": 0.5184968562655359, + "grad_norm": 1.307598965769502, + "learning_rate": 1.7703776279790113e-05, + "loss": 0.5838749408721924, + "step": 1773 + }, + { + "epoch": 0.5187892966808013, + "grad_norm": 1.44861400348765, + "learning_rate": 1.770069282890289e-05, + "loss": 0.6467812657356262, + "step": 1774 + }, + { + "epoch": 0.5190817370960666, + "grad_norm": 1.3332181124442455, + "learning_rate": 1.7697607578025543e-05, + "loss": 0.5878627896308899, + "step": 1775 + }, + { + "epoch": 0.5193741775113321, + "grad_norm": 1.2905348700615993, + "learning_rate": 1.7694520527879223e-05, + "loss": 0.6252161264419556, + "step": 1776 + }, + { + "epoch": 0.5196666179265974, + "grad_norm": 1.2071686484495499, + "learning_rate": 1.7691431679185518e-05, + "loss": 0.6098401546478271, + "step": 1777 + }, + { + "epoch": 0.5199590583418628, + "grad_norm": 1.4529959736387221, + "learning_rate": 1.7688341032666415e-05, + "loss": 0.7401748299598694, + "step": 1778 + }, + { + "epoch": 0.5202514987571283, + "grad_norm": 1.278188059333223, + "learning_rate": 1.768524858904435e-05, + "loss": 0.5398571491241455, + "step": 1779 + }, + { + "epoch": 0.5205439391723936, + "grad_norm": 1.211971903081478, + "learning_rate": 1.768215434904215e-05, + "loss": 0.5565935969352722, + "step": 1780 + }, + { + "epoch": 0.520836379587659, + "grad_norm": 1.3982258941889667, + "learning_rate": 1.7679058313383078e-05, + "loss": 0.5510461926460266, + "step": 1781 + }, + { + "epoch": 0.5211288200029244, + "grad_norm": 1.5839871959956162, + "learning_rate": 1.7675960482790818e-05, + "loss": 0.670242428779602, + "step": 1782 + }, + { + "epoch": 0.5214212604181898, + "grad_norm": 1.309838763427276, + "learning_rate": 1.7672860857989463e-05, + "loss": 0.6556246280670166, + "step": 1783 + }, + { + "epoch": 0.5217137008334551, + "grad_norm": 1.3555406156984307, + "learning_rate": 1.7669759439703537e-05, + "loss": 0.7133421897888184, + "step": 1784 + }, + { + "epoch": 0.5220061412487206, + "grad_norm": 1.340410804208978, + "learning_rate": 1.766665622865797e-05, + "loss": 0.5520647168159485, + "step": 1785 + }, + { + "epoch": 0.522298581663986, + "grad_norm": 1.2754706768801123, + "learning_rate": 1.766355122557813e-05, + "loss": 0.6906430125236511, + "step": 1786 + }, + { + "epoch": 0.5225910220792513, + "grad_norm": 1.331418831759662, + "learning_rate": 1.766044443118978e-05, + "loss": 0.6847748756408691, + "step": 1787 + }, + { + "epoch": 0.5228834624945168, + "grad_norm": 1.6656678493050783, + "learning_rate": 1.7657335846219125e-05, + "loss": 0.6690354347229004, + "step": 1788 + }, + { + "epoch": 0.5231759029097821, + "grad_norm": 1.5097667681145126, + "learning_rate": 1.765422547139277e-05, + "loss": 0.6508032083511353, + "step": 1789 + }, + { + "epoch": 0.5234683433250475, + "grad_norm": 1.3545274700404182, + "learning_rate": 1.7651113307437754e-05, + "loss": 0.7686585187911987, + "step": 1790 + }, + { + "epoch": 0.5237607837403129, + "grad_norm": 1.5694388106807053, + "learning_rate": 1.764799935508152e-05, + "loss": 0.7669490575790405, + "step": 1791 + }, + { + "epoch": 0.5240532241555783, + "grad_norm": 1.3694245126086426, + "learning_rate": 1.7644883615051936e-05, + "loss": 0.6630266308784485, + "step": 1792 + }, + { + "epoch": 0.5243456645708436, + "grad_norm": 1.350854180871217, + "learning_rate": 1.764176608807729e-05, + "loss": 0.6054951548576355, + "step": 1793 + }, + { + "epoch": 0.5246381049861091, + "grad_norm": 1.3573271710882402, + "learning_rate": 1.7638646774886282e-05, + "loss": 0.6519330739974976, + "step": 1794 + }, + { + "epoch": 0.5249305454013745, + "grad_norm": 1.3013890836364408, + "learning_rate": 1.7635525676208034e-05, + "loss": 0.6797915101051331, + "step": 1795 + }, + { + "epoch": 0.5252229858166398, + "grad_norm": 1.4138018427804997, + "learning_rate": 1.7632402792772084e-05, + "loss": 0.7296736240386963, + "step": 1796 + }, + { + "epoch": 0.5255154262319053, + "grad_norm": 1.4894816204298726, + "learning_rate": 1.7629278125308388e-05, + "loss": 0.6371006965637207, + "step": 1797 + }, + { + "epoch": 0.5258078666471706, + "grad_norm": 1.1913157227609021, + "learning_rate": 1.762615167454732e-05, + "loss": 0.5315746068954468, + "step": 1798 + }, + { + "epoch": 0.526100307062436, + "grad_norm": 1.115665172593258, + "learning_rate": 1.762302344121966e-05, + "loss": 0.5285685062408447, + "step": 1799 + }, + { + "epoch": 0.5263927474777014, + "grad_norm": 1.269936179033053, + "learning_rate": 1.7619893426056622e-05, + "loss": 0.623146653175354, + "step": 1800 + }, + { + "epoch": 0.5266851878929668, + "grad_norm": 1.3314922698636598, + "learning_rate": 1.7616761629789824e-05, + "loss": 0.5433363318443298, + "step": 1801 + }, + { + "epoch": 0.5269776283082322, + "grad_norm": 1.422200045831386, + "learning_rate": 1.7613628053151307e-05, + "loss": 0.5035480260848999, + "step": 1802 + }, + { + "epoch": 0.5272700687234976, + "grad_norm": 1.3947936859584276, + "learning_rate": 1.7610492696873523e-05, + "loss": 0.678544819355011, + "step": 1803 + }, + { + "epoch": 0.527562509138763, + "grad_norm": 1.2973841494755158, + "learning_rate": 1.7607355561689347e-05, + "loss": 0.6237714290618896, + "step": 1804 + }, + { + "epoch": 0.5278549495540283, + "grad_norm": 1.8411758190439966, + "learning_rate": 1.760421664833206e-05, + "loss": 0.6943943500518799, + "step": 1805 + }, + { + "epoch": 0.5281473899692938, + "grad_norm": 1.1545458109151105, + "learning_rate": 1.7601075957535366e-05, + "loss": 0.5477268695831299, + "step": 1806 + }, + { + "epoch": 0.5284398303845591, + "grad_norm": 1.5589440207416567, + "learning_rate": 1.759793349003338e-05, + "loss": 0.6627641320228577, + "step": 1807 + }, + { + "epoch": 0.5287322707998245, + "grad_norm": 1.169894530317387, + "learning_rate": 1.7594789246560638e-05, + "loss": 0.5394496917724609, + "step": 1808 + }, + { + "epoch": 0.5290247112150899, + "grad_norm": 1.5989109343746286, + "learning_rate": 1.759164322785209e-05, + "loss": 0.7824013233184814, + "step": 1809 + }, + { + "epoch": 0.5293171516303553, + "grad_norm": 1.5859531867022811, + "learning_rate": 1.7588495434643094e-05, + "loss": 0.6959671974182129, + "step": 1810 + }, + { + "epoch": 0.5296095920456207, + "grad_norm": 1.256097179377318, + "learning_rate": 1.7585345867669427e-05, + "loss": 0.7036902904510498, + "step": 1811 + }, + { + "epoch": 0.5299020324608861, + "grad_norm": 1.2520265115718123, + "learning_rate": 1.7582194527667285e-05, + "loss": 0.6700775623321533, + "step": 1812 + }, + { + "epoch": 0.5301944728761515, + "grad_norm": 1.4077714911889505, + "learning_rate": 1.7579041415373273e-05, + "loss": 0.648280918598175, + "step": 1813 + }, + { + "epoch": 0.5304869132914168, + "grad_norm": 1.3424741441047479, + "learning_rate": 1.757588653152441e-05, + "loss": 0.688485324382782, + "step": 1814 + }, + { + "epoch": 0.5307793537066823, + "grad_norm": 1.4718330240816029, + "learning_rate": 1.757272987685813e-05, + "loss": 0.6743370890617371, + "step": 1815 + }, + { + "epoch": 0.5310717941219476, + "grad_norm": 1.2524252340987996, + "learning_rate": 1.7569571452112288e-05, + "loss": 0.5597015619277954, + "step": 1816 + }, + { + "epoch": 0.531364234537213, + "grad_norm": 1.0387462800714626, + "learning_rate": 1.756641125802514e-05, + "loss": 0.48607051372528076, + "step": 1817 + }, + { + "epoch": 0.5316566749524785, + "grad_norm": 1.3375496888713005, + "learning_rate": 1.7563249295335366e-05, + "loss": 0.6712289452552795, + "step": 1818 + }, + { + "epoch": 0.5319491153677438, + "grad_norm": 1.4037646661677698, + "learning_rate": 1.7560085564782057e-05, + "loss": 0.5937772989273071, + "step": 1819 + }, + { + "epoch": 0.5322415557830092, + "grad_norm": 1.5529497860681427, + "learning_rate": 1.7556920067104714e-05, + "loss": 0.7416468262672424, + "step": 1820 + }, + { + "epoch": 0.5325339961982746, + "grad_norm": 1.1975217725231788, + "learning_rate": 1.7553752803043247e-05, + "loss": 0.6302096247673035, + "step": 1821 + }, + { + "epoch": 0.53282643661354, + "grad_norm": 1.268842982106158, + "learning_rate": 1.7550583773337992e-05, + "loss": 0.5576045513153076, + "step": 1822 + }, + { + "epoch": 0.5331188770288053, + "grad_norm": 1.3076658324014316, + "learning_rate": 1.7547412978729688e-05, + "loss": 0.5436257123947144, + "step": 1823 + }, + { + "epoch": 0.5334113174440708, + "grad_norm": 1.2387778464918946, + "learning_rate": 1.754424041995949e-05, + "loss": 0.5674831867218018, + "step": 1824 + }, + { + "epoch": 0.5337037578593362, + "grad_norm": 1.3729116406743342, + "learning_rate": 1.7541066097768965e-05, + "loss": 0.7254515290260315, + "step": 1825 + }, + { + "epoch": 0.5339961982746015, + "grad_norm": 1.1721694105309242, + "learning_rate": 1.7537890012900088e-05, + "loss": 0.5706701278686523, + "step": 1826 + }, + { + "epoch": 0.534288638689867, + "grad_norm": 1.4929452380767032, + "learning_rate": 1.7534712166095253e-05, + "loss": 0.6801357269287109, + "step": 1827 + }, + { + "epoch": 0.5345810791051323, + "grad_norm": 1.115878861059579, + "learning_rate": 1.753153255809726e-05, + "loss": 0.6851463317871094, + "step": 1828 + }, + { + "epoch": 0.5348735195203977, + "grad_norm": 1.3277835192492438, + "learning_rate": 1.7528351189649324e-05, + "loss": 0.6475861072540283, + "step": 1829 + }, + { + "epoch": 0.5351659599356631, + "grad_norm": 1.462925601634232, + "learning_rate": 1.752516806149507e-05, + "loss": 0.6953648924827576, + "step": 1830 + }, + { + "epoch": 0.5354584003509285, + "grad_norm": 1.5314952476377168, + "learning_rate": 1.7521983174378537e-05, + "loss": 0.5128777623176575, + "step": 1831 + }, + { + "epoch": 0.5357508407661938, + "grad_norm": 1.3754167803768682, + "learning_rate": 1.751879652904417e-05, + "loss": 0.5780255198478699, + "step": 1832 + }, + { + "epoch": 0.5360432811814593, + "grad_norm": 1.1326334157819233, + "learning_rate": 1.751560812623683e-05, + "loss": 0.581814169883728, + "step": 1833 + }, + { + "epoch": 0.5363357215967247, + "grad_norm": 1.2244339664502468, + "learning_rate": 1.7512417966701788e-05, + "loss": 0.5609169006347656, + "step": 1834 + }, + { + "epoch": 0.53662816201199, + "grad_norm": 1.2348222464159622, + "learning_rate": 1.7509226051184716e-05, + "loss": 0.6029868125915527, + "step": 1835 + }, + { + "epoch": 0.5369206024272555, + "grad_norm": 1.5575658935823142, + "learning_rate": 1.7506032380431718e-05, + "loss": 0.6749545335769653, + "step": 1836 + }, + { + "epoch": 0.5372130428425208, + "grad_norm": 1.4261868258477342, + "learning_rate": 1.750283695518929e-05, + "loss": 0.7710991501808167, + "step": 1837 + }, + { + "epoch": 0.5375054832577862, + "grad_norm": 1.2797893583505542, + "learning_rate": 1.7499639776204334e-05, + "loss": 0.6330907940864563, + "step": 1838 + }, + { + "epoch": 0.5377979236730516, + "grad_norm": 1.3697405221939354, + "learning_rate": 1.7496440844224186e-05, + "loss": 0.655827522277832, + "step": 1839 + }, + { + "epoch": 0.538090364088317, + "grad_norm": 1.3640883815652403, + "learning_rate": 1.7493240159996565e-05, + "loss": 0.723412275314331, + "step": 1840 + }, + { + "epoch": 0.5383828045035824, + "grad_norm": 1.273855459734962, + "learning_rate": 1.7490037724269618e-05, + "loss": 0.5504157543182373, + "step": 1841 + }, + { + "epoch": 0.5386752449188478, + "grad_norm": 1.3867652356352673, + "learning_rate": 1.7486833537791895e-05, + "loss": 0.6258282661437988, + "step": 1842 + }, + { + "epoch": 0.5389676853341132, + "grad_norm": 1.3063024833172743, + "learning_rate": 1.748362760131235e-05, + "loss": 0.7044231295585632, + "step": 1843 + }, + { + "epoch": 0.5392601257493785, + "grad_norm": 1.329844005030904, + "learning_rate": 1.7480419915580357e-05, + "loss": 0.5979568362236023, + "step": 1844 + }, + { + "epoch": 0.539552566164644, + "grad_norm": 1.2396904419147898, + "learning_rate": 1.7477210481345686e-05, + "loss": 0.558562159538269, + "step": 1845 + }, + { + "epoch": 0.5398450065799093, + "grad_norm": 1.5914882070233294, + "learning_rate": 1.747399929935853e-05, + "loss": 0.5965149402618408, + "step": 1846 + }, + { + "epoch": 0.5401374469951747, + "grad_norm": 1.2286076413347484, + "learning_rate": 1.7470786370369483e-05, + "loss": 0.6202878355979919, + "step": 1847 + }, + { + "epoch": 0.5404298874104401, + "grad_norm": 1.4696847585462156, + "learning_rate": 1.746757169512954e-05, + "loss": 0.652141273021698, + "step": 1848 + }, + { + "epoch": 0.5407223278257055, + "grad_norm": 1.3491880900702233, + "learning_rate": 1.746435527439012e-05, + "loss": 0.5713402628898621, + "step": 1849 + }, + { + "epoch": 0.541014768240971, + "grad_norm": 1.1036198614058235, + "learning_rate": 1.7461137108903042e-05, + "loss": 0.49776554107666016, + "step": 1850 + }, + { + "epoch": 0.5413072086562363, + "grad_norm": 1.3593053008733638, + "learning_rate": 1.7457917199420525e-05, + "loss": 0.7047991752624512, + "step": 1851 + }, + { + "epoch": 0.5415996490715017, + "grad_norm": 1.249302868601747, + "learning_rate": 1.7454695546695207e-05, + "loss": 0.7019875049591064, + "step": 1852 + }, + { + "epoch": 0.541892089486767, + "grad_norm": 1.1395410254023401, + "learning_rate": 1.745147215148013e-05, + "loss": 0.5448435544967651, + "step": 1853 + }, + { + "epoch": 0.5421845299020325, + "grad_norm": 1.3392616230054089, + "learning_rate": 1.7448247014528745e-05, + "loss": 0.6042202711105347, + "step": 1854 + }, + { + "epoch": 0.5424769703172978, + "grad_norm": 1.6632726033150385, + "learning_rate": 1.744502013659491e-05, + "loss": 0.8448539972305298, + "step": 1855 + }, + { + "epoch": 0.5427694107325632, + "grad_norm": 1.5168637416823716, + "learning_rate": 1.7441791518432877e-05, + "loss": 0.6541755795478821, + "step": 1856 + }, + { + "epoch": 0.5430618511478287, + "grad_norm": 1.3214742528031191, + "learning_rate": 1.7438561160797326e-05, + "loss": 0.6700184345245361, + "step": 1857 + }, + { + "epoch": 0.543354291563094, + "grad_norm": 1.5975598198717695, + "learning_rate": 1.7435329064443335e-05, + "loss": 0.6407896280288696, + "step": 1858 + }, + { + "epoch": 0.5436467319783594, + "grad_norm": 1.1007084555597737, + "learning_rate": 1.7432095230126382e-05, + "loss": 0.5380120277404785, + "step": 1859 + }, + { + "epoch": 0.5439391723936248, + "grad_norm": 1.4184366915429367, + "learning_rate": 1.7428859658602353e-05, + "loss": 0.6561373472213745, + "step": 1860 + }, + { + "epoch": 0.5442316128088902, + "grad_norm": 1.7211281199225186, + "learning_rate": 1.7425622350627545e-05, + "loss": 0.724541962146759, + "step": 1861 + }, + { + "epoch": 0.5445240532241555, + "grad_norm": 1.3361773700031112, + "learning_rate": 1.7422383306958666e-05, + "loss": 0.6258946657180786, + "step": 1862 + }, + { + "epoch": 0.544816493639421, + "grad_norm": 1.4343211647036773, + "learning_rate": 1.7419142528352815e-05, + "loss": 0.560769259929657, + "step": 1863 + }, + { + "epoch": 0.5451089340546864, + "grad_norm": 1.3199774156859019, + "learning_rate": 1.741590001556751e-05, + "loss": 0.7782202363014221, + "step": 1864 + }, + { + "epoch": 0.5454013744699517, + "grad_norm": 1.1330260111547463, + "learning_rate": 1.7412655769360663e-05, + "loss": 0.5956888198852539, + "step": 1865 + }, + { + "epoch": 0.5456938148852172, + "grad_norm": 1.2304180375361309, + "learning_rate": 1.7409409790490602e-05, + "loss": 0.6251999139785767, + "step": 1866 + }, + { + "epoch": 0.5459862553004825, + "grad_norm": 1.201828702533108, + "learning_rate": 1.740616207971605e-05, + "loss": 0.5864061713218689, + "step": 1867 + }, + { + "epoch": 0.5462786957157479, + "grad_norm": 1.1335552643310969, + "learning_rate": 1.7402912637796146e-05, + "loss": 0.6241225004196167, + "step": 1868 + }, + { + "epoch": 0.5465711361310133, + "grad_norm": 1.4457655679285375, + "learning_rate": 1.739966146549042e-05, + "loss": 0.7190053462982178, + "step": 1869 + }, + { + "epoch": 0.5468635765462787, + "grad_norm": 1.3107442552185273, + "learning_rate": 1.739640856355882e-05, + "loss": 0.6771985292434692, + "step": 1870 + }, + { + "epoch": 0.547156016961544, + "grad_norm": 1.3163112428890422, + "learning_rate": 1.7393153932761687e-05, + "loss": 0.5480636954307556, + "step": 1871 + }, + { + "epoch": 0.5474484573768095, + "grad_norm": 1.5272520029044583, + "learning_rate": 1.7389897573859773e-05, + "loss": 0.7362977862358093, + "step": 1872 + }, + { + "epoch": 0.5477408977920749, + "grad_norm": 1.3701377425052599, + "learning_rate": 1.7386639487614232e-05, + "loss": 0.6483198404312134, + "step": 1873 + }, + { + "epoch": 0.5480333382073402, + "grad_norm": 1.137215399363759, + "learning_rate": 1.7383379674786622e-05, + "loss": 0.479977548122406, + "step": 1874 + }, + { + "epoch": 0.5483257786226057, + "grad_norm": 1.2815568792711947, + "learning_rate": 1.738011813613891e-05, + "loss": 0.6824718117713928, + "step": 1875 + }, + { + "epoch": 0.548618219037871, + "grad_norm": 1.4252738341228008, + "learning_rate": 1.737685487243345e-05, + "loss": 0.564873218536377, + "step": 1876 + }, + { + "epoch": 0.5489106594531364, + "grad_norm": 1.208162951014484, + "learning_rate": 1.7373589884433015e-05, + "loss": 0.5748772621154785, + "step": 1877 + }, + { + "epoch": 0.5492030998684018, + "grad_norm": 1.329038884364766, + "learning_rate": 1.7370323172900778e-05, + "loss": 0.6403437852859497, + "step": 1878 + }, + { + "epoch": 0.5494955402836672, + "grad_norm": 1.7288162586927747, + "learning_rate": 1.7367054738600312e-05, + "loss": 0.8253078460693359, + "step": 1879 + }, + { + "epoch": 0.5497879806989326, + "grad_norm": 1.204164217164209, + "learning_rate": 1.7363784582295596e-05, + "loss": 0.6823058128356934, + "step": 1880 + }, + { + "epoch": 0.550080421114198, + "grad_norm": 1.0289811643005782, + "learning_rate": 1.7360512704751003e-05, + "loss": 0.48659563064575195, + "step": 1881 + }, + { + "epoch": 0.5503728615294634, + "grad_norm": 1.5395158772607802, + "learning_rate": 1.735723910673132e-05, + "loss": 0.6380710601806641, + "step": 1882 + }, + { + "epoch": 0.5506653019447287, + "grad_norm": 1.512121712639047, + "learning_rate": 1.7353963789001723e-05, + "loss": 0.6956683397293091, + "step": 1883 + }, + { + "epoch": 0.5509577423599942, + "grad_norm": 1.2992852551955654, + "learning_rate": 1.735068675232781e-05, + "loss": 0.5751473903656006, + "step": 1884 + }, + { + "epoch": 0.5512501827752595, + "grad_norm": 1.3297689987083825, + "learning_rate": 1.734740799747556e-05, + "loss": 0.7265490293502808, + "step": 1885 + }, + { + "epoch": 0.5515426231905249, + "grad_norm": 1.3317519459591716, + "learning_rate": 1.734412752521136e-05, + "loss": 0.7419843673706055, + "step": 1886 + }, + { + "epoch": 0.5518350636057903, + "grad_norm": 1.3385317545855182, + "learning_rate": 1.734084533630201e-05, + "loss": 0.7381073236465454, + "step": 1887 + }, + { + "epoch": 0.5521275040210557, + "grad_norm": 1.4687535531628584, + "learning_rate": 1.7337561431514692e-05, + "loss": 0.6542054414749146, + "step": 1888 + }, + { + "epoch": 0.5524199444363211, + "grad_norm": 1.3144027889366288, + "learning_rate": 1.7334275811617e-05, + "loss": 0.6283866167068481, + "step": 1889 + }, + { + "epoch": 0.5527123848515865, + "grad_norm": 1.34879443340803, + "learning_rate": 1.7330988477376935e-05, + "loss": 0.6171330809593201, + "step": 1890 + }, + { + "epoch": 0.5530048252668519, + "grad_norm": 1.309075407888037, + "learning_rate": 1.7327699429562887e-05, + "loss": 0.5181430578231812, + "step": 1891 + }, + { + "epoch": 0.5532972656821172, + "grad_norm": 1.4382455208413174, + "learning_rate": 1.7324408668943645e-05, + "loss": 0.7337771058082581, + "step": 1892 + }, + { + "epoch": 0.5535897060973827, + "grad_norm": 1.3677542553778577, + "learning_rate": 1.7321116196288413e-05, + "loss": 0.5193721055984497, + "step": 1893 + }, + { + "epoch": 0.553882146512648, + "grad_norm": 1.2912522952038028, + "learning_rate": 1.731782201236678e-05, + "loss": 0.7743211388587952, + "step": 1894 + }, + { + "epoch": 0.5541745869279134, + "grad_norm": 1.5457463678190766, + "learning_rate": 1.731452611794875e-05, + "loss": 0.8244242072105408, + "step": 1895 + }, + { + "epoch": 0.5544670273431789, + "grad_norm": 1.3153817051947132, + "learning_rate": 1.7311228513804712e-05, + "loss": 0.6276153326034546, + "step": 1896 + }, + { + "epoch": 0.5547594677584442, + "grad_norm": 1.4741498614217154, + "learning_rate": 1.7307929200705463e-05, + "loss": 0.7919771671295166, + "step": 1897 + }, + { + "epoch": 0.5550519081737096, + "grad_norm": 1.527110359994231, + "learning_rate": 1.7304628179422192e-05, + "loss": 0.6187459230422974, + "step": 1898 + }, + { + "epoch": 0.555344348588975, + "grad_norm": 1.1766146767977552, + "learning_rate": 1.7301325450726497e-05, + "loss": 0.6190363764762878, + "step": 1899 + }, + { + "epoch": 0.5556367890042404, + "grad_norm": 1.209178127119406, + "learning_rate": 1.7298021015390375e-05, + "loss": 0.5537956953048706, + "step": 1900 + }, + { + "epoch": 0.5559292294195057, + "grad_norm": 1.434637926231007, + "learning_rate": 1.729471487418621e-05, + "loss": 0.7164788246154785, + "step": 1901 + }, + { + "epoch": 0.5562216698347712, + "grad_norm": 1.2878374944552806, + "learning_rate": 1.7291407027886796e-05, + "loss": 0.6101689338684082, + "step": 1902 + }, + { + "epoch": 0.5565141102500366, + "grad_norm": 1.4102535348815881, + "learning_rate": 1.7288097477265322e-05, + "loss": 0.7112093567848206, + "step": 1903 + }, + { + "epoch": 0.5568065506653019, + "grad_norm": 1.698804519808014, + "learning_rate": 1.7284786223095376e-05, + "loss": 0.7807149291038513, + "step": 1904 + }, + { + "epoch": 0.5570989910805674, + "grad_norm": 1.3150296925108194, + "learning_rate": 1.7281473266150942e-05, + "loss": 0.5723121166229248, + "step": 1905 + }, + { + "epoch": 0.5573914314958327, + "grad_norm": 1.4287078485940368, + "learning_rate": 1.7278158607206402e-05, + "loss": 0.6901307106018066, + "step": 1906 + }, + { + "epoch": 0.5576838719110981, + "grad_norm": 1.3895105915390893, + "learning_rate": 1.7274842247036547e-05, + "loss": 0.8247314095497131, + "step": 1907 + }, + { + "epoch": 0.5579763123263635, + "grad_norm": 1.2902939634670878, + "learning_rate": 1.727152418641654e-05, + "loss": 0.758405327796936, + "step": 1908 + }, + { + "epoch": 0.5582687527416289, + "grad_norm": 1.1507745861737273, + "learning_rate": 1.7268204426121967e-05, + "loss": 0.6448276042938232, + "step": 1909 + }, + { + "epoch": 0.5585611931568942, + "grad_norm": 1.4597983603763345, + "learning_rate": 1.7264882966928803e-05, + "loss": 0.6846790313720703, + "step": 1910 + }, + { + "epoch": 0.5588536335721597, + "grad_norm": 1.494960410585431, + "learning_rate": 1.726155980961342e-05, + "loss": 0.6427637338638306, + "step": 1911 + }, + { + "epoch": 0.5591460739874251, + "grad_norm": 1.6049335332675108, + "learning_rate": 1.7258234954952578e-05, + "loss": 0.7105496525764465, + "step": 1912 + }, + { + "epoch": 0.5594385144026904, + "grad_norm": 1.247874236176648, + "learning_rate": 1.7254908403723446e-05, + "loss": 0.6307404041290283, + "step": 1913 + }, + { + "epoch": 0.5597309548179559, + "grad_norm": 1.3460021193743466, + "learning_rate": 1.7251580156703587e-05, + "loss": 0.7194197177886963, + "step": 1914 + }, + { + "epoch": 0.5600233952332212, + "grad_norm": 1.4541814827650097, + "learning_rate": 1.7248250214670955e-05, + "loss": 0.676772952079773, + "step": 1915 + }, + { + "epoch": 0.5603158356484866, + "grad_norm": 1.4231220185819522, + "learning_rate": 1.724491857840391e-05, + "loss": 0.6047924160957336, + "step": 1916 + }, + { + "epoch": 0.560608276063752, + "grad_norm": 1.4639689581400968, + "learning_rate": 1.7241585248681192e-05, + "loss": 0.7412474155426025, + "step": 1917 + }, + { + "epoch": 0.5609007164790174, + "grad_norm": 1.3634846491128696, + "learning_rate": 1.7238250226281952e-05, + "loss": 0.6337922215461731, + "step": 1918 + }, + { + "epoch": 0.5611931568942828, + "grad_norm": 1.325394488194612, + "learning_rate": 1.7234913511985733e-05, + "loss": 0.7192416787147522, + "step": 1919 + }, + { + "epoch": 0.5614855973095482, + "grad_norm": 1.5807591545293311, + "learning_rate": 1.723157510657247e-05, + "loss": 0.6576168537139893, + "step": 1920 + }, + { + "epoch": 0.5617780377248136, + "grad_norm": 1.2677184116479052, + "learning_rate": 1.722823501082249e-05, + "loss": 0.6592451333999634, + "step": 1921 + }, + { + "epoch": 0.5620704781400789, + "grad_norm": 1.3384834377307993, + "learning_rate": 1.722489322551653e-05, + "loss": 0.8042774796485901, + "step": 1922 + }, + { + "epoch": 0.5623629185553444, + "grad_norm": 1.4566017039283872, + "learning_rate": 1.7221549751435706e-05, + "loss": 0.727135181427002, + "step": 1923 + }, + { + "epoch": 0.5626553589706097, + "grad_norm": 1.3099994778880142, + "learning_rate": 1.7218204589361535e-05, + "loss": 0.5641134977340698, + "step": 1924 + }, + { + "epoch": 0.5629477993858751, + "grad_norm": 1.5113194940037022, + "learning_rate": 1.7214857740075924e-05, + "loss": 0.6354084610939026, + "step": 1925 + }, + { + "epoch": 0.5632402398011405, + "grad_norm": 1.3038206210364904, + "learning_rate": 1.7211509204361187e-05, + "loss": 0.6044377088546753, + "step": 1926 + }, + { + "epoch": 0.5635326802164059, + "grad_norm": 1.2045011077136063, + "learning_rate": 1.7208158983000022e-05, + "loss": 0.5519559383392334, + "step": 1927 + }, + { + "epoch": 0.5638251206316713, + "grad_norm": 1.162061868190052, + "learning_rate": 1.7204807076775514e-05, + "loss": 0.4480612277984619, + "step": 1928 + }, + { + "epoch": 0.5641175610469367, + "grad_norm": 1.3899173129631617, + "learning_rate": 1.7201453486471167e-05, + "loss": 0.5929607152938843, + "step": 1929 + }, + { + "epoch": 0.5644100014622021, + "grad_norm": 2.021763483016241, + "learning_rate": 1.7198098212870847e-05, + "loss": 0.6863572001457214, + "step": 1930 + }, + { + "epoch": 0.5647024418774674, + "grad_norm": 1.0938398450209694, + "learning_rate": 1.719474125675884e-05, + "loss": 0.5551834106445312, + "step": 1931 + }, + { + "epoch": 0.5649948822927329, + "grad_norm": 1.3644128319132816, + "learning_rate": 1.7191382618919802e-05, + "loss": 0.6113166809082031, + "step": 1932 + }, + { + "epoch": 0.5652873227079982, + "grad_norm": 1.419009993473521, + "learning_rate": 1.7188022300138805e-05, + "loss": 0.7833362817764282, + "step": 1933 + }, + { + "epoch": 0.5655797631232636, + "grad_norm": 1.3899666208681147, + "learning_rate": 1.71846603012013e-05, + "loss": 0.5981882810592651, + "step": 1934 + }, + { + "epoch": 0.5658722035385291, + "grad_norm": 1.3211180154101085, + "learning_rate": 1.7181296622893132e-05, + "loss": 0.6009912490844727, + "step": 1935 + }, + { + "epoch": 0.5661646439537944, + "grad_norm": 1.5201002205446237, + "learning_rate": 1.717793126600054e-05, + "loss": 0.5605272054672241, + "step": 1936 + }, + { + "epoch": 0.5664570843690598, + "grad_norm": 1.9581129231236365, + "learning_rate": 1.717456423131016e-05, + "loss": 0.6310821771621704, + "step": 1937 + }, + { + "epoch": 0.5667495247843252, + "grad_norm": 1.3290964241159713, + "learning_rate": 1.7171195519609013e-05, + "loss": 0.6776266694068909, + "step": 1938 + }, + { + "epoch": 0.5670419651995906, + "grad_norm": 1.5744599660597636, + "learning_rate": 1.7167825131684516e-05, + "loss": 0.6369091868400574, + "step": 1939 + }, + { + "epoch": 0.5673344056148559, + "grad_norm": 1.5708596771950396, + "learning_rate": 1.7164453068324472e-05, + "loss": 0.6241647005081177, + "step": 1940 + }, + { + "epoch": 0.5676268460301214, + "grad_norm": 1.1863544042032323, + "learning_rate": 1.7161079330317086e-05, + "loss": 0.6411961317062378, + "step": 1941 + }, + { + "epoch": 0.5679192864453868, + "grad_norm": 1.4635134179889109, + "learning_rate": 1.7157703918450942e-05, + "loss": 0.6148936152458191, + "step": 1942 + }, + { + "epoch": 0.5682117268606521, + "grad_norm": 1.3183225060577142, + "learning_rate": 1.7154326833515034e-05, + "loss": 0.5006934404373169, + "step": 1943 + }, + { + "epoch": 0.5685041672759176, + "grad_norm": 1.462356689812602, + "learning_rate": 1.7150948076298722e-05, + "loss": 0.7446701526641846, + "step": 1944 + }, + { + "epoch": 0.5687966076911829, + "grad_norm": 1.2052848826016378, + "learning_rate": 1.7147567647591777e-05, + "loss": 0.6159533262252808, + "step": 1945 + }, + { + "epoch": 0.5690890481064483, + "grad_norm": 1.4298530885651661, + "learning_rate": 1.7144185548184355e-05, + "loss": 0.6437554359436035, + "step": 1946 + }, + { + "epoch": 0.5693814885217137, + "grad_norm": 1.3361469734250542, + "learning_rate": 1.7140801778866995e-05, + "loss": 0.6229397654533386, + "step": 1947 + }, + { + "epoch": 0.5696739289369791, + "grad_norm": 1.4197238006731758, + "learning_rate": 1.7137416340430636e-05, + "loss": 0.5777184963226318, + "step": 1948 + }, + { + "epoch": 0.5699663693522444, + "grad_norm": 1.543436374887725, + "learning_rate": 1.7134029233666603e-05, + "loss": 0.7817827463150024, + "step": 1949 + }, + { + "epoch": 0.5702588097675099, + "grad_norm": 1.3527927450904613, + "learning_rate": 1.713064045936662e-05, + "loss": 0.6784861087799072, + "step": 1950 + }, + { + "epoch": 0.5705512501827753, + "grad_norm": 1.2839254399050724, + "learning_rate": 1.7127250018322777e-05, + "loss": 0.6883150339126587, + "step": 1951 + }, + { + "epoch": 0.5708436905980406, + "grad_norm": 1.093202890209594, + "learning_rate": 1.712385791132758e-05, + "loss": 0.5464504957199097, + "step": 1952 + }, + { + "epoch": 0.5711361310133061, + "grad_norm": 1.2617859237604026, + "learning_rate": 1.7120464139173908e-05, + "loss": 0.5950040817260742, + "step": 1953 + }, + { + "epoch": 0.5714285714285714, + "grad_norm": 1.374864335037442, + "learning_rate": 1.7117068702655034e-05, + "loss": 0.6381576061248779, + "step": 1954 + }, + { + "epoch": 0.5717210118438368, + "grad_norm": 1.2624571465966312, + "learning_rate": 1.7113671602564628e-05, + "loss": 0.6611777544021606, + "step": 1955 + }, + { + "epoch": 0.5720134522591022, + "grad_norm": 1.2625162580462326, + "learning_rate": 1.7110272839696735e-05, + "loss": 0.5057446956634521, + "step": 1956 + }, + { + "epoch": 0.5723058926743676, + "grad_norm": 1.3802970727547992, + "learning_rate": 1.7106872414845798e-05, + "loss": 0.6095671653747559, + "step": 1957 + }, + { + "epoch": 0.572598333089633, + "grad_norm": 1.4171107803407814, + "learning_rate": 1.710347032880664e-05, + "loss": 0.5514808893203735, + "step": 1958 + }, + { + "epoch": 0.5728907735048984, + "grad_norm": 2.1059044775107516, + "learning_rate": 1.7100066582374487e-05, + "loss": 0.6491304039955139, + "step": 1959 + }, + { + "epoch": 0.5731832139201638, + "grad_norm": 1.2887931231971388, + "learning_rate": 1.7096661176344936e-05, + "loss": 0.6759692430496216, + "step": 1960 + }, + { + "epoch": 0.5734756543354291, + "grad_norm": 1.4738884192318065, + "learning_rate": 1.709325411151399e-05, + "loss": 0.5897858142852783, + "step": 1961 + }, + { + "epoch": 0.5737680947506946, + "grad_norm": 1.537196415964603, + "learning_rate": 1.7089845388678015e-05, + "loss": 0.6822922229766846, + "step": 1962 + }, + { + "epoch": 0.5740605351659599, + "grad_norm": 1.2963583337618676, + "learning_rate": 1.7086435008633792e-05, + "loss": 0.7694820165634155, + "step": 1963 + }, + { + "epoch": 0.5743529755812253, + "grad_norm": 1.5109651591265172, + "learning_rate": 1.7083022972178473e-05, + "loss": 0.702151358127594, + "step": 1964 + }, + { + "epoch": 0.5746454159964907, + "grad_norm": 1.564445011536072, + "learning_rate": 1.7079609280109597e-05, + "loss": 0.768844485282898, + "step": 1965 + }, + { + "epoch": 0.5749378564117561, + "grad_norm": 1.4251497195478635, + "learning_rate": 1.7076193933225097e-05, + "loss": 0.6641331911087036, + "step": 1966 + }, + { + "epoch": 0.5752302968270215, + "grad_norm": 1.3577479649866828, + "learning_rate": 1.707277693232329e-05, + "loss": 0.7176777124404907, + "step": 1967 + }, + { + "epoch": 0.5755227372422869, + "grad_norm": 1.4539026175393464, + "learning_rate": 1.7069358278202877e-05, + "loss": 0.6543929576873779, + "step": 1968 + }, + { + "epoch": 0.5758151776575523, + "grad_norm": 1.422676342883674, + "learning_rate": 1.7065937971662953e-05, + "loss": 0.7501214742660522, + "step": 1969 + }, + { + "epoch": 0.5761076180728176, + "grad_norm": 1.1830543705848042, + "learning_rate": 1.7062516013502984e-05, + "loss": 0.6013212203979492, + "step": 1970 + }, + { + "epoch": 0.5764000584880831, + "grad_norm": 1.489892931502725, + "learning_rate": 1.7059092404522843e-05, + "loss": 0.5920547246932983, + "step": 1971 + }, + { + "epoch": 0.5766924989033484, + "grad_norm": 1.1082983109051399, + "learning_rate": 1.7055667145522767e-05, + "loss": 0.6720744371414185, + "step": 1972 + }, + { + "epoch": 0.5769849393186138, + "grad_norm": 1.3476214386922525, + "learning_rate": 1.70522402373034e-05, + "loss": 0.6938234567642212, + "step": 1973 + }, + { + "epoch": 0.5772773797338793, + "grad_norm": 1.321699429936501, + "learning_rate": 1.704881168066575e-05, + "loss": 0.6430555582046509, + "step": 1974 + }, + { + "epoch": 0.5775698201491446, + "grad_norm": 1.331724408429167, + "learning_rate": 1.7045381476411234e-05, + "loss": 0.7738221883773804, + "step": 1975 + }, + { + "epoch": 0.57786226056441, + "grad_norm": 1.2033511527827634, + "learning_rate": 1.704194962534163e-05, + "loss": 0.5335453748703003, + "step": 1976 + }, + { + "epoch": 0.5781547009796754, + "grad_norm": 1.4123366931040846, + "learning_rate": 1.7038516128259118e-05, + "loss": 0.691404402256012, + "step": 1977 + }, + { + "epoch": 0.5784471413949408, + "grad_norm": 1.6032589522393152, + "learning_rate": 1.7035080985966253e-05, + "loss": 0.7371880412101746, + "step": 1978 + }, + { + "epoch": 0.5787395818102061, + "grad_norm": 1.356558066648364, + "learning_rate": 1.7031644199265987e-05, + "loss": 0.5661574602127075, + "step": 1979 + }, + { + "epoch": 0.5790320222254716, + "grad_norm": 1.069750621474732, + "learning_rate": 1.702820576896164e-05, + "loss": 0.5823863744735718, + "step": 1980 + }, + { + "epoch": 0.579324462640737, + "grad_norm": 1.608685609966537, + "learning_rate": 1.7024765695856924e-05, + "loss": 0.6228796243667603, + "step": 1981 + }, + { + "epoch": 0.5796169030560023, + "grad_norm": 1.3395261062815815, + "learning_rate": 1.702132398075594e-05, + "loss": 0.5788040161132812, + "step": 1982 + }, + { + "epoch": 0.5799093434712678, + "grad_norm": 1.1540676629937416, + "learning_rate": 1.701788062446317e-05, + "loss": 0.5950253009796143, + "step": 1983 + }, + { + "epoch": 0.5802017838865331, + "grad_norm": 1.2446098890682338, + "learning_rate": 1.7014435627783466e-05, + "loss": 0.5672034025192261, + "step": 1984 + }, + { + "epoch": 0.5804942243017985, + "grad_norm": 1.329055336569987, + "learning_rate": 1.7010988991522085e-05, + "loss": 0.6646316051483154, + "step": 1985 + }, + { + "epoch": 0.5807866647170639, + "grad_norm": 1.2423480846022465, + "learning_rate": 1.7007540716484657e-05, + "loss": 0.6430097818374634, + "step": 1986 + }, + { + "epoch": 0.5810791051323293, + "grad_norm": 1.2889752174339557, + "learning_rate": 1.700409080347719e-05, + "loss": 0.5803329348564148, + "step": 1987 + }, + { + "epoch": 0.5813715455475946, + "grad_norm": 1.613226747300198, + "learning_rate": 1.7000639253306085e-05, + "loss": 0.7526525259017944, + "step": 1988 + }, + { + "epoch": 0.5816639859628601, + "grad_norm": 1.329271357875936, + "learning_rate": 1.6997186066778118e-05, + "loss": 0.6679468750953674, + "step": 1989 + }, + { + "epoch": 0.5819564263781255, + "grad_norm": 1.5773364597040387, + "learning_rate": 1.6993731244700454e-05, + "loss": 0.7233256101608276, + "step": 1990 + }, + { + "epoch": 0.5822488667933908, + "grad_norm": 1.3632345541871926, + "learning_rate": 1.6990274787880633e-05, + "loss": 0.5986290574073792, + "step": 1991 + }, + { + "epoch": 0.5825413072086563, + "grad_norm": 1.3136772281139917, + "learning_rate": 1.6986816697126583e-05, + "loss": 0.6898672580718994, + "step": 1992 + }, + { + "epoch": 0.5828337476239216, + "grad_norm": 1.6057802032529045, + "learning_rate": 1.698335697324661e-05, + "loss": 0.6888613104820251, + "step": 1993 + }, + { + "epoch": 0.583126188039187, + "grad_norm": 1.3469913891844598, + "learning_rate": 1.6979895617049404e-05, + "loss": 0.6002428531646729, + "step": 1994 + }, + { + "epoch": 0.5834186284544524, + "grad_norm": 1.3517104173069454, + "learning_rate": 1.6976432629344036e-05, + "loss": 0.6372438669204712, + "step": 1995 + }, + { + "epoch": 0.5837110688697178, + "grad_norm": 1.0868680846473084, + "learning_rate": 1.6972968010939953e-05, + "loss": 0.529569149017334, + "step": 1996 + }, + { + "epoch": 0.5840035092849832, + "grad_norm": 1.415626330345063, + "learning_rate": 1.6969501762647002e-05, + "loss": 0.5534025430679321, + "step": 1997 + }, + { + "epoch": 0.5842959497002486, + "grad_norm": 1.5855609078257513, + "learning_rate": 1.6966033885275384e-05, + "loss": 0.8105937242507935, + "step": 1998 + }, + { + "epoch": 0.584588390115514, + "grad_norm": 1.4927698791899027, + "learning_rate": 1.6962564379635702e-05, + "loss": 0.7657530903816223, + "step": 1999 + }, + { + "epoch": 0.5848808305307793, + "grad_norm": 1.2186213815751603, + "learning_rate": 1.6959093246538927e-05, + "loss": 0.5941641330718994, + "step": 2000 + }, + { + "epoch": 0.5851732709460448, + "grad_norm": 1.2310851342087676, + "learning_rate": 1.695562048679642e-05, + "loss": 0.6130149364471436, + "step": 2001 + }, + { + "epoch": 0.5854657113613101, + "grad_norm": 1.4904324383349616, + "learning_rate": 1.6952146101219914e-05, + "loss": 0.7078043222427368, + "step": 2002 + }, + { + "epoch": 0.5857581517765755, + "grad_norm": 1.4412882425814895, + "learning_rate": 1.6948670090621528e-05, + "loss": 0.6330863237380981, + "step": 2003 + }, + { + "epoch": 0.5860505921918409, + "grad_norm": 1.2835823957491164, + "learning_rate": 1.6945192455813755e-05, + "loss": 0.6631220579147339, + "step": 2004 + }, + { + "epoch": 0.5863430326071063, + "grad_norm": 1.411600977622384, + "learning_rate": 1.6941713197609476e-05, + "loss": 0.6669473648071289, + "step": 2005 + }, + { + "epoch": 0.5866354730223717, + "grad_norm": 1.370088328820007, + "learning_rate": 1.6938232316821938e-05, + "loss": 0.608252763748169, + "step": 2006 + }, + { + "epoch": 0.5869279134376371, + "grad_norm": 1.3777699704962545, + "learning_rate": 1.6934749814264786e-05, + "loss": 0.5979427695274353, + "step": 2007 + }, + { + "epoch": 0.5872203538529025, + "grad_norm": 1.541200433158731, + "learning_rate": 1.6931265690752027e-05, + "loss": 0.5653454661369324, + "step": 2008 + }, + { + "epoch": 0.5875127942681678, + "grad_norm": 1.1212005773159774, + "learning_rate": 1.6927779947098052e-05, + "loss": 0.6399147510528564, + "step": 2009 + }, + { + "epoch": 0.5878052346834333, + "grad_norm": 1.1797468758477498, + "learning_rate": 1.6924292584117642e-05, + "loss": 0.41824793815612793, + "step": 2010 + }, + { + "epoch": 0.5880976750986986, + "grad_norm": 1.917297128854583, + "learning_rate": 1.6920803602625938e-05, + "loss": 0.8881042003631592, + "step": 2011 + }, + { + "epoch": 0.588390115513964, + "grad_norm": 1.331713386917835, + "learning_rate": 1.6917313003438473e-05, + "loss": 0.636030912399292, + "step": 2012 + }, + { + "epoch": 0.5886825559292295, + "grad_norm": 1.4002891525649699, + "learning_rate": 1.6913820787371147e-05, + "loss": 0.6038305759429932, + "step": 2013 + }, + { + "epoch": 0.5889749963444948, + "grad_norm": 1.1435051787090085, + "learning_rate": 1.6910326955240252e-05, + "loss": 0.7073840498924255, + "step": 2014 + }, + { + "epoch": 0.5892674367597602, + "grad_norm": 1.4386346426993692, + "learning_rate": 1.6906831507862446e-05, + "loss": 0.5804994106292725, + "step": 2015 + }, + { + "epoch": 0.5895598771750256, + "grad_norm": 1.5741785374654678, + "learning_rate": 1.6903334446054768e-05, + "loss": 0.8194780349731445, + "step": 2016 + }, + { + "epoch": 0.589852317590291, + "grad_norm": 1.812303850133564, + "learning_rate": 1.689983577063464e-05, + "loss": 0.7348685264587402, + "step": 2017 + }, + { + "epoch": 0.5901447580055563, + "grad_norm": 1.1971589423872142, + "learning_rate": 1.689633548241985e-05, + "loss": 0.5855007171630859, + "step": 2018 + }, + { + "epoch": 0.5904371984208218, + "grad_norm": 1.3707253561652837, + "learning_rate": 1.689283358222857e-05, + "loss": 0.7387616634368896, + "step": 2019 + }, + { + "epoch": 0.5907296388360872, + "grad_norm": 1.1680954205847025, + "learning_rate": 1.688933007087935e-05, + "loss": 0.688759446144104, + "step": 2020 + }, + { + "epoch": 0.5910220792513525, + "grad_norm": 1.5341989172452428, + "learning_rate": 1.6885824949191117e-05, + "loss": 0.7203953266143799, + "step": 2021 + }, + { + "epoch": 0.591314519666618, + "grad_norm": 1.2850552689542662, + "learning_rate": 1.6882318217983165e-05, + "loss": 0.6465663909912109, + "step": 2022 + }, + { + "epoch": 0.5916069600818833, + "grad_norm": 1.1725524993946357, + "learning_rate": 1.6878809878075176e-05, + "loss": 0.6625394821166992, + "step": 2023 + }, + { + "epoch": 0.5918994004971487, + "grad_norm": 1.1518619162929866, + "learning_rate": 1.68752999302872e-05, + "loss": 0.6577074527740479, + "step": 2024 + }, + { + "epoch": 0.5921918409124141, + "grad_norm": 1.2660442226503865, + "learning_rate": 1.6871788375439667e-05, + "loss": 0.50509113073349, + "step": 2025 + }, + { + "epoch": 0.5924842813276795, + "grad_norm": 1.1506732126554624, + "learning_rate": 1.6868275214353387e-05, + "loss": 0.5723974704742432, + "step": 2026 + }, + { + "epoch": 0.5927767217429448, + "grad_norm": 1.5630741195611901, + "learning_rate": 1.6864760447849533e-05, + "loss": 0.6383459568023682, + "step": 2027 + }, + { + "epoch": 0.5930691621582103, + "grad_norm": 1.5937791400894217, + "learning_rate": 1.6861244076749663e-05, + "loss": 0.5307388305664062, + "step": 2028 + }, + { + "epoch": 0.5933616025734757, + "grad_norm": 1.3756662975981515, + "learning_rate": 1.6857726101875706e-05, + "loss": 0.8009265661239624, + "step": 2029 + }, + { + "epoch": 0.593654042988741, + "grad_norm": 1.3635510886639874, + "learning_rate": 1.685420652404997e-05, + "loss": 0.5505321025848389, + "step": 2030 + }, + { + "epoch": 0.5939464834040065, + "grad_norm": 1.2645625310092812, + "learning_rate": 1.6850685344095134e-05, + "loss": 0.680927038192749, + "step": 2031 + }, + { + "epoch": 0.5942389238192718, + "grad_norm": 1.419624052256642, + "learning_rate": 1.684716256283425e-05, + "loss": 0.7357309460639954, + "step": 2032 + }, + { + "epoch": 0.5945313642345372, + "grad_norm": 1.2277919560967578, + "learning_rate": 1.6843638181090748e-05, + "loss": 0.5896620750427246, + "step": 2033 + }, + { + "epoch": 0.5948238046498026, + "grad_norm": 1.261982037348603, + "learning_rate": 1.6840112199688432e-05, + "loss": 0.5567387342453003, + "step": 2034 + }, + { + "epoch": 0.595116245065068, + "grad_norm": 1.2606984508496513, + "learning_rate": 1.6836584619451478e-05, + "loss": 0.6428712606430054, + "step": 2035 + }, + { + "epoch": 0.5954086854803334, + "grad_norm": 1.3387753764851709, + "learning_rate": 1.6833055441204436e-05, + "loss": 0.7430459260940552, + "step": 2036 + }, + { + "epoch": 0.5957011258955988, + "grad_norm": 1.250181817593343, + "learning_rate": 1.682952466577223e-05, + "loss": 0.5982654690742493, + "step": 2037 + }, + { + "epoch": 0.5959935663108642, + "grad_norm": 1.2721973260460164, + "learning_rate": 1.6825992293980158e-05, + "loss": 0.5807450413703918, + "step": 2038 + }, + { + "epoch": 0.5962860067261295, + "grad_norm": 1.4202543697420538, + "learning_rate": 1.6822458326653888e-05, + "loss": 0.7667814493179321, + "step": 2039 + }, + { + "epoch": 0.596578447141395, + "grad_norm": 1.4555539952275451, + "learning_rate": 1.6818922764619467e-05, + "loss": 0.8192781805992126, + "step": 2040 + }, + { + "epoch": 0.5968708875566603, + "grad_norm": 1.3146767820144227, + "learning_rate": 1.681538560870331e-05, + "loss": 0.6652504205703735, + "step": 2041 + }, + { + "epoch": 0.5971633279719257, + "grad_norm": 1.4465108366403951, + "learning_rate": 1.6811846859732207e-05, + "loss": 0.6227332353591919, + "step": 2042 + }, + { + "epoch": 0.597455768387191, + "grad_norm": 1.1394575473936808, + "learning_rate": 1.6808306518533315e-05, + "loss": 0.5459558963775635, + "step": 2043 + }, + { + "epoch": 0.5977482088024565, + "grad_norm": 1.3498516241816683, + "learning_rate": 1.6804764585934167e-05, + "loss": 0.5176202058792114, + "step": 2044 + }, + { + "epoch": 0.5980406492177219, + "grad_norm": 1.5025501377940633, + "learning_rate": 1.6801221062762677e-05, + "loss": 0.5818016529083252, + "step": 2045 + }, + { + "epoch": 0.5983330896329873, + "grad_norm": 1.3397658451047565, + "learning_rate": 1.679767594984711e-05, + "loss": 0.622256875038147, + "step": 2046 + }, + { + "epoch": 0.5986255300482527, + "grad_norm": 1.2198859984633783, + "learning_rate": 1.6794129248016124e-05, + "loss": 0.5538911819458008, + "step": 2047 + }, + { + "epoch": 0.598917970463518, + "grad_norm": 1.1939205886096602, + "learning_rate": 1.6790580958098733e-05, + "loss": 0.4934890568256378, + "step": 2048 + }, + { + "epoch": 0.5992104108787835, + "grad_norm": 1.5628453531282531, + "learning_rate": 1.678703108092433e-05, + "loss": 0.6754223108291626, + "step": 2049 + }, + { + "epoch": 0.5995028512940488, + "grad_norm": 1.3047429440272302, + "learning_rate": 1.678347961732268e-05, + "loss": 0.48618268966674805, + "step": 2050 + }, + { + "epoch": 0.5997952917093142, + "grad_norm": 2.239352665042965, + "learning_rate": 1.6779926568123913e-05, + "loss": 0.6844758987426758, + "step": 2051 + }, + { + "epoch": 0.6000877321245797, + "grad_norm": 1.222439693123936, + "learning_rate": 1.677637193415853e-05, + "loss": 0.5258621573448181, + "step": 2052 + }, + { + "epoch": 0.600380172539845, + "grad_norm": 1.5856950316684058, + "learning_rate": 1.6772815716257414e-05, + "loss": 0.5571128129959106, + "step": 2053 + }, + { + "epoch": 0.6006726129551104, + "grad_norm": 1.514916071293939, + "learning_rate": 1.67692579152518e-05, + "loss": 0.5881344079971313, + "step": 2054 + }, + { + "epoch": 0.6009650533703758, + "grad_norm": 1.5804701546241575, + "learning_rate": 1.6765698531973305e-05, + "loss": 0.7162419557571411, + "step": 2055 + }, + { + "epoch": 0.6012574937856412, + "grad_norm": 1.487082432347586, + "learning_rate": 1.6762137567253917e-05, + "loss": 0.7470849752426147, + "step": 2056 + }, + { + "epoch": 0.6015499342009065, + "grad_norm": 1.4154424289161787, + "learning_rate": 1.6758575021925987e-05, + "loss": 0.6043628454208374, + "step": 2057 + }, + { + "epoch": 0.601842374616172, + "grad_norm": 1.4033964052969388, + "learning_rate": 1.6755010896822237e-05, + "loss": 0.6574143171310425, + "step": 2058 + }, + { + "epoch": 0.6021348150314374, + "grad_norm": 1.3508419478610747, + "learning_rate": 1.675144519277576e-05, + "loss": 0.605838418006897, + "step": 2059 + }, + { + "epoch": 0.6024272554467027, + "grad_norm": 1.2890691190480261, + "learning_rate": 1.6747877910620022e-05, + "loss": 0.5859218239784241, + "step": 2060 + }, + { + "epoch": 0.6027196958619682, + "grad_norm": 1.3985794655421304, + "learning_rate": 1.674430905118885e-05, + "loss": 0.7272971868515015, + "step": 2061 + }, + { + "epoch": 0.6030121362772335, + "grad_norm": 1.2870566467248659, + "learning_rate": 1.674073861531644e-05, + "loss": 0.606023907661438, + "step": 2062 + }, + { + "epoch": 0.6033045766924989, + "grad_norm": 1.345090429761192, + "learning_rate": 1.6737166603837364e-05, + "loss": 0.6029521822929382, + "step": 2063 + }, + { + "epoch": 0.6035970171077643, + "grad_norm": 1.1860277395685632, + "learning_rate": 1.673359301758656e-05, + "loss": 0.7544999122619629, + "step": 2064 + }, + { + "epoch": 0.6038894575230297, + "grad_norm": 1.3953376279645262, + "learning_rate": 1.6730017857399327e-05, + "loss": 0.7487601637840271, + "step": 2065 + }, + { + "epoch": 0.604181897938295, + "grad_norm": 1.3904468062872732, + "learning_rate": 1.672644112411134e-05, + "loss": 0.6429200172424316, + "step": 2066 + }, + { + "epoch": 0.6044743383535605, + "grad_norm": 1.4246263416975375, + "learning_rate": 1.6722862818558635e-05, + "loss": 0.7337179183959961, + "step": 2067 + }, + { + "epoch": 0.6047667787688259, + "grad_norm": 1.432290850861675, + "learning_rate": 1.671928294157762e-05, + "loss": 0.6644014120101929, + "step": 2068 + }, + { + "epoch": 0.6050592191840912, + "grad_norm": 1.3048966935224826, + "learning_rate": 1.6715701494005078e-05, + "loss": 0.5987672805786133, + "step": 2069 + }, + { + "epoch": 0.6053516595993567, + "grad_norm": 1.5176113056744007, + "learning_rate": 1.671211847667814e-05, + "loss": 0.5878695845603943, + "step": 2070 + }, + { + "epoch": 0.605644100014622, + "grad_norm": 1.3348485026555847, + "learning_rate": 1.670853389043432e-05, + "loss": 0.540128231048584, + "step": 2071 + }, + { + "epoch": 0.6059365404298874, + "grad_norm": 1.3888450119982874, + "learning_rate": 1.670494773611149e-05, + "loss": 0.667206346988678, + "step": 2072 + }, + { + "epoch": 0.6062289808451528, + "grad_norm": 1.5911825658421195, + "learning_rate": 1.6701360014547896e-05, + "loss": 0.6433641910552979, + "step": 2073 + }, + { + "epoch": 0.6065214212604182, + "grad_norm": 1.447981653333928, + "learning_rate": 1.669777072658214e-05, + "loss": 0.5803529024124146, + "step": 2074 + }, + { + "epoch": 0.6068138616756836, + "grad_norm": 1.125005009009719, + "learning_rate": 1.6694179873053202e-05, + "loss": 0.6203820705413818, + "step": 2075 + }, + { + "epoch": 0.607106302090949, + "grad_norm": 1.3092542979615172, + "learning_rate": 1.669058745480042e-05, + "loss": 0.6194918155670166, + "step": 2076 + }, + { + "epoch": 0.6073987425062144, + "grad_norm": 1.593480689755987, + "learning_rate": 1.66869934726635e-05, + "loss": 0.6797547936439514, + "step": 2077 + }, + { + "epoch": 0.6076911829214797, + "grad_norm": 1.3923211889522802, + "learning_rate": 1.6683397927482512e-05, + "loss": 0.6076459884643555, + "step": 2078 + }, + { + "epoch": 0.6079836233367452, + "grad_norm": 1.3874225830336557, + "learning_rate": 1.6679800820097895e-05, + "loss": 0.6958068609237671, + "step": 2079 + }, + { + "epoch": 0.6082760637520105, + "grad_norm": 1.3355509335032223, + "learning_rate": 1.6676202151350453e-05, + "loss": 0.5819929242134094, + "step": 2080 + }, + { + "epoch": 0.6085685041672759, + "grad_norm": 1.3476445996808082, + "learning_rate": 1.6672601922081347e-05, + "loss": 0.7125047445297241, + "step": 2081 + }, + { + "epoch": 0.6088609445825413, + "grad_norm": 1.4432332437479862, + "learning_rate": 1.6669000133132108e-05, + "loss": 0.8046560287475586, + "step": 2082 + }, + { + "epoch": 0.6091533849978067, + "grad_norm": 1.192025927247586, + "learning_rate": 1.666539678534464e-05, + "loss": 0.5468478202819824, + "step": 2083 + }, + { + "epoch": 0.6094458254130721, + "grad_norm": 1.3403719695971306, + "learning_rate": 1.6661791879561204e-05, + "loss": 0.6387852430343628, + "step": 2084 + }, + { + "epoch": 0.6097382658283375, + "grad_norm": 1.3327872578740647, + "learning_rate": 1.6658185416624415e-05, + "loss": 0.643539547920227, + "step": 2085 + }, + { + "epoch": 0.6100307062436029, + "grad_norm": 1.2236148701775094, + "learning_rate": 1.6654577397377266e-05, + "loss": 0.5031965374946594, + "step": 2086 + }, + { + "epoch": 0.6103231466588682, + "grad_norm": 1.507439246425782, + "learning_rate": 1.6650967822663115e-05, + "loss": 0.6690273284912109, + "step": 2087 + }, + { + "epoch": 0.6106155870741337, + "grad_norm": 1.2924449065282086, + "learning_rate": 1.6647356693325672e-05, + "loss": 0.6396887302398682, + "step": 2088 + }, + { + "epoch": 0.610908027489399, + "grad_norm": 1.4444361497865652, + "learning_rate": 1.664374401020902e-05, + "loss": 0.6306549310684204, + "step": 2089 + }, + { + "epoch": 0.6112004679046644, + "grad_norm": 1.3565777173208147, + "learning_rate": 1.66401297741576e-05, + "loss": 0.5936366319656372, + "step": 2090 + }, + { + "epoch": 0.6114929083199299, + "grad_norm": 1.1669567203268514, + "learning_rate": 1.6636513986016215e-05, + "loss": 0.6153277158737183, + "step": 2091 + }, + { + "epoch": 0.6117853487351952, + "grad_norm": 1.2085146124175858, + "learning_rate": 1.663289664663004e-05, + "loss": 0.6361621618270874, + "step": 2092 + }, + { + "epoch": 0.6120777891504606, + "grad_norm": 1.2163858440552462, + "learning_rate": 1.6629277756844603e-05, + "loss": 0.6511524319648743, + "step": 2093 + }, + { + "epoch": 0.612370229565726, + "grad_norm": 1.2219001757495958, + "learning_rate": 1.6625657317505792e-05, + "loss": 0.5811333656311035, + "step": 2094 + }, + { + "epoch": 0.6126626699809914, + "grad_norm": 1.4531007944498606, + "learning_rate": 1.6622035329459872e-05, + "loss": 0.6935377717018127, + "step": 2095 + }, + { + "epoch": 0.6129551103962567, + "grad_norm": 1.3697721797296887, + "learning_rate": 1.6618411793553455e-05, + "loss": 0.6363199949264526, + "step": 2096 + }, + { + "epoch": 0.6132475508115222, + "grad_norm": 1.6107434013725794, + "learning_rate": 1.6614786710633525e-05, + "loss": 0.7325713634490967, + "step": 2097 + }, + { + "epoch": 0.6135399912267876, + "grad_norm": 1.3944095356365322, + "learning_rate": 1.6611160081547414e-05, + "loss": 0.5739182829856873, + "step": 2098 + }, + { + "epoch": 0.6138324316420529, + "grad_norm": 1.4193388816384238, + "learning_rate": 1.6607531907142835e-05, + "loss": 0.611133873462677, + "step": 2099 + }, + { + "epoch": 0.6141248720573184, + "grad_norm": 1.579788361702439, + "learning_rate": 1.6603902188267842e-05, + "loss": 0.6419532299041748, + "step": 2100 + }, + { + "epoch": 0.6144173124725837, + "grad_norm": 1.482873128334509, + "learning_rate": 1.660027092577087e-05, + "loss": 0.7736743688583374, + "step": 2101 + }, + { + "epoch": 0.6147097528878491, + "grad_norm": 1.199857125427724, + "learning_rate": 1.6596638120500696e-05, + "loss": 0.5249119400978088, + "step": 2102 + }, + { + "epoch": 0.6150021933031145, + "grad_norm": 2.505852142425954, + "learning_rate": 1.6593003773306475e-05, + "loss": 0.7145636081695557, + "step": 2103 + }, + { + "epoch": 0.6152946337183799, + "grad_norm": 1.3335089477583737, + "learning_rate": 1.65893678850377e-05, + "loss": 0.5807666182518005, + "step": 2104 + }, + { + "epoch": 0.6155870741336452, + "grad_norm": 1.2437068513912055, + "learning_rate": 1.6585730456544255e-05, + "loss": 0.5049663782119751, + "step": 2105 + }, + { + "epoch": 0.6158795145489107, + "grad_norm": 1.4826397888996732, + "learning_rate": 1.658209148867635e-05, + "loss": 0.6744092702865601, + "step": 2106 + }, + { + "epoch": 0.6161719549641761, + "grad_norm": 1.4821897923446594, + "learning_rate": 1.6578450982284584e-05, + "loss": 0.605404794216156, + "step": 2107 + }, + { + "epoch": 0.6164643953794414, + "grad_norm": 1.1917544416711534, + "learning_rate": 1.6574808938219894e-05, + "loss": 0.6074866056442261, + "step": 2108 + }, + { + "epoch": 0.6167568357947069, + "grad_norm": 1.284543555588908, + "learning_rate": 1.6571165357333594e-05, + "loss": 0.6758207082748413, + "step": 2109 + }, + { + "epoch": 0.6170492762099722, + "grad_norm": 1.580962080275822, + "learning_rate": 1.6567520240477344e-05, + "loss": 0.7669274806976318, + "step": 2110 + }, + { + "epoch": 0.6173417166252376, + "grad_norm": 1.3997913559025885, + "learning_rate": 1.6563873588503173e-05, + "loss": 0.497562050819397, + "step": 2111 + }, + { + "epoch": 0.617634157040503, + "grad_norm": 1.6655652024231358, + "learning_rate": 1.656022540226345e-05, + "loss": 0.6398104429244995, + "step": 2112 + }, + { + "epoch": 0.6179265974557684, + "grad_norm": 1.4155810596985208, + "learning_rate": 1.6556575682610935e-05, + "loss": 0.6739988327026367, + "step": 2113 + }, + { + "epoch": 0.6182190378710338, + "grad_norm": 1.3164921836609038, + "learning_rate": 1.6552924430398716e-05, + "loss": 0.5710165500640869, + "step": 2114 + }, + { + "epoch": 0.6185114782862992, + "grad_norm": 1.1567442833736337, + "learning_rate": 1.6549271646480253e-05, + "loss": 0.6087738871574402, + "step": 2115 + }, + { + "epoch": 0.6188039187015646, + "grad_norm": 1.1877649418617353, + "learning_rate": 1.6545617331709364e-05, + "loss": 0.5300824642181396, + "step": 2116 + }, + { + "epoch": 0.6190963591168299, + "grad_norm": 1.3759503189909044, + "learning_rate": 1.6541961486940222e-05, + "loss": 0.7384774684906006, + "step": 2117 + }, + { + "epoch": 0.6193887995320954, + "grad_norm": 1.1608035895573054, + "learning_rate": 1.6538304113027356e-05, + "loss": 0.5867838263511658, + "step": 2118 + }, + { + "epoch": 0.6196812399473607, + "grad_norm": 1.4435135524238625, + "learning_rate": 1.653464521082566e-05, + "loss": 0.617068886756897, + "step": 2119 + }, + { + "epoch": 0.6199736803626261, + "grad_norm": 1.2420433862943483, + "learning_rate": 1.6530984781190374e-05, + "loss": 0.7316439151763916, + "step": 2120 + }, + { + "epoch": 0.6202661207778914, + "grad_norm": 1.3153827472233475, + "learning_rate": 1.6527322824977104e-05, + "loss": 0.5469995737075806, + "step": 2121 + }, + { + "epoch": 0.6205585611931569, + "grad_norm": 1.4608354678316708, + "learning_rate": 1.6523659343041815e-05, + "loss": 0.6577411890029907, + "step": 2122 + }, + { + "epoch": 0.6208510016084223, + "grad_norm": 1.5130442860821829, + "learning_rate": 1.6519994336240816e-05, + "loss": 0.7425049543380737, + "step": 2123 + }, + { + "epoch": 0.6211434420236877, + "grad_norm": 1.7408354143028393, + "learning_rate": 1.6516327805430785e-05, + "loss": 0.7894090414047241, + "step": 2124 + }, + { + "epoch": 0.6214358824389531, + "grad_norm": 1.2267269656084083, + "learning_rate": 1.651265975146875e-05, + "loss": 0.5739543437957764, + "step": 2125 + }, + { + "epoch": 0.6217283228542184, + "grad_norm": 1.2973694692382243, + "learning_rate": 1.6508990175212092e-05, + "loss": 0.6987308263778687, + "step": 2126 + }, + { + "epoch": 0.6220207632694839, + "grad_norm": 1.237403110571432, + "learning_rate": 1.650531907751856e-05, + "loss": 0.5956544280052185, + "step": 2127 + }, + { + "epoch": 0.6223132036847492, + "grad_norm": 1.3646659152675398, + "learning_rate": 1.6501646459246245e-05, + "loss": 0.582348108291626, + "step": 2128 + }, + { + "epoch": 0.6226056441000146, + "grad_norm": 1.327256978138479, + "learning_rate": 1.64979723212536e-05, + "loss": 0.8057917356491089, + "step": 2129 + }, + { + "epoch": 0.6228980845152801, + "grad_norm": 1.1623408864017983, + "learning_rate": 1.6494296664399428e-05, + "loss": 0.6237305402755737, + "step": 2130 + }, + { + "epoch": 0.6231905249305454, + "grad_norm": 1.3152067943219485, + "learning_rate": 1.6490619489542905e-05, + "loss": 0.6445767879486084, + "step": 2131 + }, + { + "epoch": 0.6234829653458108, + "grad_norm": 1.4611569228302668, + "learning_rate": 1.648694079754354e-05, + "loss": 0.6397994160652161, + "step": 2132 + }, + { + "epoch": 0.6237754057610762, + "grad_norm": 1.3955823025243248, + "learning_rate": 1.64832605892612e-05, + "loss": 0.8216533660888672, + "step": 2133 + }, + { + "epoch": 0.6240678461763416, + "grad_norm": 1.3134524569329014, + "learning_rate": 1.6479578865556115e-05, + "loss": 0.6894406080245972, + "step": 2134 + }, + { + "epoch": 0.6243602865916069, + "grad_norm": 1.2940264658828888, + "learning_rate": 1.6475895627288873e-05, + "loss": 0.6608946323394775, + "step": 2135 + }, + { + "epoch": 0.6246527270068724, + "grad_norm": 1.4094544295935185, + "learning_rate": 1.6472210875320397e-05, + "loss": 0.6070076823234558, + "step": 2136 + }, + { + "epoch": 0.6249451674221378, + "grad_norm": 1.4359082412623407, + "learning_rate": 1.6468524610511982e-05, + "loss": 0.7357348799705505, + "step": 2137 + }, + { + "epoch": 0.6252376078374031, + "grad_norm": 1.201965871501085, + "learning_rate": 1.6464836833725267e-05, + "loss": 0.5959880352020264, + "step": 2138 + }, + { + "epoch": 0.6255300482526686, + "grad_norm": 1.3046810888024383, + "learning_rate": 1.646114754582225e-05, + "loss": 0.7812649011611938, + "step": 2139 + }, + { + "epoch": 0.6258224886679339, + "grad_norm": 1.6609760293820528, + "learning_rate": 1.6457456747665282e-05, + "loss": 0.5985091924667358, + "step": 2140 + }, + { + "epoch": 0.6261149290831993, + "grad_norm": 1.5609316045902142, + "learning_rate": 1.645376444011706e-05, + "loss": 0.6610564589500427, + "step": 2141 + }, + { + "epoch": 0.6264073694984647, + "grad_norm": 1.3917319855245425, + "learning_rate": 1.6450070624040636e-05, + "loss": 0.6876299381256104, + "step": 2142 + }, + { + "epoch": 0.6266998099137301, + "grad_norm": 1.3567193814213938, + "learning_rate": 1.6446375300299425e-05, + "loss": 0.6715782284736633, + "step": 2143 + }, + { + "epoch": 0.6269922503289954, + "grad_norm": 1.6061237563072754, + "learning_rate": 1.644267846975718e-05, + "loss": 0.6066923141479492, + "step": 2144 + }, + { + "epoch": 0.6272846907442609, + "grad_norm": 1.2493532553829008, + "learning_rate": 1.6438980133278017e-05, + "loss": 0.5642968416213989, + "step": 2145 + }, + { + "epoch": 0.6275771311595263, + "grad_norm": 1.0703284322753808, + "learning_rate": 1.6435280291726394e-05, + "loss": 0.604590654373169, + "step": 2146 + }, + { + "epoch": 0.6278695715747916, + "grad_norm": 1.3292746736885825, + "learning_rate": 1.643157894596713e-05, + "loss": 0.6313889026641846, + "step": 2147 + }, + { + "epoch": 0.6281620119900571, + "grad_norm": 1.0767305616181233, + "learning_rate": 1.6427876096865394e-05, + "loss": 0.5084092617034912, + "step": 2148 + }, + { + "epoch": 0.6284544524053224, + "grad_norm": 1.250433663172197, + "learning_rate": 1.6424171745286704e-05, + "loss": 0.5191931128501892, + "step": 2149 + }, + { + "epoch": 0.6287468928205878, + "grad_norm": 1.3567625810681667, + "learning_rate": 1.6420465892096924e-05, + "loss": 0.7397615909576416, + "step": 2150 + }, + { + "epoch": 0.6290393332358531, + "grad_norm": 1.1359315638082286, + "learning_rate": 1.641675853816228e-05, + "loss": 0.622586727142334, + "step": 2151 + }, + { + "epoch": 0.6293317736511186, + "grad_norm": 1.433028642480203, + "learning_rate": 1.6413049684349344e-05, + "loss": 0.7894928455352783, + "step": 2152 + }, + { + "epoch": 0.629624214066384, + "grad_norm": 1.4395392231763253, + "learning_rate": 1.640933933152504e-05, + "loss": 0.5752773284912109, + "step": 2153 + }, + { + "epoch": 0.6299166544816494, + "grad_norm": 1.3952520818076775, + "learning_rate": 1.640562748055663e-05, + "loss": 0.6738473176956177, + "step": 2154 + }, + { + "epoch": 0.6302090948969148, + "grad_norm": 1.2597002399242925, + "learning_rate": 1.6401914132311745e-05, + "loss": 0.5789517164230347, + "step": 2155 + }, + { + "epoch": 0.6305015353121801, + "grad_norm": 1.2840904364476742, + "learning_rate": 1.6398199287658358e-05, + "loss": 0.5925524830818176, + "step": 2156 + }, + { + "epoch": 0.6307939757274456, + "grad_norm": 1.4374336859820211, + "learning_rate": 1.6394482947464784e-05, + "loss": 0.6949414610862732, + "step": 2157 + }, + { + "epoch": 0.6310864161427109, + "grad_norm": 1.3617313094593515, + "learning_rate": 1.6390765112599705e-05, + "loss": 0.7435301542282104, + "step": 2158 + }, + { + "epoch": 0.6313788565579763, + "grad_norm": 1.5109256996682827, + "learning_rate": 1.6387045783932137e-05, + "loss": 0.6931856274604797, + "step": 2159 + }, + { + "epoch": 0.6316712969732416, + "grad_norm": 1.4369843702380298, + "learning_rate": 1.638332496233145e-05, + "loss": 0.7856471538543701, + "step": 2160 + }, + { + "epoch": 0.6319637373885071, + "grad_norm": 1.460850634730034, + "learning_rate": 1.6379602648667362e-05, + "loss": 0.6299946308135986, + "step": 2161 + }, + { + "epoch": 0.6322561778037725, + "grad_norm": 1.5299113211206812, + "learning_rate": 1.6375878843809946e-05, + "loss": 0.6209328174591064, + "step": 2162 + }, + { + "epoch": 0.6325486182190379, + "grad_norm": 1.4269696757613273, + "learning_rate": 1.6372153548629617e-05, + "loss": 0.6498390436172485, + "step": 2163 + }, + { + "epoch": 0.6328410586343033, + "grad_norm": 2.1028833494160573, + "learning_rate": 1.6368426763997137e-05, + "loss": 0.6757122278213501, + "step": 2164 + }, + { + "epoch": 0.6331334990495686, + "grad_norm": 1.289589419762841, + "learning_rate": 1.6364698490783623e-05, + "loss": 0.5137026906013489, + "step": 2165 + }, + { + "epoch": 0.633425939464834, + "grad_norm": 1.3914324771074273, + "learning_rate": 1.6360968729860536e-05, + "loss": 0.5876519680023193, + "step": 2166 + }, + { + "epoch": 0.6337183798800994, + "grad_norm": 1.2533286000898018, + "learning_rate": 1.6357237482099682e-05, + "loss": 0.5804057717323303, + "step": 2167 + }, + { + "epoch": 0.6340108202953648, + "grad_norm": 1.361440329822907, + "learning_rate": 1.635350474837322e-05, + "loss": 0.6186444759368896, + "step": 2168 + }, + { + "epoch": 0.6343032607106303, + "grad_norm": 1.4479908785794617, + "learning_rate": 1.6349770529553654e-05, + "loss": 0.6358560919761658, + "step": 2169 + }, + { + "epoch": 0.6345957011258956, + "grad_norm": 1.2507636068938528, + "learning_rate": 1.6346034826513834e-05, + "loss": 0.64283686876297, + "step": 2170 + }, + { + "epoch": 0.634888141541161, + "grad_norm": 1.3854516647796151, + "learning_rate": 1.6342297640126955e-05, + "loss": 0.5269169807434082, + "step": 2171 + }, + { + "epoch": 0.6351805819564263, + "grad_norm": 1.3233372829927026, + "learning_rate": 1.6338558971266563e-05, + "loss": 0.5338561534881592, + "step": 2172 + }, + { + "epoch": 0.6354730223716918, + "grad_norm": 1.365606957045604, + "learning_rate": 1.6334818820806555e-05, + "loss": 0.5587184429168701, + "step": 2173 + }, + { + "epoch": 0.6357654627869571, + "grad_norm": 1.2288709810094502, + "learning_rate": 1.633107718962116e-05, + "loss": 0.6468764543533325, + "step": 2174 + }, + { + "epoch": 0.6360579032022226, + "grad_norm": 1.4431243955955453, + "learning_rate": 1.6327334078584967e-05, + "loss": 0.7305203676223755, + "step": 2175 + }, + { + "epoch": 0.636350343617488, + "grad_norm": 1.3207763162749322, + "learning_rate": 1.6323589488572908e-05, + "loss": 0.6226189136505127, + "step": 2176 + }, + { + "epoch": 0.6366427840327533, + "grad_norm": 1.4828987038724675, + "learning_rate": 1.631984342046025e-05, + "loss": 0.6552053093910217, + "step": 2177 + }, + { + "epoch": 0.6369352244480188, + "grad_norm": 1.6836072588979352, + "learning_rate": 1.6316095875122617e-05, + "loss": 0.8121978044509888, + "step": 2178 + }, + { + "epoch": 0.6372276648632841, + "grad_norm": 1.3359221660901908, + "learning_rate": 1.6312346853435976e-05, + "loss": 0.5826296806335449, + "step": 2179 + }, + { + "epoch": 0.6375201052785495, + "grad_norm": 1.3567795832303162, + "learning_rate": 1.630859635627664e-05, + "loss": 0.5862709283828735, + "step": 2180 + }, + { + "epoch": 0.6378125456938148, + "grad_norm": 1.2132204868801326, + "learning_rate": 1.6304844384521263e-05, + "loss": 0.7081524133682251, + "step": 2181 + }, + { + "epoch": 0.6381049861090803, + "grad_norm": 1.2359384159808198, + "learning_rate": 1.6301090939046843e-05, + "loss": 0.6394449472427368, + "step": 2182 + }, + { + "epoch": 0.6383974265243456, + "grad_norm": 1.25131780401235, + "learning_rate": 1.6297336020730727e-05, + "loss": 0.6184799075126648, + "step": 2183 + }, + { + "epoch": 0.638689866939611, + "grad_norm": 1.3090426226978378, + "learning_rate": 1.6293579630450606e-05, + "loss": 0.6877666711807251, + "step": 2184 + }, + { + "epoch": 0.6389823073548765, + "grad_norm": 1.3648594367613462, + "learning_rate": 1.6289821769084512e-05, + "loss": 0.5596371293067932, + "step": 2185 + }, + { + "epoch": 0.6392747477701418, + "grad_norm": 1.1779148594123119, + "learning_rate": 1.6286062437510823e-05, + "loss": 0.5378291010856628, + "step": 2186 + }, + { + "epoch": 0.6395671881854073, + "grad_norm": 1.2132664638530417, + "learning_rate": 1.6282301636608256e-05, + "loss": 0.6965627670288086, + "step": 2187 + }, + { + "epoch": 0.6398596286006726, + "grad_norm": 1.3017112466193883, + "learning_rate": 1.6278539367255885e-05, + "loss": 0.5939220190048218, + "step": 2188 + }, + { + "epoch": 0.640152069015938, + "grad_norm": 1.3743138396251577, + "learning_rate": 1.6274775630333104e-05, + "loss": 0.6225341558456421, + "step": 2189 + }, + { + "epoch": 0.6404445094312033, + "grad_norm": 1.103061387587319, + "learning_rate": 1.6271010426719672e-05, + "loss": 0.471333384513855, + "step": 2190 + }, + { + "epoch": 0.6407369498464688, + "grad_norm": 1.3505910885858836, + "learning_rate": 1.626724375729568e-05, + "loss": 0.6066263914108276, + "step": 2191 + }, + { + "epoch": 0.6410293902617342, + "grad_norm": 1.2842885881869934, + "learning_rate": 1.626347562294157e-05, + "loss": 0.6525982618331909, + "step": 2192 + }, + { + "epoch": 0.6413218306769995, + "grad_norm": 1.375624970339684, + "learning_rate": 1.6259706024538113e-05, + "loss": 0.7395817041397095, + "step": 2193 + }, + { + "epoch": 0.641614271092265, + "grad_norm": 1.326045982489242, + "learning_rate": 1.6255934962966432e-05, + "loss": 0.720014214515686, + "step": 2194 + }, + { + "epoch": 0.6419067115075303, + "grad_norm": 1.4102074363113735, + "learning_rate": 1.625216243910799e-05, + "loss": 0.6905295252799988, + "step": 2195 + }, + { + "epoch": 0.6421991519227958, + "grad_norm": 1.3533501829991437, + "learning_rate": 1.6248388453844596e-05, + "loss": 0.6877295970916748, + "step": 2196 + }, + { + "epoch": 0.6424915923380611, + "grad_norm": 1.414790050061214, + "learning_rate": 1.6244613008058386e-05, + "loss": 0.5782181024551392, + "step": 2197 + }, + { + "epoch": 0.6427840327533265, + "grad_norm": 1.2129092557671588, + "learning_rate": 1.6240836102631856e-05, + "loss": 0.5253425240516663, + "step": 2198 + }, + { + "epoch": 0.6430764731685918, + "grad_norm": 1.2461747547364295, + "learning_rate": 1.623705773844783e-05, + "loss": 0.6631319522857666, + "step": 2199 + }, + { + "epoch": 0.6433689135838573, + "grad_norm": 1.6130890971192966, + "learning_rate": 1.6233277916389482e-05, + "loss": 0.6458526849746704, + "step": 2200 + }, + { + "epoch": 0.6436613539991227, + "grad_norm": 1.5712729506149452, + "learning_rate": 1.622949663734032e-05, + "loss": 0.5723023414611816, + "step": 2201 + }, + { + "epoch": 0.643953794414388, + "grad_norm": 1.4119455791937807, + "learning_rate": 1.6225713902184193e-05, + "loss": 0.6852096319198608, + "step": 2202 + }, + { + "epoch": 0.6442462348296535, + "grad_norm": 1.460558869527006, + "learning_rate": 1.6221929711805297e-05, + "loss": 0.6343507170677185, + "step": 2203 + }, + { + "epoch": 0.6445386752449188, + "grad_norm": 1.217897103510346, + "learning_rate": 1.6218144067088157e-05, + "loss": 0.6378631591796875, + "step": 2204 + }, + { + "epoch": 0.6448311156601843, + "grad_norm": 1.1203441428966674, + "learning_rate": 1.621435696891765e-05, + "loss": 0.6550023555755615, + "step": 2205 + }, + { + "epoch": 0.6451235560754496, + "grad_norm": 1.3522778560223117, + "learning_rate": 1.6210568418178983e-05, + "loss": 0.5555052757263184, + "step": 2206 + }, + { + "epoch": 0.645415996490715, + "grad_norm": 1.330819772406298, + "learning_rate": 1.6206778415757715e-05, + "loss": 0.7171934247016907, + "step": 2207 + }, + { + "epoch": 0.6457084369059805, + "grad_norm": 1.2953726655501339, + "learning_rate": 1.6202986962539726e-05, + "loss": 0.6464889049530029, + "step": 2208 + }, + { + "epoch": 0.6460008773212458, + "grad_norm": 1.5324773487302452, + "learning_rate": 1.619919405941125e-05, + "loss": 0.6316033601760864, + "step": 2209 + }, + { + "epoch": 0.6462933177365112, + "grad_norm": 1.2083095479015487, + "learning_rate": 1.6195399707258855e-05, + "loss": 0.5548732876777649, + "step": 2210 + }, + { + "epoch": 0.6465857581517765, + "grad_norm": 1.088879983740594, + "learning_rate": 1.6191603906969447e-05, + "loss": 0.5055203437805176, + "step": 2211 + }, + { + "epoch": 0.646878198567042, + "grad_norm": 1.3416079726495937, + "learning_rate": 1.6187806659430268e-05, + "loss": 0.7010073661804199, + "step": 2212 + }, + { + "epoch": 0.6471706389823073, + "grad_norm": 1.39696751963916, + "learning_rate": 1.6184007965528908e-05, + "loss": 0.6188487410545349, + "step": 2213 + }, + { + "epoch": 0.6474630793975727, + "grad_norm": 1.1122504211535682, + "learning_rate": 1.6180207826153284e-05, + "loss": 0.46920153498649597, + "step": 2214 + }, + { + "epoch": 0.6477555198128382, + "grad_norm": 1.1420938414191775, + "learning_rate": 1.617640624219166e-05, + "loss": 0.6811172962188721, + "step": 2215 + }, + { + "epoch": 0.6480479602281035, + "grad_norm": 1.456471656413964, + "learning_rate": 1.617260321453263e-05, + "loss": 0.6425800323486328, + "step": 2216 + }, + { + "epoch": 0.648340400643369, + "grad_norm": 1.5968265799871777, + "learning_rate": 1.6168798744065123e-05, + "loss": 0.7020897269248962, + "step": 2217 + }, + { + "epoch": 0.6486328410586343, + "grad_norm": 1.1227944263783516, + "learning_rate": 1.6164992831678422e-05, + "loss": 0.5872179865837097, + "step": 2218 + }, + { + "epoch": 0.6489252814738997, + "grad_norm": 1.6374275819992907, + "learning_rate": 1.6161185478262127e-05, + "loss": 0.7414118647575378, + "step": 2219 + }, + { + "epoch": 0.649217721889165, + "grad_norm": 1.2707285395428818, + "learning_rate": 1.615737668470619e-05, + "loss": 0.5408385396003723, + "step": 2220 + }, + { + "epoch": 0.6495101623044305, + "grad_norm": 1.2587309097221344, + "learning_rate": 1.6153566451900887e-05, + "loss": 0.6145513653755188, + "step": 2221 + }, + { + "epoch": 0.6498026027196958, + "grad_norm": 1.1746181148032837, + "learning_rate": 1.6149754780736847e-05, + "loss": 0.556422233581543, + "step": 2222 + }, + { + "epoch": 0.6500950431349612, + "grad_norm": 1.4903419319059785, + "learning_rate": 1.614594167210501e-05, + "loss": 0.7155405282974243, + "step": 2223 + }, + { + "epoch": 0.6503874835502267, + "grad_norm": 1.2945043385192228, + "learning_rate": 1.6142127126896682e-05, + "loss": 0.4988427758216858, + "step": 2224 + }, + { + "epoch": 0.650679923965492, + "grad_norm": 1.3962995233264988, + "learning_rate": 1.6138311146003477e-05, + "loss": 0.6187007427215576, + "step": 2225 + }, + { + "epoch": 0.6509723643807575, + "grad_norm": 1.329312474096709, + "learning_rate": 1.6134493730317364e-05, + "loss": 0.5668798685073853, + "step": 2226 + }, + { + "epoch": 0.6512648047960228, + "grad_norm": 1.2528148742640925, + "learning_rate": 1.6130674880730642e-05, + "loss": 0.6354215145111084, + "step": 2227 + }, + { + "epoch": 0.6515572452112882, + "grad_norm": 1.3738601794334195, + "learning_rate": 1.612685459813594e-05, + "loss": 0.5409573912620544, + "step": 2228 + }, + { + "epoch": 0.6518496856265535, + "grad_norm": 1.24582725943008, + "learning_rate": 1.612303288342623e-05, + "loss": 0.5622435808181763, + "step": 2229 + }, + { + "epoch": 0.652142126041819, + "grad_norm": 1.3303126336426627, + "learning_rate": 1.6119209737494814e-05, + "loss": 0.786159873008728, + "step": 2230 + }, + { + "epoch": 0.6524345664570844, + "grad_norm": 1.3038971892359654, + "learning_rate": 1.611538516123532e-05, + "loss": 0.6359272003173828, + "step": 2231 + }, + { + "epoch": 0.6527270068723497, + "grad_norm": 1.2508619512631416, + "learning_rate": 1.6111559155541732e-05, + "loss": 0.5688974261283875, + "step": 2232 + }, + { + "epoch": 0.6530194472876152, + "grad_norm": 1.1877745994435736, + "learning_rate": 1.610773172130835e-05, + "loss": 0.581497311592102, + "step": 2233 + }, + { + "epoch": 0.6533118877028805, + "grad_norm": 1.6577687870030173, + "learning_rate": 1.6103902859429812e-05, + "loss": 0.674004316329956, + "step": 2234 + }, + { + "epoch": 0.653604328118146, + "grad_norm": 1.4167456148188138, + "learning_rate": 1.6100072570801092e-05, + "loss": 0.6798728108406067, + "step": 2235 + }, + { + "epoch": 0.6538967685334113, + "grad_norm": 1.245467514643811, + "learning_rate": 1.60962408563175e-05, + "loss": 0.5742023587226868, + "step": 2236 + }, + { + "epoch": 0.6541892089486767, + "grad_norm": 1.1993067492933944, + "learning_rate": 1.6092407716874674e-05, + "loss": 0.470009446144104, + "step": 2237 + }, + { + "epoch": 0.654481649363942, + "grad_norm": 1.3725626324774514, + "learning_rate": 1.6088573153368586e-05, + "loss": 0.8113270998001099, + "step": 2238 + }, + { + "epoch": 0.6547740897792075, + "grad_norm": 1.4825942391015299, + "learning_rate": 1.6084737166695542e-05, + "loss": 0.7737559676170349, + "step": 2239 + }, + { + "epoch": 0.6550665301944729, + "grad_norm": 1.5932921988768602, + "learning_rate": 1.6080899757752183e-05, + "loss": 0.6499667167663574, + "step": 2240 + }, + { + "epoch": 0.6553589706097382, + "grad_norm": 1.5295213411109583, + "learning_rate": 1.6077060927435476e-05, + "loss": 0.6898500323295593, + "step": 2241 + }, + { + "epoch": 0.6556514110250037, + "grad_norm": 1.264521733401818, + "learning_rate": 1.6073220676642724e-05, + "loss": 0.5933262705802917, + "step": 2242 + }, + { + "epoch": 0.655943851440269, + "grad_norm": 1.6150723182894215, + "learning_rate": 1.606937900627157e-05, + "loss": 0.6566172242164612, + "step": 2243 + }, + { + "epoch": 0.6562362918555344, + "grad_norm": 1.5267009306631556, + "learning_rate": 1.606553591721997e-05, + "loss": 0.6955286264419556, + "step": 2244 + }, + { + "epoch": 0.6565287322707998, + "grad_norm": 1.2904648803296817, + "learning_rate": 1.6061691410386234e-05, + "loss": 0.6905182600021362, + "step": 2245 + }, + { + "epoch": 0.6568211726860652, + "grad_norm": 1.3780634556903595, + "learning_rate": 1.6057845486668984e-05, + "loss": 0.6733677387237549, + "step": 2246 + }, + { + "epoch": 0.6571136131013307, + "grad_norm": 1.2340466884298544, + "learning_rate": 1.6053998146967186e-05, + "loss": 0.5368545055389404, + "step": 2247 + }, + { + "epoch": 0.657406053516596, + "grad_norm": 1.4627351725055429, + "learning_rate": 1.6050149392180125e-05, + "loss": 0.6995619535446167, + "step": 2248 + }, + { + "epoch": 0.6576984939318614, + "grad_norm": 1.2552392614352392, + "learning_rate": 1.6046299223207432e-05, + "loss": 0.6637085676193237, + "step": 2249 + }, + { + "epoch": 0.6579909343471267, + "grad_norm": 1.3894808498189977, + "learning_rate": 1.6042447640949058e-05, + "loss": 0.5834380388259888, + "step": 2250 + }, + { + "epoch": 0.6582833747623922, + "grad_norm": 1.1700440243092598, + "learning_rate": 1.6038594646305285e-05, + "loss": 0.5735288858413696, + "step": 2251 + }, + { + "epoch": 0.6585758151776575, + "grad_norm": 1.274727070163542, + "learning_rate": 1.6034740240176728e-05, + "loss": 0.6227413415908813, + "step": 2252 + }, + { + "epoch": 0.658868255592923, + "grad_norm": 1.5091805441488135, + "learning_rate": 1.6030884423464336e-05, + "loss": 0.6881246566772461, + "step": 2253 + }, + { + "epoch": 0.6591606960081884, + "grad_norm": 1.3237201049051734, + "learning_rate": 1.6027027197069376e-05, + "loss": 0.6059132814407349, + "step": 2254 + }, + { + "epoch": 0.6594531364234537, + "grad_norm": 1.5070949945133527, + "learning_rate": 1.6023168561893453e-05, + "loss": 0.5829097032546997, + "step": 2255 + }, + { + "epoch": 0.6597455768387191, + "grad_norm": 1.1821076640408643, + "learning_rate": 1.60193085188385e-05, + "loss": 0.5173588991165161, + "step": 2256 + }, + { + "epoch": 0.6600380172539845, + "grad_norm": 1.0404057140160172, + "learning_rate": 1.601544706880678e-05, + "loss": 0.5128534436225891, + "step": 2257 + }, + { + "epoch": 0.6603304576692499, + "grad_norm": 1.4274902732235735, + "learning_rate": 1.601158421270088e-05, + "loss": 0.5472848415374756, + "step": 2258 + }, + { + "epoch": 0.6606228980845152, + "grad_norm": 1.2505155913554076, + "learning_rate": 1.6007719951423725e-05, + "loss": 0.5775434970855713, + "step": 2259 + }, + { + "epoch": 0.6609153384997807, + "grad_norm": 1.2760490287043558, + "learning_rate": 1.6003854285878558e-05, + "loss": 0.5529654622077942, + "step": 2260 + }, + { + "epoch": 0.661207778915046, + "grad_norm": 1.2950239037035343, + "learning_rate": 1.5999987216968954e-05, + "loss": 0.5295222997665405, + "step": 2261 + }, + { + "epoch": 0.6615002193303114, + "grad_norm": 1.42880093351922, + "learning_rate": 1.5996118745598817e-05, + "loss": 0.6782759428024292, + "step": 2262 + }, + { + "epoch": 0.6617926597455769, + "grad_norm": 1.5123560217291456, + "learning_rate": 1.5992248872672384e-05, + "loss": 0.7698723077774048, + "step": 2263 + }, + { + "epoch": 0.6620851001608422, + "grad_norm": 1.224014553870767, + "learning_rate": 1.5988377599094208e-05, + "loss": 0.5056325793266296, + "step": 2264 + }, + { + "epoch": 0.6623775405761076, + "grad_norm": 1.2811286417806291, + "learning_rate": 1.598450492576918e-05, + "loss": 0.6748740673065186, + "step": 2265 + }, + { + "epoch": 0.662669980991373, + "grad_norm": 1.4413699029522251, + "learning_rate": 1.598063085360251e-05, + "loss": 0.6594111919403076, + "step": 2266 + }, + { + "epoch": 0.6629624214066384, + "grad_norm": 1.490546706478741, + "learning_rate": 1.5976755383499743e-05, + "loss": 0.5942472815513611, + "step": 2267 + }, + { + "epoch": 0.6632548618219037, + "grad_norm": 1.4166382340274284, + "learning_rate": 1.5972878516366742e-05, + "loss": 0.6956725120544434, + "step": 2268 + }, + { + "epoch": 0.6635473022371692, + "grad_norm": 1.5479108671282409, + "learning_rate": 1.5969000253109707e-05, + "loss": 0.6743103265762329, + "step": 2269 + }, + { + "epoch": 0.6638397426524346, + "grad_norm": 1.2415014970437994, + "learning_rate": 1.596512059463515e-05, + "loss": 0.5452187061309814, + "step": 2270 + }, + { + "epoch": 0.6641321830676999, + "grad_norm": 1.305856048148522, + "learning_rate": 1.5961239541849923e-05, + "loss": 0.6064754128456116, + "step": 2271 + }, + { + "epoch": 0.6644246234829654, + "grad_norm": 1.1672873660489786, + "learning_rate": 1.59573570956612e-05, + "loss": 0.5879498720169067, + "step": 2272 + }, + { + "epoch": 0.6647170638982307, + "grad_norm": 1.2464190562799757, + "learning_rate": 1.595347325697648e-05, + "loss": 0.6610721945762634, + "step": 2273 + }, + { + "epoch": 0.6650095043134961, + "grad_norm": 1.5001752360693776, + "learning_rate": 1.594958802670358e-05, + "loss": 0.6674839854240417, + "step": 2274 + }, + { + "epoch": 0.6653019447287615, + "grad_norm": 1.2669024802691538, + "learning_rate": 1.5945701405750654e-05, + "loss": 0.5189186334609985, + "step": 2275 + }, + { + "epoch": 0.6655943851440269, + "grad_norm": 1.096047033017533, + "learning_rate": 1.5941813395026174e-05, + "loss": 0.5225304365158081, + "step": 2276 + }, + { + "epoch": 0.6658868255592922, + "grad_norm": 1.1982797539630743, + "learning_rate": 1.5937923995438942e-05, + "loss": 0.5426747798919678, + "step": 2277 + }, + { + "epoch": 0.6661792659745577, + "grad_norm": 1.1331316680397499, + "learning_rate": 1.593403320789808e-05, + "loss": 0.6408158540725708, + "step": 2278 + }, + { + "epoch": 0.6664717063898231, + "grad_norm": 1.2777185085969938, + "learning_rate": 1.5930141033313034e-05, + "loss": 0.6213311553001404, + "step": 2279 + }, + { + "epoch": 0.6667641468050884, + "grad_norm": 1.2938845863415658, + "learning_rate": 1.5926247472593575e-05, + "loss": 0.6538233757019043, + "step": 2280 + }, + { + "epoch": 0.6670565872203539, + "grad_norm": 1.4396815547692279, + "learning_rate": 1.5922352526649803e-05, + "loss": 0.6714701056480408, + "step": 2281 + }, + { + "epoch": 0.6673490276356192, + "grad_norm": 1.2875131974555427, + "learning_rate": 1.5918456196392137e-05, + "loss": 0.501068115234375, + "step": 2282 + }, + { + "epoch": 0.6676414680508846, + "grad_norm": 1.483722651200639, + "learning_rate": 1.5914558482731317e-05, + "loss": 0.6551339626312256, + "step": 2283 + }, + { + "epoch": 0.66793390846615, + "grad_norm": 1.575561891265528, + "learning_rate": 1.5910659386578415e-05, + "loss": 0.666611909866333, + "step": 2284 + }, + { + "epoch": 0.6682263488814154, + "grad_norm": 1.3058077151253007, + "learning_rate": 1.590675890884482e-05, + "loss": 0.6612483859062195, + "step": 2285 + }, + { + "epoch": 0.6685187892966808, + "grad_norm": 1.535602248808955, + "learning_rate": 1.590285705044224e-05, + "loss": 0.5299272537231445, + "step": 2286 + }, + { + "epoch": 0.6688112297119462, + "grad_norm": 1.5209550044520355, + "learning_rate": 1.589895381228272e-05, + "loss": 0.6873815655708313, + "step": 2287 + }, + { + "epoch": 0.6691036701272116, + "grad_norm": 1.333463107294571, + "learning_rate": 1.5895049195278608e-05, + "loss": 0.6473613977432251, + "step": 2288 + }, + { + "epoch": 0.6693961105424769, + "grad_norm": 1.4389212790848083, + "learning_rate": 1.589114320034259e-05, + "loss": 0.6600902080535889, + "step": 2289 + }, + { + "epoch": 0.6696885509577424, + "grad_norm": 1.7581559017014303, + "learning_rate": 1.5887235828387667e-05, + "loss": 0.6066039800643921, + "step": 2290 + }, + { + "epoch": 0.6699809913730077, + "grad_norm": 1.2475073124572584, + "learning_rate": 1.5883327080327165e-05, + "loss": 0.5411461591720581, + "step": 2291 + }, + { + "epoch": 0.6702734317882731, + "grad_norm": 1.3264098990068387, + "learning_rate": 1.587941695707473e-05, + "loss": 0.5678138136863708, + "step": 2292 + }, + { + "epoch": 0.6705658722035386, + "grad_norm": 1.2017893940389541, + "learning_rate": 1.5875505459544327e-05, + "loss": 0.6175323724746704, + "step": 2293 + }, + { + "epoch": 0.6708583126188039, + "grad_norm": 1.2255154092981597, + "learning_rate": 1.587159258865025e-05, + "loss": 0.5790976285934448, + "step": 2294 + }, + { + "epoch": 0.6711507530340693, + "grad_norm": 1.4070059880127774, + "learning_rate": 1.58676783453071e-05, + "loss": 0.5891247391700745, + "step": 2295 + }, + { + "epoch": 0.6714431934493347, + "grad_norm": 1.3680740765730994, + "learning_rate": 1.5863762730429817e-05, + "loss": 0.5604299902915955, + "step": 2296 + }, + { + "epoch": 0.6717356338646001, + "grad_norm": 1.156075846793115, + "learning_rate": 1.585984574493365e-05, + "loss": 0.5402317047119141, + "step": 2297 + }, + { + "epoch": 0.6720280742798654, + "grad_norm": 1.2729484704762741, + "learning_rate": 1.5855927389734163e-05, + "loss": 0.5569097995758057, + "step": 2298 + }, + { + "epoch": 0.6723205146951309, + "grad_norm": 1.792109537125727, + "learning_rate": 1.5852007665747255e-05, + "loss": 0.6754734516143799, + "step": 2299 + }, + { + "epoch": 0.6726129551103962, + "grad_norm": 1.2015482502693244, + "learning_rate": 1.584808657388914e-05, + "loss": 0.5555064678192139, + "step": 2300 + }, + { + "epoch": 0.6729053955256616, + "grad_norm": 1.2978798977032824, + "learning_rate": 1.584416411507634e-05, + "loss": 0.5735480785369873, + "step": 2301 + }, + { + "epoch": 0.6731978359409271, + "grad_norm": 1.3948021707686127, + "learning_rate": 1.5840240290225713e-05, + "loss": 0.6084697842597961, + "step": 2302 + }, + { + "epoch": 0.6734902763561924, + "grad_norm": 1.3972987341637648, + "learning_rate": 1.5836315100254427e-05, + "loss": 0.5747361779212952, + "step": 2303 + }, + { + "epoch": 0.6737827167714578, + "grad_norm": 1.3042539657521541, + "learning_rate": 1.583238854607997e-05, + "loss": 0.6597394943237305, + "step": 2304 + }, + { + "epoch": 0.6740751571867232, + "grad_norm": 1.2885200657030746, + "learning_rate": 1.582846062862016e-05, + "loss": 0.6054418087005615, + "step": 2305 + }, + { + "epoch": 0.6743675976019886, + "grad_norm": 1.4670353156004656, + "learning_rate": 1.5824531348793106e-05, + "loss": 0.6897715330123901, + "step": 2306 + }, + { + "epoch": 0.6746600380172539, + "grad_norm": 1.2379672312585208, + "learning_rate": 1.5820600707517265e-05, + "loss": 0.5438888072967529, + "step": 2307 + }, + { + "epoch": 0.6749524784325194, + "grad_norm": 1.3511076823584265, + "learning_rate": 1.5816668705711402e-05, + "loss": 0.5139850378036499, + "step": 2308 + }, + { + "epoch": 0.6752449188477848, + "grad_norm": 1.3878243291723096, + "learning_rate": 1.5812735344294594e-05, + "loss": 0.5970615744590759, + "step": 2309 + }, + { + "epoch": 0.6755373592630501, + "grad_norm": 1.5290136714699685, + "learning_rate": 1.580880062418624e-05, + "loss": 0.6206730604171753, + "step": 2310 + }, + { + "epoch": 0.6758297996783156, + "grad_norm": 1.5283867982171593, + "learning_rate": 1.580486454630606e-05, + "loss": 0.6545864939689636, + "step": 2311 + }, + { + "epoch": 0.6761222400935809, + "grad_norm": 1.6726831788405112, + "learning_rate": 1.5800927111574084e-05, + "loss": 0.6284571290016174, + "step": 2312 + }, + { + "epoch": 0.6764146805088463, + "grad_norm": 1.3062366838416066, + "learning_rate": 1.5796988320910665e-05, + "loss": 0.6662822365760803, + "step": 2313 + }, + { + "epoch": 0.6767071209241117, + "grad_norm": 1.4857961720461585, + "learning_rate": 1.5793048175236477e-05, + "loss": 0.6952080130577087, + "step": 2314 + }, + { + "epoch": 0.6769995613393771, + "grad_norm": 1.1527122349254486, + "learning_rate": 1.5789106675472496e-05, + "loss": 0.55562424659729, + "step": 2315 + }, + { + "epoch": 0.6772920017546424, + "grad_norm": 1.417075363017466, + "learning_rate": 1.578516382254003e-05, + "loss": 0.696354866027832, + "step": 2316 + }, + { + "epoch": 0.6775844421699079, + "grad_norm": 1.2481046919985836, + "learning_rate": 1.5781219617360695e-05, + "loss": 0.5764954686164856, + "step": 2317 + }, + { + "epoch": 0.6778768825851733, + "grad_norm": 1.5617477082955222, + "learning_rate": 1.577727406085642e-05, + "loss": 0.6944533586502075, + "step": 2318 + }, + { + "epoch": 0.6781693230004386, + "grad_norm": 1.5273473613933928, + "learning_rate": 1.5773327153949465e-05, + "loss": 0.5517882704734802, + "step": 2319 + }, + { + "epoch": 0.6784617634157041, + "grad_norm": 1.3495609581159556, + "learning_rate": 1.576937889756239e-05, + "loss": 0.6151533126831055, + "step": 2320 + }, + { + "epoch": 0.6787542038309694, + "grad_norm": 1.3729348393231853, + "learning_rate": 1.5765429292618075e-05, + "loss": 0.6221417784690857, + "step": 2321 + }, + { + "epoch": 0.6790466442462348, + "grad_norm": 1.5561656408525308, + "learning_rate": 1.576147834003972e-05, + "loss": 0.6218827962875366, + "step": 2322 + }, + { + "epoch": 0.6793390846615002, + "grad_norm": 1.2844085482190328, + "learning_rate": 1.575752604075083e-05, + "loss": 0.689696192741394, + "step": 2323 + }, + { + "epoch": 0.6796315250767656, + "grad_norm": 1.459910366351317, + "learning_rate": 1.5753572395675234e-05, + "loss": 0.6457825899124146, + "step": 2324 + }, + { + "epoch": 0.679923965492031, + "grad_norm": 1.660980107305809, + "learning_rate": 1.5749617405737075e-05, + "loss": 0.6261845827102661, + "step": 2325 + }, + { + "epoch": 0.6802164059072964, + "grad_norm": 1.5113706854166593, + "learning_rate": 1.5745661071860802e-05, + "loss": 0.6631760597229004, + "step": 2326 + }, + { + "epoch": 0.6805088463225618, + "grad_norm": 1.4700703601826162, + "learning_rate": 1.574170339497119e-05, + "loss": 0.6223125457763672, + "step": 2327 + }, + { + "epoch": 0.6808012867378271, + "grad_norm": 1.4289384563362724, + "learning_rate": 1.5737744375993318e-05, + "loss": 0.5649152398109436, + "step": 2328 + }, + { + "epoch": 0.6810937271530926, + "grad_norm": 1.3637036537520066, + "learning_rate": 1.573378401585259e-05, + "loss": 0.6822011470794678, + "step": 2329 + }, + { + "epoch": 0.6813861675683579, + "grad_norm": 1.243454490323945, + "learning_rate": 1.5729822315474704e-05, + "loss": 0.4853206276893616, + "step": 2330 + }, + { + "epoch": 0.6816786079836233, + "grad_norm": 1.3491879449563893, + "learning_rate": 1.572585927578569e-05, + "loss": 0.6410783529281616, + "step": 2331 + }, + { + "epoch": 0.6819710483988888, + "grad_norm": 1.2349335330440738, + "learning_rate": 1.572189489771189e-05, + "loss": 0.607154369354248, + "step": 2332 + }, + { + "epoch": 0.6822634888141541, + "grad_norm": 1.2303800918258645, + "learning_rate": 1.571792918217994e-05, + "loss": 0.5079061388969421, + "step": 2333 + }, + { + "epoch": 0.6825559292294195, + "grad_norm": 1.355109139858454, + "learning_rate": 1.5713962130116812e-05, + "loss": 0.534178614616394, + "step": 2334 + }, + { + "epoch": 0.6828483696446849, + "grad_norm": 1.099124567807314, + "learning_rate": 1.5709993742449777e-05, + "loss": 0.6172807812690735, + "step": 2335 + }, + { + "epoch": 0.6831408100599503, + "grad_norm": 1.468863618054796, + "learning_rate": 1.5706024020106425e-05, + "loss": 0.6863975524902344, + "step": 2336 + }, + { + "epoch": 0.6834332504752156, + "grad_norm": 1.3542187494807805, + "learning_rate": 1.570205296401465e-05, + "loss": 0.6314880847930908, + "step": 2337 + }, + { + "epoch": 0.6837256908904811, + "grad_norm": 1.4888474767820694, + "learning_rate": 1.5698080575102662e-05, + "loss": 0.5420910120010376, + "step": 2338 + }, + { + "epoch": 0.6840181313057464, + "grad_norm": 1.545548665208996, + "learning_rate": 1.5694106854298988e-05, + "loss": 0.6598352789878845, + "step": 2339 + }, + { + "epoch": 0.6843105717210118, + "grad_norm": 1.1855737189309028, + "learning_rate": 1.5690131802532454e-05, + "loss": 0.49957770109176636, + "step": 2340 + }, + { + "epoch": 0.6846030121362773, + "grad_norm": 1.3910703437631544, + "learning_rate": 1.568615542073221e-05, + "loss": 0.7217017412185669, + "step": 2341 + }, + { + "epoch": 0.6848954525515426, + "grad_norm": 1.383168011584397, + "learning_rate": 1.5682177709827705e-05, + "loss": 0.5824606418609619, + "step": 2342 + }, + { + "epoch": 0.685187892966808, + "grad_norm": 1.4861418668417947, + "learning_rate": 1.567819867074871e-05, + "loss": 0.5932704210281372, + "step": 2343 + }, + { + "epoch": 0.6854803333820734, + "grad_norm": 1.1927307747773088, + "learning_rate": 1.5674218304425304e-05, + "loss": 0.6098836660385132, + "step": 2344 + }, + { + "epoch": 0.6857727737973388, + "grad_norm": 1.3302018518433079, + "learning_rate": 1.5670236611787865e-05, + "loss": 0.5158270597457886, + "step": 2345 + }, + { + "epoch": 0.6860652142126041, + "grad_norm": 1.431950758183516, + "learning_rate": 1.5666253593767095e-05, + "loss": 0.7840174436569214, + "step": 2346 + }, + { + "epoch": 0.6863576546278696, + "grad_norm": 1.3462478651155303, + "learning_rate": 1.5662269251294e-05, + "loss": 0.5665150880813599, + "step": 2347 + }, + { + "epoch": 0.686650095043135, + "grad_norm": 1.2308130347699304, + "learning_rate": 1.5658283585299894e-05, + "loss": 0.5801588296890259, + "step": 2348 + }, + { + "epoch": 0.6869425354584003, + "grad_norm": 1.487298330014143, + "learning_rate": 1.56542965967164e-05, + "loss": 0.759188175201416, + "step": 2349 + }, + { + "epoch": 0.6872349758736658, + "grad_norm": 1.5717076197736846, + "learning_rate": 1.565030828647546e-05, + "loss": 0.7182703018188477, + "step": 2350 + }, + { + "epoch": 0.6875274162889311, + "grad_norm": 1.3681215378392677, + "learning_rate": 1.564631865550931e-05, + "loss": 0.7172018885612488, + "step": 2351 + }, + { + "epoch": 0.6878198567041965, + "grad_norm": 1.3897042930637002, + "learning_rate": 1.5642327704750502e-05, + "loss": 0.5959519743919373, + "step": 2352 + }, + { + "epoch": 0.6881122971194619, + "grad_norm": 1.3686338632915553, + "learning_rate": 1.5638335435131902e-05, + "loss": 0.5531836748123169, + "step": 2353 + }, + { + "epoch": 0.6884047375347273, + "grad_norm": 1.2097339017222586, + "learning_rate": 1.5634341847586676e-05, + "loss": 0.672225296497345, + "step": 2354 + }, + { + "epoch": 0.6886971779499926, + "grad_norm": 1.3740176007353215, + "learning_rate": 1.5630346943048297e-05, + "loss": 0.5721465349197388, + "step": 2355 + }, + { + "epoch": 0.6889896183652581, + "grad_norm": 1.2416767467837069, + "learning_rate": 1.5626350722450555e-05, + "loss": 0.6357900500297546, + "step": 2356 + }, + { + "epoch": 0.6892820587805235, + "grad_norm": 1.241847883566859, + "learning_rate": 1.5622353186727542e-05, + "loss": 0.6348878145217896, + "step": 2357 + }, + { + "epoch": 0.6895744991957888, + "grad_norm": 1.390537638221337, + "learning_rate": 1.5618354336813656e-05, + "loss": 0.5473623275756836, + "step": 2358 + }, + { + "epoch": 0.6898669396110543, + "grad_norm": 1.4299851255948683, + "learning_rate": 1.5614354173643606e-05, + "loss": 0.8284158706665039, + "step": 2359 + }, + { + "epoch": 0.6901593800263196, + "grad_norm": 1.3561063303885135, + "learning_rate": 1.5610352698152396e-05, + "loss": 0.5915359854698181, + "step": 2360 + }, + { + "epoch": 0.690451820441585, + "grad_norm": 1.434488423567872, + "learning_rate": 1.560634991127536e-05, + "loss": 0.6173555254936218, + "step": 2361 + }, + { + "epoch": 0.6907442608568504, + "grad_norm": 1.2348756002421877, + "learning_rate": 1.560234581394812e-05, + "loss": 0.5551577806472778, + "step": 2362 + }, + { + "epoch": 0.6910367012721158, + "grad_norm": 1.6912535037446208, + "learning_rate": 1.559834040710661e-05, + "loss": 0.7160264253616333, + "step": 2363 + }, + { + "epoch": 0.6913291416873812, + "grad_norm": 1.4348139771874249, + "learning_rate": 1.5594333691687062e-05, + "loss": 0.5986248850822449, + "step": 2364 + }, + { + "epoch": 0.6916215821026466, + "grad_norm": 1.6827348555719241, + "learning_rate": 1.559032566862603e-05, + "loss": 0.7347019910812378, + "step": 2365 + }, + { + "epoch": 0.691914022517912, + "grad_norm": 1.1496166027771255, + "learning_rate": 1.5586316338860363e-05, + "loss": 0.502663791179657, + "step": 2366 + }, + { + "epoch": 0.6922064629331773, + "grad_norm": 1.1610976211375774, + "learning_rate": 1.558230570332722e-05, + "loss": 0.5026617050170898, + "step": 2367 + }, + { + "epoch": 0.6924989033484428, + "grad_norm": 1.3196703072069724, + "learning_rate": 1.5578293762964057e-05, + "loss": 0.6091101169586182, + "step": 2368 + }, + { + "epoch": 0.6927913437637081, + "grad_norm": 1.1607138049044183, + "learning_rate": 1.5574280518708645e-05, + "loss": 0.6202579736709595, + "step": 2369 + }, + { + "epoch": 0.6930837841789735, + "grad_norm": 1.3867301068189375, + "learning_rate": 1.557026597149905e-05, + "loss": 0.6532948017120361, + "step": 2370 + }, + { + "epoch": 0.693376224594239, + "grad_norm": 1.2799465632685962, + "learning_rate": 1.5566250122273658e-05, + "loss": 0.6197448372840881, + "step": 2371 + }, + { + "epoch": 0.6936686650095043, + "grad_norm": 1.330123548058068, + "learning_rate": 1.556223297197114e-05, + "loss": 0.6181553602218628, + "step": 2372 + }, + { + "epoch": 0.6939611054247697, + "grad_norm": 1.3757625130132767, + "learning_rate": 1.5558214521530482e-05, + "loss": 0.6015427112579346, + "step": 2373 + }, + { + "epoch": 0.6942535458400351, + "grad_norm": 1.4511778478720454, + "learning_rate": 1.555419477189098e-05, + "loss": 0.6204534769058228, + "step": 2374 + }, + { + "epoch": 0.6945459862553005, + "grad_norm": 1.2237746404921626, + "learning_rate": 1.5550173723992218e-05, + "loss": 0.5914584994316101, + "step": 2375 + }, + { + "epoch": 0.6948384266705658, + "grad_norm": 1.2633817911858796, + "learning_rate": 1.554615137877409e-05, + "loss": 0.5077188611030579, + "step": 2376 + }, + { + "epoch": 0.6951308670858313, + "grad_norm": 1.1523903505061626, + "learning_rate": 1.55421277371768e-05, + "loss": 0.5560270547866821, + "step": 2377 + }, + { + "epoch": 0.6954233075010966, + "grad_norm": 1.6214020445600121, + "learning_rate": 1.553810280014085e-05, + "loss": 0.7064549922943115, + "step": 2378 + }, + { + "epoch": 0.695715747916362, + "grad_norm": 1.4249847873824701, + "learning_rate": 1.5534076568607043e-05, + "loss": 0.7433110475540161, + "step": 2379 + }, + { + "epoch": 0.6960081883316275, + "grad_norm": 1.4661372034410074, + "learning_rate": 1.553004904351648e-05, + "loss": 0.6061110496520996, + "step": 2380 + }, + { + "epoch": 0.6963006287468928, + "grad_norm": 1.3530915937691412, + "learning_rate": 1.5526020225810583e-05, + "loss": 0.604006290435791, + "step": 2381 + }, + { + "epoch": 0.6965930691621582, + "grad_norm": 1.3193058416919141, + "learning_rate": 1.5521990116431052e-05, + "loss": 0.6221635341644287, + "step": 2382 + }, + { + "epoch": 0.6968855095774236, + "grad_norm": 1.17260855579956, + "learning_rate": 1.551795871631991e-05, + "loss": 0.5848093032836914, + "step": 2383 + }, + { + "epoch": 0.697177949992689, + "grad_norm": 1.3909866883805502, + "learning_rate": 1.5513926026419464e-05, + "loss": 0.6451606154441833, + "step": 2384 + }, + { + "epoch": 0.6974703904079543, + "grad_norm": 1.2515682694896817, + "learning_rate": 1.5509892047672336e-05, + "loss": 0.7922245264053345, + "step": 2385 + }, + { + "epoch": 0.6977628308232198, + "grad_norm": 1.501698757307051, + "learning_rate": 1.5505856781021443e-05, + "loss": 0.6458885073661804, + "step": 2386 + }, + { + "epoch": 0.6980552712384852, + "grad_norm": 1.3253141303151825, + "learning_rate": 1.5501820227410002e-05, + "loss": 0.5989570617675781, + "step": 2387 + }, + { + "epoch": 0.6983477116537505, + "grad_norm": 1.4240123629840666, + "learning_rate": 1.5497782387781536e-05, + "loss": 0.740998387336731, + "step": 2388 + }, + { + "epoch": 0.698640152069016, + "grad_norm": 1.4547948512453808, + "learning_rate": 1.5493743263079866e-05, + "loss": 0.63981032371521, + "step": 2389 + }, + { + "epoch": 0.6989325924842813, + "grad_norm": 1.325001348454028, + "learning_rate": 1.5489702854249106e-05, + "loss": 0.766716480255127, + "step": 2390 + }, + { + "epoch": 0.6992250328995467, + "grad_norm": 1.541044208915787, + "learning_rate": 1.5485661162233684e-05, + "loss": 0.7879365086555481, + "step": 2391 + }, + { + "epoch": 0.6995174733148121, + "grad_norm": 1.3532949065271656, + "learning_rate": 1.5481618187978322e-05, + "loss": 0.6005786657333374, + "step": 2392 + }, + { + "epoch": 0.6998099137300775, + "grad_norm": 1.2952910023515818, + "learning_rate": 1.5477573932428033e-05, + "loss": 0.6207927465438843, + "step": 2393 + }, + { + "epoch": 0.7001023541453428, + "grad_norm": 1.4490674696543298, + "learning_rate": 1.5473528396528144e-05, + "loss": 0.5582053661346436, + "step": 2394 + }, + { + "epoch": 0.7003947945606083, + "grad_norm": 1.6315416515790502, + "learning_rate": 1.5469481581224274e-05, + "loss": 0.5701307058334351, + "step": 2395 + }, + { + "epoch": 0.7006872349758737, + "grad_norm": 1.3804181292115258, + "learning_rate": 1.546543348746233e-05, + "loss": 0.6201068162918091, + "step": 2396 + }, + { + "epoch": 0.700979675391139, + "grad_norm": 1.3282086716914991, + "learning_rate": 1.5461384116188546e-05, + "loss": 0.6102321147918701, + "step": 2397 + }, + { + "epoch": 0.7012721158064045, + "grad_norm": 1.361382387889105, + "learning_rate": 1.545733346834943e-05, + "loss": 0.5445820093154907, + "step": 2398 + }, + { + "epoch": 0.7015645562216698, + "grad_norm": 1.3134018034606705, + "learning_rate": 1.5453281544891797e-05, + "loss": 0.5278012752532959, + "step": 2399 + }, + { + "epoch": 0.7018569966369352, + "grad_norm": 1.6159840401286016, + "learning_rate": 1.544922834676276e-05, + "loss": 0.7051252126693726, + "step": 2400 + }, + { + "epoch": 0.7021494370522006, + "grad_norm": 1.3552623655435003, + "learning_rate": 1.544517387490973e-05, + "loss": 0.6024646759033203, + "step": 2401 + }, + { + "epoch": 0.702441877467466, + "grad_norm": 1.3323978020414873, + "learning_rate": 1.5441118130280406e-05, + "loss": 0.5563746094703674, + "step": 2402 + }, + { + "epoch": 0.7027343178827314, + "grad_norm": 1.3671297363224464, + "learning_rate": 1.5437061113822805e-05, + "loss": 0.5971669554710388, + "step": 2403 + }, + { + "epoch": 0.7030267582979968, + "grad_norm": 1.5082475685517047, + "learning_rate": 1.5433002826485234e-05, + "loss": 0.5846019983291626, + "step": 2404 + }, + { + "epoch": 0.7033191987132622, + "grad_norm": 1.2921876796744827, + "learning_rate": 1.5428943269216278e-05, + "loss": 0.5571885108947754, + "step": 2405 + }, + { + "epoch": 0.7036116391285275, + "grad_norm": 1.15652993390593, + "learning_rate": 1.542488244296484e-05, + "loss": 0.4770846962928772, + "step": 2406 + }, + { + "epoch": 0.703904079543793, + "grad_norm": 1.6398352091801953, + "learning_rate": 1.542082034868012e-05, + "loss": 0.636760950088501, + "step": 2407 + }, + { + "epoch": 0.7041965199590583, + "grad_norm": 1.6877906333209267, + "learning_rate": 1.5416756987311603e-05, + "loss": 0.7264662981033325, + "step": 2408 + }, + { + "epoch": 0.7044889603743237, + "grad_norm": 1.372256728403267, + "learning_rate": 1.5412692359809073e-05, + "loss": 0.6723978519439697, + "step": 2409 + }, + { + "epoch": 0.7047814007895892, + "grad_norm": 1.4362583031777838, + "learning_rate": 1.5408626467122612e-05, + "loss": 0.6205083727836609, + "step": 2410 + }, + { + "epoch": 0.7050738412048545, + "grad_norm": 1.4495567778043355, + "learning_rate": 1.54045593102026e-05, + "loss": 0.5980903506278992, + "step": 2411 + }, + { + "epoch": 0.7053662816201199, + "grad_norm": 1.4897959908790472, + "learning_rate": 1.540049088999971e-05, + "loss": 0.6311691999435425, + "step": 2412 + }, + { + "epoch": 0.7056587220353853, + "grad_norm": 1.428243709143454, + "learning_rate": 1.539642120746491e-05, + "loss": 0.5872593522071838, + "step": 2413 + }, + { + "epoch": 0.7059511624506507, + "grad_norm": 1.351001450570791, + "learning_rate": 1.5392350263549462e-05, + "loss": 0.5037539005279541, + "step": 2414 + }, + { + "epoch": 0.706243602865916, + "grad_norm": 1.4775045660401276, + "learning_rate": 1.538827805920493e-05, + "loss": 0.5917855501174927, + "step": 2415 + }, + { + "epoch": 0.7065360432811815, + "grad_norm": 1.3687769613569196, + "learning_rate": 1.538420459538316e-05, + "loss": 0.6350749731063843, + "step": 2416 + }, + { + "epoch": 0.7068284836964468, + "grad_norm": 1.330110483636511, + "learning_rate": 1.53801298730363e-05, + "loss": 0.6828908920288086, + "step": 2417 + }, + { + "epoch": 0.7071209241117122, + "grad_norm": 1.5864329436081315, + "learning_rate": 1.5376053893116796e-05, + "loss": 0.6307995319366455, + "step": 2418 + }, + { + "epoch": 0.7074133645269777, + "grad_norm": 1.3609756396375527, + "learning_rate": 1.5371976656577385e-05, + "loss": 0.5305014252662659, + "step": 2419 + }, + { + "epoch": 0.707705804942243, + "grad_norm": 1.2953614031977334, + "learning_rate": 1.536789816437109e-05, + "loss": 0.560103178024292, + "step": 2420 + }, + { + "epoch": 0.7079982453575084, + "grad_norm": 1.4823675619867462, + "learning_rate": 1.5363818417451236e-05, + "loss": 0.5449249148368835, + "step": 2421 + }, + { + "epoch": 0.7082906857727738, + "grad_norm": 1.575423149049035, + "learning_rate": 1.5359737416771438e-05, + "loss": 0.7456427812576294, + "step": 2422 + }, + { + "epoch": 0.7085831261880392, + "grad_norm": 1.4606336998212586, + "learning_rate": 1.5355655163285607e-05, + "loss": 0.5401932597160339, + "step": 2423 + }, + { + "epoch": 0.7088755666033045, + "grad_norm": 1.4384817217494414, + "learning_rate": 1.5351571657947947e-05, + "loss": 0.6215255856513977, + "step": 2424 + }, + { + "epoch": 0.70916800701857, + "grad_norm": 1.454238489435378, + "learning_rate": 1.5347486901712946e-05, + "loss": 0.724073052406311, + "step": 2425 + }, + { + "epoch": 0.7094604474338354, + "grad_norm": 1.280381472439187, + "learning_rate": 1.5343400895535402e-05, + "loss": 0.6375223398208618, + "step": 2426 + }, + { + "epoch": 0.7097528878491007, + "grad_norm": 1.4740965908748953, + "learning_rate": 1.533931364037038e-05, + "loss": 0.6087045669555664, + "step": 2427 + }, + { + "epoch": 0.7100453282643662, + "grad_norm": 1.4709664710326, + "learning_rate": 1.5335225137173262e-05, + "loss": 0.7927658557891846, + "step": 2428 + }, + { + "epoch": 0.7103377686796315, + "grad_norm": 1.4583904783773962, + "learning_rate": 1.5331135386899702e-05, + "loss": 0.6312417387962341, + "step": 2429 + }, + { + "epoch": 0.7106302090948969, + "grad_norm": 1.3514647325044575, + "learning_rate": 1.5327044390505666e-05, + "loss": 0.6856948137283325, + "step": 2430 + }, + { + "epoch": 0.7109226495101623, + "grad_norm": 1.3486136616110067, + "learning_rate": 1.532295214894739e-05, + "loss": 0.5683865547180176, + "step": 2431 + }, + { + "epoch": 0.7112150899254277, + "grad_norm": 1.5290242403967753, + "learning_rate": 1.5318858663181412e-05, + "loss": 0.6208291053771973, + "step": 2432 + }, + { + "epoch": 0.711507530340693, + "grad_norm": 1.5265891330435364, + "learning_rate": 1.531476393416456e-05, + "loss": 0.6751389503479004, + "step": 2433 + }, + { + "epoch": 0.7117999707559585, + "grad_norm": 1.1685210774635664, + "learning_rate": 1.5310667962853954e-05, + "loss": 0.422024667263031, + "step": 2434 + }, + { + "epoch": 0.7120924111712239, + "grad_norm": 1.377587949543332, + "learning_rate": 1.5306570750207003e-05, + "loss": 0.6714169979095459, + "step": 2435 + }, + { + "epoch": 0.7123848515864892, + "grad_norm": 1.4808127088080212, + "learning_rate": 1.53024722971814e-05, + "loss": 0.5757386088371277, + "step": 2436 + }, + { + "epoch": 0.7126772920017547, + "grad_norm": 1.4790386820456973, + "learning_rate": 1.529837260473514e-05, + "loss": 0.5686037540435791, + "step": 2437 + }, + { + "epoch": 0.71296973241702, + "grad_norm": 1.570681384959534, + "learning_rate": 1.5294271673826498e-05, + "loss": 0.7601959705352783, + "step": 2438 + }, + { + "epoch": 0.7132621728322854, + "grad_norm": 1.44814607189911, + "learning_rate": 1.529016950541404e-05, + "loss": 0.5654840469360352, + "step": 2439 + }, + { + "epoch": 0.7135546132475508, + "grad_norm": 1.5844428110219366, + "learning_rate": 1.5286066100456623e-05, + "loss": 0.7009234428405762, + "step": 2440 + }, + { + "epoch": 0.7138470536628162, + "grad_norm": 1.166961279939158, + "learning_rate": 1.52819614599134e-05, + "loss": 0.4856370687484741, + "step": 2441 + }, + { + "epoch": 0.7141394940780816, + "grad_norm": 1.2283639685035557, + "learning_rate": 1.52778555847438e-05, + "loss": 0.5135019421577454, + "step": 2442 + }, + { + "epoch": 0.714431934493347, + "grad_norm": 1.3437978900697465, + "learning_rate": 1.5273748475907542e-05, + "loss": 0.7350283861160278, + "step": 2443 + }, + { + "epoch": 0.7147243749086124, + "grad_norm": 1.1274424242274286, + "learning_rate": 1.5269640134364646e-05, + "loss": 0.5985803604125977, + "step": 2444 + }, + { + "epoch": 0.7150168153238777, + "grad_norm": 1.2982732418248375, + "learning_rate": 1.5265530561075407e-05, + "loss": 0.6840892434120178, + "step": 2445 + }, + { + "epoch": 0.7153092557391432, + "grad_norm": 1.2979743249484705, + "learning_rate": 1.5261419757000417e-05, + "loss": 0.6921327114105225, + "step": 2446 + }, + { + "epoch": 0.7156016961544085, + "grad_norm": 1.429595570109343, + "learning_rate": 1.525730772310055e-05, + "loss": 0.6428500413894653, + "step": 2447 + }, + { + "epoch": 0.7158941365696739, + "grad_norm": 1.3812578358040712, + "learning_rate": 1.5253194460336964e-05, + "loss": 0.645559549331665, + "step": 2448 + }, + { + "epoch": 0.7161865769849394, + "grad_norm": 1.7222193716043204, + "learning_rate": 1.5249079969671114e-05, + "loss": 0.6211013793945312, + "step": 2449 + }, + { + "epoch": 0.7164790174002047, + "grad_norm": 1.2302035745629583, + "learning_rate": 1.5244964252064737e-05, + "loss": 0.5709721446037292, + "step": 2450 + }, + { + "epoch": 0.7167714578154701, + "grad_norm": 1.4516717315033434, + "learning_rate": 1.5240847308479855e-05, + "loss": 0.6781377196311951, + "step": 2451 + }, + { + "epoch": 0.7170638982307355, + "grad_norm": 1.5070563114338018, + "learning_rate": 1.523672913987878e-05, + "loss": 0.6476876735687256, + "step": 2452 + }, + { + "epoch": 0.7173563386460009, + "grad_norm": 1.5653544129198373, + "learning_rate": 1.523260974722411e-05, + "loss": 0.6564218997955322, + "step": 2453 + }, + { + "epoch": 0.7176487790612662, + "grad_norm": 1.3630096136191563, + "learning_rate": 1.5228489131478722e-05, + "loss": 0.6455773711204529, + "step": 2454 + }, + { + "epoch": 0.7179412194765317, + "grad_norm": 1.393672298684458, + "learning_rate": 1.5224367293605791e-05, + "loss": 0.6039570569992065, + "step": 2455 + }, + { + "epoch": 0.718233659891797, + "grad_norm": 1.337144764968105, + "learning_rate": 1.522024423456877e-05, + "loss": 0.7060747146606445, + "step": 2456 + }, + { + "epoch": 0.7185261003070624, + "grad_norm": 1.3843662041380984, + "learning_rate": 1.52161199553314e-05, + "loss": 0.561469316482544, + "step": 2457 + }, + { + "epoch": 0.7188185407223279, + "grad_norm": 1.4419398084710615, + "learning_rate": 1.5211994456857706e-05, + "loss": 0.6682697534561157, + "step": 2458 + }, + { + "epoch": 0.7191109811375932, + "grad_norm": 1.5477256567407798, + "learning_rate": 1.5207867740111994e-05, + "loss": 0.7893983125686646, + "step": 2459 + }, + { + "epoch": 0.7194034215528586, + "grad_norm": 1.3454706918314496, + "learning_rate": 1.5203739806058863e-05, + "loss": 0.617809534072876, + "step": 2460 + }, + { + "epoch": 0.719695861968124, + "grad_norm": 1.2866429351470308, + "learning_rate": 1.5199610655663193e-05, + "loss": 0.5444413423538208, + "step": 2461 + }, + { + "epoch": 0.7199883023833894, + "grad_norm": 1.342633438363169, + "learning_rate": 1.5195480289890146e-05, + "loss": 0.615330696105957, + "step": 2462 + }, + { + "epoch": 0.7202807427986547, + "grad_norm": 1.7658118623485195, + "learning_rate": 1.5191348709705169e-05, + "loss": 0.6811497211456299, + "step": 2463 + }, + { + "epoch": 0.7205731832139202, + "grad_norm": 1.3224396770739022, + "learning_rate": 1.5187215916073997e-05, + "loss": 0.612322211265564, + "step": 2464 + }, + { + "epoch": 0.7208656236291856, + "grad_norm": 1.2201627110269677, + "learning_rate": 1.518308190996264e-05, + "loss": 0.6106880903244019, + "step": 2465 + }, + { + "epoch": 0.7211580640444509, + "grad_norm": 1.2431923365136468, + "learning_rate": 1.5178946692337405e-05, + "loss": 0.4901464581489563, + "step": 2466 + }, + { + "epoch": 0.7214505044597164, + "grad_norm": 2.053814058775723, + "learning_rate": 1.5174810264164865e-05, + "loss": 0.6777167320251465, + "step": 2467 + }, + { + "epoch": 0.7217429448749817, + "grad_norm": 1.4212256530727148, + "learning_rate": 1.5170672626411888e-05, + "loss": 0.6353746056556702, + "step": 2468 + }, + { + "epoch": 0.7220353852902471, + "grad_norm": 1.4867453474426244, + "learning_rate": 1.516653378004563e-05, + "loss": 0.6218847632408142, + "step": 2469 + }, + { + "epoch": 0.7223278257055125, + "grad_norm": 1.2225434595050702, + "learning_rate": 1.5162393726033508e-05, + "loss": 0.5001585483551025, + "step": 2470 + }, + { + "epoch": 0.7226202661207779, + "grad_norm": 1.5129628743171017, + "learning_rate": 1.5158252465343242e-05, + "loss": 0.6801280975341797, + "step": 2471 + }, + { + "epoch": 0.7229127065360432, + "grad_norm": 1.208746836224967, + "learning_rate": 1.5154109998942823e-05, + "loss": 0.6739565134048462, + "step": 2472 + }, + { + "epoch": 0.7232051469513087, + "grad_norm": 1.1414220178862078, + "learning_rate": 1.5149966327800532e-05, + "loss": 0.5970213413238525, + "step": 2473 + }, + { + "epoch": 0.7234975873665741, + "grad_norm": 1.295455082889375, + "learning_rate": 1.5145821452884923e-05, + "loss": 0.7367317080497742, + "step": 2474 + }, + { + "epoch": 0.7237900277818394, + "grad_norm": 1.3877158266331615, + "learning_rate": 1.5141675375164839e-05, + "loss": 0.6332153677940369, + "step": 2475 + }, + { + "epoch": 0.7240824681971049, + "grad_norm": 1.223786080062607, + "learning_rate": 1.5137528095609395e-05, + "loss": 0.6185739636421204, + "step": 2476 + }, + { + "epoch": 0.7243749086123702, + "grad_norm": 1.436341367228992, + "learning_rate": 1.5133379615187996e-05, + "loss": 0.5982746481895447, + "step": 2477 + }, + { + "epoch": 0.7246673490276356, + "grad_norm": 1.32306496712973, + "learning_rate": 1.512922993487032e-05, + "loss": 0.5946815013885498, + "step": 2478 + }, + { + "epoch": 0.724959789442901, + "grad_norm": 1.2916301226572995, + "learning_rate": 1.5125079055626337e-05, + "loss": 0.5645624399185181, + "step": 2479 + }, + { + "epoch": 0.7252522298581664, + "grad_norm": 1.0689440382368105, + "learning_rate": 1.5120926978426288e-05, + "loss": 0.43329858779907227, + "step": 2480 + }, + { + "epoch": 0.7255446702734318, + "grad_norm": 1.420557871943188, + "learning_rate": 1.5116773704240689e-05, + "loss": 0.64244544506073, + "step": 2481 + }, + { + "epoch": 0.7258371106886972, + "grad_norm": 1.3002221181867923, + "learning_rate": 1.5112619234040348e-05, + "loss": 0.6640222072601318, + "step": 2482 + }, + { + "epoch": 0.7261295511039626, + "grad_norm": 1.4810661665547034, + "learning_rate": 1.5108463568796346e-05, + "loss": 0.6346921324729919, + "step": 2483 + }, + { + "epoch": 0.7264219915192279, + "grad_norm": 1.4101536258246594, + "learning_rate": 1.5104306709480045e-05, + "loss": 0.5891947746276855, + "step": 2484 + }, + { + "epoch": 0.7267144319344934, + "grad_norm": 1.2478330500785222, + "learning_rate": 1.5100148657063089e-05, + "loss": 0.616216242313385, + "step": 2485 + }, + { + "epoch": 0.7270068723497587, + "grad_norm": 1.3541911638943873, + "learning_rate": 1.5095989412517389e-05, + "loss": 0.5961766242980957, + "step": 2486 + }, + { + "epoch": 0.7272993127650241, + "grad_norm": 1.27681624299837, + "learning_rate": 1.509182897681515e-05, + "loss": 0.5629050731658936, + "step": 2487 + }, + { + "epoch": 0.7275917531802896, + "grad_norm": 1.3918382252124497, + "learning_rate": 1.5087667350928844e-05, + "loss": 0.6640661954879761, + "step": 2488 + }, + { + "epoch": 0.7278841935955549, + "grad_norm": 1.1741006713729014, + "learning_rate": 1.5083504535831233e-05, + "loss": 0.5884503126144409, + "step": 2489 + }, + { + "epoch": 0.7281766340108203, + "grad_norm": 1.2387841976936662, + "learning_rate": 1.5079340532495344e-05, + "loss": 0.5395207405090332, + "step": 2490 + }, + { + "epoch": 0.7284690744260857, + "grad_norm": 1.5570127298934886, + "learning_rate": 1.5075175341894487e-05, + "loss": 0.5713212490081787, + "step": 2491 + }, + { + "epoch": 0.7287615148413511, + "grad_norm": 1.8811783299638292, + "learning_rate": 1.5071008965002252e-05, + "loss": 0.5732176303863525, + "step": 2492 + }, + { + "epoch": 0.7290539552566164, + "grad_norm": 1.3314823409610355, + "learning_rate": 1.50668414027925e-05, + "loss": 0.6381006240844727, + "step": 2493 + }, + { + "epoch": 0.7293463956718819, + "grad_norm": 1.5687830928425197, + "learning_rate": 1.5062672656239381e-05, + "loss": 0.6533833742141724, + "step": 2494 + }, + { + "epoch": 0.7296388360871472, + "grad_norm": 1.213698756503139, + "learning_rate": 1.5058502726317309e-05, + "loss": 0.5919456481933594, + "step": 2495 + }, + { + "epoch": 0.7299312765024126, + "grad_norm": 1.3954865057419796, + "learning_rate": 1.5054331614000984e-05, + "loss": 0.6128921508789062, + "step": 2496 + }, + { + "epoch": 0.7302237169176781, + "grad_norm": 1.3910630571139424, + "learning_rate": 1.5050159320265371e-05, + "loss": 0.5949394702911377, + "step": 2497 + }, + { + "epoch": 0.7305161573329434, + "grad_norm": 1.5386167534502115, + "learning_rate": 1.5045985846085724e-05, + "loss": 0.6262483596801758, + "step": 2498 + }, + { + "epoch": 0.7308085977482088, + "grad_norm": 1.4477928134421267, + "learning_rate": 1.5041811192437563e-05, + "loss": 0.5032243728637695, + "step": 2499 + }, + { + "epoch": 0.7311010381634742, + "grad_norm": 1.31776348667592, + "learning_rate": 1.5037635360296695e-05, + "loss": 0.6721810102462769, + "step": 2500 + }, + { + "epoch": 0.7313934785787396, + "grad_norm": 1.3556666925406757, + "learning_rate": 1.5033458350639185e-05, + "loss": 0.7091001272201538, + "step": 2501 + }, + { + "epoch": 0.7316859189940049, + "grad_norm": 3.8317594491760163, + "learning_rate": 1.5029280164441395e-05, + "loss": 0.5414971113204956, + "step": 2502 + }, + { + "epoch": 0.7319783594092704, + "grad_norm": 1.364096425695391, + "learning_rate": 1.5025100802679944e-05, + "loss": 0.6714789271354675, + "step": 2503 + }, + { + "epoch": 0.7322707998245358, + "grad_norm": 1.3566105024089323, + "learning_rate": 1.5020920266331733e-05, + "loss": 0.5008493065834045, + "step": 2504 + }, + { + "epoch": 0.7325632402398011, + "grad_norm": 1.652155025588763, + "learning_rate": 1.5016738556373936e-05, + "loss": 0.563892126083374, + "step": 2505 + }, + { + "epoch": 0.7328556806550666, + "grad_norm": 1.3313159442091285, + "learning_rate": 1.5012555673784004e-05, + "loss": 0.6371973752975464, + "step": 2506 + }, + { + "epoch": 0.7331481210703319, + "grad_norm": 1.5289698261558242, + "learning_rate": 1.5008371619539661e-05, + "loss": 0.7365365624427795, + "step": 2507 + }, + { + "epoch": 0.7334405614855973, + "grad_norm": 1.357001447635837, + "learning_rate": 1.5004186394618906e-05, + "loss": 0.5401967763900757, + "step": 2508 + }, + { + "epoch": 0.7337330019008627, + "grad_norm": 1.4338213701683389, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.5827134847640991, + "step": 2509 + }, + { + "epoch": 0.7340254423161281, + "grad_norm": 1.5173171956884226, + "learning_rate": 1.49958124366615e-05, + "loss": 0.7655869126319885, + "step": 2510 + }, + { + "epoch": 0.7343178827313934, + "grad_norm": 1.3360976464033478, + "learning_rate": 1.4991623705582216e-05, + "loss": 0.5410823822021484, + "step": 2511 + }, + { + "epoch": 0.7346103231466589, + "grad_norm": 1.6453007873220271, + "learning_rate": 1.4987433807741242e-05, + "loss": 0.6831178665161133, + "step": 2512 + }, + { + "epoch": 0.7349027635619243, + "grad_norm": 1.2152056235269613, + "learning_rate": 1.498324274411794e-05, + "loss": 0.4952821731567383, + "step": 2513 + }, + { + "epoch": 0.7351952039771896, + "grad_norm": 1.4425254779779118, + "learning_rate": 1.4979050515691944e-05, + "loss": 0.6973339319229126, + "step": 2514 + }, + { + "epoch": 0.7354876443924551, + "grad_norm": 1.428400853551732, + "learning_rate": 1.4974857123443163e-05, + "loss": 0.6604373455047607, + "step": 2515 + }, + { + "epoch": 0.7357800848077204, + "grad_norm": 1.3355207439959806, + "learning_rate": 1.4970662568351776e-05, + "loss": 0.6523034572601318, + "step": 2516 + }, + { + "epoch": 0.7360725252229858, + "grad_norm": 1.2739776061453822, + "learning_rate": 1.4966466851398238e-05, + "loss": 0.6557538509368896, + "step": 2517 + }, + { + "epoch": 0.7363649656382512, + "grad_norm": 1.3243836594251046, + "learning_rate": 1.4962269973563269e-05, + "loss": 0.6993967294692993, + "step": 2518 + }, + { + "epoch": 0.7366574060535166, + "grad_norm": 1.3043008466806634, + "learning_rate": 1.4958071935827862e-05, + "loss": 0.611979067325592, + "step": 2519 + }, + { + "epoch": 0.736949846468782, + "grad_norm": 1.5837280682600245, + "learning_rate": 1.4953872739173289e-05, + "loss": 0.9108786582946777, + "step": 2520 + }, + { + "epoch": 0.7372422868840474, + "grad_norm": 1.5471791396278156, + "learning_rate": 1.4949672384581082e-05, + "loss": 0.7086392045021057, + "step": 2521 + }, + { + "epoch": 0.7375347272993128, + "grad_norm": 1.341070279173996, + "learning_rate": 1.494547087303305e-05, + "loss": 0.6103025674819946, + "step": 2522 + }, + { + "epoch": 0.7378271677145781, + "grad_norm": 1.223930383405044, + "learning_rate": 1.4941268205511272e-05, + "loss": 0.5597528219223022, + "step": 2523 + }, + { + "epoch": 0.7381196081298436, + "grad_norm": 1.4817126292023657, + "learning_rate": 1.4937064382998091e-05, + "loss": 0.6222598552703857, + "step": 2524 + }, + { + "epoch": 0.7384120485451089, + "grad_norm": 1.4738198225513357, + "learning_rate": 1.4932859406476131e-05, + "loss": 0.6083353757858276, + "step": 2525 + }, + { + "epoch": 0.7387044889603743, + "grad_norm": 1.2716230350108357, + "learning_rate": 1.4928653276928275e-05, + "loss": 0.47920671105384827, + "step": 2526 + }, + { + "epoch": 0.7389969293756398, + "grad_norm": 1.2356122713189879, + "learning_rate": 1.4924445995337685e-05, + "loss": 0.5752983093261719, + "step": 2527 + }, + { + "epoch": 0.7392893697909051, + "grad_norm": 1.3500870063925003, + "learning_rate": 1.4920237562687784e-05, + "loss": 0.6275333762168884, + "step": 2528 + }, + { + "epoch": 0.7395818102061705, + "grad_norm": 1.3423023519178945, + "learning_rate": 1.4916027979962266e-05, + "loss": 0.6362103223800659, + "step": 2529 + }, + { + "epoch": 0.7398742506214359, + "grad_norm": 1.4246415171584412, + "learning_rate": 1.49118172481451e-05, + "loss": 0.5902664661407471, + "step": 2530 + }, + { + "epoch": 0.7401666910367013, + "grad_norm": 1.3036213595476636, + "learning_rate": 1.4907605368220514e-05, + "loss": 0.5293874740600586, + "step": 2531 + }, + { + "epoch": 0.7404591314519666, + "grad_norm": 1.3590290047464213, + "learning_rate": 1.4903392341173013e-05, + "loss": 0.7298746109008789, + "step": 2532 + }, + { + "epoch": 0.7407515718672321, + "grad_norm": 1.3755489549876734, + "learning_rate": 1.4899178167987367e-05, + "loss": 0.6428382396697998, + "step": 2533 + }, + { + "epoch": 0.7410440122824974, + "grad_norm": 1.3444422145970576, + "learning_rate": 1.489496284964861e-05, + "loss": 0.6204425096511841, + "step": 2534 + }, + { + "epoch": 0.7413364526977628, + "grad_norm": 1.2627663029943075, + "learning_rate": 1.4890746387142052e-05, + "loss": 0.6025601625442505, + "step": 2535 + }, + { + "epoch": 0.7416288931130283, + "grad_norm": 1.212213289149315, + "learning_rate": 1.4886528781453258e-05, + "loss": 0.5570085644721985, + "step": 2536 + }, + { + "epoch": 0.7419213335282936, + "grad_norm": 1.387517207017057, + "learning_rate": 1.4882310033568072e-05, + "loss": 0.6816439628601074, + "step": 2537 + }, + { + "epoch": 0.742213773943559, + "grad_norm": 1.341130650337267, + "learning_rate": 1.4878090144472603e-05, + "loss": 0.5424396991729736, + "step": 2538 + }, + { + "epoch": 0.7425062143588244, + "grad_norm": 1.583973779595893, + "learning_rate": 1.4873869115153223e-05, + "loss": 0.58860182762146, + "step": 2539 + }, + { + "epoch": 0.7427986547740898, + "grad_norm": 1.227937032120959, + "learning_rate": 1.4869646946596568e-05, + "loss": 0.513140857219696, + "step": 2540 + }, + { + "epoch": 0.7430910951893551, + "grad_norm": 1.3321578929704418, + "learning_rate": 1.486542363978955e-05, + "loss": 0.5967035293579102, + "step": 2541 + }, + { + "epoch": 0.7433835356046206, + "grad_norm": 1.2958174333377406, + "learning_rate": 1.4861199195719334e-05, + "loss": 0.6988440752029419, + "step": 2542 + }, + { + "epoch": 0.743675976019886, + "grad_norm": 1.3279731889181368, + "learning_rate": 1.4856973615373366e-05, + "loss": 0.6176164746284485, + "step": 2543 + }, + { + "epoch": 0.7439684164351513, + "grad_norm": 1.394214331783624, + "learning_rate": 1.4852746899739346e-05, + "loss": 0.5616505742073059, + "step": 2544 + }, + { + "epoch": 0.7442608568504168, + "grad_norm": 1.199172810090394, + "learning_rate": 1.4848519049805243e-05, + "loss": 0.5470465421676636, + "step": 2545 + }, + { + "epoch": 0.7445532972656821, + "grad_norm": 1.393649724579279, + "learning_rate": 1.4844290066559292e-05, + "loss": 0.6362754106521606, + "step": 2546 + }, + { + "epoch": 0.7448457376809475, + "grad_norm": 1.2298975206172837, + "learning_rate": 1.4840059950989992e-05, + "loss": 0.6290515661239624, + "step": 2547 + }, + { + "epoch": 0.7451381780962129, + "grad_norm": 1.4356832247939193, + "learning_rate": 1.4835828704086105e-05, + "loss": 0.7225647568702698, + "step": 2548 + }, + { + "epoch": 0.7454306185114783, + "grad_norm": 1.4603777863967904, + "learning_rate": 1.483159632683666e-05, + "loss": 0.6993023157119751, + "step": 2549 + }, + { + "epoch": 0.7457230589267436, + "grad_norm": 1.5062925776475273, + "learning_rate": 1.482736282023095e-05, + "loss": 0.6960086226463318, + "step": 2550 + }, + { + "epoch": 0.7460154993420091, + "grad_norm": 1.4783046017210701, + "learning_rate": 1.4823128185258535e-05, + "loss": 0.627712607383728, + "step": 2551 + }, + { + "epoch": 0.7463079397572745, + "grad_norm": 1.3756379084869055, + "learning_rate": 1.481889242290923e-05, + "loss": 0.6314729452133179, + "step": 2552 + }, + { + "epoch": 0.7466003801725398, + "grad_norm": 1.293029687195421, + "learning_rate": 1.4814655534173121e-05, + "loss": 0.5948070287704468, + "step": 2553 + }, + { + "epoch": 0.7468928205878053, + "grad_norm": 1.28283626174806, + "learning_rate": 1.4810417520040551e-05, + "loss": 0.6227586269378662, + "step": 2554 + }, + { + "epoch": 0.7471852610030706, + "grad_norm": 1.156874509923564, + "learning_rate": 1.4806178381502139e-05, + "loss": 0.589213490486145, + "step": 2555 + }, + { + "epoch": 0.747477701418336, + "grad_norm": 1.3920763104069633, + "learning_rate": 1.4801938119548748e-05, + "loss": 0.6748968362808228, + "step": 2556 + }, + { + "epoch": 0.7477701418336014, + "grad_norm": 1.5278244850962377, + "learning_rate": 1.4797696735171521e-05, + "loss": 0.627450704574585, + "step": 2557 + }, + { + "epoch": 0.7480625822488668, + "grad_norm": 1.3979513679962843, + "learning_rate": 1.479345422936185e-05, + "loss": 0.5816184878349304, + "step": 2558 + }, + { + "epoch": 0.7483550226641322, + "grad_norm": 1.3403975244231432, + "learning_rate": 1.4789210603111399e-05, + "loss": 0.5184855461120605, + "step": 2559 + }, + { + "epoch": 0.7486474630793976, + "grad_norm": 1.3184163367774433, + "learning_rate": 1.4784965857412088e-05, + "loss": 0.5747300982475281, + "step": 2560 + }, + { + "epoch": 0.748939903494663, + "grad_norm": 1.5154750654158269, + "learning_rate": 1.4780719993256104e-05, + "loss": 0.6957682371139526, + "step": 2561 + }, + { + "epoch": 0.7492323439099283, + "grad_norm": 1.3790848349629903, + "learning_rate": 1.4776473011635886e-05, + "loss": 0.5711330771446228, + "step": 2562 + }, + { + "epoch": 0.7495247843251938, + "grad_norm": 1.260228471581513, + "learning_rate": 1.4772224913544142e-05, + "loss": 0.687350869178772, + "step": 2563 + }, + { + "epoch": 0.7498172247404591, + "grad_norm": 1.549796921470129, + "learning_rate": 1.476797569997384e-05, + "loss": 0.71396803855896, + "step": 2564 + }, + { + "epoch": 0.7501096651557245, + "grad_norm": 1.3620133851355087, + "learning_rate": 1.4763725371918209e-05, + "loss": 0.5457814335823059, + "step": 2565 + }, + { + "epoch": 0.75040210557099, + "grad_norm": 1.4687420339775556, + "learning_rate": 1.4759473930370738e-05, + "loss": 0.5889413952827454, + "step": 2566 + }, + { + "epoch": 0.7506945459862553, + "grad_norm": 1.8883582542449355, + "learning_rate": 1.4755221376325171e-05, + "loss": 0.6222226619720459, + "step": 2567 + }, + { + "epoch": 0.7509869864015207, + "grad_norm": 1.17580934018018, + "learning_rate": 1.475096771077552e-05, + "loss": 0.5273243188858032, + "step": 2568 + }, + { + "epoch": 0.7512794268167861, + "grad_norm": 1.2062680853030614, + "learning_rate": 1.4746712934716055e-05, + "loss": 0.5665162801742554, + "step": 2569 + }, + { + "epoch": 0.7515718672320515, + "grad_norm": 1.6320800654071554, + "learning_rate": 1.4742457049141298e-05, + "loss": 0.5748391151428223, + "step": 2570 + }, + { + "epoch": 0.7518643076473168, + "grad_norm": 1.4197866961281498, + "learning_rate": 1.4738200055046044e-05, + "loss": 0.7002041339874268, + "step": 2571 + }, + { + "epoch": 0.7521567480625823, + "grad_norm": 1.3507056136966096, + "learning_rate": 1.4733941953425337e-05, + "loss": 0.6841630935668945, + "step": 2572 + }, + { + "epoch": 0.7524491884778476, + "grad_norm": 1.6017928671701795, + "learning_rate": 1.4729682745274478e-05, + "loss": 0.7047172784805298, + "step": 2573 + }, + { + "epoch": 0.752741628893113, + "grad_norm": 1.4397980876250445, + "learning_rate": 1.4725422431589035e-05, + "loss": 0.6979919672012329, + "step": 2574 + }, + { + "epoch": 0.7530340693083785, + "grad_norm": 1.3152000128748418, + "learning_rate": 1.4721161013364829e-05, + "loss": 0.6437125205993652, + "step": 2575 + }, + { + "epoch": 0.7533265097236438, + "grad_norm": 1.4573280156715103, + "learning_rate": 1.4716898491597942e-05, + "loss": 0.591254711151123, + "step": 2576 + }, + { + "epoch": 0.7536189501389092, + "grad_norm": 1.592793146861773, + "learning_rate": 1.4712634867284714e-05, + "loss": 0.6276297569274902, + "step": 2577 + }, + { + "epoch": 0.7539113905541746, + "grad_norm": 1.2004846116513588, + "learning_rate": 1.4708370141421737e-05, + "loss": 0.5310626029968262, + "step": 2578 + }, + { + "epoch": 0.75420383096944, + "grad_norm": 1.374287364754045, + "learning_rate": 1.4704104315005864e-05, + "loss": 0.5256849527359009, + "step": 2579 + }, + { + "epoch": 0.7544962713847053, + "grad_norm": 1.4473126972035357, + "learning_rate": 1.4699837389034212e-05, + "loss": 0.6050584316253662, + "step": 2580 + }, + { + "epoch": 0.7547887117999708, + "grad_norm": 1.3425248874126274, + "learning_rate": 1.4695569364504144e-05, + "loss": 0.5124386548995972, + "step": 2581 + }, + { + "epoch": 0.7550811522152362, + "grad_norm": 1.1600080124683732, + "learning_rate": 1.4691300242413289e-05, + "loss": 0.5631951093673706, + "step": 2582 + }, + { + "epoch": 0.7553735926305015, + "grad_norm": 1.3017433820111879, + "learning_rate": 1.4687030023759527e-05, + "loss": 0.6352444291114807, + "step": 2583 + }, + { + "epoch": 0.755666033045767, + "grad_norm": 1.4490307646785157, + "learning_rate": 1.4682758709540992e-05, + "loss": 0.6717500686645508, + "step": 2584 + }, + { + "epoch": 0.7559584734610323, + "grad_norm": 3.0905292476778428, + "learning_rate": 1.467848630075608e-05, + "loss": 0.5889217853546143, + "step": 2585 + }, + { + "epoch": 0.7562509138762977, + "grad_norm": 1.258529998432557, + "learning_rate": 1.4674212798403443e-05, + "loss": 0.49069908261299133, + "step": 2586 + }, + { + "epoch": 0.756543354291563, + "grad_norm": 1.1729027861993524, + "learning_rate": 1.4669938203481982e-05, + "loss": 0.6272397041320801, + "step": 2587 + }, + { + "epoch": 0.7568357947068285, + "grad_norm": 1.5090841451643915, + "learning_rate": 1.466566251699086e-05, + "loss": 0.6218451261520386, + "step": 2588 + }, + { + "epoch": 0.7571282351220939, + "grad_norm": 1.4025085245751263, + "learning_rate": 1.4661385739929492e-05, + "loss": 0.6174849271774292, + "step": 2589 + }, + { + "epoch": 0.7574206755373593, + "grad_norm": 1.3554209784525295, + "learning_rate": 1.465710787329755e-05, + "loss": 0.5595160126686096, + "step": 2590 + }, + { + "epoch": 0.7577131159526247, + "grad_norm": 1.5657464206953444, + "learning_rate": 1.4652828918094954e-05, + "loss": 0.757240891456604, + "step": 2591 + }, + { + "epoch": 0.75800555636789, + "grad_norm": 1.3337551846990978, + "learning_rate": 1.4648548875321893e-05, + "loss": 0.630811333656311, + "step": 2592 + }, + { + "epoch": 0.7582979967831555, + "grad_norm": 1.208341715070646, + "learning_rate": 1.4644267745978797e-05, + "loss": 0.5857812762260437, + "step": 2593 + }, + { + "epoch": 0.7585904371984208, + "grad_norm": 1.1785954348430454, + "learning_rate": 1.463998553106635e-05, + "loss": 0.5869519710540771, + "step": 2594 + }, + { + "epoch": 0.7588828776136862, + "grad_norm": 1.2035584714461103, + "learning_rate": 1.4635702231585498e-05, + "loss": 0.5610413551330566, + "step": 2595 + }, + { + "epoch": 0.7591753180289516, + "grad_norm": 1.255732340436211, + "learning_rate": 1.4631417848537435e-05, + "loss": 0.5634676218032837, + "step": 2596 + }, + { + "epoch": 0.759467758444217, + "grad_norm": 1.2847976698363035, + "learning_rate": 1.4627132382923607e-05, + "loss": 0.6813392639160156, + "step": 2597 + }, + { + "epoch": 0.7597601988594824, + "grad_norm": 1.5611350123657577, + "learning_rate": 1.4622845835745723e-05, + "loss": 0.644945502281189, + "step": 2598 + }, + { + "epoch": 0.7600526392747478, + "grad_norm": 1.4458723370490596, + "learning_rate": 1.461855820800573e-05, + "loss": 0.7432133555412292, + "step": 2599 + }, + { + "epoch": 0.7603450796900132, + "grad_norm": 1.1406983279122715, + "learning_rate": 1.4614269500705832e-05, + "loss": 0.4729112982749939, + "step": 2600 + }, + { + "epoch": 0.7606375201052785, + "grad_norm": 1.4806970647351285, + "learning_rate": 1.4609979714848499e-05, + "loss": 0.7146443128585815, + "step": 2601 + }, + { + "epoch": 0.760929960520544, + "grad_norm": 1.4348530933940364, + "learning_rate": 1.4605688851436436e-05, + "loss": 0.5959945917129517, + "step": 2602 + }, + { + "epoch": 0.7612224009358093, + "grad_norm": 1.3380784718799885, + "learning_rate": 1.4601396911472605e-05, + "loss": 0.6091525554656982, + "step": 2603 + }, + { + "epoch": 0.7615148413510747, + "grad_norm": 1.3043703832448297, + "learning_rate": 1.4597103895960228e-05, + "loss": 0.5101523399353027, + "step": 2604 + }, + { + "epoch": 0.7618072817663402, + "grad_norm": 1.3937793894568855, + "learning_rate": 1.4592809805902762e-05, + "loss": 0.6036165952682495, + "step": 2605 + }, + { + "epoch": 0.7620997221816055, + "grad_norm": 1.361507946530242, + "learning_rate": 1.4588514642303928e-05, + "loss": 0.6094970703125, + "step": 2606 + }, + { + "epoch": 0.7623921625968709, + "grad_norm": 1.3770518433820003, + "learning_rate": 1.4584218406167697e-05, + "loss": 0.49754881858825684, + "step": 2607 + }, + { + "epoch": 0.7626846030121363, + "grad_norm": 1.3703785644048119, + "learning_rate": 1.4579921098498285e-05, + "loss": 0.6066807508468628, + "step": 2608 + }, + { + "epoch": 0.7629770434274017, + "grad_norm": 1.4768479795454132, + "learning_rate": 1.4575622720300162e-05, + "loss": 0.5758910179138184, + "step": 2609 + }, + { + "epoch": 0.763269483842667, + "grad_norm": 1.4281250780822374, + "learning_rate": 1.457132327257805e-05, + "loss": 0.6641621589660645, + "step": 2610 + }, + { + "epoch": 0.7635619242579325, + "grad_norm": 1.506727865728889, + "learning_rate": 1.4567022756336916e-05, + "loss": 0.7024788856506348, + "step": 2611 + }, + { + "epoch": 0.7638543646731978, + "grad_norm": 1.2921755321984356, + "learning_rate": 1.4562721172581982e-05, + "loss": 0.6066344380378723, + "step": 2612 + }, + { + "epoch": 0.7641468050884632, + "grad_norm": 1.3533854830579282, + "learning_rate": 1.4558418522318713e-05, + "loss": 0.566038966178894, + "step": 2613 + }, + { + "epoch": 0.7644392455037287, + "grad_norm": 1.3370326372322123, + "learning_rate": 1.4554114806552833e-05, + "loss": 0.5817335844039917, + "step": 2614 + }, + { + "epoch": 0.764731685918994, + "grad_norm": 1.2813703243908812, + "learning_rate": 1.4549810026290305e-05, + "loss": 0.6001763343811035, + "step": 2615 + }, + { + "epoch": 0.7650241263342594, + "grad_norm": 1.617460530676573, + "learning_rate": 1.4545504182537346e-05, + "loss": 0.6363068222999573, + "step": 2616 + }, + { + "epoch": 0.7653165667495248, + "grad_norm": 1.4805158326873171, + "learning_rate": 1.4541197276300424e-05, + "loss": 0.669566810131073, + "step": 2617 + }, + { + "epoch": 0.7656090071647902, + "grad_norm": 1.2122677055370945, + "learning_rate": 1.4536889308586245e-05, + "loss": 0.47967004776000977, + "step": 2618 + }, + { + "epoch": 0.7659014475800555, + "grad_norm": 1.310958704364757, + "learning_rate": 1.4532580280401777e-05, + "loss": 0.5803399085998535, + "step": 2619 + }, + { + "epoch": 0.766193887995321, + "grad_norm": 1.3185113057937472, + "learning_rate": 1.452827019275423e-05, + "loss": 0.6870115995407104, + "step": 2620 + }, + { + "epoch": 0.7664863284105864, + "grad_norm": 1.307156915151953, + "learning_rate": 1.4523959046651058e-05, + "loss": 0.6190885901451111, + "step": 2621 + }, + { + "epoch": 0.7667787688258517, + "grad_norm": 1.4891479565012034, + "learning_rate": 1.4519646843099961e-05, + "loss": 0.6624859571456909, + "step": 2622 + }, + { + "epoch": 0.7670712092411172, + "grad_norm": 1.253302711959068, + "learning_rate": 1.4515333583108896e-05, + "loss": 0.5770546197891235, + "step": 2623 + }, + { + "epoch": 0.7673636496563825, + "grad_norm": 1.3410371709150275, + "learning_rate": 1.451101926768606e-05, + "loss": 0.6843355894088745, + "step": 2624 + }, + { + "epoch": 0.7676560900716479, + "grad_norm": 1.0930173610522418, + "learning_rate": 1.4506703897839895e-05, + "loss": 0.5293717384338379, + "step": 2625 + }, + { + "epoch": 0.7679485304869133, + "grad_norm": 1.1789701874259584, + "learning_rate": 1.45023874745791e-05, + "loss": 0.44534316658973694, + "step": 2626 + }, + { + "epoch": 0.7682409709021787, + "grad_norm": 4.2234169958332295, + "learning_rate": 1.4498069998912603e-05, + "loss": 0.7279446721076965, + "step": 2627 + }, + { + "epoch": 0.7685334113174441, + "grad_norm": 1.3924343198630234, + "learning_rate": 1.4493751471849596e-05, + "loss": 0.6990453600883484, + "step": 2628 + }, + { + "epoch": 0.7688258517327095, + "grad_norm": 1.3337373981179779, + "learning_rate": 1.44894318943995e-05, + "loss": 0.6610965728759766, + "step": 2629 + }, + { + "epoch": 0.7691182921479749, + "grad_norm": 1.285212706548779, + "learning_rate": 1.4485111267571999e-05, + "loss": 0.5124749541282654, + "step": 2630 + }, + { + "epoch": 0.7694107325632402, + "grad_norm": 1.3445630320041935, + "learning_rate": 1.448078959237701e-05, + "loss": 0.7191518545150757, + "step": 2631 + }, + { + "epoch": 0.7697031729785057, + "grad_norm": 1.1499690572165278, + "learning_rate": 1.4476466869824694e-05, + "loss": 0.5798880457878113, + "step": 2632 + }, + { + "epoch": 0.769995613393771, + "grad_norm": 1.3900006441925277, + "learning_rate": 1.4472143100925467e-05, + "loss": 0.5187106728553772, + "step": 2633 + }, + { + "epoch": 0.7702880538090364, + "grad_norm": 1.1672945310140501, + "learning_rate": 1.4467818286689981e-05, + "loss": 0.5794588327407837, + "step": 2634 + }, + { + "epoch": 0.7705804942243017, + "grad_norm": 1.2435528275045493, + "learning_rate": 1.4463492428129133e-05, + "loss": 0.4884936809539795, + "step": 2635 + }, + { + "epoch": 0.7708729346395672, + "grad_norm": 1.3037745440935204, + "learning_rate": 1.4459165526254074e-05, + "loss": 0.5782946348190308, + "step": 2636 + }, + { + "epoch": 0.7711653750548326, + "grad_norm": 1.2531837165046444, + "learning_rate": 1.445483758207618e-05, + "loss": 0.5173349380493164, + "step": 2637 + }, + { + "epoch": 0.771457815470098, + "grad_norm": 1.4752149684021225, + "learning_rate": 1.4450508596607087e-05, + "loss": 0.616407573223114, + "step": 2638 + }, + { + "epoch": 0.7717502558853634, + "grad_norm": 1.4855666629653779, + "learning_rate": 1.4446178570858672e-05, + "loss": 0.537878155708313, + "step": 2639 + }, + { + "epoch": 0.7720426963006287, + "grad_norm": 1.2968861628303388, + "learning_rate": 1.4441847505843048e-05, + "loss": 0.674277663230896, + "step": 2640 + }, + { + "epoch": 0.7723351367158942, + "grad_norm": 1.440782866010467, + "learning_rate": 1.4437515402572576e-05, + "loss": 0.5064860582351685, + "step": 2641 + }, + { + "epoch": 0.7726275771311595, + "grad_norm": 1.2859384806045262, + "learning_rate": 1.4433182262059861e-05, + "loss": 0.6256883144378662, + "step": 2642 + }, + { + "epoch": 0.7729200175464249, + "grad_norm": 1.2490391757844836, + "learning_rate": 1.4428848085317744e-05, + "loss": 0.6023700833320618, + "step": 2643 + }, + { + "epoch": 0.7732124579616904, + "grad_norm": 1.5137270909206324, + "learning_rate": 1.4424512873359316e-05, + "loss": 0.5670932531356812, + "step": 2644 + }, + { + "epoch": 0.7735048983769557, + "grad_norm": 1.406486208295682, + "learning_rate": 1.4420176627197906e-05, + "loss": 0.760460376739502, + "step": 2645 + }, + { + "epoch": 0.7737973387922211, + "grad_norm": 1.3383411751300025, + "learning_rate": 1.4415839347847082e-05, + "loss": 0.5680848956108093, + "step": 2646 + }, + { + "epoch": 0.7740897792074865, + "grad_norm": 1.2948318300140997, + "learning_rate": 1.4411501036320661e-05, + "loss": 0.5962368249893188, + "step": 2647 + }, + { + "epoch": 0.7743822196227519, + "grad_norm": 1.3851281269469669, + "learning_rate": 1.4407161693632697e-05, + "loss": 0.7149791121482849, + "step": 2648 + }, + { + "epoch": 0.7746746600380172, + "grad_norm": 1.4438569377090373, + "learning_rate": 1.440282132079748e-05, + "loss": 0.5943992733955383, + "step": 2649 + }, + { + "epoch": 0.7749671004532827, + "grad_norm": 1.681920535370579, + "learning_rate": 1.439847991882955e-05, + "loss": 0.7265899181365967, + "step": 2650 + }, + { + "epoch": 0.775259540868548, + "grad_norm": 1.257384791880329, + "learning_rate": 1.4394137488743682e-05, + "loss": 0.6011309027671814, + "step": 2651 + }, + { + "epoch": 0.7755519812838134, + "grad_norm": 1.4419500386554907, + "learning_rate": 1.4389794031554894e-05, + "loss": 0.6853964328765869, + "step": 2652 + }, + { + "epoch": 0.7758444216990789, + "grad_norm": 1.4140520249216477, + "learning_rate": 1.438544954827844e-05, + "loss": 0.6598547697067261, + "step": 2653 + }, + { + "epoch": 0.7761368621143442, + "grad_norm": 1.3919438302264315, + "learning_rate": 1.4381104039929819e-05, + "loss": 0.5776119232177734, + "step": 2654 + }, + { + "epoch": 0.7764293025296096, + "grad_norm": 1.182931573556341, + "learning_rate": 1.4376757507524766e-05, + "loss": 0.6026376485824585, + "step": 2655 + }, + { + "epoch": 0.776721742944875, + "grad_norm": 1.2883148172478378, + "learning_rate": 1.4372409952079256e-05, + "loss": 0.5776997804641724, + "step": 2656 + }, + { + "epoch": 0.7770141833601404, + "grad_norm": 1.5317545348037325, + "learning_rate": 1.4368061374609505e-05, + "loss": 0.5766068696975708, + "step": 2657 + }, + { + "epoch": 0.7773066237754057, + "grad_norm": 1.0428168520269592, + "learning_rate": 1.4363711776131966e-05, + "loss": 0.4783105254173279, + "step": 2658 + }, + { + "epoch": 0.7775990641906712, + "grad_norm": 1.4837098758543301, + "learning_rate": 1.4359361157663332e-05, + "loss": 0.6563695073127747, + "step": 2659 + }, + { + "epoch": 0.7778915046059366, + "grad_norm": 1.0898257169197185, + "learning_rate": 1.4355009520220531e-05, + "loss": 0.5177119374275208, + "step": 2660 + }, + { + "epoch": 0.7781839450212019, + "grad_norm": 1.3520526907259511, + "learning_rate": 1.4350656864820733e-05, + "loss": 0.6590641736984253, + "step": 2661 + }, + { + "epoch": 0.7784763854364674, + "grad_norm": 1.2923155412118275, + "learning_rate": 1.4346303192481348e-05, + "loss": 0.6012274622917175, + "step": 2662 + }, + { + "epoch": 0.7787688258517327, + "grad_norm": 1.439032337982527, + "learning_rate": 1.4341948504220016e-05, + "loss": 0.6731704473495483, + "step": 2663 + }, + { + "epoch": 0.7790612662669981, + "grad_norm": 1.4598986218346195, + "learning_rate": 1.4337592801054623e-05, + "loss": 0.6827171444892883, + "step": 2664 + }, + { + "epoch": 0.7793537066822634, + "grad_norm": 1.3963311439466064, + "learning_rate": 1.4333236084003282e-05, + "loss": 0.6654937267303467, + "step": 2665 + }, + { + "epoch": 0.7796461470975289, + "grad_norm": 1.276825216432019, + "learning_rate": 1.4328878354084355e-05, + "loss": 0.5673532485961914, + "step": 2666 + }, + { + "epoch": 0.7799385875127943, + "grad_norm": 1.3049192363130713, + "learning_rate": 1.432451961231643e-05, + "loss": 0.5401986241340637, + "step": 2667 + }, + { + "epoch": 0.7802310279280597, + "grad_norm": 1.2877259559166432, + "learning_rate": 1.4320159859718341e-05, + "loss": 0.6134701371192932, + "step": 2668 + }, + { + "epoch": 0.7805234683433251, + "grad_norm": 1.5022932512908924, + "learning_rate": 1.4315799097309152e-05, + "loss": 0.6913554668426514, + "step": 2669 + }, + { + "epoch": 0.7808159087585904, + "grad_norm": 1.6126405133572825, + "learning_rate": 1.4311437326108167e-05, + "loss": 0.6969482898712158, + "step": 2670 + }, + { + "epoch": 0.7811083491738559, + "grad_norm": 1.343855488902383, + "learning_rate": 1.4307074547134918e-05, + "loss": 0.6612537503242493, + "step": 2671 + }, + { + "epoch": 0.7814007895891212, + "grad_norm": 1.1627822310905236, + "learning_rate": 1.430271076140918e-05, + "loss": 0.5545899868011475, + "step": 2672 + }, + { + "epoch": 0.7816932300043866, + "grad_norm": 1.1885930128001867, + "learning_rate": 1.4298345969950965e-05, + "loss": 0.6635574698448181, + "step": 2673 + }, + { + "epoch": 0.781985670419652, + "grad_norm": 1.4316816688950922, + "learning_rate": 1.4293980173780514e-05, + "loss": 0.5859510898590088, + "step": 2674 + }, + { + "epoch": 0.7822781108349174, + "grad_norm": 1.246244040215616, + "learning_rate": 1.4289613373918304e-05, + "loss": 0.5839825868606567, + "step": 2675 + }, + { + "epoch": 0.7825705512501828, + "grad_norm": 1.7192756445293216, + "learning_rate": 1.428524557138505e-05, + "loss": 0.6376889944076538, + "step": 2676 + }, + { + "epoch": 0.7828629916654481, + "grad_norm": 1.2061132029389496, + "learning_rate": 1.4280876767201696e-05, + "loss": 0.5473129749298096, + "step": 2677 + }, + { + "epoch": 0.7831554320807136, + "grad_norm": 1.2355367438994083, + "learning_rate": 1.4276506962389429e-05, + "loss": 0.6723904609680176, + "step": 2678 + }, + { + "epoch": 0.7834478724959789, + "grad_norm": 1.318329485547163, + "learning_rate": 1.4272136157969658e-05, + "loss": 0.6036845445632935, + "step": 2679 + }, + { + "epoch": 0.7837403129112444, + "grad_norm": 1.4527977807212105, + "learning_rate": 1.4267764354964038e-05, + "loss": 0.5993655920028687, + "step": 2680 + }, + { + "epoch": 0.7840327533265097, + "grad_norm": 1.5159579383707373, + "learning_rate": 1.4263391554394448e-05, + "loss": 0.6678075194358826, + "step": 2681 + }, + { + "epoch": 0.7843251937417751, + "grad_norm": 1.2588619303254647, + "learning_rate": 1.4259017757283003e-05, + "loss": 0.5627151727676392, + "step": 2682 + }, + { + "epoch": 0.7846176341570406, + "grad_norm": 1.2632820141578516, + "learning_rate": 1.4254642964652053e-05, + "loss": 0.6060316562652588, + "step": 2683 + }, + { + "epoch": 0.7849100745723059, + "grad_norm": 1.590473454276912, + "learning_rate": 1.4250267177524177e-05, + "loss": 0.6535854935646057, + "step": 2684 + }, + { + "epoch": 0.7852025149875713, + "grad_norm": 1.499355267260573, + "learning_rate": 1.4245890396922195e-05, + "loss": 0.7141643762588501, + "step": 2685 + }, + { + "epoch": 0.7854949554028366, + "grad_norm": 1.5067703709229516, + "learning_rate": 1.4241512623869143e-05, + "loss": 0.6685847640037537, + "step": 2686 + }, + { + "epoch": 0.7857873958181021, + "grad_norm": 1.4195544467165693, + "learning_rate": 1.4237133859388305e-05, + "loss": 0.6745196580886841, + "step": 2687 + }, + { + "epoch": 0.7860798362333674, + "grad_norm": 1.5617010746630147, + "learning_rate": 1.423275410450319e-05, + "loss": 0.6891968250274658, + "step": 2688 + }, + { + "epoch": 0.7863722766486329, + "grad_norm": 1.3584703297700564, + "learning_rate": 1.422837336023754e-05, + "loss": 0.5614763498306274, + "step": 2689 + }, + { + "epoch": 0.7866647170638982, + "grad_norm": 1.3595148335065306, + "learning_rate": 1.4223991627615324e-05, + "loss": 0.5867494344711304, + "step": 2690 + }, + { + "epoch": 0.7869571574791636, + "grad_norm": 1.453264768444311, + "learning_rate": 1.421960890766075e-05, + "loss": 0.644777774810791, + "step": 2691 + }, + { + "epoch": 0.787249597894429, + "grad_norm": 1.3023857436912896, + "learning_rate": 1.4215225201398249e-05, + "loss": 0.7237588167190552, + "step": 2692 + }, + { + "epoch": 0.7875420383096944, + "grad_norm": 1.45851809360972, + "learning_rate": 1.4210840509852484e-05, + "loss": 0.6314423680305481, + "step": 2693 + }, + { + "epoch": 0.7878344787249598, + "grad_norm": 1.2286351961246127, + "learning_rate": 1.4206454834048353e-05, + "loss": 0.5298433303833008, + "step": 2694 + }, + { + "epoch": 0.7881269191402251, + "grad_norm": 1.1185262454319822, + "learning_rate": 1.420206817501098e-05, + "loss": 0.507548451423645, + "step": 2695 + }, + { + "epoch": 0.7884193595554906, + "grad_norm": 1.7207072983596743, + "learning_rate": 1.4197680533765721e-05, + "loss": 0.7742520570755005, + "step": 2696 + }, + { + "epoch": 0.7887117999707559, + "grad_norm": 1.3752660802878722, + "learning_rate": 1.4193291911338161e-05, + "loss": 0.6261187195777893, + "step": 2697 + }, + { + "epoch": 0.7890042403860213, + "grad_norm": 1.521521524262885, + "learning_rate": 1.4188902308754108e-05, + "loss": 0.7501171827316284, + "step": 2698 + }, + { + "epoch": 0.7892966808012868, + "grad_norm": 1.3001128857102173, + "learning_rate": 1.4184511727039612e-05, + "loss": 0.5590647459030151, + "step": 2699 + }, + { + "epoch": 0.7895891212165521, + "grad_norm": 1.4479349527989895, + "learning_rate": 1.4180120167220941e-05, + "loss": 0.586786150932312, + "step": 2700 + }, + { + "epoch": 0.7898815616318176, + "grad_norm": 1.2133244570308048, + "learning_rate": 1.4175727630324598e-05, + "loss": 0.5208219289779663, + "step": 2701 + }, + { + "epoch": 0.7901740020470829, + "grad_norm": 1.2365924450408214, + "learning_rate": 1.4171334117377312e-05, + "loss": 0.5925623178482056, + "step": 2702 + }, + { + "epoch": 0.7904664424623483, + "grad_norm": 1.5006045037979843, + "learning_rate": 1.4166939629406034e-05, + "loss": 0.7095032930374146, + "step": 2703 + }, + { + "epoch": 0.7907588828776136, + "grad_norm": 1.167282378609361, + "learning_rate": 1.4162544167437955e-05, + "loss": 0.5683872699737549, + "step": 2704 + }, + { + "epoch": 0.7910513232928791, + "grad_norm": 1.2605941476894575, + "learning_rate": 1.4158147732500482e-05, + "loss": 0.7079274654388428, + "step": 2705 + }, + { + "epoch": 0.7913437637081445, + "grad_norm": 1.3186161570017685, + "learning_rate": 1.415375032562126e-05, + "loss": 0.6336439847946167, + "step": 2706 + }, + { + "epoch": 0.7916362041234098, + "grad_norm": 1.14446239802259, + "learning_rate": 1.414935194782816e-05, + "loss": 0.4842381477355957, + "step": 2707 + }, + { + "epoch": 0.7919286445386753, + "grad_norm": 1.4296190875249344, + "learning_rate": 1.4144952600149267e-05, + "loss": 0.5439653396606445, + "step": 2708 + }, + { + "epoch": 0.7922210849539406, + "grad_norm": 1.2988205927389838, + "learning_rate": 1.4140552283612906e-05, + "loss": 0.6365468502044678, + "step": 2709 + }, + { + "epoch": 0.792513525369206, + "grad_norm": 1.3854921286863888, + "learning_rate": 1.4136150999247623e-05, + "loss": 0.6192438006401062, + "step": 2710 + }, + { + "epoch": 0.7928059657844714, + "grad_norm": 1.2293031316317269, + "learning_rate": 1.4131748748082191e-05, + "loss": 0.5695269703865051, + "step": 2711 + }, + { + "epoch": 0.7930984061997368, + "grad_norm": 1.3405661548900325, + "learning_rate": 1.4127345531145614e-05, + "loss": 0.6892319321632385, + "step": 2712 + }, + { + "epoch": 0.7933908466150021, + "grad_norm": 1.5220370415080073, + "learning_rate": 1.4122941349467109e-05, + "loss": 0.6294678449630737, + "step": 2713 + }, + { + "epoch": 0.7936832870302676, + "grad_norm": 1.2086123903849104, + "learning_rate": 1.4118536204076135e-05, + "loss": 0.6666272878646851, + "step": 2714 + }, + { + "epoch": 0.793975727445533, + "grad_norm": 1.2066166036349477, + "learning_rate": 1.4114130096002363e-05, + "loss": 0.5981796383857727, + "step": 2715 + }, + { + "epoch": 0.7942681678607983, + "grad_norm": 1.5676320725913573, + "learning_rate": 1.4109723026275695e-05, + "loss": 0.6120023131370544, + "step": 2716 + }, + { + "epoch": 0.7945606082760638, + "grad_norm": 1.536602454646116, + "learning_rate": 1.4105314995926257e-05, + "loss": 0.5892866849899292, + "step": 2717 + }, + { + "epoch": 0.7948530486913291, + "grad_norm": 1.504529299257153, + "learning_rate": 1.4100906005984404e-05, + "loss": 0.7625553607940674, + "step": 2718 + }, + { + "epoch": 0.7951454891065945, + "grad_norm": 1.4565362056936688, + "learning_rate": 1.40964960574807e-05, + "loss": 0.643633246421814, + "step": 2719 + }, + { + "epoch": 0.7954379295218599, + "grad_norm": 1.2108583839611744, + "learning_rate": 1.4092085151445953e-05, + "loss": 0.46422284841537476, + "step": 2720 + }, + { + "epoch": 0.7957303699371253, + "grad_norm": 1.2654408745652597, + "learning_rate": 1.4087673288911182e-05, + "loss": 0.6290001273155212, + "step": 2721 + }, + { + "epoch": 0.7960228103523908, + "grad_norm": 1.2400549293858325, + "learning_rate": 1.4083260470907632e-05, + "loss": 0.5175197124481201, + "step": 2722 + }, + { + "epoch": 0.7963152507676561, + "grad_norm": 1.4748861405916942, + "learning_rate": 1.4078846698466776e-05, + "loss": 0.6475427150726318, + "step": 2723 + }, + { + "epoch": 0.7966076911829215, + "grad_norm": 1.3254407316825372, + "learning_rate": 1.40744319726203e-05, + "loss": 0.5978254079818726, + "step": 2724 + }, + { + "epoch": 0.7969001315981868, + "grad_norm": 1.2991181525686113, + "learning_rate": 1.4070016294400124e-05, + "loss": 0.5738629102706909, + "step": 2725 + }, + { + "epoch": 0.7971925720134523, + "grad_norm": 1.3493198611941248, + "learning_rate": 1.4065599664838388e-05, + "loss": 0.5809024572372437, + "step": 2726 + }, + { + "epoch": 0.7974850124287176, + "grad_norm": 1.1539725667160117, + "learning_rate": 1.4061182084967446e-05, + "loss": 0.5907782316207886, + "step": 2727 + }, + { + "epoch": 0.797777452843983, + "grad_norm": 1.4493981600012322, + "learning_rate": 1.4056763555819887e-05, + "loss": 0.7640036344528198, + "step": 2728 + }, + { + "epoch": 0.7980698932592484, + "grad_norm": 1.5601806517528776, + "learning_rate": 1.4052344078428513e-05, + "loss": 0.7472168207168579, + "step": 2729 + }, + { + "epoch": 0.7983623336745138, + "grad_norm": 1.6018546047693625, + "learning_rate": 1.4047923653826347e-05, + "loss": 0.6726990342140198, + "step": 2730 + }, + { + "epoch": 0.7986547740897793, + "grad_norm": 1.3791137229331067, + "learning_rate": 1.404350228304664e-05, + "loss": 0.5949650406837463, + "step": 2731 + }, + { + "epoch": 0.7989472145050446, + "grad_norm": 1.386756095528374, + "learning_rate": 1.403907996712286e-05, + "loss": 0.5578774213790894, + "step": 2732 + }, + { + "epoch": 0.79923965492031, + "grad_norm": 1.5271585141569006, + "learning_rate": 1.4034656707088692e-05, + "loss": 0.6092333197593689, + "step": 2733 + }, + { + "epoch": 0.7995320953355753, + "grad_norm": 1.3098390209876276, + "learning_rate": 1.4030232503978053e-05, + "loss": 0.5095718502998352, + "step": 2734 + }, + { + "epoch": 0.7998245357508408, + "grad_norm": 1.3675399597044373, + "learning_rate": 1.4025807358825072e-05, + "loss": 0.5155727863311768, + "step": 2735 + }, + { + "epoch": 0.8001169761661061, + "grad_norm": 1.3309663791332569, + "learning_rate": 1.4021381272664094e-05, + "loss": 0.5752589702606201, + "step": 2736 + }, + { + "epoch": 0.8004094165813715, + "grad_norm": 1.3619611747950222, + "learning_rate": 1.4016954246529697e-05, + "loss": 0.6334787607192993, + "step": 2737 + }, + { + "epoch": 0.800701856996637, + "grad_norm": 1.3830503239164076, + "learning_rate": 1.4012526281456666e-05, + "loss": 0.7406032085418701, + "step": 2738 + }, + { + "epoch": 0.8009942974119023, + "grad_norm": 1.2904369174268238, + "learning_rate": 1.4008097378480014e-05, + "loss": 0.5805078744888306, + "step": 2739 + }, + { + "epoch": 0.8012867378271677, + "grad_norm": 1.3584200788658642, + "learning_rate": 1.4003667538634972e-05, + "loss": 0.6849163770675659, + "step": 2740 + }, + { + "epoch": 0.8015791782424331, + "grad_norm": 1.5354340760410032, + "learning_rate": 1.3999236762956985e-05, + "loss": 0.7707695960998535, + "step": 2741 + }, + { + "epoch": 0.8018716186576985, + "grad_norm": 1.426293329050591, + "learning_rate": 1.3994805052481715e-05, + "loss": 0.6253059506416321, + "step": 2742 + }, + { + "epoch": 0.8021640590729638, + "grad_norm": 1.274928204575108, + "learning_rate": 1.3990372408245057e-05, + "loss": 0.6450316905975342, + "step": 2743 + }, + { + "epoch": 0.8024564994882293, + "grad_norm": 1.2867865996346037, + "learning_rate": 1.398593883128311e-05, + "loss": 0.672899603843689, + "step": 2744 + }, + { + "epoch": 0.8027489399034947, + "grad_norm": 1.38176481949922, + "learning_rate": 1.3981504322632198e-05, + "loss": 0.6203787326812744, + "step": 2745 + }, + { + "epoch": 0.80304138031876, + "grad_norm": 1.296034523853111, + "learning_rate": 1.3977068883328854e-05, + "loss": 0.541740894317627, + "step": 2746 + }, + { + "epoch": 0.8033338207340255, + "grad_norm": 1.3608273440615848, + "learning_rate": 1.3972632514409843e-05, + "loss": 0.5566504001617432, + "step": 2747 + }, + { + "epoch": 0.8036262611492908, + "grad_norm": 1.378445494532888, + "learning_rate": 1.3968195216912135e-05, + "loss": 0.6911404728889465, + "step": 2748 + }, + { + "epoch": 0.8039187015645562, + "grad_norm": 1.3758218413869647, + "learning_rate": 1.3963756991872921e-05, + "loss": 0.6744735240936279, + "step": 2749 + }, + { + "epoch": 0.8042111419798216, + "grad_norm": 1.3810636187989935, + "learning_rate": 1.3959317840329613e-05, + "loss": 0.6660502552986145, + "step": 2750 + }, + { + "epoch": 0.804503582395087, + "grad_norm": 1.611467815082346, + "learning_rate": 1.3954877763319832e-05, + "loss": 0.607395589351654, + "step": 2751 + }, + { + "epoch": 0.8047960228103523, + "grad_norm": 1.3065536354182021, + "learning_rate": 1.395043676188142e-05, + "loss": 0.53249192237854, + "step": 2752 + }, + { + "epoch": 0.8050884632256178, + "grad_norm": 1.384670069600496, + "learning_rate": 1.394599483705243e-05, + "loss": 0.5728630423545837, + "step": 2753 + }, + { + "epoch": 0.8053809036408832, + "grad_norm": 1.354298055615179, + "learning_rate": 1.3941551989871142e-05, + "loss": 0.6912537813186646, + "step": 2754 + }, + { + "epoch": 0.8056733440561485, + "grad_norm": 1.2211163784496284, + "learning_rate": 1.3937108221376041e-05, + "loss": 0.6002523899078369, + "step": 2755 + }, + { + "epoch": 0.805965784471414, + "grad_norm": 1.165855753943377, + "learning_rate": 1.3932663532605832e-05, + "loss": 0.6573797464370728, + "step": 2756 + }, + { + "epoch": 0.8062582248866793, + "grad_norm": 1.2846173311931015, + "learning_rate": 1.3928217924599433e-05, + "loss": 0.6997278928756714, + "step": 2757 + }, + { + "epoch": 0.8065506653019447, + "grad_norm": 1.3457721921363819, + "learning_rate": 1.3923771398395978e-05, + "loss": 0.565264105796814, + "step": 2758 + }, + { + "epoch": 0.8068431057172101, + "grad_norm": 1.7064740069380804, + "learning_rate": 1.3919323955034815e-05, + "loss": 0.8065239191055298, + "step": 2759 + }, + { + "epoch": 0.8071355461324755, + "grad_norm": 1.4850507802988735, + "learning_rate": 1.3914875595555509e-05, + "loss": 0.556678056716919, + "step": 2760 + }, + { + "epoch": 0.807427986547741, + "grad_norm": 1.653442619870376, + "learning_rate": 1.3910426320997834e-05, + "loss": 0.5528635382652283, + "step": 2761 + }, + { + "epoch": 0.8077204269630063, + "grad_norm": 1.4210714864438183, + "learning_rate": 1.3905976132401785e-05, + "loss": 0.6127038598060608, + "step": 2762 + }, + { + "epoch": 0.8080128673782717, + "grad_norm": 1.4473812948635245, + "learning_rate": 1.390152503080756e-05, + "loss": 0.6311757564544678, + "step": 2763 + }, + { + "epoch": 0.808305307793537, + "grad_norm": 1.256496005559394, + "learning_rate": 1.389707301725558e-05, + "loss": 0.669788122177124, + "step": 2764 + }, + { + "epoch": 0.8085977482088025, + "grad_norm": 1.1602455830470428, + "learning_rate": 1.3892620092786477e-05, + "loss": 0.48408570885658264, + "step": 2765 + }, + { + "epoch": 0.8088901886240678, + "grad_norm": 1.3816192110102654, + "learning_rate": 1.3888166258441098e-05, + "loss": 0.5648288726806641, + "step": 2766 + }, + { + "epoch": 0.8091826290393332, + "grad_norm": 1.359222924847667, + "learning_rate": 1.3883711515260497e-05, + "loss": 0.5894806385040283, + "step": 2767 + }, + { + "epoch": 0.8094750694545986, + "grad_norm": 1.609438084965147, + "learning_rate": 1.3879255864285939e-05, + "loss": 0.8325392603874207, + "step": 2768 + }, + { + "epoch": 0.809767509869864, + "grad_norm": 1.3200888192290248, + "learning_rate": 1.387479930655891e-05, + "loss": 0.5282119512557983, + "step": 2769 + }, + { + "epoch": 0.8100599502851294, + "grad_norm": 1.2020970963419326, + "learning_rate": 1.3870341843121104e-05, + "loss": 0.7565277218818665, + "step": 2770 + }, + { + "epoch": 0.8103523907003948, + "grad_norm": 1.20769025145285, + "learning_rate": 1.3865883475014424e-05, + "loss": 0.5767146944999695, + "step": 2771 + }, + { + "epoch": 0.8106448311156602, + "grad_norm": 1.3747646237948088, + "learning_rate": 1.3861424203280987e-05, + "loss": 0.5988898873329163, + "step": 2772 + }, + { + "epoch": 0.8109372715309255, + "grad_norm": 1.2837797411261327, + "learning_rate": 1.3856964028963119e-05, + "loss": 0.5752500295639038, + "step": 2773 + }, + { + "epoch": 0.811229711946191, + "grad_norm": 1.3281997353125305, + "learning_rate": 1.385250295310336e-05, + "loss": 0.6834297776222229, + "step": 2774 + }, + { + "epoch": 0.8115221523614563, + "grad_norm": 1.376792748908409, + "learning_rate": 1.3848040976744459e-05, + "loss": 0.5667037963867188, + "step": 2775 + }, + { + "epoch": 0.8118145927767217, + "grad_norm": 1.33236222276005, + "learning_rate": 1.3843578100929375e-05, + "loss": 0.5618781447410583, + "step": 2776 + }, + { + "epoch": 0.8121070331919872, + "grad_norm": 1.4974631308124338, + "learning_rate": 1.3839114326701281e-05, + "loss": 0.538033664226532, + "step": 2777 + }, + { + "epoch": 0.8123994736072525, + "grad_norm": 1.3236430994846111, + "learning_rate": 1.3834649655103556e-05, + "loss": 0.7218335270881653, + "step": 2778 + }, + { + "epoch": 0.812691914022518, + "grad_norm": 1.3045533775783231, + "learning_rate": 1.383018408717979e-05, + "loss": 0.5979611873626709, + "step": 2779 + }, + { + "epoch": 0.8129843544377833, + "grad_norm": 1.191818251767074, + "learning_rate": 1.3825717623973775e-05, + "loss": 0.4958215355873108, + "step": 2780 + }, + { + "epoch": 0.8132767948530487, + "grad_norm": 1.4132643925978479, + "learning_rate": 1.3821250266529531e-05, + "loss": 0.6759654879570007, + "step": 2781 + }, + { + "epoch": 0.813569235268314, + "grad_norm": 1.1873413404245543, + "learning_rate": 1.3816782015891272e-05, + "loss": 0.5499521493911743, + "step": 2782 + }, + { + "epoch": 0.8138616756835795, + "grad_norm": 1.327517100573182, + "learning_rate": 1.3812312873103425e-05, + "loss": 0.5308753252029419, + "step": 2783 + }, + { + "epoch": 0.8141541160988449, + "grad_norm": 1.4850132833469487, + "learning_rate": 1.3807842839210617e-05, + "loss": 0.585492730140686, + "step": 2784 + }, + { + "epoch": 0.8144465565141102, + "grad_norm": 1.5985853231384999, + "learning_rate": 1.3803371915257702e-05, + "loss": 0.6598281860351562, + "step": 2785 + }, + { + "epoch": 0.8147389969293757, + "grad_norm": 1.2500600856454092, + "learning_rate": 1.3798900102289726e-05, + "loss": 0.6819334030151367, + "step": 2786 + }, + { + "epoch": 0.815031437344641, + "grad_norm": 2.1106639284366877, + "learning_rate": 1.3794427401351946e-05, + "loss": 0.6548545360565186, + "step": 2787 + }, + { + "epoch": 0.8153238777599064, + "grad_norm": 1.4934248295829666, + "learning_rate": 1.3789953813489834e-05, + "loss": 0.7836263179779053, + "step": 2788 + }, + { + "epoch": 0.8156163181751718, + "grad_norm": 1.3092153960785353, + "learning_rate": 1.3785479339749062e-05, + "loss": 0.6108324527740479, + "step": 2789 + }, + { + "epoch": 0.8159087585904372, + "grad_norm": 1.4189973842835568, + "learning_rate": 1.378100398117551e-05, + "loss": 0.7079485058784485, + "step": 2790 + }, + { + "epoch": 0.8162011990057025, + "grad_norm": 1.2593140459847156, + "learning_rate": 1.3776527738815264e-05, + "loss": 0.5935578346252441, + "step": 2791 + }, + { + "epoch": 0.816493639420968, + "grad_norm": 1.159439153093783, + "learning_rate": 1.3772050613714623e-05, + "loss": 0.5559983253479004, + "step": 2792 + }, + { + "epoch": 0.8167860798362334, + "grad_norm": 1.2282449471592758, + "learning_rate": 1.3767572606920083e-05, + "loss": 0.6230447292327881, + "step": 2793 + }, + { + "epoch": 0.8170785202514987, + "grad_norm": 1.3750755360912204, + "learning_rate": 1.3763093719478357e-05, + "loss": 0.5672184824943542, + "step": 2794 + }, + { + "epoch": 0.8173709606667642, + "grad_norm": 1.3345649111405589, + "learning_rate": 1.3758613952436353e-05, + "loss": 0.6933468580245972, + "step": 2795 + }, + { + "epoch": 0.8176634010820295, + "grad_norm": 1.299919441217989, + "learning_rate": 1.3754133306841188e-05, + "loss": 0.5873827934265137, + "step": 2796 + }, + { + "epoch": 0.8179558414972949, + "grad_norm": 1.3238138716227077, + "learning_rate": 1.3749651783740188e-05, + "loss": 0.6061393022537231, + "step": 2797 + }, + { + "epoch": 0.8182482819125603, + "grad_norm": 1.3503137209197107, + "learning_rate": 1.3745169384180886e-05, + "loss": 0.6218947768211365, + "step": 2798 + }, + { + "epoch": 0.8185407223278257, + "grad_norm": 1.584036085033884, + "learning_rate": 1.3740686109211008e-05, + "loss": 0.6092264652252197, + "step": 2799 + }, + { + "epoch": 0.8188331627430911, + "grad_norm": 1.4327213465282531, + "learning_rate": 1.3736201959878497e-05, + "loss": 0.6145539283752441, + "step": 2800 + }, + { + "epoch": 0.8191256031583565, + "grad_norm": 1.1433366189059146, + "learning_rate": 1.3731716937231493e-05, + "loss": 0.4637746214866638, + "step": 2801 + }, + { + "epoch": 0.8194180435736219, + "grad_norm": 1.2802202387296946, + "learning_rate": 1.3727231042318345e-05, + "loss": 0.6102726459503174, + "step": 2802 + }, + { + "epoch": 0.8197104839888872, + "grad_norm": 1.3432330324336637, + "learning_rate": 1.3722744276187603e-05, + "loss": 0.5885297060012817, + "step": 2803 + }, + { + "epoch": 0.8200029244041527, + "grad_norm": 1.4575985112282515, + "learning_rate": 1.3718256639888021e-05, + "loss": 0.592369019985199, + "step": 2804 + }, + { + "epoch": 0.820295364819418, + "grad_norm": 1.4943856663354038, + "learning_rate": 1.3713768134468557e-05, + "loss": 0.5194098949432373, + "step": 2805 + }, + { + "epoch": 0.8205878052346834, + "grad_norm": 1.3716539173176907, + "learning_rate": 1.370927876097837e-05, + "loss": 0.6033506393432617, + "step": 2806 + }, + { + "epoch": 0.8208802456499488, + "grad_norm": 1.686602588559283, + "learning_rate": 1.3704788520466828e-05, + "loss": 0.6866108179092407, + "step": 2807 + }, + { + "epoch": 0.8211726860652142, + "grad_norm": 1.564205528186879, + "learning_rate": 1.3700297413983492e-05, + "loss": 0.7325261831283569, + "step": 2808 + }, + { + "epoch": 0.8214651264804796, + "grad_norm": 1.531257665763453, + "learning_rate": 1.3695805442578136e-05, + "loss": 0.5422608852386475, + "step": 2809 + }, + { + "epoch": 0.821757566895745, + "grad_norm": 1.5581516895112182, + "learning_rate": 1.369131260730073e-05, + "loss": 0.6124732494354248, + "step": 2810 + }, + { + "epoch": 0.8220500073110104, + "grad_norm": 1.3009124551880797, + "learning_rate": 1.3686818909201442e-05, + "loss": 0.6097716093063354, + "step": 2811 + }, + { + "epoch": 0.8223424477262757, + "grad_norm": 1.302794206877671, + "learning_rate": 1.3682324349330652e-05, + "loss": 0.6283478140830994, + "step": 2812 + }, + { + "epoch": 0.8226348881415412, + "grad_norm": 1.6179042229288885, + "learning_rate": 1.3677828928738934e-05, + "loss": 0.6590027213096619, + "step": 2813 + }, + { + "epoch": 0.8229273285568065, + "grad_norm": 1.5247617474384554, + "learning_rate": 1.3673332648477065e-05, + "loss": 0.6417049169540405, + "step": 2814 + }, + { + "epoch": 0.8232197689720719, + "grad_norm": 1.510678230362789, + "learning_rate": 1.3668835509596023e-05, + "loss": 0.6217149496078491, + "step": 2815 + }, + { + "epoch": 0.8235122093873374, + "grad_norm": 1.9022694632783144, + "learning_rate": 1.3664337513146993e-05, + "loss": 0.7530043125152588, + "step": 2816 + }, + { + "epoch": 0.8238046498026027, + "grad_norm": 1.3235640761468095, + "learning_rate": 1.3659838660181341e-05, + "loss": 0.6690578460693359, + "step": 2817 + }, + { + "epoch": 0.8240970902178681, + "grad_norm": 1.5311368229830338, + "learning_rate": 1.3655338951750657e-05, + "loss": 0.5348777174949646, + "step": 2818 + }, + { + "epoch": 0.8243895306331335, + "grad_norm": 1.494896630136579, + "learning_rate": 1.3650838388906718e-05, + "loss": 0.7076361179351807, + "step": 2819 + }, + { + "epoch": 0.8246819710483989, + "grad_norm": 1.611810759372966, + "learning_rate": 1.3646336972701507e-05, + "loss": 0.6649855375289917, + "step": 2820 + }, + { + "epoch": 0.8249744114636642, + "grad_norm": 1.4188027146347701, + "learning_rate": 1.3641834704187194e-05, + "loss": 0.6484942436218262, + "step": 2821 + }, + { + "epoch": 0.8252668518789297, + "grad_norm": 1.066364944063908, + "learning_rate": 1.3637331584416163e-05, + "loss": 0.5167717337608337, + "step": 2822 + }, + { + "epoch": 0.8255592922941951, + "grad_norm": 1.4320675291883214, + "learning_rate": 1.3632827614440988e-05, + "loss": 0.7808440327644348, + "step": 2823 + }, + { + "epoch": 0.8258517327094604, + "grad_norm": 1.6437853600585473, + "learning_rate": 1.3628322795314449e-05, + "loss": 0.551183819770813, + "step": 2824 + }, + { + "epoch": 0.8261441731247259, + "grad_norm": 1.3439080199790612, + "learning_rate": 1.3623817128089513e-05, + "loss": 0.6084691286087036, + "step": 2825 + }, + { + "epoch": 0.8264366135399912, + "grad_norm": 1.3974747336185755, + "learning_rate": 1.3619310613819363e-05, + "loss": 0.6251019239425659, + "step": 2826 + }, + { + "epoch": 0.8267290539552566, + "grad_norm": 1.237260204163714, + "learning_rate": 1.3614803253557358e-05, + "loss": 0.5037761926651001, + "step": 2827 + }, + { + "epoch": 0.827021494370522, + "grad_norm": 1.3461097726205675, + "learning_rate": 1.3610295048357072e-05, + "loss": 0.5606831312179565, + "step": 2828 + }, + { + "epoch": 0.8273139347857874, + "grad_norm": 1.3850167464051482, + "learning_rate": 1.360578599927227e-05, + "loss": 0.6664785146713257, + "step": 2829 + }, + { + "epoch": 0.8276063752010527, + "grad_norm": 1.3613746427457352, + "learning_rate": 1.360127610735691e-05, + "loss": 0.7105492353439331, + "step": 2830 + }, + { + "epoch": 0.8278988156163182, + "grad_norm": 1.3577681820511107, + "learning_rate": 1.3596765373665162e-05, + "loss": 0.6255359053611755, + "step": 2831 + }, + { + "epoch": 0.8281912560315836, + "grad_norm": 1.3150522794807806, + "learning_rate": 1.3592253799251377e-05, + "loss": 0.5422149300575256, + "step": 2832 + }, + { + "epoch": 0.8284836964468489, + "grad_norm": 1.4383576380181533, + "learning_rate": 1.3587741385170104e-05, + "loss": 0.6044044494628906, + "step": 2833 + }, + { + "epoch": 0.8287761368621144, + "grad_norm": 1.2478223452248756, + "learning_rate": 1.3583228132476094e-05, + "loss": 0.6256763935089111, + "step": 2834 + }, + { + "epoch": 0.8290685772773797, + "grad_norm": 1.2507601544621354, + "learning_rate": 1.3578714042224297e-05, + "loss": 0.6759064793586731, + "step": 2835 + }, + { + "epoch": 0.8293610176926451, + "grad_norm": 1.3610869198536528, + "learning_rate": 1.3574199115469852e-05, + "loss": 0.5819023251533508, + "step": 2836 + }, + { + "epoch": 0.8296534581079105, + "grad_norm": 1.331505314238688, + "learning_rate": 1.3569683353268098e-05, + "loss": 0.5412642359733582, + "step": 2837 + }, + { + "epoch": 0.8299458985231759, + "grad_norm": 1.0998151045906572, + "learning_rate": 1.356516675667456e-05, + "loss": 0.5129171013832092, + "step": 2838 + }, + { + "epoch": 0.8302383389384413, + "grad_norm": 1.310393887156268, + "learning_rate": 1.356064932674497e-05, + "loss": 0.5165198445320129, + "step": 2839 + }, + { + "epoch": 0.8305307793537067, + "grad_norm": 1.287643091691659, + "learning_rate": 1.3556131064535249e-05, + "loss": 0.6545724272727966, + "step": 2840 + }, + { + "epoch": 0.8308232197689721, + "grad_norm": 1.2180901867245224, + "learning_rate": 1.3551611971101513e-05, + "loss": 0.5715968608856201, + "step": 2841 + }, + { + "epoch": 0.8311156601842374, + "grad_norm": 1.1619522611517994, + "learning_rate": 1.3547092047500074e-05, + "loss": 0.7063779830932617, + "step": 2842 + }, + { + "epoch": 0.8314081005995029, + "grad_norm": 1.2876429096537105, + "learning_rate": 1.3542571294787437e-05, + "loss": 0.6391212940216064, + "step": 2843 + }, + { + "epoch": 0.8317005410147682, + "grad_norm": 1.3047489403917027, + "learning_rate": 1.3538049714020298e-05, + "loss": 0.7145380973815918, + "step": 2844 + }, + { + "epoch": 0.8319929814300336, + "grad_norm": 1.4749234473747483, + "learning_rate": 1.3533527306255547e-05, + "loss": 0.7262213230133057, + "step": 2845 + }, + { + "epoch": 0.832285421845299, + "grad_norm": 1.5661213009447377, + "learning_rate": 1.3529004072550276e-05, + "loss": 0.7621959447860718, + "step": 2846 + }, + { + "epoch": 0.8325778622605644, + "grad_norm": 1.2349365167185542, + "learning_rate": 1.3524480013961757e-05, + "loss": 0.6372592449188232, + "step": 2847 + }, + { + "epoch": 0.8328703026758298, + "grad_norm": 1.5746526285594844, + "learning_rate": 1.3519955131547469e-05, + "loss": 0.6223774552345276, + "step": 2848 + }, + { + "epoch": 0.8331627430910952, + "grad_norm": 1.3246634087041118, + "learning_rate": 1.3515429426365066e-05, + "loss": 0.6500433683395386, + "step": 2849 + }, + { + "epoch": 0.8334551835063606, + "grad_norm": 1.4424195637381385, + "learning_rate": 1.3510902899472408e-05, + "loss": 0.6136040687561035, + "step": 2850 + }, + { + "epoch": 0.8337476239216259, + "grad_norm": 1.512738908953339, + "learning_rate": 1.3506375551927546e-05, + "loss": 0.5297173261642456, + "step": 2851 + }, + { + "epoch": 0.8340400643368914, + "grad_norm": 1.4629352546381682, + "learning_rate": 1.3501847384788718e-05, + "loss": 0.6215870976448059, + "step": 2852 + }, + { + "epoch": 0.8343325047521567, + "grad_norm": 1.3184866454725659, + "learning_rate": 1.3497318399114354e-05, + "loss": 0.5507583618164062, + "step": 2853 + }, + { + "epoch": 0.8346249451674221, + "grad_norm": 1.6022185079697295, + "learning_rate": 1.349278859596308e-05, + "loss": 0.6348794102668762, + "step": 2854 + }, + { + "epoch": 0.8349173855826876, + "grad_norm": 1.4038791520130975, + "learning_rate": 1.3488257976393708e-05, + "loss": 0.7009605765342712, + "step": 2855 + }, + { + "epoch": 0.8352098259979529, + "grad_norm": 1.2288500000369813, + "learning_rate": 1.3483726541465238e-05, + "loss": 0.6268658638000488, + "step": 2856 + }, + { + "epoch": 0.8355022664132183, + "grad_norm": 1.1391793971559063, + "learning_rate": 1.3479194292236875e-05, + "loss": 0.7187683582305908, + "step": 2857 + }, + { + "epoch": 0.8357947068284837, + "grad_norm": 1.5724396660128028, + "learning_rate": 1.3474661229768002e-05, + "loss": 0.7016449570655823, + "step": 2858 + }, + { + "epoch": 0.8360871472437491, + "grad_norm": 1.5882858400771258, + "learning_rate": 1.347012735511819e-05, + "loss": 0.5852428674697876, + "step": 2859 + }, + { + "epoch": 0.8363795876590144, + "grad_norm": 1.4143289380031852, + "learning_rate": 1.3465592669347207e-05, + "loss": 0.6232450008392334, + "step": 2860 + }, + { + "epoch": 0.8366720280742799, + "grad_norm": 1.3444277392597084, + "learning_rate": 1.346105717351501e-05, + "loss": 0.526097297668457, + "step": 2861 + }, + { + "epoch": 0.8369644684895453, + "grad_norm": 1.5627282993073515, + "learning_rate": 1.3456520868681741e-05, + "loss": 0.6065535545349121, + "step": 2862 + }, + { + "epoch": 0.8372569089048106, + "grad_norm": 1.3941305759607394, + "learning_rate": 1.3451983755907736e-05, + "loss": 0.5836296677589417, + "step": 2863 + }, + { + "epoch": 0.8375493493200761, + "grad_norm": 1.336778139255592, + "learning_rate": 1.3447445836253519e-05, + "loss": 0.678827166557312, + "step": 2864 + }, + { + "epoch": 0.8378417897353414, + "grad_norm": 1.3002974651392025, + "learning_rate": 1.3442907110779794e-05, + "loss": 0.5206096172332764, + "step": 2865 + }, + { + "epoch": 0.8381342301506068, + "grad_norm": 1.3468789034772342, + "learning_rate": 1.3438367580547468e-05, + "loss": 0.6424980163574219, + "step": 2866 + }, + { + "epoch": 0.8384266705658722, + "grad_norm": 1.1467777796306478, + "learning_rate": 1.3433827246617624e-05, + "loss": 0.6293484568595886, + "step": 2867 + }, + { + "epoch": 0.8387191109811376, + "grad_norm": 1.2601562582063903, + "learning_rate": 1.3429286110051539e-05, + "loss": 0.5912167429924011, + "step": 2868 + }, + { + "epoch": 0.8390115513964029, + "grad_norm": 1.5181261084157656, + "learning_rate": 1.342474417191068e-05, + "loss": 0.6571674346923828, + "step": 2869 + }, + { + "epoch": 0.8393039918116684, + "grad_norm": 1.421037061270542, + "learning_rate": 1.342020143325669e-05, + "loss": 0.5519720911979675, + "step": 2870 + }, + { + "epoch": 0.8395964322269338, + "grad_norm": 1.3997247827352193, + "learning_rate": 1.341565789515141e-05, + "loss": 0.6465001106262207, + "step": 2871 + }, + { + "epoch": 0.8398888726421991, + "grad_norm": 1.398359818513133, + "learning_rate": 1.3411113558656865e-05, + "loss": 0.6022073030471802, + "step": 2872 + }, + { + "epoch": 0.8401813130574646, + "grad_norm": 1.361775248337709, + "learning_rate": 1.3406568424835264e-05, + "loss": 0.610893726348877, + "step": 2873 + }, + { + "epoch": 0.8404737534727299, + "grad_norm": 1.427563498701008, + "learning_rate": 1.340202249474901e-05, + "loss": 0.5296563506126404, + "step": 2874 + }, + { + "epoch": 0.8407661938879953, + "grad_norm": 1.170906744718837, + "learning_rate": 1.3397475769460679e-05, + "loss": 0.6327008605003357, + "step": 2875 + }, + { + "epoch": 0.8410586343032607, + "grad_norm": 1.3517928558744952, + "learning_rate": 1.3392928250033045e-05, + "loss": 0.6437617540359497, + "step": 2876 + }, + { + "epoch": 0.8413510747185261, + "grad_norm": 1.3416431365752262, + "learning_rate": 1.3388379937529063e-05, + "loss": 0.5627291202545166, + "step": 2877 + }, + { + "epoch": 0.8416435151337915, + "grad_norm": 1.3602688623647594, + "learning_rate": 1.3383830833011871e-05, + "loss": 0.5921163558959961, + "step": 2878 + }, + { + "epoch": 0.8419359555490569, + "grad_norm": 1.2033937218328357, + "learning_rate": 1.3379280937544797e-05, + "loss": 0.5749082565307617, + "step": 2879 + }, + { + "epoch": 0.8422283959643223, + "grad_norm": 1.462463173522237, + "learning_rate": 1.3374730252191347e-05, + "loss": 0.6294553279876709, + "step": 2880 + }, + { + "epoch": 0.8425208363795876, + "grad_norm": 1.222130659730857, + "learning_rate": 1.3370178778015223e-05, + "loss": 0.5172078609466553, + "step": 2881 + }, + { + "epoch": 0.8428132767948531, + "grad_norm": 1.3695607626504847, + "learning_rate": 1.3365626516080301e-05, + "loss": 0.44069811701774597, + "step": 2882 + }, + { + "epoch": 0.8431057172101184, + "grad_norm": 1.31704500891114, + "learning_rate": 1.336107346745064e-05, + "loss": 0.72663813829422, + "step": 2883 + }, + { + "epoch": 0.8433981576253838, + "grad_norm": 1.3488066557741722, + "learning_rate": 1.3356519633190495e-05, + "loss": 0.6562269926071167, + "step": 2884 + }, + { + "epoch": 0.8436905980406492, + "grad_norm": 1.3994820366244107, + "learning_rate": 1.3351965014364293e-05, + "loss": 0.699925422668457, + "step": 2885 + }, + { + "epoch": 0.8439830384559146, + "grad_norm": 1.3294441855934318, + "learning_rate": 1.3347409612036651e-05, + "loss": 0.5902425646781921, + "step": 2886 + }, + { + "epoch": 0.84427547887118, + "grad_norm": 1.401705271294413, + "learning_rate": 1.3342853427272362e-05, + "loss": 0.613966703414917, + "step": 2887 + }, + { + "epoch": 0.8445679192864454, + "grad_norm": 1.2630848315271062, + "learning_rate": 1.333829646113641e-05, + "loss": 0.5864139199256897, + "step": 2888 + }, + { + "epoch": 0.8448603597017108, + "grad_norm": 1.5447722719058155, + "learning_rate": 1.3333738714693958e-05, + "loss": 0.5851572751998901, + "step": 2889 + }, + { + "epoch": 0.8451528001169761, + "grad_norm": 1.4679598706703352, + "learning_rate": 1.3329180189010348e-05, + "loss": 0.6564328074455261, + "step": 2890 + }, + { + "epoch": 0.8454452405322416, + "grad_norm": 1.3794930949186583, + "learning_rate": 1.3324620885151115e-05, + "loss": 0.6745615005493164, + "step": 2891 + }, + { + "epoch": 0.8457376809475069, + "grad_norm": 1.277678612967463, + "learning_rate": 1.3320060804181962e-05, + "loss": 0.5003606081008911, + "step": 2892 + }, + { + "epoch": 0.8460301213627723, + "grad_norm": 1.4995028165986726, + "learning_rate": 1.3315499947168781e-05, + "loss": 0.6646369695663452, + "step": 2893 + }, + { + "epoch": 0.8463225617780378, + "grad_norm": 1.3696086888087433, + "learning_rate": 1.3310938315177647e-05, + "loss": 0.6903572082519531, + "step": 2894 + }, + { + "epoch": 0.8466150021933031, + "grad_norm": 1.633835119151456, + "learning_rate": 1.330637590927481e-05, + "loss": 0.6221956610679626, + "step": 2895 + }, + { + "epoch": 0.8469074426085685, + "grad_norm": 1.5369372818354106, + "learning_rate": 1.3301812730526713e-05, + "loss": 0.5602666139602661, + "step": 2896 + }, + { + "epoch": 0.8471998830238339, + "grad_norm": 1.2910113915198014, + "learning_rate": 1.3297248779999963e-05, + "loss": 0.5843783617019653, + "step": 2897 + }, + { + "epoch": 0.8474923234390993, + "grad_norm": 1.304495064263293, + "learning_rate": 1.3292684058761357e-05, + "loss": 0.5040254592895508, + "step": 2898 + }, + { + "epoch": 0.8477847638543646, + "grad_norm": 1.4968280315795712, + "learning_rate": 1.3288118567877874e-05, + "loss": 0.6180210709571838, + "step": 2899 + }, + { + "epoch": 0.8480772042696301, + "grad_norm": 1.345230482752467, + "learning_rate": 1.3283552308416668e-05, + "loss": 0.5050851106643677, + "step": 2900 + }, + { + "epoch": 0.8483696446848955, + "grad_norm": 1.604217394640997, + "learning_rate": 1.3278985281445072e-05, + "loss": 0.6627126932144165, + "step": 2901 + }, + { + "epoch": 0.8486620851001608, + "grad_norm": 1.40930260394039, + "learning_rate": 1.3274417488030607e-05, + "loss": 0.5984441041946411, + "step": 2902 + }, + { + "epoch": 0.8489545255154263, + "grad_norm": 1.3584927833580034, + "learning_rate": 1.3269848929240958e-05, + "loss": 0.611599326133728, + "step": 2903 + }, + { + "epoch": 0.8492469659306916, + "grad_norm": 1.4743229169395644, + "learning_rate": 1.3265279606144006e-05, + "loss": 0.6057847142219543, + "step": 2904 + }, + { + "epoch": 0.849539406345957, + "grad_norm": 1.5324921987406994, + "learning_rate": 1.3260709519807797e-05, + "loss": 0.7123644948005676, + "step": 2905 + }, + { + "epoch": 0.8498318467612224, + "grad_norm": 1.4337194400937256, + "learning_rate": 1.3256138671300564e-05, + "loss": 0.6193811893463135, + "step": 2906 + }, + { + "epoch": 0.8501242871764878, + "grad_norm": 1.6102821646068017, + "learning_rate": 1.3251567061690717e-05, + "loss": 0.5775484442710876, + "step": 2907 + }, + { + "epoch": 0.8504167275917531, + "grad_norm": 1.5171257755680165, + "learning_rate": 1.3246994692046837e-05, + "loss": 0.5655511617660522, + "step": 2908 + }, + { + "epoch": 0.8507091680070186, + "grad_norm": 1.8299545213851978, + "learning_rate": 1.3242421563437688e-05, + "loss": 0.6216102838516235, + "step": 2909 + }, + { + "epoch": 0.851001608422284, + "grad_norm": 1.4045274179517395, + "learning_rate": 1.3237847676932217e-05, + "loss": 0.649554967880249, + "step": 2910 + }, + { + "epoch": 0.8512940488375493, + "grad_norm": 1.5965021256139, + "learning_rate": 1.3233273033599534e-05, + "loss": 0.6688281297683716, + "step": 2911 + }, + { + "epoch": 0.8515864892528148, + "grad_norm": 1.2158705367599922, + "learning_rate": 1.322869763450894e-05, + "loss": 0.664188027381897, + "step": 2912 + }, + { + "epoch": 0.8518789296680801, + "grad_norm": 1.27994094299147, + "learning_rate": 1.3224121480729905e-05, + "loss": 0.47189265489578247, + "step": 2913 + }, + { + "epoch": 0.8521713700833455, + "grad_norm": 1.387813816085696, + "learning_rate": 1.3219544573332075e-05, + "loss": 0.6190480589866638, + "step": 2914 + }, + { + "epoch": 0.8524638104986109, + "grad_norm": 1.3459335682790516, + "learning_rate": 1.3214966913385277e-05, + "loss": 0.6564091444015503, + "step": 2915 + }, + { + "epoch": 0.8527562509138763, + "grad_norm": 1.563994961699158, + "learning_rate": 1.321038850195951e-05, + "loss": 0.6083766222000122, + "step": 2916 + }, + { + "epoch": 0.8530486913291417, + "grad_norm": 1.2689051257322506, + "learning_rate": 1.3205809340124951e-05, + "loss": 0.5262473821640015, + "step": 2917 + }, + { + "epoch": 0.8533411317444071, + "grad_norm": 1.3633671661320785, + "learning_rate": 1.320122942895195e-05, + "loss": 0.6170297861099243, + "step": 2918 + }, + { + "epoch": 0.8536335721596725, + "grad_norm": 1.3838619263880951, + "learning_rate": 1.3196648769511036e-05, + "loss": 0.5791536569595337, + "step": 2919 + }, + { + "epoch": 0.8539260125749378, + "grad_norm": 1.4116909766151964, + "learning_rate": 1.3192067362872904e-05, + "loss": 0.5870766639709473, + "step": 2920 + }, + { + "epoch": 0.8542184529902033, + "grad_norm": 1.5317627298998806, + "learning_rate": 1.3187485210108438e-05, + "loss": 0.604548990726471, + "step": 2921 + }, + { + "epoch": 0.8545108934054686, + "grad_norm": 1.3458362989469688, + "learning_rate": 1.3182902312288682e-05, + "loss": 0.5292568206787109, + "step": 2922 + }, + { + "epoch": 0.854803333820734, + "grad_norm": 1.276264176970529, + "learning_rate": 1.3178318670484862e-05, + "loss": 0.5638582706451416, + "step": 2923 + }, + { + "epoch": 0.8550957742359994, + "grad_norm": 1.5369089697533718, + "learning_rate": 1.317373428576838e-05, + "loss": 0.5730164051055908, + "step": 2924 + }, + { + "epoch": 0.8553882146512648, + "grad_norm": 1.369500285153578, + "learning_rate": 1.3169149159210803e-05, + "loss": 0.6170799732208252, + "step": 2925 + }, + { + "epoch": 0.8556806550665302, + "grad_norm": 1.3401436683949477, + "learning_rate": 1.3164563291883879e-05, + "loss": 0.591925323009491, + "step": 2926 + }, + { + "epoch": 0.8559730954817956, + "grad_norm": 1.5178314176439451, + "learning_rate": 1.3159976684859528e-05, + "loss": 0.7269439697265625, + "step": 2927 + }, + { + "epoch": 0.856265535897061, + "grad_norm": 1.552203527248451, + "learning_rate": 1.3155389339209839e-05, + "loss": 0.615471363067627, + "step": 2928 + }, + { + "epoch": 0.8565579763123263, + "grad_norm": 1.4397776020126687, + "learning_rate": 1.3150801256007076e-05, + "loss": 0.6264692544937134, + "step": 2929 + }, + { + "epoch": 0.8568504167275918, + "grad_norm": 1.203302342126932, + "learning_rate": 1.314621243632368e-05, + "loss": 0.5729779005050659, + "step": 2930 + }, + { + "epoch": 0.8571428571428571, + "grad_norm": 1.3833464526102248, + "learning_rate": 1.314162288123225e-05, + "loss": 0.6462980508804321, + "step": 2931 + }, + { + "epoch": 0.8574352975581225, + "grad_norm": 1.1795102455310789, + "learning_rate": 1.3137032591805577e-05, + "loss": 0.5493176579475403, + "step": 2932 + }, + { + "epoch": 0.857727737973388, + "grad_norm": 1.1422942251299026, + "learning_rate": 1.3132441569116608e-05, + "loss": 0.49161234498023987, + "step": 2933 + }, + { + "epoch": 0.8580201783886533, + "grad_norm": 1.428090020215004, + "learning_rate": 1.312784981423847e-05, + "loss": 0.6724506616592407, + "step": 2934 + }, + { + "epoch": 0.8583126188039187, + "grad_norm": 1.6216709335890533, + "learning_rate": 1.3123257328244455e-05, + "loss": 0.6180965900421143, + "step": 2935 + }, + { + "epoch": 0.8586050592191841, + "grad_norm": 1.6797724821518334, + "learning_rate": 1.3118664112208027e-05, + "loss": 0.6676491498947144, + "step": 2936 + }, + { + "epoch": 0.8588974996344495, + "grad_norm": 1.1911121778916818, + "learning_rate": 1.3114070167202827e-05, + "loss": 0.5964041948318481, + "step": 2937 + }, + { + "epoch": 0.8591899400497148, + "grad_norm": 1.3660050885815391, + "learning_rate": 1.3109475494302657e-05, + "loss": 0.708328366279602, + "step": 2938 + }, + { + "epoch": 0.8594823804649803, + "grad_norm": 1.6146616988047677, + "learning_rate": 1.3104880094581495e-05, + "loss": 0.6360403299331665, + "step": 2939 + }, + { + "epoch": 0.8597748208802457, + "grad_norm": 1.5628439078603966, + "learning_rate": 1.3100283969113494e-05, + "loss": 0.5450131893157959, + "step": 2940 + }, + { + "epoch": 0.860067261295511, + "grad_norm": 1.2422442713506727, + "learning_rate": 1.3095687118972962e-05, + "loss": 0.4472329020500183, + "step": 2941 + }, + { + "epoch": 0.8603597017107765, + "grad_norm": 1.2824654152788901, + "learning_rate": 1.3091089545234387e-05, + "loss": 0.6853972673416138, + "step": 2942 + }, + { + "epoch": 0.8606521421260418, + "grad_norm": 1.5236765495118778, + "learning_rate": 1.3086491248972429e-05, + "loss": 0.6547979116439819, + "step": 2943 + }, + { + "epoch": 0.8609445825413072, + "grad_norm": 1.2521364069886292, + "learning_rate": 1.3081892231261903e-05, + "loss": 0.46194693446159363, + "step": 2944 + }, + { + "epoch": 0.8612370229565726, + "grad_norm": 1.3749685968664958, + "learning_rate": 1.307729249317781e-05, + "loss": 0.5715345144271851, + "step": 2945 + }, + { + "epoch": 0.861529463371838, + "grad_norm": 1.2925136251134925, + "learning_rate": 1.3072692035795305e-05, + "loss": 0.5590982437133789, + "step": 2946 + }, + { + "epoch": 0.8618219037871033, + "grad_norm": 1.4594997051230878, + "learning_rate": 1.3068090860189719e-05, + "loss": 0.5435009002685547, + "step": 2947 + }, + { + "epoch": 0.8621143442023688, + "grad_norm": 1.1604259212434795, + "learning_rate": 1.3063488967436548e-05, + "loss": 0.4528965651988983, + "step": 2948 + }, + { + "epoch": 0.8624067846176342, + "grad_norm": 1.1967844606343032, + "learning_rate": 1.3058886358611457e-05, + "loss": 0.5520291328430176, + "step": 2949 + }, + { + "epoch": 0.8626992250328995, + "grad_norm": 1.3959982999797578, + "learning_rate": 1.305428303479028e-05, + "loss": 0.6444021463394165, + "step": 2950 + }, + { + "epoch": 0.862991665448165, + "grad_norm": 1.597979452275331, + "learning_rate": 1.3049678997049016e-05, + "loss": 0.7808041572570801, + "step": 2951 + }, + { + "epoch": 0.8632841058634303, + "grad_norm": 1.6855013913251111, + "learning_rate": 1.3045074246463825e-05, + "loss": 0.6297428607940674, + "step": 2952 + }, + { + "epoch": 0.8635765462786957, + "grad_norm": 1.5203533995419023, + "learning_rate": 1.3040468784111045e-05, + "loss": 0.5776612162590027, + "step": 2953 + }, + { + "epoch": 0.8638689866939611, + "grad_norm": 1.3696314111811954, + "learning_rate": 1.3035862611067169e-05, + "loss": 0.49298524856567383, + "step": 2954 + }, + { + "epoch": 0.8641614271092265, + "grad_norm": 1.7023849342400221, + "learning_rate": 1.303125572840887e-05, + "loss": 0.8061650991439819, + "step": 2955 + }, + { + "epoch": 0.8644538675244919, + "grad_norm": 1.4509821363343893, + "learning_rate": 1.3026648137212976e-05, + "loss": 0.7741662859916687, + "step": 2956 + }, + { + "epoch": 0.8647463079397573, + "grad_norm": 1.350671993753925, + "learning_rate": 1.302203983855648e-05, + "loss": 0.5589889287948608, + "step": 2957 + }, + { + "epoch": 0.8650387483550227, + "grad_norm": 1.400755532782556, + "learning_rate": 1.3017430833516547e-05, + "loss": 0.5801941752433777, + "step": 2958 + }, + { + "epoch": 0.865331188770288, + "grad_norm": 1.3298019485580883, + "learning_rate": 1.30128211231705e-05, + "loss": 0.5874185562133789, + "step": 2959 + }, + { + "epoch": 0.8656236291855535, + "grad_norm": 1.1737111706818832, + "learning_rate": 1.3008210708595837e-05, + "loss": 0.6062727570533752, + "step": 2960 + }, + { + "epoch": 0.8659160696008188, + "grad_norm": 1.3334829952801492, + "learning_rate": 1.3003599590870209e-05, + "loss": 0.571448802947998, + "step": 2961 + }, + { + "epoch": 0.8662085100160842, + "grad_norm": 1.3654619359177553, + "learning_rate": 1.2998987771071442e-05, + "loss": 0.7001944780349731, + "step": 2962 + }, + { + "epoch": 0.8665009504313496, + "grad_norm": 1.48577297171421, + "learning_rate": 1.2994375250277516e-05, + "loss": 0.49182790517807007, + "step": 2963 + }, + { + "epoch": 0.866793390846615, + "grad_norm": 1.342673325945858, + "learning_rate": 1.298976202956658e-05, + "loss": 0.5299041271209717, + "step": 2964 + }, + { + "epoch": 0.8670858312618804, + "grad_norm": 1.1975267191215118, + "learning_rate": 1.2985148110016947e-05, + "loss": 0.4955265522003174, + "step": 2965 + }, + { + "epoch": 0.8673782716771458, + "grad_norm": 1.132262479106049, + "learning_rate": 1.2980533492707094e-05, + "loss": 0.6395630836486816, + "step": 2966 + }, + { + "epoch": 0.8676707120924112, + "grad_norm": 1.1303573523984183, + "learning_rate": 1.2975918178715661e-05, + "loss": 0.5926274061203003, + "step": 2967 + }, + { + "epoch": 0.8679631525076765, + "grad_norm": 1.194805436445147, + "learning_rate": 1.2971302169121447e-05, + "loss": 0.5556914806365967, + "step": 2968 + }, + { + "epoch": 0.868255592922942, + "grad_norm": 1.2766981949480176, + "learning_rate": 1.2966685465003415e-05, + "loss": 0.5347195863723755, + "step": 2969 + }, + { + "epoch": 0.8685480333382073, + "grad_norm": 1.3728880032694415, + "learning_rate": 1.2962068067440694e-05, + "loss": 0.6839208006858826, + "step": 2970 + }, + { + "epoch": 0.8688404737534727, + "grad_norm": 1.1132776608061867, + "learning_rate": 1.295744997751257e-05, + "loss": 0.5741337537765503, + "step": 2971 + }, + { + "epoch": 0.8691329141687382, + "grad_norm": 1.536125480269087, + "learning_rate": 1.29528311962985e-05, + "loss": 0.7383404970169067, + "step": 2972 + }, + { + "epoch": 0.8694253545840035, + "grad_norm": 1.4560088611056379, + "learning_rate": 1.294821172487809e-05, + "loss": 0.5075374245643616, + "step": 2973 + }, + { + "epoch": 0.8697177949992689, + "grad_norm": 1.235849675897421, + "learning_rate": 1.2943591564331113e-05, + "loss": 0.557248592376709, + "step": 2974 + }, + { + "epoch": 0.8700102354145343, + "grad_norm": 1.3655420768672006, + "learning_rate": 1.2938970715737506e-05, + "loss": 0.5687203407287598, + "step": 2975 + }, + { + "epoch": 0.8703026758297997, + "grad_norm": 1.3479345698129241, + "learning_rate": 1.2934349180177364e-05, + "loss": 0.5946108102798462, + "step": 2976 + }, + { + "epoch": 0.870595116245065, + "grad_norm": 1.258994257926457, + "learning_rate": 1.2929726958730942e-05, + "loss": 0.6103173494338989, + "step": 2977 + }, + { + "epoch": 0.8708875566603305, + "grad_norm": 1.4914714674105345, + "learning_rate": 1.2925104052478657e-05, + "loss": 0.7007244825363159, + "step": 2978 + }, + { + "epoch": 0.8711799970755959, + "grad_norm": 1.4140285074261345, + "learning_rate": 1.2920480462501082e-05, + "loss": 0.6157742142677307, + "step": 2979 + }, + { + "epoch": 0.8714724374908612, + "grad_norm": 1.4708644175648395, + "learning_rate": 1.2915856189878956e-05, + "loss": 0.6501113176345825, + "step": 2980 + }, + { + "epoch": 0.8717648779061267, + "grad_norm": 1.2555000815915451, + "learning_rate": 1.2911231235693178e-05, + "loss": 0.5084626078605652, + "step": 2981 + }, + { + "epoch": 0.872057318321392, + "grad_norm": 1.343175395168551, + "learning_rate": 1.2906605601024796e-05, + "loss": 0.5953651666641235, + "step": 2982 + }, + { + "epoch": 0.8723497587366574, + "grad_norm": 1.218776434986359, + "learning_rate": 1.290197928695503e-05, + "loss": 0.5733205676078796, + "step": 2983 + }, + { + "epoch": 0.8726421991519228, + "grad_norm": 1.5420791901099857, + "learning_rate": 1.2897352294565248e-05, + "loss": 0.5976133942604065, + "step": 2984 + }, + { + "epoch": 0.8729346395671882, + "grad_norm": 1.2904353456419873, + "learning_rate": 1.2892724624936983e-05, + "loss": 0.5092414617538452, + "step": 2985 + }, + { + "epoch": 0.8732270799824535, + "grad_norm": 1.4935525581566107, + "learning_rate": 1.2888096279151926e-05, + "loss": 0.7244688272476196, + "step": 2986 + }, + { + "epoch": 0.873519520397719, + "grad_norm": 1.5818576721862576, + "learning_rate": 1.2883467258291922e-05, + "loss": 0.6943881511688232, + "step": 2987 + }, + { + "epoch": 0.8738119608129844, + "grad_norm": 1.422762914124539, + "learning_rate": 1.287883756343898e-05, + "loss": 0.6484338641166687, + "step": 2988 + }, + { + "epoch": 0.8741044012282497, + "grad_norm": 1.23046146833686, + "learning_rate": 1.2874207195675262e-05, + "loss": 0.620865523815155, + "step": 2989 + }, + { + "epoch": 0.8743968416435152, + "grad_norm": 1.245843663622743, + "learning_rate": 1.2869576156083085e-05, + "loss": 0.5290236473083496, + "step": 2990 + }, + { + "epoch": 0.8746892820587805, + "grad_norm": 1.383695697280258, + "learning_rate": 1.2864944445744932e-05, + "loss": 0.7140257358551025, + "step": 2991 + }, + { + "epoch": 0.8749817224740459, + "grad_norm": 1.3901579888827407, + "learning_rate": 1.286031206574343e-05, + "loss": 0.7167611122131348, + "step": 2992 + }, + { + "epoch": 0.8752741628893113, + "grad_norm": 1.4097752029885913, + "learning_rate": 1.2855679017161372e-05, + "loss": 0.5631322860717773, + "step": 2993 + }, + { + "epoch": 0.8755666033045767, + "grad_norm": 1.4535459078300315, + "learning_rate": 1.2851045301081714e-05, + "loss": 0.6250770092010498, + "step": 2994 + }, + { + "epoch": 0.8758590437198421, + "grad_norm": 1.3041015408341177, + "learning_rate": 1.2846410918587546e-05, + "loss": 0.5121266841888428, + "step": 2995 + }, + { + "epoch": 0.8761514841351075, + "grad_norm": 1.2982813372349626, + "learning_rate": 1.2841775870762134e-05, + "loss": 0.6075780987739563, + "step": 2996 + }, + { + "epoch": 0.8764439245503729, + "grad_norm": 1.2610269556078437, + "learning_rate": 1.283714015868889e-05, + "loss": 0.516838014125824, + "step": 2997 + }, + { + "epoch": 0.8767363649656382, + "grad_norm": 1.3997368275790003, + "learning_rate": 1.2832503783451384e-05, + "loss": 0.6952051520347595, + "step": 2998 + }, + { + "epoch": 0.8770288053809037, + "grad_norm": 1.668277066498958, + "learning_rate": 1.2827866746133342e-05, + "loss": 0.8039685487747192, + "step": 2999 + }, + { + "epoch": 0.877321245796169, + "grad_norm": 1.3275322129226486, + "learning_rate": 1.2823229047818642e-05, + "loss": 0.6200549602508545, + "step": 3000 + }, + { + "epoch": 0.8776136862114344, + "grad_norm": 1.3153882408773916, + "learning_rate": 1.2818590689591315e-05, + "loss": 0.6666116714477539, + "step": 3001 + }, + { + "epoch": 0.8779061266266998, + "grad_norm": 1.4255915593552042, + "learning_rate": 1.2813951672535551e-05, + "loss": 0.566741943359375, + "step": 3002 + }, + { + "epoch": 0.8781985670419652, + "grad_norm": 1.2390037918473238, + "learning_rate": 1.2809311997735697e-05, + "loss": 0.6103402376174927, + "step": 3003 + }, + { + "epoch": 0.8784910074572306, + "grad_norm": 1.1444583076116077, + "learning_rate": 1.280467166627624e-05, + "loss": 0.48296916484832764, + "step": 3004 + }, + { + "epoch": 0.878783447872496, + "grad_norm": 1.4235586871910597, + "learning_rate": 1.2800030679241834e-05, + "loss": 0.5995723605155945, + "step": 3005 + }, + { + "epoch": 0.8790758882877614, + "grad_norm": 1.5173093942193803, + "learning_rate": 1.2795389037717286e-05, + "loss": 0.6199642419815063, + "step": 3006 + }, + { + "epoch": 0.8793683287030267, + "grad_norm": 1.5757356892284924, + "learning_rate": 1.279074674278754e-05, + "loss": 0.6740807294845581, + "step": 3007 + }, + { + "epoch": 0.8796607691182922, + "grad_norm": 1.4923318097982954, + "learning_rate": 1.2786103795537714e-05, + "loss": 0.7330688238143921, + "step": 3008 + }, + { + "epoch": 0.8799532095335575, + "grad_norm": 1.1357910142893406, + "learning_rate": 1.2781460197053066e-05, + "loss": 0.5048441290855408, + "step": 3009 + }, + { + "epoch": 0.8802456499488229, + "grad_norm": 1.2484561154788956, + "learning_rate": 1.277681594841901e-05, + "loss": 0.6103702187538147, + "step": 3010 + }, + { + "epoch": 0.8805380903640884, + "grad_norm": 1.3117487221252475, + "learning_rate": 1.2772171050721107e-05, + "loss": 0.5223366022109985, + "step": 3011 + }, + { + "epoch": 0.8808305307793537, + "grad_norm": 1.5806437295259135, + "learning_rate": 1.2767525505045078e-05, + "loss": 0.708305835723877, + "step": 3012 + }, + { + "epoch": 0.8811229711946191, + "grad_norm": 1.324207789268205, + "learning_rate": 1.2762879312476785e-05, + "loss": 0.6827911734580994, + "step": 3013 + }, + { + "epoch": 0.8814154116098845, + "grad_norm": 1.7302207886555443, + "learning_rate": 1.2758232474102254e-05, + "loss": 0.6977027654647827, + "step": 3014 + }, + { + "epoch": 0.8817078520251499, + "grad_norm": 1.235299173012923, + "learning_rate": 1.2753584991007654e-05, + "loss": 0.5534720420837402, + "step": 3015 + }, + { + "epoch": 0.8820002924404152, + "grad_norm": 1.1722300923390174, + "learning_rate": 1.2748936864279305e-05, + "loss": 0.541682243347168, + "step": 3016 + }, + { + "epoch": 0.8822927328556807, + "grad_norm": 1.4134630737456748, + "learning_rate": 1.2744288095003674e-05, + "loss": 0.6195456981658936, + "step": 3017 + }, + { + "epoch": 0.8825851732709461, + "grad_norm": 1.1963339495389647, + "learning_rate": 1.2739638684267387e-05, + "loss": 0.5050234794616699, + "step": 3018 + }, + { + "epoch": 0.8828776136862114, + "grad_norm": 1.1967088542641229, + "learning_rate": 1.2734988633157218e-05, + "loss": 0.5397066473960876, + "step": 3019 + }, + { + "epoch": 0.8831700541014769, + "grad_norm": 1.3480056981854442, + "learning_rate": 1.273033794276008e-05, + "loss": 0.5932190418243408, + "step": 3020 + }, + { + "epoch": 0.8834624945167422, + "grad_norm": 1.2383533139434324, + "learning_rate": 1.2725686614163055e-05, + "loss": 0.5780059099197388, + "step": 3021 + }, + { + "epoch": 0.8837549349320076, + "grad_norm": 1.4379159594856536, + "learning_rate": 1.2721034648453353e-05, + "loss": 0.5850226879119873, + "step": 3022 + }, + { + "epoch": 0.884047375347273, + "grad_norm": 1.351057706249645, + "learning_rate": 1.2716382046718346e-05, + "loss": 0.6684393882751465, + "step": 3023 + }, + { + "epoch": 0.8843398157625384, + "grad_norm": 1.3578422906902012, + "learning_rate": 1.271172881004555e-05, + "loss": 0.6045842170715332, + "step": 3024 + }, + { + "epoch": 0.8846322561778037, + "grad_norm": 1.4246831207517041, + "learning_rate": 1.2707074939522633e-05, + "loss": 0.6769551038742065, + "step": 3025 + }, + { + "epoch": 0.8849246965930692, + "grad_norm": 1.541147063192512, + "learning_rate": 1.2702420436237408e-05, + "loss": 0.5581091642379761, + "step": 3026 + }, + { + "epoch": 0.8852171370083346, + "grad_norm": 1.381695049653859, + "learning_rate": 1.269776530127784e-05, + "loss": 0.5010186433792114, + "step": 3027 + }, + { + "epoch": 0.8855095774235999, + "grad_norm": 1.3620137613749654, + "learning_rate": 1.2693109535732034e-05, + "loss": 0.4537884294986725, + "step": 3028 + }, + { + "epoch": 0.8858020178388654, + "grad_norm": 1.155156838639785, + "learning_rate": 1.2688453140688246e-05, + "loss": 0.5920443534851074, + "step": 3029 + }, + { + "epoch": 0.8860944582541307, + "grad_norm": 1.4193738144287875, + "learning_rate": 1.2683796117234884e-05, + "loss": 0.564072847366333, + "step": 3030 + }, + { + "epoch": 0.8863868986693961, + "grad_norm": 1.3206014730711304, + "learning_rate": 1.26791384664605e-05, + "loss": 0.657585620880127, + "step": 3031 + }, + { + "epoch": 0.8866793390846615, + "grad_norm": 1.4850669504718117, + "learning_rate": 1.2674480189453786e-05, + "loss": 0.6864298582077026, + "step": 3032 + }, + { + "epoch": 0.8869717794999269, + "grad_norm": 1.4143994971740543, + "learning_rate": 1.266982128730359e-05, + "loss": 0.6416069865226746, + "step": 3033 + }, + { + "epoch": 0.8872642199151923, + "grad_norm": 1.3298070008922416, + "learning_rate": 1.2665161761098899e-05, + "loss": 0.6405118703842163, + "step": 3034 + }, + { + "epoch": 0.8875566603304577, + "grad_norm": 1.4036133965159712, + "learning_rate": 1.266050161192885e-05, + "loss": 0.649673342704773, + "step": 3035 + }, + { + "epoch": 0.8878491007457231, + "grad_norm": 1.359043965576467, + "learning_rate": 1.2655840840882729e-05, + "loss": 0.5914620161056519, + "step": 3036 + }, + { + "epoch": 0.8881415411609884, + "grad_norm": 1.5837746169822255, + "learning_rate": 1.2651179449049958e-05, + "loss": 0.6080621480941772, + "step": 3037 + }, + { + "epoch": 0.8884339815762539, + "grad_norm": 1.5302588008128089, + "learning_rate": 1.264651743752011e-05, + "loss": 0.657015860080719, + "step": 3038 + }, + { + "epoch": 0.8887264219915192, + "grad_norm": 1.3603604072518423, + "learning_rate": 1.26418548073829e-05, + "loss": 0.5384848713874817, + "step": 3039 + }, + { + "epoch": 0.8890188624067846, + "grad_norm": 1.5457096573294893, + "learning_rate": 1.2637191559728195e-05, + "loss": 0.7452554106712341, + "step": 3040 + }, + { + "epoch": 0.88931130282205, + "grad_norm": 1.4411555623785637, + "learning_rate": 1.2632527695645993e-05, + "loss": 0.743236780166626, + "step": 3041 + }, + { + "epoch": 0.8896037432373154, + "grad_norm": 1.5417347407679962, + "learning_rate": 1.2627863216226453e-05, + "loss": 0.557692289352417, + "step": 3042 + }, + { + "epoch": 0.8898961836525808, + "grad_norm": 1.3302198914823486, + "learning_rate": 1.2623198122559863e-05, + "loss": 0.5637259483337402, + "step": 3043 + }, + { + "epoch": 0.8901886240678462, + "grad_norm": 1.4403910054587767, + "learning_rate": 1.261853241573666e-05, + "loss": 0.5217350721359253, + "step": 3044 + }, + { + "epoch": 0.8904810644831116, + "grad_norm": 1.4659582389098327, + "learning_rate": 1.2613866096847423e-05, + "loss": 0.5971624255180359, + "step": 3045 + }, + { + "epoch": 0.8907735048983769, + "grad_norm": 1.5641010174504344, + "learning_rate": 1.260919916698288e-05, + "loss": 0.6586427092552185, + "step": 3046 + }, + { + "epoch": 0.8910659453136424, + "grad_norm": 1.8045032510726307, + "learning_rate": 1.2604531627233895e-05, + "loss": 0.7059915661811829, + "step": 3047 + }, + { + "epoch": 0.8913583857289077, + "grad_norm": 1.3406441666811264, + "learning_rate": 1.2599863478691483e-05, + "loss": 0.582252025604248, + "step": 3048 + }, + { + "epoch": 0.8916508261441731, + "grad_norm": 1.2760858553291834, + "learning_rate": 1.2595194722446786e-05, + "loss": 0.6901981830596924, + "step": 3049 + }, + { + "epoch": 0.8919432665594386, + "grad_norm": 1.5789638647855007, + "learning_rate": 1.2590525359591101e-05, + "loss": 0.7462388873100281, + "step": 3050 + }, + { + "epoch": 0.8922357069747039, + "grad_norm": 1.1893369289763132, + "learning_rate": 1.2585855391215866e-05, + "loss": 0.4963245391845703, + "step": 3051 + }, + { + "epoch": 0.8925281473899693, + "grad_norm": 1.427293357699651, + "learning_rate": 1.2581184818412655e-05, + "loss": 0.6408337354660034, + "step": 3052 + }, + { + "epoch": 0.8928205878052347, + "grad_norm": 1.3357664905418998, + "learning_rate": 1.257651364227319e-05, + "loss": 0.44528326392173767, + "step": 3053 + }, + { + "epoch": 0.8931130282205001, + "grad_norm": 1.4527206031665332, + "learning_rate": 1.2571841863889322e-05, + "loss": 0.4595017731189728, + "step": 3054 + }, + { + "epoch": 0.8934054686357654, + "grad_norm": 1.435143014894245, + "learning_rate": 1.2567169484353057e-05, + "loss": 0.6934910416603088, + "step": 3055 + }, + { + "epoch": 0.8936979090510309, + "grad_norm": 1.3543177360296097, + "learning_rate": 1.2562496504756535e-05, + "loss": 0.6392845511436462, + "step": 3056 + }, + { + "epoch": 0.8939903494662963, + "grad_norm": 1.3638361282130094, + "learning_rate": 1.255782292619203e-05, + "loss": 0.5506458878517151, + "step": 3057 + }, + { + "epoch": 0.8942827898815616, + "grad_norm": 1.3861859212756857, + "learning_rate": 1.255314874975197e-05, + "loss": 0.5871223211288452, + "step": 3058 + }, + { + "epoch": 0.8945752302968271, + "grad_norm": 1.4446737131271559, + "learning_rate": 1.254847397652892e-05, + "loss": 0.603033185005188, + "step": 3059 + }, + { + "epoch": 0.8948676707120924, + "grad_norm": 1.4764688506929942, + "learning_rate": 1.2543798607615566e-05, + "loss": 0.667452335357666, + "step": 3060 + }, + { + "epoch": 0.8951601111273578, + "grad_norm": 1.5052245195755742, + "learning_rate": 1.2539122644104755e-05, + "loss": 0.6264449954032898, + "step": 3061 + }, + { + "epoch": 0.8954525515426232, + "grad_norm": 1.2694525054193362, + "learning_rate": 1.2534446087089465e-05, + "loss": 0.6085609793663025, + "step": 3062 + }, + { + "epoch": 0.8957449919578886, + "grad_norm": 1.5027824768205942, + "learning_rate": 1.252976893766281e-05, + "loss": 0.6414828896522522, + "step": 3063 + }, + { + "epoch": 0.8960374323731539, + "grad_norm": 1.5067492390612103, + "learning_rate": 1.2525091196918049e-05, + "loss": 0.714614987373352, + "step": 3064 + }, + { + "epoch": 0.8963298727884194, + "grad_norm": 1.4473594871396505, + "learning_rate": 1.2520412865948574e-05, + "loss": 0.5966176986694336, + "step": 3065 + }, + { + "epoch": 0.8966223132036848, + "grad_norm": 1.234582474772498, + "learning_rate": 1.2515733945847914e-05, + "loss": 0.5162957906723022, + "step": 3066 + }, + { + "epoch": 0.8969147536189501, + "grad_norm": 1.5378382727824902, + "learning_rate": 1.2511054437709743e-05, + "loss": 0.6460821628570557, + "step": 3067 + }, + { + "epoch": 0.8972071940342156, + "grad_norm": 1.3526579806372556, + "learning_rate": 1.2506374342627861e-05, + "loss": 0.6802507638931274, + "step": 3068 + }, + { + "epoch": 0.8974996344494809, + "grad_norm": 1.4306769896677902, + "learning_rate": 1.2501693661696218e-05, + "loss": 0.5966957807540894, + "step": 3069 + }, + { + "epoch": 0.8977920748647463, + "grad_norm": 1.336293797847081, + "learning_rate": 1.2497012396008893e-05, + "loss": 0.607227087020874, + "step": 3070 + }, + { + "epoch": 0.8980845152800117, + "grad_norm": 1.360686606627987, + "learning_rate": 1.2492330546660098e-05, + "loss": 0.6544637084007263, + "step": 3071 + }, + { + "epoch": 0.8983769556952771, + "grad_norm": 1.410133865972111, + "learning_rate": 1.2487648114744196e-05, + "loss": 0.5896593332290649, + "step": 3072 + }, + { + "epoch": 0.8986693961105425, + "grad_norm": 1.296908458370691, + "learning_rate": 1.248296510135567e-05, + "loss": 0.5710231065750122, + "step": 3073 + }, + { + "epoch": 0.8989618365258079, + "grad_norm": 1.2057046094411794, + "learning_rate": 1.2478281507589147e-05, + "loss": 0.5918926000595093, + "step": 3074 + }, + { + "epoch": 0.8992542769410733, + "grad_norm": 1.5306817529094334, + "learning_rate": 1.2473597334539392e-05, + "loss": 0.681663453578949, + "step": 3075 + }, + { + "epoch": 0.8995467173563386, + "grad_norm": 1.2671727964507529, + "learning_rate": 1.24689125833013e-05, + "loss": 0.5229436159133911, + "step": 3076 + }, + { + "epoch": 0.8998391577716041, + "grad_norm": 1.5769374861363958, + "learning_rate": 1.2464227254969903e-05, + "loss": 0.7165119051933289, + "step": 3077 + }, + { + "epoch": 0.9001315981868694, + "grad_norm": 1.2324966791017462, + "learning_rate": 1.2459541350640368e-05, + "loss": 0.514594554901123, + "step": 3078 + }, + { + "epoch": 0.9004240386021348, + "grad_norm": 1.4144268048636097, + "learning_rate": 1.2454854871407993e-05, + "loss": 0.6173784732818604, + "step": 3079 + }, + { + "epoch": 0.9007164790174002, + "grad_norm": 1.6555744107314199, + "learning_rate": 1.245016781836822e-05, + "loss": 0.6796407103538513, + "step": 3080 + }, + { + "epoch": 0.9010089194326656, + "grad_norm": 1.3666754181554102, + "learning_rate": 1.2445480192616619e-05, + "loss": 0.6901683807373047, + "step": 3081 + }, + { + "epoch": 0.901301359847931, + "grad_norm": 1.295839204252469, + "learning_rate": 1.2440791995248886e-05, + "loss": 0.6215920448303223, + "step": 3082 + }, + { + "epoch": 0.9015938002631964, + "grad_norm": 1.29381925555321, + "learning_rate": 1.243610322736087e-05, + "loss": 0.6109690070152283, + "step": 3083 + }, + { + "epoch": 0.9018862406784618, + "grad_norm": 1.3751453546430485, + "learning_rate": 1.2431413890048534e-05, + "loss": 0.5273362398147583, + "step": 3084 + }, + { + "epoch": 0.9021786810937271, + "grad_norm": 1.197511083408015, + "learning_rate": 1.2426723984407982e-05, + "loss": 0.5219408273696899, + "step": 3085 + }, + { + "epoch": 0.9024711215089926, + "grad_norm": 1.4389803986869047, + "learning_rate": 1.2422033511535458e-05, + "loss": 0.6894690990447998, + "step": 3086 + }, + { + "epoch": 0.9027635619242579, + "grad_norm": 1.2949596320128054, + "learning_rate": 1.2417342472527325e-05, + "loss": 0.6135656833648682, + "step": 3087 + }, + { + "epoch": 0.9030560023395233, + "grad_norm": 1.4997841327771624, + "learning_rate": 1.2412650868480088e-05, + "loss": 0.595108151435852, + "step": 3088 + }, + { + "epoch": 0.9033484427547888, + "grad_norm": 1.4068106482758378, + "learning_rate": 1.2407958700490376e-05, + "loss": 0.6445261240005493, + "step": 3089 + }, + { + "epoch": 0.9036408831700541, + "grad_norm": 1.1391728287440939, + "learning_rate": 1.240326596965496e-05, + "loss": 0.5601890087127686, + "step": 3090 + }, + { + "epoch": 0.9039333235853195, + "grad_norm": 1.4556896662499954, + "learning_rate": 1.239857267707074e-05, + "loss": 0.6229134798049927, + "step": 3091 + }, + { + "epoch": 0.9042257640005849, + "grad_norm": 1.3633245090329542, + "learning_rate": 1.2393878823834737e-05, + "loss": 0.5769803524017334, + "step": 3092 + }, + { + "epoch": 0.9045182044158503, + "grad_norm": 1.5373386649577192, + "learning_rate": 1.2389184411044113e-05, + "loss": 0.8101233243942261, + "step": 3093 + }, + { + "epoch": 0.9048106448311156, + "grad_norm": 1.3507156228218853, + "learning_rate": 1.2384489439796159e-05, + "loss": 0.5562945604324341, + "step": 3094 + }, + { + "epoch": 0.9051030852463811, + "grad_norm": 1.6942487879562902, + "learning_rate": 1.2379793911188299e-05, + "loss": 0.5764975547790527, + "step": 3095 + }, + { + "epoch": 0.9053955256616465, + "grad_norm": 1.4280019855873591, + "learning_rate": 1.2375097826318079e-05, + "loss": 0.5951659083366394, + "step": 3096 + }, + { + "epoch": 0.9056879660769118, + "grad_norm": 1.3804272066554735, + "learning_rate": 1.2370401186283186e-05, + "loss": 0.5550940632820129, + "step": 3097 + }, + { + "epoch": 0.9059804064921773, + "grad_norm": 1.5012418323017303, + "learning_rate": 1.2365703992181425e-05, + "loss": 0.5423737168312073, + "step": 3098 + }, + { + "epoch": 0.9062728469074426, + "grad_norm": 1.3277873552974655, + "learning_rate": 1.236100624511074e-05, + "loss": 0.633366048336029, + "step": 3099 + }, + { + "epoch": 0.906565287322708, + "grad_norm": 1.2113954677804317, + "learning_rate": 1.2356307946169202e-05, + "loss": 0.6067361831665039, + "step": 3100 + }, + { + "epoch": 0.9068577277379734, + "grad_norm": 1.1800329005672614, + "learning_rate": 1.2351609096455006e-05, + "loss": 0.6039519309997559, + "step": 3101 + }, + { + "epoch": 0.9071501681532388, + "grad_norm": 1.3373962705942997, + "learning_rate": 1.2346909697066486e-05, + "loss": 0.5643757581710815, + "step": 3102 + }, + { + "epoch": 0.9074426085685041, + "grad_norm": 1.4963223668806274, + "learning_rate": 1.2342209749102088e-05, + "loss": 0.5406394004821777, + "step": 3103 + }, + { + "epoch": 0.9077350489837696, + "grad_norm": 1.3377232980381308, + "learning_rate": 1.2337509253660404e-05, + "loss": 0.5845915079116821, + "step": 3104 + }, + { + "epoch": 0.908027489399035, + "grad_norm": 1.614536146442758, + "learning_rate": 1.2332808211840147e-05, + "loss": 0.6912981271743774, + "step": 3105 + }, + { + "epoch": 0.9083199298143003, + "grad_norm": 1.2433178855630291, + "learning_rate": 1.2328106624740151e-05, + "loss": 0.5571672320365906, + "step": 3106 + }, + { + "epoch": 0.9086123702295658, + "grad_norm": 1.0866011599268561, + "learning_rate": 1.2323404493459386e-05, + "loss": 0.5219087600708008, + "step": 3107 + }, + { + "epoch": 0.9089048106448311, + "grad_norm": 1.3110052749572634, + "learning_rate": 1.2318701819096952e-05, + "loss": 0.5780971050262451, + "step": 3108 + }, + { + "epoch": 0.9091972510600965, + "grad_norm": 1.4311943893173962, + "learning_rate": 1.2313998602752063e-05, + "loss": 0.6206589937210083, + "step": 3109 + }, + { + "epoch": 0.9094896914753618, + "grad_norm": 1.4768884476442792, + "learning_rate": 1.2309294845524068e-05, + "loss": 0.6063584089279175, + "step": 3110 + }, + { + "epoch": 0.9097821318906273, + "grad_norm": 1.7547035202334638, + "learning_rate": 1.2304590548512445e-05, + "loss": 0.5733555555343628, + "step": 3111 + }, + { + "epoch": 0.9100745723058927, + "grad_norm": 1.0786362412869268, + "learning_rate": 1.2299885712816792e-05, + "loss": 0.5227848887443542, + "step": 3112 + }, + { + "epoch": 0.910367012721158, + "grad_norm": 1.3268713618037162, + "learning_rate": 1.2295180339536839e-05, + "loss": 0.6357969045639038, + "step": 3113 + }, + { + "epoch": 0.9106594531364235, + "grad_norm": 1.4243975329678797, + "learning_rate": 1.2290474429772438e-05, + "loss": 0.6194056272506714, + "step": 3114 + }, + { + "epoch": 0.9109518935516888, + "grad_norm": 1.3151715542581663, + "learning_rate": 1.2285767984623563e-05, + "loss": 0.5274733304977417, + "step": 3115 + }, + { + "epoch": 0.9112443339669543, + "grad_norm": 1.370068266036648, + "learning_rate": 1.228106100519032e-05, + "loss": 0.5612698197364807, + "step": 3116 + }, + { + "epoch": 0.9115367743822196, + "grad_norm": 1.578530779654035, + "learning_rate": 1.2276353492572937e-05, + "loss": 0.6261074542999268, + "step": 3117 + }, + { + "epoch": 0.911829214797485, + "grad_norm": 1.2011662273206838, + "learning_rate": 1.2271645447871764e-05, + "loss": 0.6407681703567505, + "step": 3118 + }, + { + "epoch": 0.9121216552127503, + "grad_norm": 1.869370443317622, + "learning_rate": 1.226693687218728e-05, + "loss": 0.7862328290939331, + "step": 3119 + }, + { + "epoch": 0.9124140956280158, + "grad_norm": 1.4175623746202768, + "learning_rate": 1.2262227766620083e-05, + "loss": 0.5079205632209778, + "step": 3120 + }, + { + "epoch": 0.9127065360432812, + "grad_norm": 1.5666620241066453, + "learning_rate": 1.2257518132270903e-05, + "loss": 0.6074210405349731, + "step": 3121 + }, + { + "epoch": 0.9129989764585466, + "grad_norm": 1.5222891825114737, + "learning_rate": 1.2252807970240582e-05, + "loss": 0.642460823059082, + "step": 3122 + }, + { + "epoch": 0.913291416873812, + "grad_norm": 1.5105961127505823, + "learning_rate": 1.22480972816301e-05, + "loss": 0.5996612310409546, + "step": 3123 + }, + { + "epoch": 0.9135838572890773, + "grad_norm": 1.4191755584361432, + "learning_rate": 1.2243386067540548e-05, + "loss": 0.5629523992538452, + "step": 3124 + }, + { + "epoch": 0.9138762977043428, + "grad_norm": 1.488297008451051, + "learning_rate": 1.223867432907314e-05, + "loss": 0.5794960260391235, + "step": 3125 + }, + { + "epoch": 0.9141687381196081, + "grad_norm": 1.4839380471480481, + "learning_rate": 1.2233962067329217e-05, + "loss": 0.6665213108062744, + "step": 3126 + }, + { + "epoch": 0.9144611785348735, + "grad_norm": 1.7069185609011637, + "learning_rate": 1.2229249283410245e-05, + "loss": 0.6834249496459961, + "step": 3127 + }, + { + "epoch": 0.914753618950139, + "grad_norm": 1.472483487554638, + "learning_rate": 1.2224535978417809e-05, + "loss": 0.5709845423698425, + "step": 3128 + }, + { + "epoch": 0.9150460593654043, + "grad_norm": 1.3783113695609808, + "learning_rate": 1.2219822153453613e-05, + "loss": 0.5455344915390015, + "step": 3129 + }, + { + "epoch": 0.9153384997806697, + "grad_norm": 1.5138708664001599, + "learning_rate": 1.2215107809619483e-05, + "loss": 0.6291406154632568, + "step": 3130 + }, + { + "epoch": 0.915630940195935, + "grad_norm": 1.340686035335307, + "learning_rate": 1.2210392948017371e-05, + "loss": 0.5953069925308228, + "step": 3131 + }, + { + "epoch": 0.9159233806112005, + "grad_norm": 1.3390197673162056, + "learning_rate": 1.2205677569749347e-05, + "loss": 0.6958901882171631, + "step": 3132 + }, + { + "epoch": 0.9162158210264658, + "grad_norm": 2.251590691230911, + "learning_rate": 1.2200961675917605e-05, + "loss": 0.5867033004760742, + "step": 3133 + }, + { + "epoch": 0.9165082614417313, + "grad_norm": 1.2167957981489814, + "learning_rate": 1.2196245267624449e-05, + "loss": 0.5364042520523071, + "step": 3134 + }, + { + "epoch": 0.9168007018569967, + "grad_norm": 1.0997310314063415, + "learning_rate": 1.2191528345972318e-05, + "loss": 0.5141438841819763, + "step": 3135 + }, + { + "epoch": 0.917093142272262, + "grad_norm": 1.1435709173541644, + "learning_rate": 1.218681091206376e-05, + "loss": 0.5024605393409729, + "step": 3136 + }, + { + "epoch": 0.9173855826875275, + "grad_norm": 1.4583614763595478, + "learning_rate": 1.2182092967001447e-05, + "loss": 0.567114531993866, + "step": 3137 + }, + { + "epoch": 0.9176780231027928, + "grad_norm": 1.4993671644221835, + "learning_rate": 1.217737451188817e-05, + "loss": 0.7224113941192627, + "step": 3138 + }, + { + "epoch": 0.9179704635180582, + "grad_norm": 1.368376715547139, + "learning_rate": 1.2172655547826839e-05, + "loss": 0.6033936738967896, + "step": 3139 + }, + { + "epoch": 0.9182629039333235, + "grad_norm": 1.4327847369216065, + "learning_rate": 1.2167936075920486e-05, + "loss": 0.5555745363235474, + "step": 3140 + }, + { + "epoch": 0.918555344348589, + "grad_norm": 1.1757378939927343, + "learning_rate": 1.2163216097272255e-05, + "loss": 0.5939170718193054, + "step": 3141 + }, + { + "epoch": 0.9188477847638543, + "grad_norm": 1.49535441688526, + "learning_rate": 1.2158495612985415e-05, + "loss": 0.7141895294189453, + "step": 3142 + }, + { + "epoch": 0.9191402251791198, + "grad_norm": 1.5558405168210478, + "learning_rate": 1.2153774624163345e-05, + "loss": 0.585646390914917, + "step": 3143 + }, + { + "epoch": 0.9194326655943852, + "grad_norm": 1.114182805953909, + "learning_rate": 1.2149053131909556e-05, + "loss": 0.5378825068473816, + "step": 3144 + }, + { + "epoch": 0.9197251060096505, + "grad_norm": 1.383902731385194, + "learning_rate": 1.2144331137327663e-05, + "loss": 0.569821834564209, + "step": 3145 + }, + { + "epoch": 0.920017546424916, + "grad_norm": 1.6457891792908532, + "learning_rate": 1.2139608641521406e-05, + "loss": 0.6101462244987488, + "step": 3146 + }, + { + "epoch": 0.9203099868401813, + "grad_norm": 1.2016357640033675, + "learning_rate": 1.2134885645594637e-05, + "loss": 0.5481746792793274, + "step": 3147 + }, + { + "epoch": 0.9206024272554467, + "grad_norm": 1.538402380383642, + "learning_rate": 1.2130162150651326e-05, + "loss": 0.7075197696685791, + "step": 3148 + }, + { + "epoch": 0.920894867670712, + "grad_norm": 1.7217246005422928, + "learning_rate": 1.2125438157795567e-05, + "loss": 0.6375464200973511, + "step": 3149 + }, + { + "epoch": 0.9211873080859775, + "grad_norm": 1.3850395600859229, + "learning_rate": 1.2120713668131558e-05, + "loss": 0.6954327821731567, + "step": 3150 + }, + { + "epoch": 0.9214797485012429, + "grad_norm": 1.3658544095341296, + "learning_rate": 1.2115988682763626e-05, + "loss": 0.5855636596679688, + "step": 3151 + }, + { + "epoch": 0.9217721889165083, + "grad_norm": 1.4751760026778278, + "learning_rate": 1.2111263202796206e-05, + "loss": 0.6056143641471863, + "step": 3152 + }, + { + "epoch": 0.9220646293317737, + "grad_norm": 1.551741495670365, + "learning_rate": 1.2106537229333848e-05, + "loss": 0.7918239831924438, + "step": 3153 + }, + { + "epoch": 0.922357069747039, + "grad_norm": 1.7033588700340108, + "learning_rate": 1.2101810763481218e-05, + "loss": 0.7772212028503418, + "step": 3154 + }, + { + "epoch": 0.9226495101623045, + "grad_norm": 1.511966147005096, + "learning_rate": 1.2097083806343104e-05, + "loss": 0.6332443356513977, + "step": 3155 + }, + { + "epoch": 0.9229419505775698, + "grad_norm": 1.358434184305942, + "learning_rate": 1.2092356359024399e-05, + "loss": 0.6254568099975586, + "step": 3156 + }, + { + "epoch": 0.9232343909928352, + "grad_norm": 1.5630990314712985, + "learning_rate": 1.208762842263012e-05, + "loss": 0.6178697347640991, + "step": 3157 + }, + { + "epoch": 0.9235268314081005, + "grad_norm": 1.1998616171531247, + "learning_rate": 1.2082899998265387e-05, + "loss": 0.5049355030059814, + "step": 3158 + }, + { + "epoch": 0.923819271823366, + "grad_norm": 1.4513160919924062, + "learning_rate": 1.2078171087035444e-05, + "loss": 0.7013234496116638, + "step": 3159 + }, + { + "epoch": 0.9241117122386314, + "grad_norm": 1.4119575222677514, + "learning_rate": 1.2073441690045647e-05, + "loss": 0.576643705368042, + "step": 3160 + }, + { + "epoch": 0.9244041526538967, + "grad_norm": 1.2307321356514476, + "learning_rate": 1.2068711808401459e-05, + "loss": 0.5163617134094238, + "step": 3161 + }, + { + "epoch": 0.9246965930691622, + "grad_norm": 1.39625806011197, + "learning_rate": 1.2063981443208466e-05, + "loss": 0.571370005607605, + "step": 3162 + }, + { + "epoch": 0.9249890334844275, + "grad_norm": 1.3814954844513003, + "learning_rate": 1.2059250595572358e-05, + "loss": 0.7424927949905396, + "step": 3163 + }, + { + "epoch": 0.925281473899693, + "grad_norm": 1.398481393831642, + "learning_rate": 1.2054519266598946e-05, + "loss": 0.6661131381988525, + "step": 3164 + }, + { + "epoch": 0.9255739143149583, + "grad_norm": 1.382448951979987, + "learning_rate": 1.2049787457394145e-05, + "loss": 0.6416351795196533, + "step": 3165 + }, + { + "epoch": 0.9258663547302237, + "grad_norm": 1.5012000035545232, + "learning_rate": 1.2045055169063988e-05, + "loss": 0.6708394289016724, + "step": 3166 + }, + { + "epoch": 0.9261587951454892, + "grad_norm": 1.5269915566780659, + "learning_rate": 1.2040322402714624e-05, + "loss": 0.536340057849884, + "step": 3167 + }, + { + "epoch": 0.9264512355607545, + "grad_norm": 1.4556897812811458, + "learning_rate": 1.20355891594523e-05, + "loss": 0.5621340274810791, + "step": 3168 + }, + { + "epoch": 0.9267436759760199, + "grad_norm": 1.274628172323648, + "learning_rate": 1.2030855440383387e-05, + "loss": 0.5972496271133423, + "step": 3169 + }, + { + "epoch": 0.9270361163912852, + "grad_norm": 1.4230845419048714, + "learning_rate": 1.2026121246614362e-05, + "loss": 0.567542314529419, + "step": 3170 + }, + { + "epoch": 0.9273285568065507, + "grad_norm": 1.092340586033623, + "learning_rate": 1.2021386579251814e-05, + "loss": 0.5487483739852905, + "step": 3171 + }, + { + "epoch": 0.927620997221816, + "grad_norm": 1.6219751059797927, + "learning_rate": 1.2016651439402445e-05, + "loss": 0.7988057136535645, + "step": 3172 + }, + { + "epoch": 0.9279134376370815, + "grad_norm": 1.2231171520157942, + "learning_rate": 1.2011915828173066e-05, + "loss": 0.5333850979804993, + "step": 3173 + }, + { + "epoch": 0.9282058780523469, + "grad_norm": 1.1146388373256622, + "learning_rate": 1.2007179746670592e-05, + "loss": 0.5640296936035156, + "step": 3174 + }, + { + "epoch": 0.9284983184676122, + "grad_norm": 1.7918188640848236, + "learning_rate": 1.2002443196002057e-05, + "loss": 0.7154449820518494, + "step": 3175 + }, + { + "epoch": 0.9287907588828777, + "grad_norm": 1.533684329230312, + "learning_rate": 1.1997706177274597e-05, + "loss": 0.8660446405410767, + "step": 3176 + }, + { + "epoch": 0.929083199298143, + "grad_norm": 1.498753630747748, + "learning_rate": 1.1992968691595465e-05, + "loss": 0.601166307926178, + "step": 3177 + }, + { + "epoch": 0.9293756397134084, + "grad_norm": 1.4563708289231845, + "learning_rate": 1.1988230740072022e-05, + "loss": 0.6197638511657715, + "step": 3178 + }, + { + "epoch": 0.9296680801286737, + "grad_norm": 1.2218794629813654, + "learning_rate": 1.198349232381173e-05, + "loss": 0.5716423988342285, + "step": 3179 + }, + { + "epoch": 0.9299605205439392, + "grad_norm": 1.1601969521725652, + "learning_rate": 1.197875344392217e-05, + "loss": 0.4319373071193695, + "step": 3180 + }, + { + "epoch": 0.9302529609592045, + "grad_norm": 1.3226372570662766, + "learning_rate": 1.1974014101511018e-05, + "loss": 0.5299028158187866, + "step": 3181 + }, + { + "epoch": 0.93054540137447, + "grad_norm": 1.4024951088839022, + "learning_rate": 1.1969274297686075e-05, + "loss": 0.7085509300231934, + "step": 3182 + }, + { + "epoch": 0.9308378417897354, + "grad_norm": 1.3237854936063287, + "learning_rate": 1.1964534033555237e-05, + "loss": 0.6025770902633667, + "step": 3183 + }, + { + "epoch": 0.9311302822050007, + "grad_norm": 1.2585066067859425, + "learning_rate": 1.1959793310226518e-05, + "loss": 0.5624677538871765, + "step": 3184 + }, + { + "epoch": 0.9314227226202662, + "grad_norm": 1.3607236544497474, + "learning_rate": 1.1955052128808025e-05, + "loss": 0.602645754814148, + "step": 3185 + }, + { + "epoch": 0.9317151630355315, + "grad_norm": 1.5196424442530971, + "learning_rate": 1.1950310490407984e-05, + "loss": 0.6495026350021362, + "step": 3186 + }, + { + "epoch": 0.9320076034507969, + "grad_norm": 1.2037819566859902, + "learning_rate": 1.1945568396134721e-05, + "loss": 0.50370192527771, + "step": 3187 + }, + { + "epoch": 0.9323000438660622, + "grad_norm": 1.4578860564520788, + "learning_rate": 1.1940825847096677e-05, + "loss": 0.5717373490333557, + "step": 3188 + }, + { + "epoch": 0.9325924842813277, + "grad_norm": 1.2463647398252022, + "learning_rate": 1.1936082844402395e-05, + "loss": 0.5863519310951233, + "step": 3189 + }, + { + "epoch": 0.9328849246965931, + "grad_norm": 1.3634372027202455, + "learning_rate": 1.1931339389160516e-05, + "loss": 0.6607284545898438, + "step": 3190 + }, + { + "epoch": 0.9331773651118584, + "grad_norm": 1.2667041686104175, + "learning_rate": 1.1926595482479799e-05, + "loss": 0.5578058958053589, + "step": 3191 + }, + { + "epoch": 0.9334698055271239, + "grad_norm": 1.577459199872034, + "learning_rate": 1.19218511254691e-05, + "loss": 0.6839171648025513, + "step": 3192 + }, + { + "epoch": 0.9337622459423892, + "grad_norm": 1.4197717809462, + "learning_rate": 1.1917106319237386e-05, + "loss": 0.5071141719818115, + "step": 3193 + }, + { + "epoch": 0.9340546863576547, + "grad_norm": 1.3302825340941604, + "learning_rate": 1.1912361064893726e-05, + "loss": 0.5112525820732117, + "step": 3194 + }, + { + "epoch": 0.93434712677292, + "grad_norm": 1.3701575961238917, + "learning_rate": 1.1907615363547299e-05, + "loss": 0.5661873817443848, + "step": 3195 + }, + { + "epoch": 0.9346395671881854, + "grad_norm": 1.3078991902724904, + "learning_rate": 1.190286921630737e-05, + "loss": 0.5520195364952087, + "step": 3196 + }, + { + "epoch": 0.9349320076034507, + "grad_norm": 1.1923433518822224, + "learning_rate": 1.1898122624283337e-05, + "loss": 0.560089111328125, + "step": 3197 + }, + { + "epoch": 0.9352244480187162, + "grad_norm": 1.3393482355065873, + "learning_rate": 1.1893375588584681e-05, + "loss": 0.6431207656860352, + "step": 3198 + }, + { + "epoch": 0.9355168884339816, + "grad_norm": 1.6025933525200546, + "learning_rate": 1.1888628110320995e-05, + "loss": 0.7365666031837463, + "step": 3199 + }, + { + "epoch": 0.935809328849247, + "grad_norm": 1.5181397488734587, + "learning_rate": 1.1883880190601968e-05, + "loss": 0.5455417633056641, + "step": 3200 + }, + { + "epoch": 0.9361017692645124, + "grad_norm": 1.2648151177686433, + "learning_rate": 1.1879131830537403e-05, + "loss": 0.5749938488006592, + "step": 3201 + }, + { + "epoch": 0.9363942096797777, + "grad_norm": 1.4774526931967815, + "learning_rate": 1.1874383031237196e-05, + "loss": 0.588424563407898, + "step": 3202 + }, + { + "epoch": 0.9366866500950431, + "grad_norm": 1.7045519601542285, + "learning_rate": 1.1869633793811352e-05, + "loss": 0.7039792537689209, + "step": 3203 + }, + { + "epoch": 0.9369790905103085, + "grad_norm": 1.3777530310932211, + "learning_rate": 1.1864884119369977e-05, + "loss": 0.5972777009010315, + "step": 3204 + }, + { + "epoch": 0.9372715309255739, + "grad_norm": 1.5348242749242778, + "learning_rate": 1.1860134009023281e-05, + "loss": 0.6510647535324097, + "step": 3205 + }, + { + "epoch": 0.9375639713408394, + "grad_norm": 1.3174058455781212, + "learning_rate": 1.1855383463881566e-05, + "loss": 0.606874406337738, + "step": 3206 + }, + { + "epoch": 0.9378564117561047, + "grad_norm": 1.4675285988638056, + "learning_rate": 1.1850632485055247e-05, + "loss": 0.5527048110961914, + "step": 3207 + }, + { + "epoch": 0.9381488521713701, + "grad_norm": 1.3531723389548285, + "learning_rate": 1.1845881073654838e-05, + "loss": 0.6297399997711182, + "step": 3208 + }, + { + "epoch": 0.9384412925866354, + "grad_norm": 1.4561464002236073, + "learning_rate": 1.184112923079095e-05, + "loss": 0.5852634310722351, + "step": 3209 + }, + { + "epoch": 0.9387337330019009, + "grad_norm": 1.276124242645333, + "learning_rate": 1.1836376957574301e-05, + "loss": 0.5648211240768433, + "step": 3210 + }, + { + "epoch": 0.9390261734171662, + "grad_norm": 1.4542765956455581, + "learning_rate": 1.1831624255115703e-05, + "loss": 0.5547506213188171, + "step": 3211 + }, + { + "epoch": 0.9393186138324316, + "grad_norm": 1.3882723904405088, + "learning_rate": 1.1826871124526072e-05, + "loss": 0.5927829146385193, + "step": 3212 + }, + { + "epoch": 0.9396110542476971, + "grad_norm": 1.4870159815211654, + "learning_rate": 1.182211756691642e-05, + "loss": 0.5705278515815735, + "step": 3213 + }, + { + "epoch": 0.9399034946629624, + "grad_norm": 1.3481561389317809, + "learning_rate": 1.1817363583397868e-05, + "loss": 0.547038197517395, + "step": 3214 + }, + { + "epoch": 0.9401959350782279, + "grad_norm": 1.6799026497887648, + "learning_rate": 1.1812609175081626e-05, + "loss": 0.6136760115623474, + "step": 3215 + }, + { + "epoch": 0.9404883754934932, + "grad_norm": 1.3697737055687615, + "learning_rate": 1.1807854343079015e-05, + "loss": 0.5784845352172852, + "step": 3216 + }, + { + "epoch": 0.9407808159087586, + "grad_norm": 1.306268521565337, + "learning_rate": 1.1803099088501439e-05, + "loss": 0.6629599332809448, + "step": 3217 + }, + { + "epoch": 0.9410732563240239, + "grad_norm": 1.3560413521315915, + "learning_rate": 1.1798343412460416e-05, + "loss": 0.6058052778244019, + "step": 3218 + }, + { + "epoch": 0.9413656967392894, + "grad_norm": 1.236587656133179, + "learning_rate": 1.1793587316067552e-05, + "loss": 0.5689725875854492, + "step": 3219 + }, + { + "epoch": 0.9416581371545547, + "grad_norm": 1.2722209400014248, + "learning_rate": 1.1788830800434561e-05, + "loss": 0.5718861818313599, + "step": 3220 + }, + { + "epoch": 0.9419505775698201, + "grad_norm": 1.4517063699959183, + "learning_rate": 1.1784073866673245e-05, + "loss": 0.6061254739761353, + "step": 3221 + }, + { + "epoch": 0.9422430179850856, + "grad_norm": 1.3732176542504997, + "learning_rate": 1.1779316515895511e-05, + "loss": 0.6805517077445984, + "step": 3222 + }, + { + "epoch": 0.9425354584003509, + "grad_norm": 1.3828844754339646, + "learning_rate": 1.1774558749213358e-05, + "loss": 0.5553466081619263, + "step": 3223 + }, + { + "epoch": 0.9428278988156163, + "grad_norm": 1.2173236944216692, + "learning_rate": 1.176980056773889e-05, + "loss": 0.6408798694610596, + "step": 3224 + }, + { + "epoch": 0.9431203392308817, + "grad_norm": 1.222815565053331, + "learning_rate": 1.1765041972584296e-05, + "loss": 0.5269505381584167, + "step": 3225 + }, + { + "epoch": 0.9434127796461471, + "grad_norm": 1.424391391794669, + "learning_rate": 1.1760282964861873e-05, + "loss": 0.682415246963501, + "step": 3226 + }, + { + "epoch": 0.9437052200614124, + "grad_norm": 1.4623421356805024, + "learning_rate": 1.1755523545684016e-05, + "loss": 0.507567286491394, + "step": 3227 + }, + { + "epoch": 0.9439976604766779, + "grad_norm": 1.4192334343942388, + "learning_rate": 1.1750763716163199e-05, + "loss": 0.6977763175964355, + "step": 3228 + }, + { + "epoch": 0.9442901008919433, + "grad_norm": 1.3754010773945908, + "learning_rate": 1.1746003477412007e-05, + "loss": 0.5626407861709595, + "step": 3229 + }, + { + "epoch": 0.9445825413072086, + "grad_norm": 1.537446067568307, + "learning_rate": 1.1741242830543118e-05, + "loss": 0.5280323624610901, + "step": 3230 + }, + { + "epoch": 0.9448749817224741, + "grad_norm": 1.564549447099706, + "learning_rate": 1.1736481776669307e-05, + "loss": 0.6236885190010071, + "step": 3231 + }, + { + "epoch": 0.9451674221377394, + "grad_norm": 1.2957140073878561, + "learning_rate": 1.1731720316903435e-05, + "loss": 0.5250823497772217, + "step": 3232 + }, + { + "epoch": 0.9454598625530048, + "grad_norm": 1.3562245135276858, + "learning_rate": 1.1726958452358472e-05, + "loss": 0.5885770320892334, + "step": 3233 + }, + { + "epoch": 0.9457523029682702, + "grad_norm": 1.5466392002562799, + "learning_rate": 1.1722196184147467e-05, + "loss": 0.7812498807907104, + "step": 3234 + }, + { + "epoch": 0.9460447433835356, + "grad_norm": 2.1182720670568678, + "learning_rate": 1.1717433513383575e-05, + "loss": 0.6763796210289001, + "step": 3235 + }, + { + "epoch": 0.9463371837988009, + "grad_norm": 1.4130641179603503, + "learning_rate": 1.1712670441180045e-05, + "loss": 0.5983982682228088, + "step": 3236 + }, + { + "epoch": 0.9466296242140664, + "grad_norm": 1.4075974845813908, + "learning_rate": 1.1707906968650214e-05, + "loss": 0.6665002107620239, + "step": 3237 + }, + { + "epoch": 0.9469220646293318, + "grad_norm": 1.3129047594602676, + "learning_rate": 1.1703143096907507e-05, + "loss": 0.7676652669906616, + "step": 3238 + }, + { + "epoch": 0.9472145050445971, + "grad_norm": 1.552106023331421, + "learning_rate": 1.1698378827065461e-05, + "loss": 0.710014820098877, + "step": 3239 + }, + { + "epoch": 0.9475069454598626, + "grad_norm": 1.3709978679968329, + "learning_rate": 1.169361416023769e-05, + "loss": 0.5800554752349854, + "step": 3240 + }, + { + "epoch": 0.9477993858751279, + "grad_norm": 1.2790925568283578, + "learning_rate": 1.1688849097537904e-05, + "loss": 0.602012574672699, + "step": 3241 + }, + { + "epoch": 0.9480918262903933, + "grad_norm": 1.4089569844293444, + "learning_rate": 1.1684083640079912e-05, + "loss": 0.4943910241127014, + "step": 3242 + }, + { + "epoch": 0.9483842667056587, + "grad_norm": 1.3173293444454082, + "learning_rate": 1.1679317788977609e-05, + "loss": 0.49094298481941223, + "step": 3243 + }, + { + "epoch": 0.9486767071209241, + "grad_norm": 1.1684708220820899, + "learning_rate": 1.1674551545344983e-05, + "loss": 0.46416157484054565, + "step": 3244 + }, + { + "epoch": 0.9489691475361896, + "grad_norm": 1.3422229221849986, + "learning_rate": 1.1669784910296114e-05, + "loss": 0.5170255899429321, + "step": 3245 + }, + { + "epoch": 0.9492615879514549, + "grad_norm": 1.3467691134757651, + "learning_rate": 1.1665017884945174e-05, + "loss": 0.7673200368881226, + "step": 3246 + }, + { + "epoch": 0.9495540283667203, + "grad_norm": 1.194998950326605, + "learning_rate": 1.1660250470406426e-05, + "loss": 0.49335333704948425, + "step": 3247 + }, + { + "epoch": 0.9498464687819856, + "grad_norm": 1.5055569823397887, + "learning_rate": 1.1655482667794228e-05, + "loss": 0.6620640754699707, + "step": 3248 + }, + { + "epoch": 0.9501389091972511, + "grad_norm": 1.5536985980342881, + "learning_rate": 1.1650714478223022e-05, + "loss": 0.600047767162323, + "step": 3249 + }, + { + "epoch": 0.9504313496125164, + "grad_norm": 1.449375702915225, + "learning_rate": 1.164594590280734e-05, + "loss": 0.668572187423706, + "step": 3250 + }, + { + "epoch": 0.9507237900277818, + "grad_norm": 1.28696773590094, + "learning_rate": 1.1641176942661812e-05, + "loss": 0.4460945725440979, + "step": 3251 + }, + { + "epoch": 0.9510162304430473, + "grad_norm": 1.553130185640807, + "learning_rate": 1.1636407598901154e-05, + "loss": 0.6650545597076416, + "step": 3252 + }, + { + "epoch": 0.9513086708583126, + "grad_norm": 1.4537452557116313, + "learning_rate": 1.1631637872640166e-05, + "loss": 0.5631237030029297, + "step": 3253 + }, + { + "epoch": 0.951601111273578, + "grad_norm": 1.2642307643713007, + "learning_rate": 1.162686776499375e-05, + "loss": 0.650580883026123, + "step": 3254 + }, + { + "epoch": 0.9518935516888434, + "grad_norm": 1.2808622379645098, + "learning_rate": 1.1622097277076883e-05, + "loss": 0.5606606602668762, + "step": 3255 + }, + { + "epoch": 0.9521859921041088, + "grad_norm": 1.6059525544711786, + "learning_rate": 1.1617326410004639e-05, + "loss": 0.667366623878479, + "step": 3256 + }, + { + "epoch": 0.9524784325193741, + "grad_norm": 1.2848877829061671, + "learning_rate": 1.1612555164892181e-05, + "loss": 0.5895084738731384, + "step": 3257 + }, + { + "epoch": 0.9527708729346396, + "grad_norm": 1.3031742059601414, + "learning_rate": 1.1607783542854759e-05, + "loss": 0.6468119025230408, + "step": 3258 + }, + { + "epoch": 0.9530633133499049, + "grad_norm": 1.567653748749065, + "learning_rate": 1.1603011545007708e-05, + "loss": 0.7178056240081787, + "step": 3259 + }, + { + "epoch": 0.9533557537651703, + "grad_norm": 1.0796246328531958, + "learning_rate": 1.1598239172466457e-05, + "loss": 0.42994585633277893, + "step": 3260 + }, + { + "epoch": 0.9536481941804358, + "grad_norm": 1.3208710287997751, + "learning_rate": 1.1593466426346513e-05, + "loss": 0.4939822554588318, + "step": 3261 + }, + { + "epoch": 0.9539406345957011, + "grad_norm": 1.4828958620285886, + "learning_rate": 1.1588693307763483e-05, + "loss": 0.4252137839794159, + "step": 3262 + }, + { + "epoch": 0.9542330750109665, + "grad_norm": 1.4293991408504185, + "learning_rate": 1.1583919817833051e-05, + "loss": 0.5772995948791504, + "step": 3263 + }, + { + "epoch": 0.9545255154262319, + "grad_norm": 1.4892265763022432, + "learning_rate": 1.1579145957670992e-05, + "loss": 0.6784560680389404, + "step": 3264 + }, + { + "epoch": 0.9548179558414973, + "grad_norm": 1.4340903064465058, + "learning_rate": 1.1574371728393169e-05, + "loss": 0.5373483896255493, + "step": 3265 + }, + { + "epoch": 0.9551103962567626, + "grad_norm": 1.5590731671081544, + "learning_rate": 1.1569597131115523e-05, + "loss": 0.7517837285995483, + "step": 3266 + }, + { + "epoch": 0.9554028366720281, + "grad_norm": 1.2323534514024168, + "learning_rate": 1.1564822166954092e-05, + "loss": 0.6715551614761353, + "step": 3267 + }, + { + "epoch": 0.9556952770872935, + "grad_norm": 1.5740418428519831, + "learning_rate": 1.1560046837024994e-05, + "loss": 0.6892265677452087, + "step": 3268 + }, + { + "epoch": 0.9559877175025588, + "grad_norm": 1.1845546480418727, + "learning_rate": 1.1555271142444433e-05, + "loss": 0.5564894676208496, + "step": 3269 + }, + { + "epoch": 0.9562801579178243, + "grad_norm": 1.4735106062071393, + "learning_rate": 1.15504950843287e-05, + "loss": 0.6211465001106262, + "step": 3270 + }, + { + "epoch": 0.9565725983330896, + "grad_norm": 1.360797371118281, + "learning_rate": 1.1545718663794165e-05, + "loss": 0.6189093589782715, + "step": 3271 + }, + { + "epoch": 0.956865038748355, + "grad_norm": 1.332461163898103, + "learning_rate": 1.1540941881957293e-05, + "loss": 0.6600508689880371, + "step": 3272 + }, + { + "epoch": 0.9571574791636204, + "grad_norm": 1.1722369932825303, + "learning_rate": 1.1536164739934626e-05, + "loss": 0.5891202688217163, + "step": 3273 + }, + { + "epoch": 0.9574499195788858, + "grad_norm": 1.450456789269031, + "learning_rate": 1.1531387238842788e-05, + "loss": 0.5996856093406677, + "step": 3274 + }, + { + "epoch": 0.9577423599941511, + "grad_norm": 1.3947581203143906, + "learning_rate": 1.15266093797985e-05, + "loss": 0.5645085573196411, + "step": 3275 + }, + { + "epoch": 0.9580348004094166, + "grad_norm": 1.3192013477387883, + "learning_rate": 1.1521831163918545e-05, + "loss": 0.5934250354766846, + "step": 3276 + }, + { + "epoch": 0.958327240824682, + "grad_norm": 1.3125475487560205, + "learning_rate": 1.151705259231981e-05, + "loss": 0.6659657955169678, + "step": 3277 + }, + { + "epoch": 0.9586196812399473, + "grad_norm": 1.4439329469838202, + "learning_rate": 1.1512273666119255e-05, + "loss": 0.518921434879303, + "step": 3278 + }, + { + "epoch": 0.9589121216552128, + "grad_norm": 1.5520324796179028, + "learning_rate": 1.1507494386433927e-05, + "loss": 0.6015551686286926, + "step": 3279 + }, + { + "epoch": 0.9592045620704781, + "grad_norm": 1.3864839845404684, + "learning_rate": 1.150271475438095e-05, + "loss": 0.5590265393257141, + "step": 3280 + }, + { + "epoch": 0.9594970024857435, + "grad_norm": 2.135782810317134, + "learning_rate": 1.149793477107754e-05, + "loss": 0.5820340514183044, + "step": 3281 + }, + { + "epoch": 0.9597894429010089, + "grad_norm": 1.5263684685914536, + "learning_rate": 1.1493154437640981e-05, + "loss": 0.5356709957122803, + "step": 3282 + }, + { + "epoch": 0.9600818833162743, + "grad_norm": 1.6754028625571513, + "learning_rate": 1.1488373755188651e-05, + "loss": 0.7024146318435669, + "step": 3283 + }, + { + "epoch": 0.9603743237315397, + "grad_norm": 1.1672092433368113, + "learning_rate": 1.1483592724838007e-05, + "loss": 0.4929785132408142, + "step": 3284 + }, + { + "epoch": 0.9606667641468051, + "grad_norm": 1.288237919875972, + "learning_rate": 1.147881134770658e-05, + "loss": 0.6902902126312256, + "step": 3285 + }, + { + "epoch": 0.9609592045620705, + "grad_norm": 1.3348356135288268, + "learning_rate": 1.1474029624911997e-05, + "loss": 0.5339258313179016, + "step": 3286 + }, + { + "epoch": 0.9612516449773358, + "grad_norm": 1.4657145875756896, + "learning_rate": 1.146924755757195e-05, + "loss": 0.6998730897903442, + "step": 3287 + }, + { + "epoch": 0.9615440853926013, + "grad_norm": 1.257948537764273, + "learning_rate": 1.1464465146804218e-05, + "loss": 0.6174519062042236, + "step": 3288 + }, + { + "epoch": 0.9618365258078666, + "grad_norm": 1.812192547108516, + "learning_rate": 1.145968239372666e-05, + "loss": 0.5395258665084839, + "step": 3289 + }, + { + "epoch": 0.962128966223132, + "grad_norm": 1.4759469600623887, + "learning_rate": 1.1454899299457221e-05, + "loss": 0.6355341672897339, + "step": 3290 + }, + { + "epoch": 0.9624214066383975, + "grad_norm": 1.519697305957534, + "learning_rate": 1.1450115865113916e-05, + "loss": 0.5315179228782654, + "step": 3291 + }, + { + "epoch": 0.9627138470536628, + "grad_norm": 1.468105168017502, + "learning_rate": 1.1445332091814844e-05, + "loss": 0.5595142841339111, + "step": 3292 + }, + { + "epoch": 0.9630062874689282, + "grad_norm": 1.2033736096293444, + "learning_rate": 1.1440547980678185e-05, + "loss": 0.5509291291236877, + "step": 3293 + }, + { + "epoch": 0.9632987278841936, + "grad_norm": 1.5381505996084959, + "learning_rate": 1.1435763532822191e-05, + "loss": 0.6831322908401489, + "step": 3294 + }, + { + "epoch": 0.963591168299459, + "grad_norm": 1.3733453232745707, + "learning_rate": 1.1430978749365203e-05, + "loss": 0.5494598150253296, + "step": 3295 + }, + { + "epoch": 0.9638836087147243, + "grad_norm": 1.498661160088125, + "learning_rate": 1.142619363142563e-05, + "loss": 0.5613550543785095, + "step": 3296 + }, + { + "epoch": 0.9641760491299898, + "grad_norm": 1.5212850266198317, + "learning_rate": 1.1421408180121972e-05, + "loss": 0.656089186668396, + "step": 3297 + }, + { + "epoch": 0.9644684895452551, + "grad_norm": 1.1510410875603876, + "learning_rate": 1.1416622396572791e-05, + "loss": 0.5913431644439697, + "step": 3298 + }, + { + "epoch": 0.9647609299605205, + "grad_norm": 1.3644056514467953, + "learning_rate": 1.1411836281896737e-05, + "loss": 0.6706565022468567, + "step": 3299 + }, + { + "epoch": 0.965053370375786, + "grad_norm": 1.3661421058655916, + "learning_rate": 1.1407049837212539e-05, + "loss": 0.6169217824935913, + "step": 3300 + }, + { + "epoch": 0.9653458107910513, + "grad_norm": 1.2988460072876178, + "learning_rate": 1.1402263063638994e-05, + "loss": 0.5516680479049683, + "step": 3301 + }, + { + "epoch": 0.9656382512063167, + "grad_norm": 1.2914486970247845, + "learning_rate": 1.1397475962294986e-05, + "loss": 0.7105098962783813, + "step": 3302 + }, + { + "epoch": 0.9659306916215821, + "grad_norm": 1.5297340917133426, + "learning_rate": 1.139268853429947e-05, + "loss": 0.6183327436447144, + "step": 3303 + }, + { + "epoch": 0.9662231320368475, + "grad_norm": 1.4183780196378124, + "learning_rate": 1.1387900780771472e-05, + "loss": 0.6160033941268921, + "step": 3304 + }, + { + "epoch": 0.9665155724521128, + "grad_norm": 1.4212044707464202, + "learning_rate": 1.1383112702830108e-05, + "loss": 0.5526994466781616, + "step": 3305 + }, + { + "epoch": 0.9668080128673783, + "grad_norm": 1.381901469460175, + "learning_rate": 1.137832430159456e-05, + "loss": 0.5476477742195129, + "step": 3306 + }, + { + "epoch": 0.9671004532826437, + "grad_norm": 1.3794404018811846, + "learning_rate": 1.1373535578184083e-05, + "loss": 0.558393657207489, + "step": 3307 + }, + { + "epoch": 0.967392893697909, + "grad_norm": 1.4577860579810487, + "learning_rate": 1.1368746533718017e-05, + "loss": 0.6302276849746704, + "step": 3308 + }, + { + "epoch": 0.9676853341131745, + "grad_norm": 1.2805956031485568, + "learning_rate": 1.1363957169315773e-05, + "loss": 0.619697630405426, + "step": 3309 + }, + { + "epoch": 0.9679777745284398, + "grad_norm": 1.4119075289775231, + "learning_rate": 1.135916748609683e-05, + "loss": 0.564563512802124, + "step": 3310 + }, + { + "epoch": 0.9682702149437052, + "grad_norm": 1.6014783450991135, + "learning_rate": 1.1354377485180756e-05, + "loss": 0.6238751411437988, + "step": 3311 + }, + { + "epoch": 0.9685626553589706, + "grad_norm": 1.4620948350058627, + "learning_rate": 1.1349587167687177e-05, + "loss": 0.8079221844673157, + "step": 3312 + }, + { + "epoch": 0.968855095774236, + "grad_norm": 1.4034979651528738, + "learning_rate": 1.1344796534735805e-05, + "loss": 0.5547629594802856, + "step": 3313 + }, + { + "epoch": 0.9691475361895013, + "grad_norm": 1.2187187942390127, + "learning_rate": 1.134000558744642e-05, + "loss": 0.630042552947998, + "step": 3314 + }, + { + "epoch": 0.9694399766047668, + "grad_norm": 1.284912675244452, + "learning_rate": 1.1335214326938872e-05, + "loss": 0.5283412337303162, + "step": 3315 + }, + { + "epoch": 0.9697324170200322, + "grad_norm": 1.3484514955842084, + "learning_rate": 1.1330422754333097e-05, + "loss": 0.6356452703475952, + "step": 3316 + }, + { + "epoch": 0.9700248574352975, + "grad_norm": 1.265116321608699, + "learning_rate": 1.132563087074909e-05, + "loss": 0.6531886458396912, + "step": 3317 + }, + { + "epoch": 0.970317297850563, + "grad_norm": 1.6209665553722108, + "learning_rate": 1.1320838677306927e-05, + "loss": 0.5725178718566895, + "step": 3318 + }, + { + "epoch": 0.9706097382658283, + "grad_norm": 1.460783947968998, + "learning_rate": 1.1316046175126758e-05, + "loss": 0.6341495513916016, + "step": 3319 + }, + { + "epoch": 0.9709021786810937, + "grad_norm": 1.428850290510927, + "learning_rate": 1.1311253365328794e-05, + "loss": 0.5792768597602844, + "step": 3320 + }, + { + "epoch": 0.9711946190963591, + "grad_norm": 1.2539734431492524, + "learning_rate": 1.1306460249033326e-05, + "loss": 0.5495700836181641, + "step": 3321 + }, + { + "epoch": 0.9714870595116245, + "grad_norm": 1.3779597112573112, + "learning_rate": 1.1301666827360721e-05, + "loss": 0.7092291116714478, + "step": 3322 + }, + { + "epoch": 0.97177949992689, + "grad_norm": 1.210154083257435, + "learning_rate": 1.1296873101431409e-05, + "loss": 0.5368257761001587, + "step": 3323 + }, + { + "epoch": 0.9720719403421553, + "grad_norm": 1.2901315838159502, + "learning_rate": 1.1292079072365898e-05, + "loss": 0.6116393804550171, + "step": 3324 + }, + { + "epoch": 0.9723643807574207, + "grad_norm": 1.6375876584807947, + "learning_rate": 1.1287284741284757e-05, + "loss": 0.5654028654098511, + "step": 3325 + }, + { + "epoch": 0.972656821172686, + "grad_norm": 1.4007947938241085, + "learning_rate": 1.1282490109308633e-05, + "loss": 0.6436389684677124, + "step": 3326 + }, + { + "epoch": 0.9729492615879515, + "grad_norm": 1.6286174854172328, + "learning_rate": 1.1277695177558243e-05, + "loss": 0.7687330842018127, + "step": 3327 + }, + { + "epoch": 0.9732417020032168, + "grad_norm": 1.3338540478099405, + "learning_rate": 1.1272899947154377e-05, + "loss": 0.5350443124771118, + "step": 3328 + }, + { + "epoch": 0.9735341424184822, + "grad_norm": 1.5528633763871835, + "learning_rate": 1.1268104419217884e-05, + "loss": 0.6032785773277283, + "step": 3329 + }, + { + "epoch": 0.9738265828337477, + "grad_norm": 1.410347655987774, + "learning_rate": 1.1263308594869697e-05, + "loss": 0.5756093263626099, + "step": 3330 + }, + { + "epoch": 0.974119023249013, + "grad_norm": 1.5831169693775362, + "learning_rate": 1.1258512475230807e-05, + "loss": 0.6977418065071106, + "step": 3331 + }, + { + "epoch": 0.9744114636642784, + "grad_norm": 1.3726893652594243, + "learning_rate": 1.1253716061422275e-05, + "loss": 0.5409448146820068, + "step": 3332 + }, + { + "epoch": 0.9747039040795438, + "grad_norm": 1.3626349639764654, + "learning_rate": 1.1248919354565237e-05, + "loss": 0.5863862037658691, + "step": 3333 + }, + { + "epoch": 0.9749963444948092, + "grad_norm": 1.313934697737098, + "learning_rate": 1.1244122355780895e-05, + "loss": 0.6039433479309082, + "step": 3334 + }, + { + "epoch": 0.9752887849100745, + "grad_norm": 1.4813691831553626, + "learning_rate": 1.1239325066190513e-05, + "loss": 0.6696581840515137, + "step": 3335 + }, + { + "epoch": 0.97558122532534, + "grad_norm": 1.5159715106591773, + "learning_rate": 1.1234527486915439e-05, + "loss": 0.6308715343475342, + "step": 3336 + }, + { + "epoch": 0.9758736657406053, + "grad_norm": 1.4927391317525602, + "learning_rate": 1.1229729619077065e-05, + "loss": 0.580268383026123, + "step": 3337 + }, + { + "epoch": 0.9761661061558707, + "grad_norm": 1.775582999909584, + "learning_rate": 1.1224931463796871e-05, + "loss": 0.8080834746360779, + "step": 3338 + }, + { + "epoch": 0.9764585465711362, + "grad_norm": 1.3814988427954438, + "learning_rate": 1.1220133022196395e-05, + "loss": 0.4933619499206543, + "step": 3339 + }, + { + "epoch": 0.9767509869864015, + "grad_norm": 1.26412210808527, + "learning_rate": 1.1215334295397244e-05, + "loss": 0.5639102458953857, + "step": 3340 + }, + { + "epoch": 0.9770434274016669, + "grad_norm": 1.3947001629341338, + "learning_rate": 1.1210535284521094e-05, + "loss": 0.6332741975784302, + "step": 3341 + }, + { + "epoch": 0.9773358678169323, + "grad_norm": 1.4234927806293247, + "learning_rate": 1.1205735990689677e-05, + "loss": 0.5425227880477905, + "step": 3342 + }, + { + "epoch": 0.9776283082321977, + "grad_norm": 1.2841671137073696, + "learning_rate": 1.1200936415024804e-05, + "loss": 0.48746997117996216, + "step": 3343 + }, + { + "epoch": 0.977920748647463, + "grad_norm": 1.3045240526527524, + "learning_rate": 1.1196136558648345e-05, + "loss": 0.5509577393531799, + "step": 3344 + }, + { + "epoch": 0.9782131890627285, + "grad_norm": 1.5306708658005588, + "learning_rate": 1.1191336422682237e-05, + "loss": 0.5939484238624573, + "step": 3345 + }, + { + "epoch": 0.9785056294779939, + "grad_norm": 1.4772741629174198, + "learning_rate": 1.1186536008248487e-05, + "loss": 0.6078917384147644, + "step": 3346 + }, + { + "epoch": 0.9787980698932592, + "grad_norm": 1.4449426772113496, + "learning_rate": 1.1181735316469157e-05, + "loss": 0.5578145980834961, + "step": 3347 + }, + { + "epoch": 0.9790905103085247, + "grad_norm": 1.5556898331182667, + "learning_rate": 1.1176934348466384e-05, + "loss": 0.6809493899345398, + "step": 3348 + }, + { + "epoch": 0.97938295072379, + "grad_norm": 1.3454886518258895, + "learning_rate": 1.117213310536236e-05, + "loss": 0.6057093143463135, + "step": 3349 + }, + { + "epoch": 0.9796753911390554, + "grad_norm": 1.2918762120947054, + "learning_rate": 1.1167331588279351e-05, + "loss": 0.6656113266944885, + "step": 3350 + }, + { + "epoch": 0.9799678315543208, + "grad_norm": 1.3588186351553628, + "learning_rate": 1.1162529798339682e-05, + "loss": 0.5260547399520874, + "step": 3351 + }, + { + "epoch": 0.9802602719695862, + "grad_norm": 1.4059510686804249, + "learning_rate": 1.115772773666574e-05, + "loss": 0.6918379068374634, + "step": 3352 + }, + { + "epoch": 0.9805527123848515, + "grad_norm": 1.4859264660633271, + "learning_rate": 1.115292540437998e-05, + "loss": 0.7128825187683105, + "step": 3353 + }, + { + "epoch": 0.980845152800117, + "grad_norm": 1.7806281788252345, + "learning_rate": 1.1148122802604913e-05, + "loss": 0.6858257055282593, + "step": 3354 + }, + { + "epoch": 0.9811375932153824, + "grad_norm": 1.3250069966815017, + "learning_rate": 1.1143319932463124e-05, + "loss": 0.540290117263794, + "step": 3355 + }, + { + "epoch": 0.9814300336306477, + "grad_norm": 1.3692222106755043, + "learning_rate": 1.1138516795077251e-05, + "loss": 0.7293038368225098, + "step": 3356 + }, + { + "epoch": 0.9817224740459132, + "grad_norm": 1.2337952733643827, + "learning_rate": 1.1133713391570003e-05, + "loss": 0.5981270670890808, + "step": 3357 + }, + { + "epoch": 0.9820149144611785, + "grad_norm": 1.282642205016649, + "learning_rate": 1.1128909723064138e-05, + "loss": 0.6175673604011536, + "step": 3358 + }, + { + "epoch": 0.9823073548764439, + "grad_norm": 1.233452486411816, + "learning_rate": 1.112410579068249e-05, + "loss": 0.5385074615478516, + "step": 3359 + }, + { + "epoch": 0.9825997952917093, + "grad_norm": 1.372295513124522, + "learning_rate": 1.1119301595547952e-05, + "loss": 0.5754122734069824, + "step": 3360 + }, + { + "epoch": 0.9828922357069747, + "grad_norm": 1.4139982265628481, + "learning_rate": 1.1114497138783469e-05, + "loss": 0.5817348957061768, + "step": 3361 + }, + { + "epoch": 0.9831846761222401, + "grad_norm": 1.5953096945649214, + "learning_rate": 1.1109692421512058e-05, + "loss": 0.7561115026473999, + "step": 3362 + }, + { + "epoch": 0.9834771165375055, + "grad_norm": 1.4339527302516233, + "learning_rate": 1.1104887444856786e-05, + "loss": 0.5972003936767578, + "step": 3363 + }, + { + "epoch": 0.9837695569527709, + "grad_norm": 1.7933233288020083, + "learning_rate": 1.1100082209940795e-05, + "loss": 0.7569154500961304, + "step": 3364 + }, + { + "epoch": 0.9840619973680362, + "grad_norm": 1.6291951934588174, + "learning_rate": 1.1095276717887273e-05, + "loss": 0.587831437587738, + "step": 3365 + }, + { + "epoch": 0.9843544377833017, + "grad_norm": 1.3893746663182953, + "learning_rate": 1.109047096981948e-05, + "loss": 0.5265868902206421, + "step": 3366 + }, + { + "epoch": 0.984646878198567, + "grad_norm": 1.5308570155926502, + "learning_rate": 1.1085664966860728e-05, + "loss": 0.6065980792045593, + "step": 3367 + }, + { + "epoch": 0.9849393186138324, + "grad_norm": 1.2582827679300745, + "learning_rate": 1.1080858710134392e-05, + "loss": 0.5859705209732056, + "step": 3368 + }, + { + "epoch": 0.9852317590290979, + "grad_norm": 1.2323676627113982, + "learning_rate": 1.1076052200763903e-05, + "loss": 0.508766770362854, + "step": 3369 + }, + { + "epoch": 0.9855241994443632, + "grad_norm": 1.36193145330846, + "learning_rate": 1.1071245439872752e-05, + "loss": 0.569848358631134, + "step": 3370 + }, + { + "epoch": 0.9858166398596286, + "grad_norm": 1.5268801014665052, + "learning_rate": 1.1066438428584496e-05, + "loss": 0.6665600538253784, + "step": 3371 + }, + { + "epoch": 0.986109080274894, + "grad_norm": 4.0352208239875536, + "learning_rate": 1.1061631168022742e-05, + "loss": 0.5942315459251404, + "step": 3372 + }, + { + "epoch": 0.9864015206901594, + "grad_norm": 1.3552035470831052, + "learning_rate": 1.1056823659311158e-05, + "loss": 0.5270178318023682, + "step": 3373 + }, + { + "epoch": 0.9866939611054247, + "grad_norm": 1.484191192307279, + "learning_rate": 1.1052015903573465e-05, + "loss": 0.6879183053970337, + "step": 3374 + }, + { + "epoch": 0.9869864015206902, + "grad_norm": 1.3455375539569006, + "learning_rate": 1.1047207901933453e-05, + "loss": 0.5980993509292603, + "step": 3375 + }, + { + "epoch": 0.9872788419359555, + "grad_norm": 1.3905728698834559, + "learning_rate": 1.1042399655514961e-05, + "loss": 0.5616245865821838, + "step": 3376 + }, + { + "epoch": 0.9875712823512209, + "grad_norm": 1.186489901347366, + "learning_rate": 1.1037591165441887e-05, + "loss": 0.6233900785446167, + "step": 3377 + }, + { + "epoch": 0.9878637227664864, + "grad_norm": 1.2146885941659273, + "learning_rate": 1.1032782432838188e-05, + "loss": 0.612476110458374, + "step": 3378 + }, + { + "epoch": 0.9881561631817517, + "grad_norm": 1.4001611534955285, + "learning_rate": 1.1027973458827874e-05, + "loss": 0.7109482288360596, + "step": 3379 + }, + { + "epoch": 0.9884486035970171, + "grad_norm": 1.4339596644962305, + "learning_rate": 1.1023164244535013e-05, + "loss": 0.7105005383491516, + "step": 3380 + }, + { + "epoch": 0.9887410440122825, + "grad_norm": 1.1897152470249062, + "learning_rate": 1.1018354791083731e-05, + "loss": 0.5401301383972168, + "step": 3381 + }, + { + "epoch": 0.9890334844275479, + "grad_norm": 1.2391450524860042, + "learning_rate": 1.101354509959821e-05, + "loss": 0.504487156867981, + "step": 3382 + }, + { + "epoch": 0.9893259248428132, + "grad_norm": 1.5778073649668172, + "learning_rate": 1.1008735171202685e-05, + "loss": 0.5634675025939941, + "step": 3383 + }, + { + "epoch": 0.9896183652580787, + "grad_norm": 1.2596231385186676, + "learning_rate": 1.1003925007021444e-05, + "loss": 0.4828820824623108, + "step": 3384 + }, + { + "epoch": 0.9899108056733441, + "grad_norm": 1.5274466661026922, + "learning_rate": 1.0999114608178837e-05, + "loss": 0.7154384851455688, + "step": 3385 + }, + { + "epoch": 0.9902032460886094, + "grad_norm": 1.4762279403432657, + "learning_rate": 1.0994303975799268e-05, + "loss": 0.626085638999939, + "step": 3386 + }, + { + "epoch": 0.9904956865038749, + "grad_norm": 1.2276097303271793, + "learning_rate": 1.0989493111007186e-05, + "loss": 0.5179756283760071, + "step": 3387 + }, + { + "epoch": 0.9907881269191402, + "grad_norm": 1.443725456432181, + "learning_rate": 1.0984682014927108e-05, + "loss": 0.6992131471633911, + "step": 3388 + }, + { + "epoch": 0.9910805673344056, + "grad_norm": 1.3252934977411588, + "learning_rate": 1.0979870688683598e-05, + "loss": 0.5791709423065186, + "step": 3389 + }, + { + "epoch": 0.991373007749671, + "grad_norm": 1.2293406038140111, + "learning_rate": 1.097505913340127e-05, + "loss": 0.4703817367553711, + "step": 3390 + }, + { + "epoch": 0.9916654481649364, + "grad_norm": 1.7130975290215298, + "learning_rate": 1.0970247350204797e-05, + "loss": 0.6042051911354065, + "step": 3391 + }, + { + "epoch": 0.9919578885802017, + "grad_norm": 1.5075227997294136, + "learning_rate": 1.0965435340218905e-05, + "loss": 0.6806557178497314, + "step": 3392 + }, + { + "epoch": 0.9922503289954672, + "grad_norm": 1.4336313879655775, + "learning_rate": 1.0960623104568373e-05, + "loss": 0.6372751593589783, + "step": 3393 + }, + { + "epoch": 0.9925427694107326, + "grad_norm": 1.2403325317456615, + "learning_rate": 1.0955810644378031e-05, + "loss": 0.48651185631752014, + "step": 3394 + }, + { + "epoch": 0.9928352098259979, + "grad_norm": 1.5056465468012041, + "learning_rate": 1.0950997960772764e-05, + "loss": 0.5244222283363342, + "step": 3395 + }, + { + "epoch": 0.9931276502412634, + "grad_norm": 1.4445958557594307, + "learning_rate": 1.0946185054877505e-05, + "loss": 0.6194322109222412, + "step": 3396 + }, + { + "epoch": 0.9934200906565287, + "grad_norm": 1.4199918179889868, + "learning_rate": 1.0941371927817241e-05, + "loss": 0.690010666847229, + "step": 3397 + }, + { + "epoch": 0.9937125310717941, + "grad_norm": 1.9110036566867663, + "learning_rate": 1.0936558580717013e-05, + "loss": 0.7332549095153809, + "step": 3398 + }, + { + "epoch": 0.9940049714870595, + "grad_norm": 1.428619260140058, + "learning_rate": 1.093174501470191e-05, + "loss": 0.5264838337898254, + "step": 3399 + }, + { + "epoch": 0.9942974119023249, + "grad_norm": 1.1922668548863515, + "learning_rate": 1.092693123089708e-05, + "loss": 0.624382734298706, + "step": 3400 + }, + { + "epoch": 0.9945898523175903, + "grad_norm": 1.6559518933415514, + "learning_rate": 1.0922117230427705e-05, + "loss": 0.6340548992156982, + "step": 3401 + }, + { + "epoch": 0.9948822927328557, + "grad_norm": 1.194444639014181, + "learning_rate": 1.0917303014419036e-05, + "loss": 0.4452754855155945, + "step": 3402 + }, + { + "epoch": 0.9951747331481211, + "grad_norm": 1.4241998861848877, + "learning_rate": 1.0912488583996364e-05, + "loss": 0.6180763244628906, + "step": 3403 + }, + { + "epoch": 0.9954671735633864, + "grad_norm": 1.7347993099568695, + "learning_rate": 1.0907673940285032e-05, + "loss": 0.7079293727874756, + "step": 3404 + }, + { + "epoch": 0.9957596139786519, + "grad_norm": 1.6216897448198107, + "learning_rate": 1.090285908441044e-05, + "loss": 0.6608254909515381, + "step": 3405 + }, + { + "epoch": 0.9960520543939172, + "grad_norm": 1.6873856420041173, + "learning_rate": 1.0898044017498024e-05, + "loss": 0.6450251340866089, + "step": 3406 + }, + { + "epoch": 0.9963444948091826, + "grad_norm": 1.4055094844579619, + "learning_rate": 1.089322874067328e-05, + "loss": 0.6267623901367188, + "step": 3407 + }, + { + "epoch": 0.9966369352244481, + "grad_norm": 1.6519553259967432, + "learning_rate": 1.0888413255061747e-05, + "loss": 0.6756424903869629, + "step": 3408 + }, + { + "epoch": 0.9969293756397134, + "grad_norm": 1.4122044676522614, + "learning_rate": 1.0883597561789017e-05, + "loss": 0.6578212976455688, + "step": 3409 + }, + { + "epoch": 0.9972218160549788, + "grad_norm": 1.600222297323414, + "learning_rate": 1.087878166198073e-05, + "loss": 0.8186248540878296, + "step": 3410 + }, + { + "epoch": 0.9975142564702442, + "grad_norm": 1.4575083835366422, + "learning_rate": 1.0873965556762573e-05, + "loss": 0.6689319610595703, + "step": 3411 + }, + { + "epoch": 0.9978066968855096, + "grad_norm": 1.5562694813418687, + "learning_rate": 1.0869149247260282e-05, + "loss": 0.5471278429031372, + "step": 3412 + }, + { + "epoch": 0.9980991373007749, + "grad_norm": 1.239131034827953, + "learning_rate": 1.0864332734599636e-05, + "loss": 0.4673747420310974, + "step": 3413 + }, + { + "epoch": 0.9983915777160404, + "grad_norm": 1.4054798008983762, + "learning_rate": 1.085951601990647e-05, + "loss": 0.5777568221092224, + "step": 3414 + }, + { + "epoch": 0.9986840181313057, + "grad_norm": 1.6708797545900484, + "learning_rate": 1.0854699104306661e-05, + "loss": 0.6758528351783752, + "step": 3415 + }, + { + "epoch": 0.9989764585465711, + "grad_norm": 1.169154860422915, + "learning_rate": 1.0849881988926132e-05, + "loss": 0.5759919881820679, + "step": 3416 + }, + { + "epoch": 0.9992688989618366, + "grad_norm": 1.3291108456245637, + "learning_rate": 1.0845064674890857e-05, + "loss": 0.606694221496582, + "step": 3417 + }, + { + "epoch": 0.9995613393771019, + "grad_norm": 1.475290016916602, + "learning_rate": 1.0840247163326851e-05, + "loss": 0.627873957157135, + "step": 3418 + }, + { + "epoch": 0.9998537797923673, + "grad_norm": 1.4144594545282698, + "learning_rate": 1.083542945536018e-05, + "loss": 0.5560880303382874, + "step": 3419 + }, + { + "epoch": 1.0, + "grad_norm": 2.3650000488034633, + "learning_rate": 1.0830611552116952e-05, + "loss": 0.5983354449272156, + "step": 3420 + }, + { + "epoch": 1.0002924404152653, + "grad_norm": 1.1169918975180415, + "learning_rate": 1.0825793454723325e-05, + "loss": 0.5012353658676147, + "step": 3421 + }, + { + "epoch": 1.0005848808305309, + "grad_norm": 1.6136465051179143, + "learning_rate": 1.0820975164305498e-05, + "loss": 0.4585106372833252, + "step": 3422 + }, + { + "epoch": 1.0008773212457962, + "grad_norm": 1.2831850675969656, + "learning_rate": 1.0816156681989717e-05, + "loss": 0.5790318846702576, + "step": 3423 + }, + { + "epoch": 1.0011697616610615, + "grad_norm": 1.5258008126885618, + "learning_rate": 1.0811338008902277e-05, + "loss": 0.6016381978988647, + "step": 3424 + }, + { + "epoch": 1.0014622020763269, + "grad_norm": 1.328199543518758, + "learning_rate": 1.0806519146169507e-05, + "loss": 0.5756744146347046, + "step": 3425 + }, + { + "epoch": 1.0017546424915924, + "grad_norm": 1.1865012964818713, + "learning_rate": 1.0801700094917792e-05, + "loss": 0.4776861369609833, + "step": 3426 + }, + { + "epoch": 1.0020470829068577, + "grad_norm": 1.8629358545914494, + "learning_rate": 1.0796880856273557e-05, + "loss": 0.645842969417572, + "step": 3427 + }, + { + "epoch": 1.002339523322123, + "grad_norm": 1.1125775865964678, + "learning_rate": 1.0792061431363266e-05, + "loss": 0.5645815134048462, + "step": 3428 + }, + { + "epoch": 1.0026319637373886, + "grad_norm": 1.4821141209987578, + "learning_rate": 1.0787241821313428e-05, + "loss": 0.5477975606918335, + "step": 3429 + }, + { + "epoch": 1.002924404152654, + "grad_norm": 1.0992693186116131, + "learning_rate": 1.0782422027250604e-05, + "loss": 0.4064188599586487, + "step": 3430 + }, + { + "epoch": 1.0032168445679193, + "grad_norm": 1.3634803374266724, + "learning_rate": 1.0777602050301384e-05, + "loss": 0.5360208749771118, + "step": 3431 + }, + { + "epoch": 1.0035092849831846, + "grad_norm": 1.4203435807547533, + "learning_rate": 1.0772781891592419e-05, + "loss": 0.6189982891082764, + "step": 3432 + }, + { + "epoch": 1.0038017253984501, + "grad_norm": 1.4406563602891276, + "learning_rate": 1.0767961552250382e-05, + "loss": 0.4623541533946991, + "step": 3433 + }, + { + "epoch": 1.0040941658137155, + "grad_norm": 1.4714321386033957, + "learning_rate": 1.0763141033402e-05, + "loss": 0.6094095706939697, + "step": 3434 + }, + { + "epoch": 1.0043866062289808, + "grad_norm": 1.8852494834868845, + "learning_rate": 1.0758320336174042e-05, + "loss": 0.6997445821762085, + "step": 3435 + }, + { + "epoch": 1.0046790466442463, + "grad_norm": 1.3591852438815977, + "learning_rate": 1.0753499461693316e-05, + "loss": 0.5447323322296143, + "step": 3436 + }, + { + "epoch": 1.0049714870595117, + "grad_norm": 1.526403087538078, + "learning_rate": 1.0748678411086672e-05, + "loss": 0.5851927995681763, + "step": 3437 + }, + { + "epoch": 1.005263927474777, + "grad_norm": 1.2443699762001765, + "learning_rate": 1.0743857185481006e-05, + "loss": 0.5897810459136963, + "step": 3438 + }, + { + "epoch": 1.0055563678900423, + "grad_norm": 1.277276792826896, + "learning_rate": 1.073903578600324e-05, + "loss": 0.47671592235565186, + "step": 3439 + }, + { + "epoch": 1.0058488083053079, + "grad_norm": 1.5091606917661848, + "learning_rate": 1.0734214213780355e-05, + "loss": 0.5586696863174438, + "step": 3440 + }, + { + "epoch": 1.0061412487205732, + "grad_norm": 1.7171075095449666, + "learning_rate": 1.0729392469939362e-05, + "loss": 0.6817598342895508, + "step": 3441 + }, + { + "epoch": 1.0064336891358385, + "grad_norm": 1.4899951597044825, + "learning_rate": 1.0724570555607311e-05, + "loss": 0.6503750085830688, + "step": 3442 + }, + { + "epoch": 1.0067261295511039, + "grad_norm": 1.516461978227071, + "learning_rate": 1.07197484719113e-05, + "loss": 0.7121564149856567, + "step": 3443 + }, + { + "epoch": 1.0070185699663694, + "grad_norm": 1.2899445236891802, + "learning_rate": 1.071492621997846e-05, + "loss": 0.5760178565979004, + "step": 3444 + }, + { + "epoch": 1.0073110103816347, + "grad_norm": 1.2567067936293974, + "learning_rate": 1.0710103800935965e-05, + "loss": 0.4555765390396118, + "step": 3445 + }, + { + "epoch": 1.0076034507969, + "grad_norm": 1.73824720674272, + "learning_rate": 1.0705281215911021e-05, + "loss": 0.6098523736000061, + "step": 3446 + }, + { + "epoch": 1.0078958912121656, + "grad_norm": 1.3529009112365886, + "learning_rate": 1.070045846603088e-05, + "loss": 0.49828749895095825, + "step": 3447 + }, + { + "epoch": 1.008188331627431, + "grad_norm": 1.6747165622943363, + "learning_rate": 1.0695635552422834e-05, + "loss": 0.5134999752044678, + "step": 3448 + }, + { + "epoch": 1.0084807720426963, + "grad_norm": 1.6379844761327287, + "learning_rate": 1.0690812476214209e-05, + "loss": 0.53546142578125, + "step": 3449 + }, + { + "epoch": 1.0087732124579616, + "grad_norm": 1.353591975524027, + "learning_rate": 1.0685989238532364e-05, + "loss": 0.4955276846885681, + "step": 3450 + }, + { + "epoch": 1.0090656528732271, + "grad_norm": 1.5308502126967132, + "learning_rate": 1.0681165840504708e-05, + "loss": 0.5693827271461487, + "step": 3451 + }, + { + "epoch": 1.0093580932884925, + "grad_norm": 1.2544327118971752, + "learning_rate": 1.0676342283258676e-05, + "loss": 0.5023596286773682, + "step": 3452 + }, + { + "epoch": 1.0096505337037578, + "grad_norm": 1.4830383604575028, + "learning_rate": 1.0671518567921748e-05, + "loss": 0.5601100921630859, + "step": 3453 + }, + { + "epoch": 1.0099429741190233, + "grad_norm": 1.5483896672555095, + "learning_rate": 1.0666694695621438e-05, + "loss": 0.5744563341140747, + "step": 3454 + }, + { + "epoch": 1.0102354145342887, + "grad_norm": 1.2243241739970807, + "learning_rate": 1.0661870667485298e-05, + "loss": 0.531909704208374, + "step": 3455 + }, + { + "epoch": 1.010527854949554, + "grad_norm": 1.5063779223920848, + "learning_rate": 1.0657046484640911e-05, + "loss": 0.5737274885177612, + "step": 3456 + }, + { + "epoch": 1.0108202953648193, + "grad_norm": 1.3852723907754825, + "learning_rate": 1.0652222148215905e-05, + "loss": 0.5550329089164734, + "step": 3457 + }, + { + "epoch": 1.0111127357800849, + "grad_norm": 1.6139287553682227, + "learning_rate": 1.0647397659337936e-05, + "loss": 0.47795504331588745, + "step": 3458 + }, + { + "epoch": 1.0114051761953502, + "grad_norm": 1.4543285146976004, + "learning_rate": 1.0642573019134703e-05, + "loss": 0.6817550659179688, + "step": 3459 + }, + { + "epoch": 1.0116976166106155, + "grad_norm": 1.1722820118460164, + "learning_rate": 1.063774822873393e-05, + "loss": 0.45271044969558716, + "step": 3460 + }, + { + "epoch": 1.011990057025881, + "grad_norm": 1.537598582173988, + "learning_rate": 1.0632923289263389e-05, + "loss": 0.611709475517273, + "step": 3461 + }, + { + "epoch": 1.0122824974411464, + "grad_norm": 1.4188302760105698, + "learning_rate": 1.0628098201850876e-05, + "loss": 0.5101709961891174, + "step": 3462 + }, + { + "epoch": 1.0125749378564117, + "grad_norm": 1.433548611715836, + "learning_rate": 1.0623272967624227e-05, + "loss": 0.6550514698028564, + "step": 3463 + }, + { + "epoch": 1.012867378271677, + "grad_norm": 1.2796248072280718, + "learning_rate": 1.0618447587711312e-05, + "loss": 0.479978084564209, + "step": 3464 + }, + { + "epoch": 1.0131598186869426, + "grad_norm": 1.5575466316491844, + "learning_rate": 1.0613622063240035e-05, + "loss": 0.5616719722747803, + "step": 3465 + }, + { + "epoch": 1.013452259102208, + "grad_norm": 1.5865800035698945, + "learning_rate": 1.060879639533833e-05, + "loss": 0.5160953998565674, + "step": 3466 + }, + { + "epoch": 1.0137446995174733, + "grad_norm": 1.5690447549246889, + "learning_rate": 1.0603970585134168e-05, + "loss": 0.6069898009300232, + "step": 3467 + }, + { + "epoch": 1.0140371399327388, + "grad_norm": 1.4806335128762829, + "learning_rate": 1.0599144633755555e-05, + "loss": 0.5800961256027222, + "step": 3468 + }, + { + "epoch": 1.0143295803480041, + "grad_norm": 1.2794607035027592, + "learning_rate": 1.0594318542330528e-05, + "loss": 0.5286555290222168, + "step": 3469 + }, + { + "epoch": 1.0146220207632695, + "grad_norm": 1.3098421389423984, + "learning_rate": 1.0589492311987157e-05, + "loss": 0.44960829615592957, + "step": 3470 + }, + { + "epoch": 1.0149144611785348, + "grad_norm": 1.787788159345536, + "learning_rate": 1.0584665943853538e-05, + "loss": 0.5799434781074524, + "step": 3471 + }, + { + "epoch": 1.0152069015938003, + "grad_norm": 1.3655057393381103, + "learning_rate": 1.057983943905781e-05, + "loss": 0.5142421126365662, + "step": 3472 + }, + { + "epoch": 1.0154993420090657, + "grad_norm": 1.3605211166498987, + "learning_rate": 1.0575012798728141e-05, + "loss": 0.5184981226921082, + "step": 3473 + }, + { + "epoch": 1.015791782424331, + "grad_norm": 1.6630390830837942, + "learning_rate": 1.0570186023992724e-05, + "loss": 0.5747173428535461, + "step": 3474 + }, + { + "epoch": 1.0160842228395965, + "grad_norm": 1.4307323575447104, + "learning_rate": 1.0565359115979792e-05, + "loss": 0.5994119644165039, + "step": 3475 + }, + { + "epoch": 1.0163766632548619, + "grad_norm": 1.4001969418816858, + "learning_rate": 1.0560532075817605e-05, + "loss": 0.5020599365234375, + "step": 3476 + }, + { + "epoch": 1.0166691036701272, + "grad_norm": 1.5266027572877992, + "learning_rate": 1.0555704904634451e-05, + "loss": 0.5023698806762695, + "step": 3477 + }, + { + "epoch": 1.0169615440853925, + "grad_norm": 1.3247610849347196, + "learning_rate": 1.0550877603558656e-05, + "loss": 0.3998676538467407, + "step": 3478 + }, + { + "epoch": 1.017253984500658, + "grad_norm": 1.2513443496343235, + "learning_rate": 1.0546050173718569e-05, + "loss": 0.5083760619163513, + "step": 3479 + }, + { + "epoch": 1.0175464249159234, + "grad_norm": 1.3684676716830397, + "learning_rate": 1.0541222616242575e-05, + "loss": 0.49840620160102844, + "step": 3480 + }, + { + "epoch": 1.0178388653311887, + "grad_norm": 1.3303553104888959, + "learning_rate": 1.0536394932259085e-05, + "loss": 0.5302960276603699, + "step": 3481 + }, + { + "epoch": 1.018131305746454, + "grad_norm": 1.338379797222235, + "learning_rate": 1.0531567122896543e-05, + "loss": 0.5694236755371094, + "step": 3482 + }, + { + "epoch": 1.0184237461617196, + "grad_norm": 1.4305833876226657, + "learning_rate": 1.0526739189283414e-05, + "loss": 0.5155326128005981, + "step": 3483 + }, + { + "epoch": 1.018716186576985, + "grad_norm": 1.3829306833852764, + "learning_rate": 1.0521911132548207e-05, + "loss": 0.6254806518554688, + "step": 3484 + }, + { + "epoch": 1.0190086269922503, + "grad_norm": 1.9177430357611984, + "learning_rate": 1.0517082953819442e-05, + "loss": 0.5623525977134705, + "step": 3485 + }, + { + "epoch": 1.0193010674075158, + "grad_norm": 1.67092732120196, + "learning_rate": 1.051225465422568e-05, + "loss": 0.6289865970611572, + "step": 3486 + }, + { + "epoch": 1.0195935078227811, + "grad_norm": 1.4045798370952283, + "learning_rate": 1.050742623489551e-05, + "loss": 0.5935345888137817, + "step": 3487 + }, + { + "epoch": 1.0198859482380465, + "grad_norm": 1.696103524125264, + "learning_rate": 1.0502597696957542e-05, + "loss": 0.5223839282989502, + "step": 3488 + }, + { + "epoch": 1.0201783886533118, + "grad_norm": 1.9382869881093494, + "learning_rate": 1.0497769041540418e-05, + "loss": 0.6766373515129089, + "step": 3489 + }, + { + "epoch": 1.0204708290685773, + "grad_norm": 1.7017290392950901, + "learning_rate": 1.0492940269772806e-05, + "loss": 0.4934672713279724, + "step": 3490 + }, + { + "epoch": 1.0207632694838427, + "grad_norm": 1.345123127698455, + "learning_rate": 1.0488111382783403e-05, + "loss": 0.5207735300064087, + "step": 3491 + }, + { + "epoch": 1.021055709899108, + "grad_norm": 1.6293706929191067, + "learning_rate": 1.0483282381700933e-05, + "loss": 0.6090695261955261, + "step": 3492 + }, + { + "epoch": 1.0213481503143735, + "grad_norm": 1.2927953162345942, + "learning_rate": 1.0478453267654147e-05, + "loss": 0.5777665376663208, + "step": 3493 + }, + { + "epoch": 1.0216405907296389, + "grad_norm": 1.5951555841510592, + "learning_rate": 1.0473624041771814e-05, + "loss": 0.7241395711898804, + "step": 3494 + }, + { + "epoch": 1.0219330311449042, + "grad_norm": 1.4480767991556562, + "learning_rate": 1.0468794705182742e-05, + "loss": 0.45545506477355957, + "step": 3495 + }, + { + "epoch": 1.0222254715601695, + "grad_norm": 1.422698945534055, + "learning_rate": 1.0463965259015761e-05, + "loss": 0.5519885420799255, + "step": 3496 + }, + { + "epoch": 1.022517911975435, + "grad_norm": 1.509316262763282, + "learning_rate": 1.045913570439972e-05, + "loss": 0.558646559715271, + "step": 3497 + }, + { + "epoch": 1.0228103523907004, + "grad_norm": 1.4960690347564465, + "learning_rate": 1.0454306042463499e-05, + "loss": 0.5259999632835388, + "step": 3498 + }, + { + "epoch": 1.0231027928059657, + "grad_norm": 1.2679527875669403, + "learning_rate": 1.0449476274336004e-05, + "loss": 0.4711627960205078, + "step": 3499 + }, + { + "epoch": 1.0233952332212313, + "grad_norm": 1.5395810801486782, + "learning_rate": 1.0444646401146161e-05, + "loss": 0.5893874168395996, + "step": 3500 + }, + { + "epoch": 1.0236876736364966, + "grad_norm": 1.498228532943397, + "learning_rate": 1.0439816424022926e-05, + "loss": 0.5596123933792114, + "step": 3501 + }, + { + "epoch": 1.023980114051762, + "grad_norm": 1.3706228388690522, + "learning_rate": 1.0434986344095276e-05, + "loss": 0.5228658318519592, + "step": 3502 + }, + { + "epoch": 1.0242725544670273, + "grad_norm": 1.3956010390337459, + "learning_rate": 1.0430156162492216e-05, + "loss": 0.5520567297935486, + "step": 3503 + }, + { + "epoch": 1.0245649948822928, + "grad_norm": 1.2988010194163804, + "learning_rate": 1.0425325880342762e-05, + "loss": 0.531911313533783, + "step": 3504 + }, + { + "epoch": 1.0248574352975581, + "grad_norm": 1.5296749459710133, + "learning_rate": 1.0420495498775974e-05, + "loss": 0.58717942237854, + "step": 3505 + }, + { + "epoch": 1.0251498757128235, + "grad_norm": 1.3937094974123596, + "learning_rate": 1.0415665018920919e-05, + "loss": 0.4972108006477356, + "step": 3506 + }, + { + "epoch": 1.025442316128089, + "grad_norm": 1.4653045497635373, + "learning_rate": 1.0410834441906692e-05, + "loss": 0.567977249622345, + "step": 3507 + }, + { + "epoch": 1.0257347565433543, + "grad_norm": 1.4984249963013099, + "learning_rate": 1.0406003768862416e-05, + "loss": 0.568755567073822, + "step": 3508 + }, + { + "epoch": 1.0260271969586197, + "grad_norm": 1.5140899451878516, + "learning_rate": 1.0401173000917224e-05, + "loss": 0.5668960809707642, + "step": 3509 + }, + { + "epoch": 1.026319637373885, + "grad_norm": 1.5737165138245863, + "learning_rate": 1.0396342139200282e-05, + "loss": 0.5956743955612183, + "step": 3510 + }, + { + "epoch": 1.0266120777891505, + "grad_norm": 1.3000472899601168, + "learning_rate": 1.0391511184840775e-05, + "loss": 0.5258834362030029, + "step": 3511 + }, + { + "epoch": 1.0269045182044159, + "grad_norm": 1.52676259543146, + "learning_rate": 1.038668013896791e-05, + "loss": 0.7358168363571167, + "step": 3512 + }, + { + "epoch": 1.0271969586196812, + "grad_norm": 1.6868440270891885, + "learning_rate": 1.0381849002710914e-05, + "loss": 0.5845209956169128, + "step": 3513 + }, + { + "epoch": 1.0274893990349467, + "grad_norm": 1.4837942506085555, + "learning_rate": 1.0377017777199034e-05, + "loss": 0.4475495219230652, + "step": 3514 + }, + { + "epoch": 1.027781839450212, + "grad_norm": 1.2830033919091985, + "learning_rate": 1.0372186463561542e-05, + "loss": 0.5555804371833801, + "step": 3515 + }, + { + "epoch": 1.0280742798654774, + "grad_norm": 1.65016913167245, + "learning_rate": 1.0367355062927726e-05, + "loss": 0.5927316546440125, + "step": 3516 + }, + { + "epoch": 1.0283667202807427, + "grad_norm": 1.3376999356667882, + "learning_rate": 1.0362523576426897e-05, + "loss": 0.47281715273857117, + "step": 3517 + }, + { + "epoch": 1.0286591606960083, + "grad_norm": 1.4195049172993812, + "learning_rate": 1.0357692005188387e-05, + "loss": 0.5275483727455139, + "step": 3518 + }, + { + "epoch": 1.0289516011112736, + "grad_norm": 1.6670234220228792, + "learning_rate": 1.0352860350341547e-05, + "loss": 0.5740839242935181, + "step": 3519 + }, + { + "epoch": 1.029244041526539, + "grad_norm": 1.3668449892598942, + "learning_rate": 1.0348028613015747e-05, + "loss": 0.6030054688453674, + "step": 3520 + }, + { + "epoch": 1.0295364819418042, + "grad_norm": 1.4423080423666719, + "learning_rate": 1.034319679434037e-05, + "loss": 0.5415347814559937, + "step": 3521 + }, + { + "epoch": 1.0298289223570698, + "grad_norm": 1.4756281264212951, + "learning_rate": 1.033836489544483e-05, + "loss": 0.5850083231925964, + "step": 3522 + }, + { + "epoch": 1.0301213627723351, + "grad_norm": 1.516707487989418, + "learning_rate": 1.0333532917458556e-05, + "loss": 0.47614163160324097, + "step": 3523 + }, + { + "epoch": 1.0304138031876005, + "grad_norm": 1.5357316287676814, + "learning_rate": 1.0328700861510987e-05, + "loss": 0.5645745992660522, + "step": 3524 + }, + { + "epoch": 1.030706243602866, + "grad_norm": 1.3186548714848774, + "learning_rate": 1.0323868728731591e-05, + "loss": 0.5729008913040161, + "step": 3525 + }, + { + "epoch": 1.0309986840181313, + "grad_norm": 1.373781447264802, + "learning_rate": 1.031903652024985e-05, + "loss": 0.5177778005599976, + "step": 3526 + }, + { + "epoch": 1.0312911244333967, + "grad_norm": 1.390457184292636, + "learning_rate": 1.0314204237195263e-05, + "loss": 0.49413079023361206, + "step": 3527 + }, + { + "epoch": 1.031583564848662, + "grad_norm": 1.4789369230243037, + "learning_rate": 1.0309371880697342e-05, + "loss": 0.5074756145477295, + "step": 3528 + }, + { + "epoch": 1.0318760052639275, + "grad_norm": 1.590543948205407, + "learning_rate": 1.0304539451885629e-05, + "loss": 0.5601285696029663, + "step": 3529 + }, + { + "epoch": 1.0321684456791929, + "grad_norm": 1.3273904087281212, + "learning_rate": 1.029970695188967e-05, + "loss": 0.48358121514320374, + "step": 3530 + }, + { + "epoch": 1.0324608860944582, + "grad_norm": 1.4772927313727484, + "learning_rate": 1.0294874381839033e-05, + "loss": 0.4472161829471588, + "step": 3531 + }, + { + "epoch": 1.0327533265097237, + "grad_norm": 1.4129544794929634, + "learning_rate": 1.02900417428633e-05, + "loss": 0.6011627912521362, + "step": 3532 + }, + { + "epoch": 1.033045766924989, + "grad_norm": 1.354725840134447, + "learning_rate": 1.0285209036092076e-05, + "loss": 0.5212395191192627, + "step": 3533 + }, + { + "epoch": 1.0333382073402544, + "grad_norm": 1.844431950477259, + "learning_rate": 1.0280376262654971e-05, + "loss": 0.5433810949325562, + "step": 3534 + }, + { + "epoch": 1.0336306477555197, + "grad_norm": 1.4124385690995565, + "learning_rate": 1.0275543423681622e-05, + "loss": 0.5215464234352112, + "step": 3535 + }, + { + "epoch": 1.0339230881707853, + "grad_norm": 1.3386210311441036, + "learning_rate": 1.0270710520301672e-05, + "loss": 0.511099100112915, + "step": 3536 + }, + { + "epoch": 1.0342155285860506, + "grad_norm": 1.3822305233430652, + "learning_rate": 1.0265877553644783e-05, + "loss": 0.4954407811164856, + "step": 3537 + }, + { + "epoch": 1.034507969001316, + "grad_norm": 1.5424734752588294, + "learning_rate": 1.0261044524840633e-05, + "loss": 0.5491081476211548, + "step": 3538 + }, + { + "epoch": 1.0348004094165815, + "grad_norm": 1.5108040554468096, + "learning_rate": 1.0256211435018912e-05, + "loss": 0.43202829360961914, + "step": 3539 + }, + { + "epoch": 1.0350928498318468, + "grad_norm": 1.5814180623509084, + "learning_rate": 1.0251378285309326e-05, + "loss": 0.4721212089061737, + "step": 3540 + }, + { + "epoch": 1.0353852902471121, + "grad_norm": 1.6070602892086314, + "learning_rate": 1.0246545076841596e-05, + "loss": 0.5621099472045898, + "step": 3541 + }, + { + "epoch": 1.0356777306623774, + "grad_norm": 1.5170284121136077, + "learning_rate": 1.0241711810745452e-05, + "loss": 0.5572346448898315, + "step": 3542 + }, + { + "epoch": 1.035970171077643, + "grad_norm": 1.3590672633285579, + "learning_rate": 1.023687848815064e-05, + "loss": 0.40916550159454346, + "step": 3543 + }, + { + "epoch": 1.0362626114929083, + "grad_norm": 1.5018716604616227, + "learning_rate": 1.0232045110186926e-05, + "loss": 0.5370572805404663, + "step": 3544 + }, + { + "epoch": 1.0365550519081737, + "grad_norm": 1.603253593979403, + "learning_rate": 1.0227211677984074e-05, + "loss": 0.5381634831428528, + "step": 3545 + }, + { + "epoch": 1.0368474923234392, + "grad_norm": 1.3795492267662186, + "learning_rate": 1.0222378192671878e-05, + "loss": 0.4807749092578888, + "step": 3546 + }, + { + "epoch": 1.0371399327387045, + "grad_norm": 1.4973562396665303, + "learning_rate": 1.0217544655380129e-05, + "loss": 0.5673447847366333, + "step": 3547 + }, + { + "epoch": 1.0374323731539699, + "grad_norm": 1.6360254172890698, + "learning_rate": 1.0212711067238639e-05, + "loss": 0.5259549021720886, + "step": 3548 + }, + { + "epoch": 1.0377248135692352, + "grad_norm": 1.4439961362376934, + "learning_rate": 1.0207877429377232e-05, + "loss": 0.48267534375190735, + "step": 3549 + }, + { + "epoch": 1.0380172539845007, + "grad_norm": 1.438603988067733, + "learning_rate": 1.0203043742925738e-05, + "loss": 0.44843387603759766, + "step": 3550 + }, + { + "epoch": 1.038309694399766, + "grad_norm": 1.5765887333733293, + "learning_rate": 1.0198210009014005e-05, + "loss": 0.8050575256347656, + "step": 3551 + }, + { + "epoch": 1.0386021348150314, + "grad_norm": 1.3559927051954717, + "learning_rate": 1.0193376228771887e-05, + "loss": 0.590203046798706, + "step": 3552 + }, + { + "epoch": 1.0388945752302967, + "grad_norm": 1.4420953878245995, + "learning_rate": 1.0188542403329252e-05, + "loss": 0.5974458456039429, + "step": 3553 + }, + { + "epoch": 1.0391870156455623, + "grad_norm": 1.4408311686918343, + "learning_rate": 1.0183708533815975e-05, + "loss": 0.4628743827342987, + "step": 3554 + }, + { + "epoch": 1.0394794560608276, + "grad_norm": 1.538902326182442, + "learning_rate": 1.0178874621361944e-05, + "loss": 0.6738137006759644, + "step": 3555 + }, + { + "epoch": 1.039771896476093, + "grad_norm": 1.2584091446339778, + "learning_rate": 1.0174040667097061e-05, + "loss": 0.48062413930892944, + "step": 3556 + }, + { + "epoch": 1.0400643368913585, + "grad_norm": 1.4180020858721523, + "learning_rate": 1.016920667215123e-05, + "loss": 0.564401388168335, + "step": 3557 + }, + { + "epoch": 1.0403567773066238, + "grad_norm": 1.5220611788966263, + "learning_rate": 1.0164372637654367e-05, + "loss": 0.4035246968269348, + "step": 3558 + }, + { + "epoch": 1.0406492177218891, + "grad_norm": 1.3759176374876299, + "learning_rate": 1.0159538564736399e-05, + "loss": 0.4484536051750183, + "step": 3559 + }, + { + "epoch": 1.0409416581371547, + "grad_norm": 1.5320485493087415, + "learning_rate": 1.0154704454527265e-05, + "loss": 0.6257200837135315, + "step": 3560 + }, + { + "epoch": 1.04123409855242, + "grad_norm": 1.7250809702027206, + "learning_rate": 1.0149870308156899e-05, + "loss": 0.5541477799415588, + "step": 3561 + }, + { + "epoch": 1.0415265389676853, + "grad_norm": 1.5360272319586679, + "learning_rate": 1.0145036126755264e-05, + "loss": 0.6248821020126343, + "step": 3562 + }, + { + "epoch": 1.0418189793829506, + "grad_norm": 1.3930925306710389, + "learning_rate": 1.0140201911452318e-05, + "loss": 0.574689507484436, + "step": 3563 + }, + { + "epoch": 1.0421114197982162, + "grad_norm": 1.45907196010364, + "learning_rate": 1.0135367663378025e-05, + "loss": 0.5873313546180725, + "step": 3564 + }, + { + "epoch": 1.0424038602134815, + "grad_norm": 1.7911480245961826, + "learning_rate": 1.0130533383662361e-05, + "loss": 0.6662088632583618, + "step": 3565 + }, + { + "epoch": 1.0426963006287469, + "grad_norm": 1.688392121046196, + "learning_rate": 1.0125699073435316e-05, + "loss": 0.6517773866653442, + "step": 3566 + }, + { + "epoch": 1.0429887410440122, + "grad_norm": 1.8273298961737783, + "learning_rate": 1.0120864733826877e-05, + "loss": 0.6311444640159607, + "step": 3567 + }, + { + "epoch": 1.0432811814592777, + "grad_norm": 1.4367651958960501, + "learning_rate": 1.0116030365967037e-05, + "loss": 0.49060457944869995, + "step": 3568 + }, + { + "epoch": 1.043573621874543, + "grad_norm": 1.609897253932932, + "learning_rate": 1.0111195970985813e-05, + "loss": 0.5405893921852112, + "step": 3569 + }, + { + "epoch": 1.0438660622898084, + "grad_norm": 1.4830806836977097, + "learning_rate": 1.01063615500132e-05, + "loss": 0.482162743806839, + "step": 3570 + }, + { + "epoch": 1.044158502705074, + "grad_norm": 1.4107369824500982, + "learning_rate": 1.0101527104179224e-05, + "loss": 0.4542362093925476, + "step": 3571 + }, + { + "epoch": 1.0444509431203393, + "grad_norm": 1.5628480243599212, + "learning_rate": 1.00966926346139e-05, + "loss": 0.6157265305519104, + "step": 3572 + }, + { + "epoch": 1.0447433835356046, + "grad_norm": 1.6143915430154057, + "learning_rate": 1.0091858142447266e-05, + "loss": 0.6591875553131104, + "step": 3573 + }, + { + "epoch": 1.04503582395087, + "grad_norm": 1.410506710976703, + "learning_rate": 1.0087023628809347e-05, + "loss": 0.5686256885528564, + "step": 3574 + }, + { + "epoch": 1.0453282643661355, + "grad_norm": 1.2971662039691743, + "learning_rate": 1.0082189094830183e-05, + "loss": 0.45131799578666687, + "step": 3575 + }, + { + "epoch": 1.0456207047814008, + "grad_norm": 1.6508365467694242, + "learning_rate": 1.0077354541639821e-05, + "loss": 0.5787829160690308, + "step": 3576 + }, + { + "epoch": 1.0459131451966661, + "grad_norm": 1.6915833775625508, + "learning_rate": 1.0072519970368303e-05, + "loss": 0.5755574107170105, + "step": 3577 + }, + { + "epoch": 1.0462055856119317, + "grad_norm": 1.4591194150184388, + "learning_rate": 1.0067685382145683e-05, + "loss": 0.5017693638801575, + "step": 3578 + }, + { + "epoch": 1.046498026027197, + "grad_norm": 1.508478769597254, + "learning_rate": 1.0062850778102017e-05, + "loss": 0.5096016526222229, + "step": 3579 + }, + { + "epoch": 1.0467904664424623, + "grad_norm": 1.443966956114079, + "learning_rate": 1.0058016159367365e-05, + "loss": 0.4988967180252075, + "step": 3580 + }, + { + "epoch": 1.0470829068577276, + "grad_norm": 1.5186890104543016, + "learning_rate": 1.0053181527071786e-05, + "loss": 0.5410172939300537, + "step": 3581 + }, + { + "epoch": 1.0473753472729932, + "grad_norm": 1.7546625585964495, + "learning_rate": 1.004834688234535e-05, + "loss": 0.5980710983276367, + "step": 3582 + }, + { + "epoch": 1.0476677876882585, + "grad_norm": 1.347751797857706, + "learning_rate": 1.0043512226318124e-05, + "loss": 0.4737449586391449, + "step": 3583 + }, + { + "epoch": 1.0479602281035238, + "grad_norm": 1.5493397390355739, + "learning_rate": 1.003867756012018e-05, + "loss": 0.6106469631195068, + "step": 3584 + }, + { + "epoch": 1.0482526685187894, + "grad_norm": 1.6077524420960543, + "learning_rate": 1.0033842884881593e-05, + "loss": 0.48002901673316956, + "step": 3585 + }, + { + "epoch": 1.0485451089340547, + "grad_norm": 1.4065529576638647, + "learning_rate": 1.0029008201732433e-05, + "loss": 0.5101731419563293, + "step": 3586 + }, + { + "epoch": 1.04883754934932, + "grad_norm": 1.6961382740739117, + "learning_rate": 1.0024173511802786e-05, + "loss": 0.6350706219673157, + "step": 3587 + }, + { + "epoch": 1.0491299897645854, + "grad_norm": 1.4947432010936612, + "learning_rate": 1.0019338816222725e-05, + "loss": 0.5268979072570801, + "step": 3588 + }, + { + "epoch": 1.049422430179851, + "grad_norm": 1.4955724361545546, + "learning_rate": 1.0014504116122335e-05, + "loss": 0.5670457482337952, + "step": 3589 + }, + { + "epoch": 1.0497148705951163, + "grad_norm": 1.7472274991386971, + "learning_rate": 1.0009669412631697e-05, + "loss": 0.6200711727142334, + "step": 3590 + }, + { + "epoch": 1.0500073110103816, + "grad_norm": 1.5117580085419962, + "learning_rate": 1.0004834706880891e-05, + "loss": 0.44014686346054077, + "step": 3591 + }, + { + "epoch": 1.050299751425647, + "grad_norm": 1.4806608082423456, + "learning_rate": 1e-05, + "loss": 0.4690900146961212, + "step": 3592 + }, + { + "epoch": 1.0505921918409125, + "grad_norm": 1.5061085663062508, + "learning_rate": 9.995165293119112e-06, + "loss": 0.5791969299316406, + "step": 3593 + }, + { + "epoch": 1.0508846322561778, + "grad_norm": 1.403652610849375, + "learning_rate": 9.990330587368306e-06, + "loss": 0.5566244125366211, + "step": 3594 + }, + { + "epoch": 1.0511770726714431, + "grad_norm": 1.47068511144412, + "learning_rate": 9.985495883877668e-06, + "loss": 0.5201646685600281, + "step": 3595 + }, + { + "epoch": 1.0514695130867087, + "grad_norm": 1.3147681531847344, + "learning_rate": 9.980661183777277e-06, + "loss": 0.44774526357650757, + "step": 3596 + }, + { + "epoch": 1.051761953501974, + "grad_norm": 1.641682032458417, + "learning_rate": 9.975826488197217e-06, + "loss": 0.5346901416778564, + "step": 3597 + }, + { + "epoch": 1.0520543939172393, + "grad_norm": 1.516503297952313, + "learning_rate": 9.970991798267568e-06, + "loss": 0.4639764428138733, + "step": 3598 + }, + { + "epoch": 1.0523468343325049, + "grad_norm": 1.5385061459553095, + "learning_rate": 9.966157115118412e-06, + "loss": 0.5505763292312622, + "step": 3599 + }, + { + "epoch": 1.0526392747477702, + "grad_norm": 1.5065604638146801, + "learning_rate": 9.961322439879821e-06, + "loss": 0.5187631845474243, + "step": 3600 + }, + { + "epoch": 1.0529317151630355, + "grad_norm": 1.5837365707911437, + "learning_rate": 9.95648777368188e-06, + "loss": 0.5990081429481506, + "step": 3601 + }, + { + "epoch": 1.0532241555783008, + "grad_norm": 1.5943954940503307, + "learning_rate": 9.951653117654653e-06, + "loss": 0.5926306843757629, + "step": 3602 + }, + { + "epoch": 1.0535165959935664, + "grad_norm": 1.5828616151591308, + "learning_rate": 9.946818472928215e-06, + "loss": 0.5294582843780518, + "step": 3603 + }, + { + "epoch": 1.0538090364088317, + "grad_norm": 1.4492789926079117, + "learning_rate": 9.941983840632637e-06, + "loss": 0.5442140102386475, + "step": 3604 + }, + { + "epoch": 1.054101476824097, + "grad_norm": 1.5960181258924353, + "learning_rate": 9.937149221897984e-06, + "loss": 0.5888028740882874, + "step": 3605 + }, + { + "epoch": 1.0543939172393624, + "grad_norm": 1.6823030520405429, + "learning_rate": 9.93231461785432e-06, + "loss": 0.7545796632766724, + "step": 3606 + }, + { + "epoch": 1.054686357654628, + "grad_norm": 1.4193397986001617, + "learning_rate": 9.9274800296317e-06, + "loss": 0.4850383996963501, + "step": 3607 + }, + { + "epoch": 1.0549787980698933, + "grad_norm": 1.7761903590602732, + "learning_rate": 9.922645458360182e-06, + "loss": 0.5658243894577026, + "step": 3608 + }, + { + "epoch": 1.0552712384851586, + "grad_norm": 1.913627443584159, + "learning_rate": 9.917810905169818e-06, + "loss": 0.6526712775230408, + "step": 3609 + }, + { + "epoch": 1.0555636789004241, + "grad_norm": 1.7132894383948376, + "learning_rate": 9.912976371190657e-06, + "loss": 0.6125987768173218, + "step": 3610 + }, + { + "epoch": 1.0558561193156895, + "grad_norm": 1.3139938490016692, + "learning_rate": 9.908141857552737e-06, + "loss": 0.40159785747528076, + "step": 3611 + }, + { + "epoch": 1.0561485597309548, + "grad_norm": 1.7052081125083998, + "learning_rate": 9.903307365386103e-06, + "loss": 0.6628924608230591, + "step": 3612 + }, + { + "epoch": 1.05644100014622, + "grad_norm": 1.638888923278887, + "learning_rate": 9.898472895820783e-06, + "loss": 0.6083816289901733, + "step": 3613 + }, + { + "epoch": 1.0567334405614857, + "grad_norm": 1.564812875636552, + "learning_rate": 9.893638449986806e-06, + "loss": 0.5349488854408264, + "step": 3614 + }, + { + "epoch": 1.057025880976751, + "grad_norm": 1.5340813216184335, + "learning_rate": 9.888804029014194e-06, + "loss": 0.6119222044944763, + "step": 3615 + }, + { + "epoch": 1.0573183213920163, + "grad_norm": 1.367693459120948, + "learning_rate": 9.883969634032964e-06, + "loss": 0.531359851360321, + "step": 3616 + }, + { + "epoch": 1.0576107618072819, + "grad_norm": 1.6344237981695606, + "learning_rate": 9.879135266173127e-06, + "loss": 0.6604791879653931, + "step": 3617 + }, + { + "epoch": 1.0579032022225472, + "grad_norm": 1.4352324880813543, + "learning_rate": 9.874300926564689e-06, + "loss": 0.4691445231437683, + "step": 3618 + }, + { + "epoch": 1.0581956426378125, + "grad_norm": 1.2910646539258182, + "learning_rate": 9.869466616337642e-06, + "loss": 0.5690087080001831, + "step": 3619 + }, + { + "epoch": 1.0584880830530778, + "grad_norm": 1.403700057828388, + "learning_rate": 9.86463233662198e-06, + "loss": 0.5426729917526245, + "step": 3620 + }, + { + "epoch": 1.0587805234683434, + "grad_norm": 1.578075476325045, + "learning_rate": 9.859798088547687e-06, + "loss": 0.5640411376953125, + "step": 3621 + }, + { + "epoch": 1.0590729638836087, + "grad_norm": 1.4838032713556162, + "learning_rate": 9.854963873244738e-06, + "loss": 0.6724091172218323, + "step": 3622 + }, + { + "epoch": 1.059365404298874, + "grad_norm": 1.4145337335983883, + "learning_rate": 9.850129691843105e-06, + "loss": 0.5448887348175049, + "step": 3623 + }, + { + "epoch": 1.0596578447141396, + "grad_norm": 1.5190623574509117, + "learning_rate": 9.845295545472742e-06, + "loss": 0.5555344820022583, + "step": 3624 + }, + { + "epoch": 1.059950285129405, + "grad_norm": 1.6879154347320564, + "learning_rate": 9.840461435263604e-06, + "loss": 0.5053969621658325, + "step": 3625 + }, + { + "epoch": 1.0602427255446703, + "grad_norm": 1.5675488432589333, + "learning_rate": 9.835627362345636e-06, + "loss": 0.5866390466690063, + "step": 3626 + }, + { + "epoch": 1.0605351659599356, + "grad_norm": 1.81247497722172, + "learning_rate": 9.830793327848773e-06, + "loss": 0.5936717987060547, + "step": 3627 + }, + { + "epoch": 1.0608276063752011, + "grad_norm": 1.5536122437945554, + "learning_rate": 9.82595933290294e-06, + "loss": 0.6009070873260498, + "step": 3628 + }, + { + "epoch": 1.0611200467904665, + "grad_norm": 1.588445125911092, + "learning_rate": 9.821125378638059e-06, + "loss": 0.5361435413360596, + "step": 3629 + }, + { + "epoch": 1.0614124872057318, + "grad_norm": 1.4856331412797505, + "learning_rate": 9.816291466184025e-06, + "loss": 0.5763939619064331, + "step": 3630 + }, + { + "epoch": 1.061704927620997, + "grad_norm": 1.618308780160016, + "learning_rate": 9.81145759667075e-06, + "loss": 0.57512366771698, + "step": 3631 + }, + { + "epoch": 1.0619973680362627, + "grad_norm": 1.4990484363196022, + "learning_rate": 9.806623771228115e-06, + "loss": 0.6144367456436157, + "step": 3632 + }, + { + "epoch": 1.062289808451528, + "grad_norm": 1.5222649609215075, + "learning_rate": 9.801789990985997e-06, + "loss": 0.5715698003768921, + "step": 3633 + }, + { + "epoch": 1.0625822488667933, + "grad_norm": 1.3438421364889925, + "learning_rate": 9.796956257074263e-06, + "loss": 0.632681131362915, + "step": 3634 + }, + { + "epoch": 1.0628746892820589, + "grad_norm": 1.2996961363437054, + "learning_rate": 9.79212257062277e-06, + "loss": 0.5362547636032104, + "step": 3635 + }, + { + "epoch": 1.0631671296973242, + "grad_norm": 1.2451948790215157, + "learning_rate": 9.787288932761361e-06, + "loss": 0.553846538066864, + "step": 3636 + }, + { + "epoch": 1.0634595701125895, + "grad_norm": 2.0033616068213456, + "learning_rate": 9.782455344619871e-06, + "loss": 0.7200362682342529, + "step": 3637 + }, + { + "epoch": 1.063752010527855, + "grad_norm": 1.5986858016901493, + "learning_rate": 9.777621807328126e-06, + "loss": 0.5544596910476685, + "step": 3638 + }, + { + "epoch": 1.0640444509431204, + "grad_norm": 1.9336329750915207, + "learning_rate": 9.772788322015926e-06, + "loss": 0.687321126461029, + "step": 3639 + }, + { + "epoch": 1.0643368913583857, + "grad_norm": 1.4658162923896687, + "learning_rate": 9.767954889813076e-06, + "loss": 0.4986167550086975, + "step": 3640 + }, + { + "epoch": 1.064629331773651, + "grad_norm": 1.6835767903522258, + "learning_rate": 9.763121511849358e-06, + "loss": 0.5021307468414307, + "step": 3641 + }, + { + "epoch": 1.0649217721889166, + "grad_norm": 1.6084332451713093, + "learning_rate": 9.758288189254548e-06, + "loss": 0.5542711019515991, + "step": 3642 + }, + { + "epoch": 1.065214212604182, + "grad_norm": 1.4567212868909125, + "learning_rate": 9.753454923158407e-06, + "loss": 0.5161126852035522, + "step": 3643 + }, + { + "epoch": 1.0655066530194472, + "grad_norm": 1.3588587385016027, + "learning_rate": 9.748621714690674e-06, + "loss": 0.6041361093521118, + "step": 3644 + }, + { + "epoch": 1.0657990934347126, + "grad_norm": 1.5312936542968558, + "learning_rate": 9.74378856498109e-06, + "loss": 0.5252672433853149, + "step": 3645 + }, + { + "epoch": 1.0660915338499781, + "grad_norm": 1.508976518247356, + "learning_rate": 9.738955475159369e-06, + "loss": 0.5198208093643188, + "step": 3646 + }, + { + "epoch": 1.0663839742652435, + "grad_norm": 1.617831688267231, + "learning_rate": 9.734122446355219e-06, + "loss": 0.5547968149185181, + "step": 3647 + }, + { + "epoch": 1.0666764146805088, + "grad_norm": 1.3192996989880752, + "learning_rate": 9.72928947969833e-06, + "loss": 0.5854370594024658, + "step": 3648 + }, + { + "epoch": 1.0669688550957743, + "grad_norm": 1.4612935433441103, + "learning_rate": 9.724456576318383e-06, + "loss": 0.5199173092842102, + "step": 3649 + }, + { + "epoch": 1.0672612955110397, + "grad_norm": 1.5597306303032106, + "learning_rate": 9.71962373734503e-06, + "loss": 0.49684566259384155, + "step": 3650 + }, + { + "epoch": 1.067553735926305, + "grad_norm": 1.5081407431370675, + "learning_rate": 9.714790963907927e-06, + "loss": 0.593805193901062, + "step": 3651 + }, + { + "epoch": 1.0678461763415703, + "grad_norm": 1.6501383657240702, + "learning_rate": 9.7099582571367e-06, + "loss": 0.5524622201919556, + "step": 3652 + }, + { + "epoch": 1.0681386167568359, + "grad_norm": 1.589706723326761, + "learning_rate": 9.70512561816097e-06, + "loss": 0.5796955227851868, + "step": 3653 + }, + { + "epoch": 1.0684310571721012, + "grad_norm": 1.6252059263075247, + "learning_rate": 9.700293048110335e-06, + "loss": 0.5470535159111023, + "step": 3654 + }, + { + "epoch": 1.0687234975873665, + "grad_norm": 1.180447413588476, + "learning_rate": 9.695460548114374e-06, + "loss": 0.5438790321350098, + "step": 3655 + }, + { + "epoch": 1.069015938002632, + "grad_norm": 1.5271792603913512, + "learning_rate": 9.69062811930266e-06, + "loss": 0.6324823498725891, + "step": 3656 + }, + { + "epoch": 1.0693083784178974, + "grad_norm": 1.5347219744388463, + "learning_rate": 9.68579576280474e-06, + "loss": 0.5261266231536865, + "step": 3657 + }, + { + "epoch": 1.0696008188331627, + "grad_norm": 1.408009396375569, + "learning_rate": 9.680963479750152e-06, + "loss": 0.49827292561531067, + "step": 3658 + }, + { + "epoch": 1.069893259248428, + "grad_norm": 1.8715423798930795, + "learning_rate": 9.67613127126841e-06, + "loss": 0.5273935794830322, + "step": 3659 + }, + { + "epoch": 1.0701856996636936, + "grad_norm": 1.5578682729768194, + "learning_rate": 9.671299138489017e-06, + "loss": 0.5816709995269775, + "step": 3660 + }, + { + "epoch": 1.070478140078959, + "grad_norm": 1.7016426471813102, + "learning_rate": 9.66646708254145e-06, + "loss": 0.5591616630554199, + "step": 3661 + }, + { + "epoch": 1.0707705804942242, + "grad_norm": 1.5738449439513973, + "learning_rate": 9.661635104555172e-06, + "loss": 0.581566572189331, + "step": 3662 + }, + { + "epoch": 1.0710630209094898, + "grad_norm": 1.5518333497561696, + "learning_rate": 9.656803205659632e-06, + "loss": 0.5339047312736511, + "step": 3663 + }, + { + "epoch": 1.0713554613247551, + "grad_norm": 1.6271916881343873, + "learning_rate": 9.651971386984258e-06, + "loss": 0.5200103521347046, + "step": 3664 + }, + { + "epoch": 1.0716479017400204, + "grad_norm": 1.6521270716003156, + "learning_rate": 9.647139649658454e-06, + "loss": 0.7201805114746094, + "step": 3665 + }, + { + "epoch": 1.0719403421552858, + "grad_norm": 1.534541270100013, + "learning_rate": 9.642307994811614e-06, + "loss": 0.4801551103591919, + "step": 3666 + }, + { + "epoch": 1.0722327825705513, + "grad_norm": 1.5215862158184845, + "learning_rate": 9.637476423573106e-06, + "loss": 0.5809728503227234, + "step": 3667 + }, + { + "epoch": 1.0725252229858167, + "grad_norm": 1.6423129831570165, + "learning_rate": 9.632644937072277e-06, + "loss": 0.6493573188781738, + "step": 3668 + }, + { + "epoch": 1.072817663401082, + "grad_norm": 1.5984538738730298, + "learning_rate": 9.627813536438461e-06, + "loss": 0.5858349800109863, + "step": 3669 + }, + { + "epoch": 1.0731101038163473, + "grad_norm": 1.5154205099747375, + "learning_rate": 9.622982222800968e-06, + "loss": 0.604835033416748, + "step": 3670 + }, + { + "epoch": 1.0734025442316129, + "grad_norm": 1.6814842296922758, + "learning_rate": 9.618150997289091e-06, + "loss": 0.6168441772460938, + "step": 3671 + }, + { + "epoch": 1.0736949846468782, + "grad_norm": 1.4221905571438933, + "learning_rate": 9.613319861032093e-06, + "loss": 0.5297094583511353, + "step": 3672 + }, + { + "epoch": 1.0739874250621435, + "grad_norm": 1.4440813284349416, + "learning_rate": 9.608488815159226e-06, + "loss": 0.513571560382843, + "step": 3673 + }, + { + "epoch": 1.074279865477409, + "grad_norm": 1.4202335692197015, + "learning_rate": 9.603657860799721e-06, + "loss": 0.4383837580680847, + "step": 3674 + }, + { + "epoch": 1.0745723058926744, + "grad_norm": 1.660966167075539, + "learning_rate": 9.59882699908278e-06, + "loss": 0.5428420305252075, + "step": 3675 + }, + { + "epoch": 1.0748647463079397, + "grad_norm": 1.331252403406651, + "learning_rate": 9.593996231137587e-06, + "loss": 0.5193662047386169, + "step": 3676 + }, + { + "epoch": 1.0751571867232053, + "grad_norm": 1.1890998376752542, + "learning_rate": 9.589165558093311e-06, + "loss": 0.47949904203414917, + "step": 3677 + }, + { + "epoch": 1.0754496271384706, + "grad_norm": 1.4440336102087743, + "learning_rate": 9.584334981079085e-06, + "loss": 0.5092326402664185, + "step": 3678 + }, + { + "epoch": 1.075742067553736, + "grad_norm": 1.642845621448486, + "learning_rate": 9.579504501224028e-06, + "loss": 0.6627280712127686, + "step": 3679 + }, + { + "epoch": 1.0760345079690012, + "grad_norm": 1.4633415466571795, + "learning_rate": 9.57467411965724e-06, + "loss": 0.45087775588035583, + "step": 3680 + }, + { + "epoch": 1.0763269483842668, + "grad_norm": 1.5441336288481917, + "learning_rate": 9.569843837507788e-06, + "loss": 0.5745380520820618, + "step": 3681 + }, + { + "epoch": 1.0766193887995321, + "grad_norm": 1.4663672637613454, + "learning_rate": 9.565013655904728e-06, + "loss": 0.4410436749458313, + "step": 3682 + }, + { + "epoch": 1.0769118292147974, + "grad_norm": 1.5197962338342057, + "learning_rate": 9.560183575977079e-06, + "loss": 0.4991244375705719, + "step": 3683 + }, + { + "epoch": 1.0772042696300628, + "grad_norm": 1.760205368894331, + "learning_rate": 9.555353598853842e-06, + "loss": 0.6316145658493042, + "step": 3684 + }, + { + "epoch": 1.0774967100453283, + "grad_norm": 1.7400994246729, + "learning_rate": 9.550523725664e-06, + "loss": 0.5593908429145813, + "step": 3685 + }, + { + "epoch": 1.0777891504605936, + "grad_norm": 1.360696277932948, + "learning_rate": 9.545693957536503e-06, + "loss": 0.5491319894790649, + "step": 3686 + }, + { + "epoch": 1.078081590875859, + "grad_norm": 1.6733496726210937, + "learning_rate": 9.540864295600282e-06, + "loss": 0.6299821138381958, + "step": 3687 + }, + { + "epoch": 1.0783740312911245, + "grad_norm": 1.584478567774571, + "learning_rate": 9.536034740984244e-06, + "loss": 0.5673841238021851, + "step": 3688 + }, + { + "epoch": 1.0786664717063899, + "grad_norm": 1.2029070866459273, + "learning_rate": 9.53120529481726e-06, + "loss": 0.45966464281082153, + "step": 3689 + }, + { + "epoch": 1.0789589121216552, + "grad_norm": 1.5763188044346095, + "learning_rate": 9.526375958228191e-06, + "loss": 0.5831631422042847, + "step": 3690 + }, + { + "epoch": 1.0792513525369205, + "grad_norm": 1.6299976133727174, + "learning_rate": 9.52154673234586e-06, + "loss": 0.5456256866455078, + "step": 3691 + }, + { + "epoch": 1.079543792952186, + "grad_norm": 1.4868906970264604, + "learning_rate": 9.516717618299069e-06, + "loss": 0.46428292989730835, + "step": 3692 + }, + { + "epoch": 1.0798362333674514, + "grad_norm": 1.4498481381133475, + "learning_rate": 9.511888617216602e-06, + "loss": 0.47320839762687683, + "step": 3693 + }, + { + "epoch": 1.0801286737827167, + "grad_norm": 1.4932376641022789, + "learning_rate": 9.507059730227199e-06, + "loss": 0.5205492973327637, + "step": 3694 + }, + { + "epoch": 1.0804211141979823, + "grad_norm": 1.631704411581211, + "learning_rate": 9.502230958459587e-06, + "loss": 0.42696553468704224, + "step": 3695 + }, + { + "epoch": 1.0807135546132476, + "grad_norm": 1.5001123816983175, + "learning_rate": 9.497402303042463e-06, + "loss": 0.5147116780281067, + "step": 3696 + }, + { + "epoch": 1.081005995028513, + "grad_norm": 1.38029323867701, + "learning_rate": 9.492573765104494e-06, + "loss": 0.5080294609069824, + "step": 3697 + }, + { + "epoch": 1.0812984354437782, + "grad_norm": 1.6652094239637947, + "learning_rate": 9.487745345774323e-06, + "loss": 0.6228866577148438, + "step": 3698 + }, + { + "epoch": 1.0815908758590438, + "grad_norm": 1.5822778586922481, + "learning_rate": 9.482917046180563e-06, + "loss": 0.5560915470123291, + "step": 3699 + }, + { + "epoch": 1.0818833162743091, + "grad_norm": 1.5535091238731367, + "learning_rate": 9.4780888674518e-06, + "loss": 0.5245859622955322, + "step": 3700 + }, + { + "epoch": 1.0821757566895744, + "grad_norm": 1.5051094804368905, + "learning_rate": 9.47326081071659e-06, + "loss": 0.6462790966033936, + "step": 3701 + }, + { + "epoch": 1.08246819710484, + "grad_norm": 1.5924758840128848, + "learning_rate": 9.468432877103462e-06, + "loss": 0.5196692943572998, + "step": 3702 + }, + { + "epoch": 1.0827606375201053, + "grad_norm": 1.7568328506180717, + "learning_rate": 9.463605067740917e-06, + "loss": 0.5487779974937439, + "step": 3703 + }, + { + "epoch": 1.0830530779353706, + "grad_norm": 1.6433197945872438, + "learning_rate": 9.458777383757428e-06, + "loss": 0.5471592545509338, + "step": 3704 + }, + { + "epoch": 1.083345518350636, + "grad_norm": 1.7295248979937683, + "learning_rate": 9.453949826281436e-06, + "loss": 0.6927378177642822, + "step": 3705 + }, + { + "epoch": 1.0836379587659015, + "grad_norm": 1.645450906929874, + "learning_rate": 9.449122396441344e-06, + "loss": 0.569003164768219, + "step": 3706 + }, + { + "epoch": 1.0839303991811668, + "grad_norm": 1.5204128580175535, + "learning_rate": 9.444295095365549e-06, + "loss": 0.5655964612960815, + "step": 3707 + }, + { + "epoch": 1.0842228395964322, + "grad_norm": 1.5653417821245283, + "learning_rate": 9.439467924182397e-06, + "loss": 0.6223032474517822, + "step": 3708 + }, + { + "epoch": 1.0845152800116975, + "grad_norm": 1.8058201614843348, + "learning_rate": 9.43464088402021e-06, + "loss": 0.6553555727005005, + "step": 3709 + }, + { + "epoch": 1.084807720426963, + "grad_norm": 1.7065419655088354, + "learning_rate": 9.429813976007277e-06, + "loss": 0.534509539604187, + "step": 3710 + }, + { + "epoch": 1.0851001608422284, + "grad_norm": 1.7341944929762452, + "learning_rate": 9.42498720127186e-06, + "loss": 0.5801417827606201, + "step": 3711 + }, + { + "epoch": 1.0853926012574937, + "grad_norm": 1.4311879630985456, + "learning_rate": 9.42016056094219e-06, + "loss": 0.47260361909866333, + "step": 3712 + }, + { + "epoch": 1.0856850416727593, + "grad_norm": 1.5640804855296242, + "learning_rate": 9.415334056146464e-06, + "loss": 0.5924841165542603, + "step": 3713 + }, + { + "epoch": 1.0859774820880246, + "grad_norm": 1.7346051575584198, + "learning_rate": 9.410507688012847e-06, + "loss": 0.6029725074768066, + "step": 3714 + }, + { + "epoch": 1.08626992250329, + "grad_norm": 1.6762909361099274, + "learning_rate": 9.405681457669472e-06, + "loss": 0.5838413834571838, + "step": 3715 + }, + { + "epoch": 1.0865623629185555, + "grad_norm": 1.277586165055191, + "learning_rate": 9.400855366244445e-06, + "loss": 0.4739546775817871, + "step": 3716 + }, + { + "epoch": 1.0868548033338208, + "grad_norm": 1.5391172094714582, + "learning_rate": 9.396029414865832e-06, + "loss": 0.4870055913925171, + "step": 3717 + }, + { + "epoch": 1.0871472437490861, + "grad_norm": 1.4254039758246118, + "learning_rate": 9.39120360466167e-06, + "loss": 0.5572132468223572, + "step": 3718 + }, + { + "epoch": 1.0874396841643514, + "grad_norm": 1.6824352313774058, + "learning_rate": 9.386377936759966e-06, + "loss": 0.5601439476013184, + "step": 3719 + }, + { + "epoch": 1.087732124579617, + "grad_norm": 1.4548205788512927, + "learning_rate": 9.38155241228869e-06, + "loss": 0.4551504850387573, + "step": 3720 + }, + { + "epoch": 1.0880245649948823, + "grad_norm": 1.447968175073075, + "learning_rate": 9.376727032375773e-06, + "loss": 0.5656375885009766, + "step": 3721 + }, + { + "epoch": 1.0883170054101476, + "grad_norm": 1.4767808933411752, + "learning_rate": 9.371901798149124e-06, + "loss": 0.5597153902053833, + "step": 3722 + }, + { + "epoch": 1.088609445825413, + "grad_norm": 1.5252235269095387, + "learning_rate": 9.367076710736613e-06, + "loss": 0.5946288108825684, + "step": 3723 + }, + { + "epoch": 1.0889018862406785, + "grad_norm": 1.9924638298376933, + "learning_rate": 9.36225177126607e-06, + "loss": 0.5951449871063232, + "step": 3724 + }, + { + "epoch": 1.0891943266559438, + "grad_norm": 1.7845167649533908, + "learning_rate": 9.3574269808653e-06, + "loss": 0.5755487680435181, + "step": 3725 + }, + { + "epoch": 1.0894867670712092, + "grad_norm": 1.5254834641419546, + "learning_rate": 9.352602340662065e-06, + "loss": 0.5118892788887024, + "step": 3726 + }, + { + "epoch": 1.0897792074864747, + "grad_norm": 1.596558008598135, + "learning_rate": 9.347777851784097e-06, + "loss": 0.5652351975440979, + "step": 3727 + }, + { + "epoch": 1.09007164790174, + "grad_norm": 1.5215560380827415, + "learning_rate": 9.34295351535909e-06, + "loss": 0.624887228012085, + "step": 3728 + }, + { + "epoch": 1.0903640883170054, + "grad_norm": 1.447383452488018, + "learning_rate": 9.338129332514705e-06, + "loss": 0.534363329410553, + "step": 3729 + }, + { + "epoch": 1.0906565287322707, + "grad_norm": 1.477841435635963, + "learning_rate": 9.333305304378565e-06, + "loss": 0.6203521490097046, + "step": 3730 + }, + { + "epoch": 1.0909489691475363, + "grad_norm": 1.7401174715864398, + "learning_rate": 9.328481432078254e-06, + "loss": 0.64560866355896, + "step": 3731 + }, + { + "epoch": 1.0912414095628016, + "grad_norm": 1.5841972191853104, + "learning_rate": 9.323657716741327e-06, + "loss": 0.5389514565467834, + "step": 3732 + }, + { + "epoch": 1.091533849978067, + "grad_norm": 1.4621625707128454, + "learning_rate": 9.318834159495295e-06, + "loss": 0.5245277881622314, + "step": 3733 + }, + { + "epoch": 1.0918262903933325, + "grad_norm": 1.6486990138865423, + "learning_rate": 9.314010761467637e-06, + "loss": 0.603967010974884, + "step": 3734 + }, + { + "epoch": 1.0921187308085978, + "grad_norm": 1.7983997195133608, + "learning_rate": 9.309187523785794e-06, + "loss": 0.5426995754241943, + "step": 3735 + }, + { + "epoch": 1.092411171223863, + "grad_norm": 1.6248514181798874, + "learning_rate": 9.30436444757717e-06, + "loss": 0.5400352478027344, + "step": 3736 + }, + { + "epoch": 1.0927036116391284, + "grad_norm": 1.5009984854869718, + "learning_rate": 9.299541533969121e-06, + "loss": 0.5016524195671082, + "step": 3737 + }, + { + "epoch": 1.092996052054394, + "grad_norm": 1.7929437285814107, + "learning_rate": 9.294718784088982e-06, + "loss": 0.526217520236969, + "step": 3738 + }, + { + "epoch": 1.0932884924696593, + "grad_norm": 1.7293517567202035, + "learning_rate": 9.289896199064038e-06, + "loss": 0.525063157081604, + "step": 3739 + }, + { + "epoch": 1.0935809328849246, + "grad_norm": 1.269101628653969, + "learning_rate": 9.285073780021541e-06, + "loss": 0.3792048692703247, + "step": 3740 + }, + { + "epoch": 1.0938733733001902, + "grad_norm": 1.4416380651624152, + "learning_rate": 9.280251528088702e-06, + "loss": 0.5326308012008667, + "step": 3741 + }, + { + "epoch": 1.0941658137154555, + "grad_norm": 1.3946561055322027, + "learning_rate": 9.275429444392692e-06, + "loss": 0.5675199627876282, + "step": 3742 + }, + { + "epoch": 1.0944582541307208, + "grad_norm": 1.640552639536372, + "learning_rate": 9.270607530060643e-06, + "loss": 0.6525516510009766, + "step": 3743 + }, + { + "epoch": 1.0947506945459862, + "grad_norm": 1.563647681973335, + "learning_rate": 9.265785786219647e-06, + "loss": 0.6376343369483948, + "step": 3744 + }, + { + "epoch": 1.0950431349612517, + "grad_norm": 1.7701418719133022, + "learning_rate": 9.260964213996763e-06, + "loss": 0.6440377235412598, + "step": 3745 + }, + { + "epoch": 1.095335575376517, + "grad_norm": 1.886853414823259, + "learning_rate": 9.256142814518997e-06, + "loss": 0.5971434116363525, + "step": 3746 + }, + { + "epoch": 1.0956280157917824, + "grad_norm": 1.3797760891901851, + "learning_rate": 9.251321588913331e-06, + "loss": 0.5096890330314636, + "step": 3747 + }, + { + "epoch": 1.0959204562070477, + "grad_norm": 1.7099901744739332, + "learning_rate": 9.246500538306686e-06, + "loss": 0.4303498864173889, + "step": 3748 + }, + { + "epoch": 1.0962128966223132, + "grad_norm": 1.5934571510718554, + "learning_rate": 9.241679663825961e-06, + "loss": 0.5484192371368408, + "step": 3749 + }, + { + "epoch": 1.0965053370375786, + "grad_norm": 1.6268147624989107, + "learning_rate": 9.236858966598004e-06, + "loss": 0.6057884693145752, + "step": 3750 + }, + { + "epoch": 1.096797777452844, + "grad_norm": 1.565840426411154, + "learning_rate": 9.232038447749623e-06, + "loss": 0.5261536836624146, + "step": 3751 + }, + { + "epoch": 1.0970902178681095, + "grad_norm": 1.3242416099520606, + "learning_rate": 9.227218108407586e-06, + "loss": 0.470365047454834, + "step": 3752 + }, + { + "epoch": 1.0973826582833748, + "grad_norm": 1.8694075496184692, + "learning_rate": 9.222397949698618e-06, + "loss": 0.6158323287963867, + "step": 3753 + }, + { + "epoch": 1.09767509869864, + "grad_norm": 1.4353847976975904, + "learning_rate": 9.217577972749401e-06, + "loss": 0.582190990447998, + "step": 3754 + }, + { + "epoch": 1.0979675391139057, + "grad_norm": 1.5377732823861585, + "learning_rate": 9.212758178686575e-06, + "loss": 0.4939305782318115, + "step": 3755 + }, + { + "epoch": 1.098259979529171, + "grad_norm": 1.501946006392042, + "learning_rate": 9.207938568636739e-06, + "loss": 0.576829731464386, + "step": 3756 + }, + { + "epoch": 1.0985524199444363, + "grad_norm": 1.582387804664269, + "learning_rate": 9.203119143726445e-06, + "loss": 0.581257164478302, + "step": 3757 + }, + { + "epoch": 1.0988448603597016, + "grad_norm": 1.4501950316688965, + "learning_rate": 9.19829990508221e-06, + "loss": 0.6105127334594727, + "step": 3758 + }, + { + "epoch": 1.0991373007749672, + "grad_norm": 1.7379854400774775, + "learning_rate": 9.193480853830495e-06, + "loss": 0.5311432480812073, + "step": 3759 + }, + { + "epoch": 1.0994297411902325, + "grad_norm": 1.3707297007944412, + "learning_rate": 9.188661991097726e-06, + "loss": 0.44334596395492554, + "step": 3760 + }, + { + "epoch": 1.0997221816054978, + "grad_norm": 1.6175971035022318, + "learning_rate": 9.183843318010285e-06, + "loss": 0.5795773267745972, + "step": 3761 + }, + { + "epoch": 1.1000146220207632, + "grad_norm": 1.4465404341375856, + "learning_rate": 9.179024835694504e-06, + "loss": 0.619825541973114, + "step": 3762 + }, + { + "epoch": 1.1003070624360287, + "grad_norm": 1.754450237430447, + "learning_rate": 9.174206545276678e-06, + "loss": 0.633934497833252, + "step": 3763 + }, + { + "epoch": 1.100599502851294, + "grad_norm": 1.34560762533496, + "learning_rate": 9.169388447883053e-06, + "loss": 0.48922473192214966, + "step": 3764 + }, + { + "epoch": 1.1008919432665594, + "grad_norm": 1.7340747304342141, + "learning_rate": 9.164570544639825e-06, + "loss": 0.6125025153160095, + "step": 3765 + }, + { + "epoch": 1.101184383681825, + "grad_norm": 1.4327034643571392, + "learning_rate": 9.159752836673154e-06, + "loss": 0.5428078174591064, + "step": 3766 + }, + { + "epoch": 1.1014768240970902, + "grad_norm": 1.4335551572069505, + "learning_rate": 9.154935325109148e-06, + "loss": 0.5848157405853271, + "step": 3767 + }, + { + "epoch": 1.1017692645123556, + "grad_norm": 1.5053579548838565, + "learning_rate": 9.150118011073872e-06, + "loss": 0.5150102376937866, + "step": 3768 + }, + { + "epoch": 1.102061704927621, + "grad_norm": 1.429407171536289, + "learning_rate": 9.145300895693344e-06, + "loss": 0.6106699705123901, + "step": 3769 + }, + { + "epoch": 1.1023541453428864, + "grad_norm": 1.4079938603953852, + "learning_rate": 9.140483980093534e-06, + "loss": 0.5819482803344727, + "step": 3770 + }, + { + "epoch": 1.1026465857581518, + "grad_norm": 1.7060315490040079, + "learning_rate": 9.135667265400369e-06, + "loss": 0.6499812602996826, + "step": 3771 + }, + { + "epoch": 1.102939026173417, + "grad_norm": 1.520551323323022, + "learning_rate": 9.130850752739724e-06, + "loss": 0.5375189781188965, + "step": 3772 + }, + { + "epoch": 1.1032314665886827, + "grad_norm": 1.5200340564855783, + "learning_rate": 9.12603444323743e-06, + "loss": 0.5582318902015686, + "step": 3773 + }, + { + "epoch": 1.103523907003948, + "grad_norm": 1.6010357553720616, + "learning_rate": 9.121218338019273e-06, + "loss": 0.5549799203872681, + "step": 3774 + }, + { + "epoch": 1.1038163474192133, + "grad_norm": 1.672600820514396, + "learning_rate": 9.116402438210988e-06, + "loss": 0.4942197799682617, + "step": 3775 + }, + { + "epoch": 1.1041087878344786, + "grad_norm": 1.294858704528479, + "learning_rate": 9.11158674493826e-06, + "loss": 0.5039837956428528, + "step": 3776 + }, + { + "epoch": 1.1044012282497442, + "grad_norm": 1.4904483423531274, + "learning_rate": 9.106771259326726e-06, + "loss": 0.49781280755996704, + "step": 3777 + }, + { + "epoch": 1.1046936686650095, + "grad_norm": 1.5058975394537781, + "learning_rate": 9.101955982501981e-06, + "loss": 0.41755813360214233, + "step": 3778 + }, + { + "epoch": 1.1049861090802748, + "grad_norm": 1.3247888444316807, + "learning_rate": 9.097140915589564e-06, + "loss": 0.5605067014694214, + "step": 3779 + }, + { + "epoch": 1.1052785494955404, + "grad_norm": 1.6960736504408462, + "learning_rate": 9.092326059714971e-06, + "loss": 0.6291122436523438, + "step": 3780 + }, + { + "epoch": 1.1055709899108057, + "grad_norm": 1.5309788529424204, + "learning_rate": 9.087511416003636e-06, + "loss": 0.5164260864257812, + "step": 3781 + }, + { + "epoch": 1.105863430326071, + "grad_norm": 1.481065256446166, + "learning_rate": 9.082696985580964e-06, + "loss": 0.5002986192703247, + "step": 3782 + }, + { + "epoch": 1.1061558707413364, + "grad_norm": 1.8553995759252653, + "learning_rate": 9.077882769572295e-06, + "loss": 0.5149055123329163, + "step": 3783 + }, + { + "epoch": 1.106448311156602, + "grad_norm": 1.4637547819206846, + "learning_rate": 9.073068769102925e-06, + "loss": 0.5375808477401733, + "step": 3784 + }, + { + "epoch": 1.1067407515718672, + "grad_norm": 1.4438276838658128, + "learning_rate": 9.06825498529809e-06, + "loss": 0.5574408173561096, + "step": 3785 + }, + { + "epoch": 1.1070331919871326, + "grad_norm": 1.8566533611842586, + "learning_rate": 9.063441419282989e-06, + "loss": 0.7410034537315369, + "step": 3786 + }, + { + "epoch": 1.107325632402398, + "grad_norm": 1.4780218137550694, + "learning_rate": 9.058628072182759e-06, + "loss": 0.4890757203102112, + "step": 3787 + }, + { + "epoch": 1.1076180728176634, + "grad_norm": 1.449027088222319, + "learning_rate": 9.053814945122496e-06, + "loss": 0.5012304782867432, + "step": 3788 + }, + { + "epoch": 1.1079105132329288, + "grad_norm": 1.6277147220392454, + "learning_rate": 9.049002039227239e-06, + "loss": 0.5235648155212402, + "step": 3789 + }, + { + "epoch": 1.108202953648194, + "grad_norm": 1.5099212526378973, + "learning_rate": 9.044189355621969e-06, + "loss": 0.44732457399368286, + "step": 3790 + }, + { + "epoch": 1.1084953940634596, + "grad_norm": 1.6131396298332503, + "learning_rate": 9.039376895431627e-06, + "loss": 0.5771712064743042, + "step": 3791 + }, + { + "epoch": 1.108787834478725, + "grad_norm": 2.537465666899194, + "learning_rate": 9.034564659781096e-06, + "loss": 0.5361784100532532, + "step": 3792 + }, + { + "epoch": 1.1090802748939903, + "grad_norm": 1.3520934517992165, + "learning_rate": 9.029752649795203e-06, + "loss": 0.5305893421173096, + "step": 3793 + }, + { + "epoch": 1.1093727153092559, + "grad_norm": 1.3303918593615456, + "learning_rate": 9.02494086659873e-06, + "loss": 0.5094715356826782, + "step": 3794 + }, + { + "epoch": 1.1096651557245212, + "grad_norm": 1.4162243148383913, + "learning_rate": 9.020129311316405e-06, + "loss": 0.5406676530838013, + "step": 3795 + }, + { + "epoch": 1.1099575961397865, + "grad_norm": 1.5431545303983976, + "learning_rate": 9.015317985072893e-06, + "loss": 0.5170687437057495, + "step": 3796 + }, + { + "epoch": 1.1102500365550518, + "grad_norm": 1.454438976249235, + "learning_rate": 9.010506888992814e-06, + "loss": 0.4632429778575897, + "step": 3797 + }, + { + "epoch": 1.1105424769703174, + "grad_norm": 1.8257270837662332, + "learning_rate": 9.005696024200734e-06, + "loss": 0.5614180564880371, + "step": 3798 + }, + { + "epoch": 1.1108349173855827, + "grad_norm": 1.5187438448472135, + "learning_rate": 9.000885391821164e-06, + "loss": 0.5660920143127441, + "step": 3799 + }, + { + "epoch": 1.111127357800848, + "grad_norm": 1.616333702810617, + "learning_rate": 8.996074992978558e-06, + "loss": 0.6346436142921448, + "step": 3800 + }, + { + "epoch": 1.1114197982161134, + "grad_norm": 1.2613316779938173, + "learning_rate": 8.991264828797319e-06, + "loss": 0.4295850396156311, + "step": 3801 + }, + { + "epoch": 1.111712238631379, + "grad_norm": 1.4545086499056976, + "learning_rate": 8.986454900401791e-06, + "loss": 0.4797070622444153, + "step": 3802 + }, + { + "epoch": 1.1120046790466442, + "grad_norm": 1.3353593055033692, + "learning_rate": 8.98164520891627e-06, + "loss": 0.4912114143371582, + "step": 3803 + }, + { + "epoch": 1.1122971194619096, + "grad_norm": 1.6135433736276805, + "learning_rate": 8.976835755464988e-06, + "loss": 0.4156647026538849, + "step": 3804 + }, + { + "epoch": 1.1125895598771751, + "grad_norm": 1.6120031027815822, + "learning_rate": 8.97202654117213e-06, + "loss": 0.4527992010116577, + "step": 3805 + }, + { + "epoch": 1.1128820002924404, + "grad_norm": 1.6881758541294942, + "learning_rate": 8.967217567161817e-06, + "loss": 0.5969425439834595, + "step": 3806 + }, + { + "epoch": 1.1131744407077058, + "grad_norm": 1.5313114259080804, + "learning_rate": 8.962408834558116e-06, + "loss": 0.5867633819580078, + "step": 3807 + }, + { + "epoch": 1.113466881122971, + "grad_norm": 1.4924056676350326, + "learning_rate": 8.957600344485042e-06, + "loss": 0.549109697341919, + "step": 3808 + }, + { + "epoch": 1.1137593215382366, + "grad_norm": 1.6602567019426782, + "learning_rate": 8.952792098066549e-06, + "loss": 0.6336593627929688, + "step": 3809 + }, + { + "epoch": 1.114051761953502, + "grad_norm": 1.4867429859275132, + "learning_rate": 8.947984096426537e-06, + "loss": 0.5403220653533936, + "step": 3810 + }, + { + "epoch": 1.1143442023687673, + "grad_norm": 1.3422567204959701, + "learning_rate": 8.943176340688846e-06, + "loss": 0.37941914796829224, + "step": 3811 + }, + { + "epoch": 1.1146366427840328, + "grad_norm": 1.7322077540170269, + "learning_rate": 8.938368831977262e-06, + "loss": 0.5509335994720459, + "step": 3812 + }, + { + "epoch": 1.1149290831992982, + "grad_norm": 1.7077554301344111, + "learning_rate": 8.933561571415506e-06, + "loss": 0.5798860788345337, + "step": 3813 + }, + { + "epoch": 1.1152215236145635, + "grad_norm": 1.8693354922278385, + "learning_rate": 8.92875456012725e-06, + "loss": 0.5549412965774536, + "step": 3814 + }, + { + "epoch": 1.1155139640298288, + "grad_norm": 1.5992402094758784, + "learning_rate": 8.9239477992361e-06, + "loss": 0.4707058072090149, + "step": 3815 + }, + { + "epoch": 1.1158064044450944, + "grad_norm": 1.5838333385974708, + "learning_rate": 8.919141289865611e-06, + "loss": 0.4717002511024475, + "step": 3816 + }, + { + "epoch": 1.1160988448603597, + "grad_norm": 1.288572308356885, + "learning_rate": 8.914335033139274e-06, + "loss": 0.48403650522232056, + "step": 3817 + }, + { + "epoch": 1.116391285275625, + "grad_norm": 1.6715157915340426, + "learning_rate": 8.909529030180522e-06, + "loss": 0.48592090606689453, + "step": 3818 + }, + { + "epoch": 1.1166837256908906, + "grad_norm": 1.566157541574177, + "learning_rate": 8.904723282112728e-06, + "loss": 0.5052220225334167, + "step": 3819 + }, + { + "epoch": 1.116976166106156, + "grad_norm": 1.6151321192825796, + "learning_rate": 8.899917790059208e-06, + "loss": 0.7858535051345825, + "step": 3820 + }, + { + "epoch": 1.1172686065214212, + "grad_norm": 1.8369545909174703, + "learning_rate": 8.895112555143217e-06, + "loss": 0.6768159866333008, + "step": 3821 + }, + { + "epoch": 1.1175610469366866, + "grad_norm": 1.8079763728482598, + "learning_rate": 8.890307578487947e-06, + "loss": 0.5661243200302124, + "step": 3822 + }, + { + "epoch": 1.1178534873519521, + "grad_norm": 1.7067515294047517, + "learning_rate": 8.885502861216535e-06, + "loss": 0.5129438638687134, + "step": 3823 + }, + { + "epoch": 1.1181459277672174, + "grad_norm": 1.5735393429941704, + "learning_rate": 8.880698404452051e-06, + "loss": 0.4813467264175415, + "step": 3824 + }, + { + "epoch": 1.1184383681824828, + "grad_norm": 1.5840908667031388, + "learning_rate": 8.87589420931751e-06, + "loss": 0.5165577530860901, + "step": 3825 + }, + { + "epoch": 1.118730808597748, + "grad_norm": 1.4425390765128903, + "learning_rate": 8.871090276935863e-06, + "loss": 0.47335073351860046, + "step": 3826 + }, + { + "epoch": 1.1190232490130136, + "grad_norm": 1.6934955516318184, + "learning_rate": 8.86628660843e-06, + "loss": 0.4902348518371582, + "step": 3827 + }, + { + "epoch": 1.119315689428279, + "grad_norm": 1.7245920316429901, + "learning_rate": 8.861483204922752e-06, + "loss": 0.5933388471603394, + "step": 3828 + }, + { + "epoch": 1.1196081298435443, + "grad_norm": 1.5505961542425288, + "learning_rate": 8.85668006753688e-06, + "loss": 0.4898201823234558, + "step": 3829 + }, + { + "epoch": 1.1199005702588098, + "grad_norm": 1.3287782031202422, + "learning_rate": 8.851877197395088e-06, + "loss": 0.4745003879070282, + "step": 3830 + }, + { + "epoch": 1.1201930106740752, + "grad_norm": 1.5081067046883336, + "learning_rate": 8.847074595620024e-06, + "loss": 0.5246972441673279, + "step": 3831 + }, + { + "epoch": 1.1204854510893405, + "grad_norm": 1.498399687409688, + "learning_rate": 8.842272263334263e-06, + "loss": 0.5196787714958191, + "step": 3832 + }, + { + "epoch": 1.120777891504606, + "grad_norm": 2.0301798075149446, + "learning_rate": 8.83747020166032e-06, + "loss": 0.6721034049987793, + "step": 3833 + }, + { + "epoch": 1.1210703319198714, + "grad_norm": 1.4930580521199184, + "learning_rate": 8.832668411720652e-06, + "loss": 0.5654234886169434, + "step": 3834 + }, + { + "epoch": 1.1213627723351367, + "grad_norm": 1.91001506609742, + "learning_rate": 8.827866894637642e-06, + "loss": 0.7520767450332642, + "step": 3835 + }, + { + "epoch": 1.121655212750402, + "grad_norm": 1.3753523987373926, + "learning_rate": 8.82306565153362e-06, + "loss": 0.43645960092544556, + "step": 3836 + }, + { + "epoch": 1.1219476531656676, + "grad_norm": 1.688542605024225, + "learning_rate": 8.818264683530845e-06, + "loss": 0.5802274942398071, + "step": 3837 + }, + { + "epoch": 1.122240093580933, + "grad_norm": 1.5706370149670577, + "learning_rate": 8.813463991751516e-06, + "loss": 0.5593410134315491, + "step": 3838 + }, + { + "epoch": 1.1225325339961982, + "grad_norm": 1.56949134961986, + "learning_rate": 8.808663577317765e-06, + "loss": 0.6126681566238403, + "step": 3839 + }, + { + "epoch": 1.1228249744114636, + "grad_norm": 1.6396156905409707, + "learning_rate": 8.80386344135166e-06, + "loss": 0.6245180368423462, + "step": 3840 + }, + { + "epoch": 1.123117414826729, + "grad_norm": 1.3541654958690765, + "learning_rate": 8.799063584975201e-06, + "loss": 0.6611473560333252, + "step": 3841 + }, + { + "epoch": 1.1234098552419944, + "grad_norm": 1.2310988629927149, + "learning_rate": 8.79426400931033e-06, + "loss": 0.40020978450775146, + "step": 3842 + }, + { + "epoch": 1.1237022956572598, + "grad_norm": 1.3644507366239775, + "learning_rate": 8.789464715478913e-06, + "loss": 0.4965318441390991, + "step": 3843 + }, + { + "epoch": 1.1239947360725253, + "grad_norm": 1.4326851806590044, + "learning_rate": 8.784665704602758e-06, + "loss": 0.4838374853134155, + "step": 3844 + }, + { + "epoch": 1.1242871764877906, + "grad_norm": 1.389039662475551, + "learning_rate": 8.77986697780361e-06, + "loss": 0.5756508708000183, + "step": 3845 + }, + { + "epoch": 1.124579616903056, + "grad_norm": 1.484286888056792, + "learning_rate": 8.775068536203132e-06, + "loss": 0.5341511964797974, + "step": 3846 + }, + { + "epoch": 1.1248720573183213, + "grad_norm": 1.5007549282773276, + "learning_rate": 8.77027038092294e-06, + "loss": 0.6239134073257446, + "step": 3847 + }, + { + "epoch": 1.1251644977335868, + "grad_norm": 1.818555508500906, + "learning_rate": 8.765472513084566e-06, + "loss": 0.5642406940460205, + "step": 3848 + }, + { + "epoch": 1.1254569381488522, + "grad_norm": 1.5841401225303304, + "learning_rate": 8.760674933809488e-06, + "loss": 0.5242771506309509, + "step": 3849 + }, + { + "epoch": 1.1257493785641175, + "grad_norm": 1.5608207104848433, + "learning_rate": 8.755877644219108e-06, + "loss": 0.5205737352371216, + "step": 3850 + }, + { + "epoch": 1.126041818979383, + "grad_norm": 1.6760248633979633, + "learning_rate": 8.751080645434768e-06, + "loss": 0.5005168318748474, + "step": 3851 + }, + { + "epoch": 1.1263342593946484, + "grad_norm": 1.539730717074913, + "learning_rate": 8.74628393857773e-06, + "loss": 0.44978275895118713, + "step": 3852 + }, + { + "epoch": 1.1266266998099137, + "grad_norm": 1.3558571119447433, + "learning_rate": 8.741487524769198e-06, + "loss": 0.43631571531295776, + "step": 3853 + }, + { + "epoch": 1.126919140225179, + "grad_norm": 1.220093214706796, + "learning_rate": 8.736691405130306e-06, + "loss": 0.4196016788482666, + "step": 3854 + }, + { + "epoch": 1.1272115806404446, + "grad_norm": 1.6296314839875645, + "learning_rate": 8.731895580782118e-06, + "loss": 0.6389856338500977, + "step": 3855 + }, + { + "epoch": 1.12750402105571, + "grad_norm": 1.726359030533187, + "learning_rate": 8.72710005284563e-06, + "loss": 0.5465584993362427, + "step": 3856 + }, + { + "epoch": 1.1277964614709752, + "grad_norm": 1.469192647678069, + "learning_rate": 8.722304822441757e-06, + "loss": 0.5513765811920166, + "step": 3857 + }, + { + "epoch": 1.1280889018862408, + "grad_norm": 1.5516390698184288, + "learning_rate": 8.717509890691369e-06, + "loss": 0.6984349489212036, + "step": 3858 + }, + { + "epoch": 1.128381342301506, + "grad_norm": 1.6096511723205336, + "learning_rate": 8.712715258715248e-06, + "loss": 0.5311027765274048, + "step": 3859 + }, + { + "epoch": 1.1286737827167714, + "grad_norm": 1.5113126886002746, + "learning_rate": 8.707920927634105e-06, + "loss": 0.4598672091960907, + "step": 3860 + }, + { + "epoch": 1.1289662231320368, + "grad_norm": 1.8202302284240548, + "learning_rate": 8.703126898568591e-06, + "loss": 0.6177612543106079, + "step": 3861 + }, + { + "epoch": 1.1292586635473023, + "grad_norm": 1.8043964275332298, + "learning_rate": 8.69833317263928e-06, + "loss": 0.6442389488220215, + "step": 3862 + }, + { + "epoch": 1.1295511039625676, + "grad_norm": 1.4793698971631246, + "learning_rate": 8.693539750966672e-06, + "loss": 0.5925737023353577, + "step": 3863 + }, + { + "epoch": 1.129843544377833, + "grad_norm": 1.3730688779887357, + "learning_rate": 8.688746634671207e-06, + "loss": 0.46009114384651184, + "step": 3864 + }, + { + "epoch": 1.1301359847930983, + "grad_norm": 1.6065358861472605, + "learning_rate": 8.683953824873246e-06, + "loss": 0.5438460111618042, + "step": 3865 + }, + { + "epoch": 1.1304284252083638, + "grad_norm": 1.6436751318662282, + "learning_rate": 8.679161322693073e-06, + "loss": 0.5355101823806763, + "step": 3866 + }, + { + "epoch": 1.1307208656236292, + "grad_norm": 1.5636124606467166, + "learning_rate": 8.67436912925091e-06, + "loss": 0.4494459629058838, + "step": 3867 + }, + { + "epoch": 1.1310133060388945, + "grad_norm": 1.5118698872161136, + "learning_rate": 8.669577245666905e-06, + "loss": 0.5828550457954407, + "step": 3868 + }, + { + "epoch": 1.13130574645416, + "grad_norm": 1.43455699505813, + "learning_rate": 8.664785673061127e-06, + "loss": 0.4956590235233307, + "step": 3869 + }, + { + "epoch": 1.1315981868694254, + "grad_norm": 1.5146504272638424, + "learning_rate": 8.659994412553582e-06, + "loss": 0.5447779893875122, + "step": 3870 + }, + { + "epoch": 1.1318906272846907, + "grad_norm": 1.6512585184867246, + "learning_rate": 8.655203465264196e-06, + "loss": 0.6275361776351929, + "step": 3871 + }, + { + "epoch": 1.1321830676999562, + "grad_norm": 1.564521343459816, + "learning_rate": 8.650412832312823e-06, + "loss": 0.47899991273880005, + "step": 3872 + }, + { + "epoch": 1.1324755081152216, + "grad_norm": 1.1875547206815094, + "learning_rate": 8.645622514819243e-06, + "loss": 0.3356127142906189, + "step": 3873 + }, + { + "epoch": 1.132767948530487, + "grad_norm": 1.4442401622701144, + "learning_rate": 8.640832513903168e-06, + "loss": 0.48855727910995483, + "step": 3874 + }, + { + "epoch": 1.1330603889457522, + "grad_norm": 1.4528018972795056, + "learning_rate": 8.636042830684227e-06, + "loss": 0.46642380952835083, + "step": 3875 + }, + { + "epoch": 1.1333528293610178, + "grad_norm": 1.8421536572224761, + "learning_rate": 8.631253466281984e-06, + "loss": 0.6179598569869995, + "step": 3876 + }, + { + "epoch": 1.133645269776283, + "grad_norm": 1.6762180368596016, + "learning_rate": 8.626464421815919e-06, + "loss": 0.6361704468727112, + "step": 3877 + }, + { + "epoch": 1.1339377101915484, + "grad_norm": 1.574443230288469, + "learning_rate": 8.621675698405446e-06, + "loss": 0.6243701577186584, + "step": 3878 + }, + { + "epoch": 1.1342301506068138, + "grad_norm": 1.6113304231540622, + "learning_rate": 8.616887297169895e-06, + "loss": 0.5402215123176575, + "step": 3879 + }, + { + "epoch": 1.1345225910220793, + "grad_norm": 1.6390191276422172, + "learning_rate": 8.61209921922853e-06, + "loss": 0.6050009727478027, + "step": 3880 + }, + { + "epoch": 1.1348150314373446, + "grad_norm": 1.6106875040973343, + "learning_rate": 8.607311465700534e-06, + "loss": 0.5705801248550415, + "step": 3881 + }, + { + "epoch": 1.13510747185261, + "grad_norm": 1.5380461037587805, + "learning_rate": 8.602524037705018e-06, + "loss": 0.5467248558998108, + "step": 3882 + }, + { + "epoch": 1.1353999122678755, + "grad_norm": 1.7121108266736746, + "learning_rate": 8.597736936361007e-06, + "loss": 0.5903012752532959, + "step": 3883 + }, + { + "epoch": 1.1356923526831408, + "grad_norm": 1.6218348221942134, + "learning_rate": 8.592950162787463e-06, + "loss": 0.6034090518951416, + "step": 3884 + }, + { + "epoch": 1.1359847930984062, + "grad_norm": 1.3056254339924755, + "learning_rate": 8.588163718103264e-06, + "loss": 0.4282987117767334, + "step": 3885 + }, + { + "epoch": 1.1362772335136717, + "grad_norm": 1.5127630417626896, + "learning_rate": 8.583377603427212e-06, + "loss": 0.47374194860458374, + "step": 3886 + }, + { + "epoch": 1.136569673928937, + "grad_norm": 1.5841934947134406, + "learning_rate": 8.578591819878033e-06, + "loss": 0.43954724073410034, + "step": 3887 + }, + { + "epoch": 1.1368621143442024, + "grad_norm": 1.4278799477191386, + "learning_rate": 8.573806368574372e-06, + "loss": 0.4731065034866333, + "step": 3888 + }, + { + "epoch": 1.1371545547594677, + "grad_norm": 1.4184800646863156, + "learning_rate": 8.5690212506348e-06, + "loss": 0.5241256356239319, + "step": 3889 + }, + { + "epoch": 1.1374469951747332, + "grad_norm": 1.631021419370316, + "learning_rate": 8.56423646717781e-06, + "loss": 0.5823307037353516, + "step": 3890 + }, + { + "epoch": 1.1377394355899986, + "grad_norm": 1.654201038343883, + "learning_rate": 8.55945201932182e-06, + "loss": 0.5360631346702576, + "step": 3891 + }, + { + "epoch": 1.138031876005264, + "grad_norm": 1.5773624073994579, + "learning_rate": 8.554667908185158e-06, + "loss": 0.5227797627449036, + "step": 3892 + }, + { + "epoch": 1.1383243164205292, + "grad_norm": 1.7414634806893152, + "learning_rate": 8.549884134886089e-06, + "loss": 0.6232806444168091, + "step": 3893 + }, + { + "epoch": 1.1386167568357948, + "grad_norm": 1.567438316916472, + "learning_rate": 8.545100700542782e-06, + "loss": 0.6697877049446106, + "step": 3894 + }, + { + "epoch": 1.13890919725106, + "grad_norm": 1.5115348655280192, + "learning_rate": 8.540317606273343e-06, + "loss": 0.6348206400871277, + "step": 3895 + }, + { + "epoch": 1.1392016376663254, + "grad_norm": 1.5453537409734852, + "learning_rate": 8.535534853195786e-06, + "loss": 0.5578476190567017, + "step": 3896 + }, + { + "epoch": 1.139494078081591, + "grad_norm": 1.6674507953444782, + "learning_rate": 8.530752442428055e-06, + "loss": 0.6439946889877319, + "step": 3897 + }, + { + "epoch": 1.1397865184968563, + "grad_norm": 1.5233786551580588, + "learning_rate": 8.525970375088006e-06, + "loss": 0.5292261242866516, + "step": 3898 + }, + { + "epoch": 1.1400789589121216, + "grad_norm": 1.6870433422022266, + "learning_rate": 8.521188652293421e-06, + "loss": 0.5836480855941772, + "step": 3899 + }, + { + "epoch": 1.140371399327387, + "grad_norm": 1.731988866581243, + "learning_rate": 8.516407275161998e-06, + "loss": 0.5166354775428772, + "step": 3900 + }, + { + "epoch": 1.1406638397426525, + "grad_norm": 1.61853635946673, + "learning_rate": 8.511626244811352e-06, + "loss": 0.5236127972602844, + "step": 3901 + }, + { + "epoch": 1.1409562801579178, + "grad_norm": 1.3903461786321225, + "learning_rate": 8.506845562359022e-06, + "loss": 0.4900703430175781, + "step": 3902 + }, + { + "epoch": 1.1412487205731832, + "grad_norm": 1.484704749479714, + "learning_rate": 8.502065228922464e-06, + "loss": 0.5200212001800537, + "step": 3903 + }, + { + "epoch": 1.1415411609884485, + "grad_norm": 1.588712114908106, + "learning_rate": 8.497285245619053e-06, + "loss": 0.5553300976753235, + "step": 3904 + }, + { + "epoch": 1.141833601403714, + "grad_norm": 1.5947362241383982, + "learning_rate": 8.492505613566075e-06, + "loss": 0.5650131702423096, + "step": 3905 + }, + { + "epoch": 1.1421260418189794, + "grad_norm": 1.7370414648582224, + "learning_rate": 8.487726333880746e-06, + "loss": 0.4732077121734619, + "step": 3906 + }, + { + "epoch": 1.1424184822342447, + "grad_norm": 1.4560698890341355, + "learning_rate": 8.482947407680193e-06, + "loss": 0.46741920709609985, + "step": 3907 + }, + { + "epoch": 1.1427109226495102, + "grad_norm": 1.717154367813477, + "learning_rate": 8.478168836081457e-06, + "loss": 0.606191873550415, + "step": 3908 + }, + { + "epoch": 1.1430033630647756, + "grad_norm": 1.4018605845855592, + "learning_rate": 8.473390620201505e-06, + "loss": 0.4373897314071655, + "step": 3909 + }, + { + "epoch": 1.143295803480041, + "grad_norm": 1.4705540951964773, + "learning_rate": 8.468612761157215e-06, + "loss": 0.5460623502731323, + "step": 3910 + }, + { + "epoch": 1.1435882438953064, + "grad_norm": 1.311062743935516, + "learning_rate": 8.463835260065379e-06, + "loss": 0.4939531087875366, + "step": 3911 + }, + { + "epoch": 1.1438806843105718, + "grad_norm": 1.4297382144675803, + "learning_rate": 8.459058118042708e-06, + "loss": 0.544964611530304, + "step": 3912 + }, + { + "epoch": 1.144173124725837, + "grad_norm": 1.600083847682917, + "learning_rate": 8.454281336205836e-06, + "loss": 0.6118921041488647, + "step": 3913 + }, + { + "epoch": 1.1444655651411024, + "grad_norm": 1.4018893231050742, + "learning_rate": 8.449504915671304e-06, + "loss": 0.561060905456543, + "step": 3914 + }, + { + "epoch": 1.144758005556368, + "grad_norm": 1.3967184209578762, + "learning_rate": 8.444728857555572e-06, + "loss": 0.430827796459198, + "step": 3915 + }, + { + "epoch": 1.1450504459716333, + "grad_norm": 1.7776445971156332, + "learning_rate": 8.439953162975011e-06, + "loss": 0.5482884645462036, + "step": 3916 + }, + { + "epoch": 1.1453428863868986, + "grad_norm": 1.8487697311002218, + "learning_rate": 8.435177833045911e-06, + "loss": 0.6614879965782166, + "step": 3917 + }, + { + "epoch": 1.145635326802164, + "grad_norm": 1.686073678884194, + "learning_rate": 8.430402868884482e-06, + "loss": 0.6290509104728699, + "step": 3918 + }, + { + "epoch": 1.1459277672174295, + "grad_norm": 1.479686951025449, + "learning_rate": 8.425628271606836e-06, + "loss": 0.404970645904541, + "step": 3919 + }, + { + "epoch": 1.1462202076326948, + "grad_norm": 1.3978968237521616, + "learning_rate": 8.420854042329011e-06, + "loss": 0.4902762174606323, + "step": 3920 + }, + { + "epoch": 1.1465126480479602, + "grad_norm": 1.6869046118960203, + "learning_rate": 8.416080182166955e-06, + "loss": 0.5757346153259277, + "step": 3921 + }, + { + "epoch": 1.1468050884632257, + "grad_norm": 1.5541954452670608, + "learning_rate": 8.41130669223652e-06, + "loss": 0.5453485250473022, + "step": 3922 + }, + { + "epoch": 1.147097528878491, + "grad_norm": 1.7189844130617113, + "learning_rate": 8.40653357365349e-06, + "loss": 0.5660290122032166, + "step": 3923 + }, + { + "epoch": 1.1473899692937564, + "grad_norm": 1.576466831282747, + "learning_rate": 8.40176082753355e-06, + "loss": 0.46013498306274414, + "step": 3924 + }, + { + "epoch": 1.147682409709022, + "grad_norm": 1.4364824711460213, + "learning_rate": 8.396988454992296e-06, + "loss": 0.5183000564575195, + "step": 3925 + }, + { + "epoch": 1.1479748501242872, + "grad_norm": 1.7258149850246205, + "learning_rate": 8.392216457145246e-06, + "loss": 0.5407284498214722, + "step": 3926 + }, + { + "epoch": 1.1482672905395526, + "grad_norm": 1.735168999167248, + "learning_rate": 8.387444835107824e-06, + "loss": 0.5960655808448792, + "step": 3927 + }, + { + "epoch": 1.148559730954818, + "grad_norm": 1.2891916158500891, + "learning_rate": 8.382673589995365e-06, + "loss": 0.4363316297531128, + "step": 3928 + }, + { + "epoch": 1.1488521713700834, + "grad_norm": 1.4413045514377891, + "learning_rate": 8.377902722923122e-06, + "loss": 0.5143908262252808, + "step": 3929 + }, + { + "epoch": 1.1491446117853488, + "grad_norm": 1.696244956095385, + "learning_rate": 8.373132235006254e-06, + "loss": 0.6016460657119751, + "step": 3930 + }, + { + "epoch": 1.149437052200614, + "grad_norm": 1.4319561508465357, + "learning_rate": 8.368362127359835e-06, + "loss": 0.5120511651039124, + "step": 3931 + }, + { + "epoch": 1.1497294926158794, + "grad_norm": 1.5254857110351325, + "learning_rate": 8.363592401098853e-06, + "loss": 0.49658435583114624, + "step": 3932 + }, + { + "epoch": 1.150021933031145, + "grad_norm": 1.7705521617533395, + "learning_rate": 8.358823057338188e-06, + "loss": 0.584032416343689, + "step": 3933 + }, + { + "epoch": 1.1503143734464103, + "grad_norm": 1.5012587623360505, + "learning_rate": 8.35405409719266e-06, + "loss": 0.4673706293106079, + "step": 3934 + }, + { + "epoch": 1.1506068138616756, + "grad_norm": 1.618555555366979, + "learning_rate": 8.349285521776982e-06, + "loss": 0.633565366268158, + "step": 3935 + }, + { + "epoch": 1.1508992542769412, + "grad_norm": 1.6576478038135816, + "learning_rate": 8.344517332205774e-06, + "loss": 0.6029015779495239, + "step": 3936 + }, + { + "epoch": 1.1511916946922065, + "grad_norm": 1.519081286345544, + "learning_rate": 8.339749529593574e-06, + "loss": 0.45594489574432373, + "step": 3937 + }, + { + "epoch": 1.1514841351074718, + "grad_norm": 1.5262842564669963, + "learning_rate": 8.334982115054828e-06, + "loss": 0.4413541257381439, + "step": 3938 + }, + { + "epoch": 1.1517765755227372, + "grad_norm": 1.6373893488771099, + "learning_rate": 8.330215089703887e-06, + "loss": 0.5674389004707336, + "step": 3939 + }, + { + "epoch": 1.1520690159380027, + "grad_norm": 1.386401958621656, + "learning_rate": 8.325448454655019e-06, + "loss": 0.43449294567108154, + "step": 3940 + }, + { + "epoch": 1.152361456353268, + "grad_norm": 1.6283403091444353, + "learning_rate": 8.320682211022393e-06, + "loss": 0.5190714597702026, + "step": 3941 + }, + { + "epoch": 1.1526538967685334, + "grad_norm": 1.5774508757028434, + "learning_rate": 8.31591635992009e-06, + "loss": 0.56162428855896, + "step": 3942 + }, + { + "epoch": 1.1529463371837987, + "grad_norm": 1.4891934876919055, + "learning_rate": 8.311150902462096e-06, + "loss": 0.5588958263397217, + "step": 3943 + }, + { + "epoch": 1.1532387775990642, + "grad_norm": 1.5108312938903155, + "learning_rate": 8.306385839762312e-06, + "loss": 0.5438264608383179, + "step": 3944 + }, + { + "epoch": 1.1535312180143296, + "grad_norm": 1.575513080138648, + "learning_rate": 8.30162117293454e-06, + "loss": 0.5860258340835571, + "step": 3945 + }, + { + "epoch": 1.153823658429595, + "grad_norm": 1.552005958726473, + "learning_rate": 8.296856903092494e-06, + "loss": 0.4742947220802307, + "step": 3946 + }, + { + "epoch": 1.1541160988448604, + "grad_norm": 1.44195573685015, + "learning_rate": 8.292093031349791e-06, + "loss": 0.47963109612464905, + "step": 3947 + }, + { + "epoch": 1.1544085392601258, + "grad_norm": 1.5340226225614597, + "learning_rate": 8.287329558819957e-06, + "loss": 0.5404704213142395, + "step": 3948 + }, + { + "epoch": 1.154700979675391, + "grad_norm": 1.8054477659796657, + "learning_rate": 8.282566486616425e-06, + "loss": 0.6559766530990601, + "step": 3949 + }, + { + "epoch": 1.1549934200906566, + "grad_norm": 1.507763379787764, + "learning_rate": 8.277803815852535e-06, + "loss": 0.4462929368019104, + "step": 3950 + }, + { + "epoch": 1.155285860505922, + "grad_norm": 1.6398920335039024, + "learning_rate": 8.273041547641531e-06, + "loss": 0.5672504901885986, + "step": 3951 + }, + { + "epoch": 1.1555783009211873, + "grad_norm": 1.5384582587859306, + "learning_rate": 8.268279683096567e-06, + "loss": 0.4040188193321228, + "step": 3952 + }, + { + "epoch": 1.1558707413364526, + "grad_norm": 1.4954603260099153, + "learning_rate": 8.263518223330698e-06, + "loss": 0.4639814794063568, + "step": 3953 + }, + { + "epoch": 1.1561631817517182, + "grad_norm": 1.3560290444841174, + "learning_rate": 8.258757169456885e-06, + "loss": 0.384866327047348, + "step": 3954 + }, + { + "epoch": 1.1564556221669835, + "grad_norm": 1.5360587849114566, + "learning_rate": 8.253996522587997e-06, + "loss": 0.452106773853302, + "step": 3955 + }, + { + "epoch": 1.1567480625822488, + "grad_norm": 1.5044138285106523, + "learning_rate": 8.249236283836806e-06, + "loss": 0.487504780292511, + "step": 3956 + }, + { + "epoch": 1.1570405029975142, + "grad_norm": 1.6199121483000312, + "learning_rate": 8.244476454315989e-06, + "loss": 0.6225916147232056, + "step": 3957 + }, + { + "epoch": 1.1573329434127797, + "grad_norm": 1.7421167385988239, + "learning_rate": 8.239717035138128e-06, + "loss": 0.5254271030426025, + "step": 3958 + }, + { + "epoch": 1.157625383828045, + "grad_norm": 1.6240162719096014, + "learning_rate": 8.234958027415707e-06, + "loss": 0.5759135484695435, + "step": 3959 + }, + { + "epoch": 1.1579178242433104, + "grad_norm": 1.6959935899735565, + "learning_rate": 8.230199432261115e-06, + "loss": 0.5720966458320618, + "step": 3960 + }, + { + "epoch": 1.158210264658576, + "grad_norm": 1.5797174163929866, + "learning_rate": 8.225441250786643e-06, + "loss": 0.4807323217391968, + "step": 3961 + }, + { + "epoch": 1.1585027050738412, + "grad_norm": 1.6197693861653146, + "learning_rate": 8.22068348410449e-06, + "loss": 0.5049746036529541, + "step": 3962 + }, + { + "epoch": 1.1587951454891066, + "grad_norm": 1.673364031578337, + "learning_rate": 8.215926133326758e-06, + "loss": 0.5321973562240601, + "step": 3963 + }, + { + "epoch": 1.159087585904372, + "grad_norm": 1.3992709586079797, + "learning_rate": 8.211169199565444e-06, + "loss": 0.5176634788513184, + "step": 3964 + }, + { + "epoch": 1.1593800263196374, + "grad_norm": 1.5661593234971032, + "learning_rate": 8.20641268393245e-06, + "loss": 0.5345112681388855, + "step": 3965 + }, + { + "epoch": 1.1596724667349028, + "grad_norm": 1.8309312482061675, + "learning_rate": 8.201656587539589e-06, + "loss": 0.47578325867652893, + "step": 3966 + }, + { + "epoch": 1.159964907150168, + "grad_norm": 1.5996140092470157, + "learning_rate": 8.196900911498563e-06, + "loss": 0.5018264651298523, + "step": 3967 + }, + { + "epoch": 1.1602573475654336, + "grad_norm": 1.530612277867195, + "learning_rate": 8.192145656920989e-06, + "loss": 0.4643394351005554, + "step": 3968 + }, + { + "epoch": 1.160549787980699, + "grad_norm": 1.6066179328722245, + "learning_rate": 8.187390824918375e-06, + "loss": 0.5391045808792114, + "step": 3969 + }, + { + "epoch": 1.1608422283959643, + "grad_norm": 1.4691594768883462, + "learning_rate": 8.182636416602136e-06, + "loss": 0.5168124437332153, + "step": 3970 + }, + { + "epoch": 1.1611346688112296, + "grad_norm": 1.4702658109064293, + "learning_rate": 8.177882433083583e-06, + "loss": 0.5821055173873901, + "step": 3971 + }, + { + "epoch": 1.1614271092264952, + "grad_norm": 1.597748811964364, + "learning_rate": 8.173128875473933e-06, + "loss": 0.6031824946403503, + "step": 3972 + }, + { + "epoch": 1.1617195496417605, + "grad_norm": 1.598311083454874, + "learning_rate": 8.1683757448843e-06, + "loss": 0.5085259675979614, + "step": 3973 + }, + { + "epoch": 1.1620119900570258, + "grad_norm": 1.6218562380492636, + "learning_rate": 8.163623042425702e-06, + "loss": 0.5654903650283813, + "step": 3974 + }, + { + "epoch": 1.1623044304722914, + "grad_norm": 1.6279393236171642, + "learning_rate": 8.158870769209051e-06, + "loss": 0.3920902609825134, + "step": 3975 + }, + { + "epoch": 1.1625968708875567, + "grad_norm": 1.6100798425685794, + "learning_rate": 8.154118926345165e-06, + "loss": 0.5334979891777039, + "step": 3976 + }, + { + "epoch": 1.162889311302822, + "grad_norm": 1.7332980039574648, + "learning_rate": 8.149367514944754e-06, + "loss": 0.6212184429168701, + "step": 3977 + }, + { + "epoch": 1.1631817517180874, + "grad_norm": 1.847204612085083, + "learning_rate": 8.144616536118437e-06, + "loss": 0.71863853931427, + "step": 3978 + }, + { + "epoch": 1.163474192133353, + "grad_norm": 1.7297963031597574, + "learning_rate": 8.139865990976722e-06, + "loss": 0.5263794660568237, + "step": 3979 + }, + { + "epoch": 1.1637666325486182, + "grad_norm": 1.5706968019905152, + "learning_rate": 8.135115880630025e-06, + "loss": 0.5035576224327087, + "step": 3980 + }, + { + "epoch": 1.1640590729638836, + "grad_norm": 1.4183002447341373, + "learning_rate": 8.130366206188651e-06, + "loss": 0.5695084929466248, + "step": 3981 + }, + { + "epoch": 1.1643515133791489, + "grad_norm": 1.51980370598088, + "learning_rate": 8.125616968762806e-06, + "loss": 0.5826396942138672, + "step": 3982 + }, + { + "epoch": 1.1646439537944144, + "grad_norm": 1.5991682342910063, + "learning_rate": 8.1208681694626e-06, + "loss": 0.5132841467857361, + "step": 3983 + }, + { + "epoch": 1.1649363942096798, + "grad_norm": 1.7073185800473716, + "learning_rate": 8.116119809398034e-06, + "loss": 0.6572669744491577, + "step": 3984 + }, + { + "epoch": 1.165228834624945, + "grad_norm": 1.8729301131644296, + "learning_rate": 8.111371889679007e-06, + "loss": 0.5365801453590393, + "step": 3985 + }, + { + "epoch": 1.1655212750402106, + "grad_norm": 1.4561472169130645, + "learning_rate": 8.10662441141532e-06, + "loss": 0.44511687755584717, + "step": 3986 + }, + { + "epoch": 1.165813715455476, + "grad_norm": 1.596383666869324, + "learning_rate": 8.101877375716666e-06, + "loss": 0.47212404012680054, + "step": 3987 + }, + { + "epoch": 1.1661061558707413, + "grad_norm": 1.5859450593798408, + "learning_rate": 8.097130783692631e-06, + "loss": 0.5942205786705017, + "step": 3988 + }, + { + "epoch": 1.1663985962860068, + "grad_norm": 1.6678058947227146, + "learning_rate": 8.092384636452708e-06, + "loss": 0.49162304401397705, + "step": 3989 + }, + { + "epoch": 1.1666910367012722, + "grad_norm": 2.635849062548634, + "learning_rate": 8.087638935106277e-06, + "loss": 0.6544803380966187, + "step": 3990 + }, + { + "epoch": 1.1669834771165375, + "grad_norm": 1.677008396527972, + "learning_rate": 8.082893680762619e-06, + "loss": 0.5572186708450317, + "step": 3991 + }, + { + "epoch": 1.1672759175318028, + "grad_norm": 1.6443546400872178, + "learning_rate": 8.078148874530906e-06, + "loss": 0.5836775898933411, + "step": 3992 + }, + { + "epoch": 1.1675683579470684, + "grad_norm": 1.3079140035223278, + "learning_rate": 8.073404517520208e-06, + "loss": 0.5507068634033203, + "step": 3993 + }, + { + "epoch": 1.1678607983623337, + "grad_norm": 1.9861505555993526, + "learning_rate": 8.068660610839489e-06, + "loss": 0.5312684178352356, + "step": 3994 + }, + { + "epoch": 1.168153238777599, + "grad_norm": 1.6552821635427635, + "learning_rate": 8.06391715559761e-06, + "loss": 0.5555688142776489, + "step": 3995 + }, + { + "epoch": 1.1684456791928644, + "grad_norm": 1.6871575092969338, + "learning_rate": 8.059174152903324e-06, + "loss": 0.5724596977233887, + "step": 3996 + }, + { + "epoch": 1.16873811960813, + "grad_norm": 1.6937240718062052, + "learning_rate": 8.054431603865282e-06, + "loss": 0.6212218999862671, + "step": 3997 + }, + { + "epoch": 1.1690305600233952, + "grad_norm": 1.4230492945656301, + "learning_rate": 8.049689509592023e-06, + "loss": 0.5061509609222412, + "step": 3998 + }, + { + "epoch": 1.1693230004386606, + "grad_norm": 1.695677070671476, + "learning_rate": 8.044947871191982e-06, + "loss": 0.6143001914024353, + "step": 3999 + }, + { + "epoch": 1.169615440853926, + "grad_norm": 1.32203821023621, + "learning_rate": 8.040206689773487e-06, + "loss": 0.5079911351203918, + "step": 4000 + }, + { + "epoch": 1.1699078812691914, + "grad_norm": 1.6638666755727167, + "learning_rate": 8.035465966444764e-06, + "loss": 0.6104908585548401, + "step": 4001 + }, + { + "epoch": 1.1702003216844568, + "grad_norm": 1.680147004679776, + "learning_rate": 8.03072570231393e-06, + "loss": 0.5953013896942139, + "step": 4002 + }, + { + "epoch": 1.1704927620997223, + "grad_norm": 1.3075175590117196, + "learning_rate": 8.025985898488986e-06, + "loss": 0.4541323781013489, + "step": 4003 + }, + { + "epoch": 1.1707852025149876, + "grad_norm": 1.5142685350846732, + "learning_rate": 8.021246556077838e-06, + "loss": 0.5708850026130676, + "step": 4004 + }, + { + "epoch": 1.171077642930253, + "grad_norm": 1.6574340180310174, + "learning_rate": 8.016507676188275e-06, + "loss": 0.5430601835250854, + "step": 4005 + }, + { + "epoch": 1.1713700833455183, + "grad_norm": 1.504278683910439, + "learning_rate": 8.011769259927981e-06, + "loss": 0.5621174573898315, + "step": 4006 + }, + { + "epoch": 1.1716625237607838, + "grad_norm": 1.5473800123062453, + "learning_rate": 8.007031308404536e-06, + "loss": 0.48092782497406006, + "step": 4007 + }, + { + "epoch": 1.1719549641760492, + "grad_norm": 1.9401766125340165, + "learning_rate": 8.002293822725404e-06, + "loss": 0.5770663022994995, + "step": 4008 + }, + { + "epoch": 1.1722474045913145, + "grad_norm": 1.7123399188942874, + "learning_rate": 7.997556803997945e-06, + "loss": 0.5692728757858276, + "step": 4009 + }, + { + "epoch": 1.1725398450065798, + "grad_norm": 1.6496884851556144, + "learning_rate": 7.99282025332941e-06, + "loss": 0.6256895065307617, + "step": 4010 + }, + { + "epoch": 1.1728322854218454, + "grad_norm": 1.4529405935008253, + "learning_rate": 7.988084171826937e-06, + "loss": 0.4272884130477905, + "step": 4011 + }, + { + "epoch": 1.1731247258371107, + "grad_norm": 1.548325850009333, + "learning_rate": 7.983348560597557e-06, + "loss": 0.5113184452056885, + "step": 4012 + }, + { + "epoch": 1.173417166252376, + "grad_norm": 1.4489276426544837, + "learning_rate": 7.978613420748186e-06, + "loss": 0.45635539293289185, + "step": 4013 + }, + { + "epoch": 1.1737096066676416, + "grad_norm": 1.6347983233600756, + "learning_rate": 7.973878753385638e-06, + "loss": 0.5539636611938477, + "step": 4014 + }, + { + "epoch": 1.174002047082907, + "grad_norm": 1.4140693021111321, + "learning_rate": 7.969144559616615e-06, + "loss": 0.6083431243896484, + "step": 4015 + }, + { + "epoch": 1.1742944874981722, + "grad_norm": 1.372102806580561, + "learning_rate": 7.9644108405477e-06, + "loss": 0.5268326997756958, + "step": 4016 + }, + { + "epoch": 1.1745869279134376, + "grad_norm": 1.5989300144328094, + "learning_rate": 7.95967759728538e-06, + "loss": 0.5680301189422607, + "step": 4017 + }, + { + "epoch": 1.174879368328703, + "grad_norm": 1.62105042514946, + "learning_rate": 7.954944830936012e-06, + "loss": 0.5457121133804321, + "step": 4018 + }, + { + "epoch": 1.1751718087439684, + "grad_norm": 1.59579517284719, + "learning_rate": 7.950212542605857e-06, + "loss": 0.5358338356018066, + "step": 4019 + }, + { + "epoch": 1.1754642491592338, + "grad_norm": 1.5630110417390142, + "learning_rate": 7.945480733401056e-06, + "loss": 0.6094579696655273, + "step": 4020 + }, + { + "epoch": 1.175756689574499, + "grad_norm": 1.6732097827507912, + "learning_rate": 7.940749404427642e-06, + "loss": 0.5108463764190674, + "step": 4021 + }, + { + "epoch": 1.1760491299897646, + "grad_norm": 1.6495017651653137, + "learning_rate": 7.936018556791537e-06, + "loss": 0.4946494698524475, + "step": 4022 + }, + { + "epoch": 1.17634157040503, + "grad_norm": 1.7757854212337651, + "learning_rate": 7.931288191598543e-06, + "loss": 0.5056017637252808, + "step": 4023 + }, + { + "epoch": 1.1766340108202953, + "grad_norm": 1.5276820294687934, + "learning_rate": 7.926558309954354e-06, + "loss": 0.5242294073104858, + "step": 4024 + }, + { + "epoch": 1.1769264512355608, + "grad_norm": 1.682705103807737, + "learning_rate": 7.921828912964556e-06, + "loss": 0.5667276382446289, + "step": 4025 + }, + { + "epoch": 1.1772188916508262, + "grad_norm": 1.6370912674167624, + "learning_rate": 7.917100001734614e-06, + "loss": 0.5282422304153442, + "step": 4026 + }, + { + "epoch": 1.1775113320660915, + "grad_norm": 1.6026370834828365, + "learning_rate": 7.912371577369881e-06, + "loss": 0.4887520670890808, + "step": 4027 + }, + { + "epoch": 1.177803772481357, + "grad_norm": 1.6287890532743194, + "learning_rate": 7.907643640975603e-06, + "loss": 0.5082155466079712, + "step": 4028 + }, + { + "epoch": 1.1780962128966224, + "grad_norm": 1.649507152949628, + "learning_rate": 7.902916193656898e-06, + "loss": 0.5432984828948975, + "step": 4029 + }, + { + "epoch": 1.1783886533118877, + "grad_norm": 1.7249606112651144, + "learning_rate": 7.898189236518783e-06, + "loss": 0.4313681721687317, + "step": 4030 + }, + { + "epoch": 1.178681093727153, + "grad_norm": 1.494399406404666, + "learning_rate": 7.893462770666155e-06, + "loss": 0.6051831245422363, + "step": 4031 + }, + { + "epoch": 1.1789735341424186, + "grad_norm": 1.5057449817059945, + "learning_rate": 7.888736797203796e-06, + "loss": 0.45805442333221436, + "step": 4032 + }, + { + "epoch": 1.179265974557684, + "grad_norm": 1.7917233044229635, + "learning_rate": 7.884011317236376e-06, + "loss": 0.4998340606689453, + "step": 4033 + }, + { + "epoch": 1.1795584149729492, + "grad_norm": 1.651259706746187, + "learning_rate": 7.879286331868443e-06, + "loss": 0.5298212170600891, + "step": 4034 + }, + { + "epoch": 1.1798508553882145, + "grad_norm": 1.7028183419777814, + "learning_rate": 7.874561842204437e-06, + "loss": 0.5104682445526123, + "step": 4035 + }, + { + "epoch": 1.18014329580348, + "grad_norm": 1.6393724776910414, + "learning_rate": 7.869837849348676e-06, + "loss": 0.5793051719665527, + "step": 4036 + }, + { + "epoch": 1.1804357362187454, + "grad_norm": 1.4839435154715734, + "learning_rate": 7.865114354405367e-06, + "loss": 0.42913323640823364, + "step": 4037 + }, + { + "epoch": 1.1807281766340108, + "grad_norm": 2.104724599006863, + "learning_rate": 7.860391358478596e-06, + "loss": 0.5183675289154053, + "step": 4038 + }, + { + "epoch": 1.1810206170492763, + "grad_norm": 1.5685744104736703, + "learning_rate": 7.855668862672339e-06, + "loss": 0.444034218788147, + "step": 4039 + }, + { + "epoch": 1.1813130574645416, + "grad_norm": 1.486556561749613, + "learning_rate": 7.850946868090446e-06, + "loss": 0.4357207417488098, + "step": 4040 + }, + { + "epoch": 1.181605497879807, + "grad_norm": 1.6923285770365775, + "learning_rate": 7.846225375836657e-06, + "loss": 0.4517707824707031, + "step": 4041 + }, + { + "epoch": 1.1818979382950725, + "grad_norm": 1.5865011864132745, + "learning_rate": 7.841504387014589e-06, + "loss": 0.4437381625175476, + "step": 4042 + }, + { + "epoch": 1.1821903787103378, + "grad_norm": 1.4744521314451464, + "learning_rate": 7.836783902727746e-06, + "loss": 0.5364828109741211, + "step": 4043 + }, + { + "epoch": 1.1824828191256032, + "grad_norm": 1.650227369991675, + "learning_rate": 7.832063924079516e-06, + "loss": 0.4814251661300659, + "step": 4044 + }, + { + "epoch": 1.1827752595408685, + "grad_norm": 1.9016693432010778, + "learning_rate": 7.827344452173163e-06, + "loss": 0.5376232862472534, + "step": 4045 + }, + { + "epoch": 1.183067699956134, + "grad_norm": 1.7007887018924743, + "learning_rate": 7.822625488111833e-06, + "loss": 0.6005147695541382, + "step": 4046 + }, + { + "epoch": 1.1833601403713994, + "grad_norm": 1.5696670197669271, + "learning_rate": 7.817907032998556e-06, + "loss": 0.5276827216148376, + "step": 4047 + }, + { + "epoch": 1.1836525807866647, + "grad_norm": 2.1978111734105994, + "learning_rate": 7.813189087936243e-06, + "loss": 0.6425626277923584, + "step": 4048 + }, + { + "epoch": 1.18394502120193, + "grad_norm": 1.272646490936496, + "learning_rate": 7.808471654027685e-06, + "loss": 0.44388407468795776, + "step": 4049 + }, + { + "epoch": 1.1842374616171956, + "grad_norm": 1.743245771156321, + "learning_rate": 7.803754732375554e-06, + "loss": 0.5044336318969727, + "step": 4050 + }, + { + "epoch": 1.184529902032461, + "grad_norm": 1.9415496480441554, + "learning_rate": 7.7990383240824e-06, + "loss": 0.6964906454086304, + "step": 4051 + }, + { + "epoch": 1.1848223424477262, + "grad_norm": 1.9029191440552455, + "learning_rate": 7.794322430250654e-06, + "loss": 0.6093637943267822, + "step": 4052 + }, + { + "epoch": 1.1851147828629918, + "grad_norm": 1.8079016024144563, + "learning_rate": 7.78960705198263e-06, + "loss": 0.5264803171157837, + "step": 4053 + }, + { + "epoch": 1.185407223278257, + "grad_norm": 1.444425047773482, + "learning_rate": 7.78489219038052e-06, + "loss": 0.5336456298828125, + "step": 4054 + }, + { + "epoch": 1.1856996636935224, + "grad_norm": 1.7563642817078289, + "learning_rate": 7.78017784654639e-06, + "loss": 0.5266311168670654, + "step": 4055 + }, + { + "epoch": 1.1859921041087877, + "grad_norm": 1.6538609406479838, + "learning_rate": 7.775464021582195e-06, + "loss": 0.6281685829162598, + "step": 4056 + }, + { + "epoch": 1.1862845445240533, + "grad_norm": 1.6081255371588656, + "learning_rate": 7.770750716589758e-06, + "loss": 0.560591995716095, + "step": 4057 + }, + { + "epoch": 1.1865769849393186, + "grad_norm": 1.7912692279763305, + "learning_rate": 7.766037932670786e-06, + "loss": 0.5751859545707703, + "step": 4058 + }, + { + "epoch": 1.186869425354584, + "grad_norm": 1.629657999448518, + "learning_rate": 7.761325670926864e-06, + "loss": 0.5404624938964844, + "step": 4059 + }, + { + "epoch": 1.1871618657698493, + "grad_norm": 1.6997280704374504, + "learning_rate": 7.756613932459456e-06, + "loss": 0.4714626669883728, + "step": 4060 + }, + { + "epoch": 1.1874543061851148, + "grad_norm": 1.4471766418666208, + "learning_rate": 7.751902718369903e-06, + "loss": 0.5449519157409668, + "step": 4061 + }, + { + "epoch": 1.1877467466003802, + "grad_norm": 1.6279611933236646, + "learning_rate": 7.747192029759419e-06, + "loss": 0.6518754959106445, + "step": 4062 + }, + { + "epoch": 1.1880391870156455, + "grad_norm": 1.4655931664348079, + "learning_rate": 7.7424818677291e-06, + "loss": 0.47224369645118713, + "step": 4063 + }, + { + "epoch": 1.188331627430911, + "grad_norm": 1.6924798895194766, + "learning_rate": 7.737772233379919e-06, + "loss": 0.5482417345046997, + "step": 4064 + }, + { + "epoch": 1.1886240678461764, + "grad_norm": 1.3910277085667344, + "learning_rate": 7.733063127812724e-06, + "loss": 0.5401996374130249, + "step": 4065 + }, + { + "epoch": 1.1889165082614417, + "grad_norm": 2.4517381628425547, + "learning_rate": 7.72835455212824e-06, + "loss": 0.4678424596786499, + "step": 4066 + }, + { + "epoch": 1.1892089486767072, + "grad_norm": 1.6156459518768798, + "learning_rate": 7.72364650742707e-06, + "loss": 0.5191294550895691, + "step": 4067 + }, + { + "epoch": 1.1895013890919726, + "grad_norm": 1.4433917939096517, + "learning_rate": 7.718938994809685e-06, + "loss": 0.44018834829330444, + "step": 4068 + }, + { + "epoch": 1.1897938295072379, + "grad_norm": 1.3955169745603861, + "learning_rate": 7.714232015376442e-06, + "loss": 0.47852614521980286, + "step": 4069 + }, + { + "epoch": 1.1900862699225032, + "grad_norm": 1.523334975304476, + "learning_rate": 7.709525570227567e-06, + "loss": 0.5748994946479797, + "step": 4070 + }, + { + "epoch": 1.1903787103377688, + "grad_norm": 1.7714529908638612, + "learning_rate": 7.704819660463164e-06, + "loss": 0.5015645027160645, + "step": 4071 + }, + { + "epoch": 1.190671150753034, + "grad_norm": 1.8100962592275294, + "learning_rate": 7.70011428718321e-06, + "loss": 0.6200511455535889, + "step": 4072 + }, + { + "epoch": 1.1909635911682994, + "grad_norm": 1.531990990921369, + "learning_rate": 7.69540945148756e-06, + "loss": 0.6311289668083191, + "step": 4073 + }, + { + "epoch": 1.1912560315835647, + "grad_norm": 1.5403717728586237, + "learning_rate": 7.690705154475937e-06, + "loss": 0.5707247257232666, + "step": 4074 + }, + { + "epoch": 1.1915484719988303, + "grad_norm": 2.0693191702072107, + "learning_rate": 7.686001397247944e-06, + "loss": 0.5616360902786255, + "step": 4075 + }, + { + "epoch": 1.1918409124140956, + "grad_norm": 1.7144278887449431, + "learning_rate": 7.681298180903054e-06, + "loss": 0.5955555438995361, + "step": 4076 + }, + { + "epoch": 1.192133352829361, + "grad_norm": 1.6252826516162207, + "learning_rate": 7.676595506540615e-06, + "loss": 0.5057257413864136, + "step": 4077 + }, + { + "epoch": 1.1924257932446265, + "grad_norm": 1.8445544744897249, + "learning_rate": 7.671893375259854e-06, + "loss": 0.5795278549194336, + "step": 4078 + }, + { + "epoch": 1.1927182336598918, + "grad_norm": 1.5541021220011975, + "learning_rate": 7.66719178815986e-06, + "loss": 0.5213087797164917, + "step": 4079 + }, + { + "epoch": 1.1930106740751572, + "grad_norm": 1.5183242886274189, + "learning_rate": 7.662490746339601e-06, + "loss": 0.5333693027496338, + "step": 4080 + }, + { + "epoch": 1.1933031144904227, + "grad_norm": 1.5253876680230323, + "learning_rate": 7.657790250897916e-06, + "loss": 0.4705297648906708, + "step": 4081 + }, + { + "epoch": 1.193595554905688, + "grad_norm": 1.5875026444946445, + "learning_rate": 7.65309030293352e-06, + "loss": 0.5376054644584656, + "step": 4082 + }, + { + "epoch": 1.1938879953209534, + "grad_norm": 1.4103452849520708, + "learning_rate": 7.648390903544997e-06, + "loss": 0.47457355260849, + "step": 4083 + }, + { + "epoch": 1.1941804357362187, + "grad_norm": 1.5068528532277095, + "learning_rate": 7.6436920538308e-06, + "loss": 0.48752763867378235, + "step": 4084 + }, + { + "epoch": 1.1944728761514842, + "grad_norm": 1.5226531730849548, + "learning_rate": 7.63899375488926e-06, + "loss": 0.48227858543395996, + "step": 4085 + }, + { + "epoch": 1.1947653165667496, + "grad_norm": 1.4101996785965327, + "learning_rate": 7.634296007818576e-06, + "loss": 0.4294116497039795, + "step": 4086 + }, + { + "epoch": 1.1950577569820149, + "grad_norm": 1.24669252589954, + "learning_rate": 7.629598813716817e-06, + "loss": 0.5562552809715271, + "step": 4087 + }, + { + "epoch": 1.1953501973972802, + "grad_norm": 1.6628602240304204, + "learning_rate": 7.624902173681923e-06, + "loss": 0.6466431617736816, + "step": 4088 + }, + { + "epoch": 1.1956426378125458, + "grad_norm": 1.7596122427030323, + "learning_rate": 7.620206088811704e-06, + "loss": 0.7183903455734253, + "step": 4089 + }, + { + "epoch": 1.195935078227811, + "grad_norm": 1.666055880141139, + "learning_rate": 7.615510560203841e-06, + "loss": 0.5667496919631958, + "step": 4090 + }, + { + "epoch": 1.1962275186430764, + "grad_norm": 1.5232682591562918, + "learning_rate": 7.610815588955888e-06, + "loss": 0.5603050589561462, + "step": 4091 + }, + { + "epoch": 1.196519959058342, + "grad_norm": 1.6556784363331365, + "learning_rate": 7.606121176165267e-06, + "loss": 0.5305474996566772, + "step": 4092 + }, + { + "epoch": 1.1968123994736073, + "grad_norm": 2.0140258709167163, + "learning_rate": 7.6014273229292625e-06, + "loss": 0.7321374416351318, + "step": 4093 + }, + { + "epoch": 1.1971048398888726, + "grad_norm": 1.5891169030075603, + "learning_rate": 7.5967340303450385e-06, + "loss": 0.44885972142219543, + "step": 4094 + }, + { + "epoch": 1.197397280304138, + "grad_norm": 1.7955283190373275, + "learning_rate": 7.592041299509624e-06, + "loss": 0.593859076499939, + "step": 4095 + }, + { + "epoch": 1.1976897207194035, + "grad_norm": 1.4553428657338656, + "learning_rate": 7.587349131519913e-06, + "loss": 0.6701182723045349, + "step": 4096 + }, + { + "epoch": 1.1979821611346688, + "grad_norm": 1.9268795339399152, + "learning_rate": 7.582657527472674e-06, + "loss": 0.6456711292266846, + "step": 4097 + }, + { + "epoch": 1.1982746015499341, + "grad_norm": 1.6602700214896833, + "learning_rate": 7.577966488464543e-06, + "loss": 0.5933864116668701, + "step": 4098 + }, + { + "epoch": 1.1985670419651995, + "grad_norm": 1.398305705152583, + "learning_rate": 7.5732760155920175e-06, + "loss": 0.4609876275062561, + "step": 4099 + }, + { + "epoch": 1.198859482380465, + "grad_norm": 1.4827488477589208, + "learning_rate": 7.568586109951468e-06, + "loss": 0.540961503982544, + "step": 4100 + }, + { + "epoch": 1.1991519227957304, + "grad_norm": 1.629963355664156, + "learning_rate": 7.563896772639132e-06, + "loss": 0.5522942543029785, + "step": 4101 + }, + { + "epoch": 1.1994443632109957, + "grad_norm": 1.739784480619601, + "learning_rate": 7.559208004751114e-06, + "loss": 0.483737587928772, + "step": 4102 + }, + { + "epoch": 1.1997368036262612, + "grad_norm": 1.2743684109876499, + "learning_rate": 7.554519807383384e-06, + "loss": 0.3760339915752411, + "step": 4103 + }, + { + "epoch": 1.2000292440415266, + "grad_norm": 1.494486709964621, + "learning_rate": 7.549832181631782e-06, + "loss": 0.5034801959991455, + "step": 4104 + }, + { + "epoch": 1.2003216844567919, + "grad_norm": 1.6403057961263519, + "learning_rate": 7.545145128592009e-06, + "loss": 0.5605261325836182, + "step": 4105 + }, + { + "epoch": 1.2006141248720574, + "grad_norm": 1.4179033673825343, + "learning_rate": 7.540458649359637e-06, + "loss": 0.4724245071411133, + "step": 4106 + }, + { + "epoch": 1.2009065652873228, + "grad_norm": 1.609040907971216, + "learning_rate": 7.535772745030101e-06, + "loss": 0.564873218536377, + "step": 4107 + }, + { + "epoch": 1.201199005702588, + "grad_norm": 1.9431151220409157, + "learning_rate": 7.531087416698702e-06, + "loss": 0.699596643447876, + "step": 4108 + }, + { + "epoch": 1.2014914461178534, + "grad_norm": 1.5180492689699372, + "learning_rate": 7.526402665460612e-06, + "loss": 0.47448351979255676, + "step": 4109 + }, + { + "epoch": 1.201783886533119, + "grad_norm": 1.4606225624905942, + "learning_rate": 7.521718492410855e-06, + "loss": 0.4681323766708374, + "step": 4110 + }, + { + "epoch": 1.2020763269483843, + "grad_norm": 1.8550718864551587, + "learning_rate": 7.517034898644333e-06, + "loss": 0.6361842155456543, + "step": 4111 + }, + { + "epoch": 1.2023687673636496, + "grad_norm": 1.5211596606564617, + "learning_rate": 7.5123518852558075e-06, + "loss": 0.4732646942138672, + "step": 4112 + }, + { + "epoch": 1.202661207778915, + "grad_norm": 1.6512929892036816, + "learning_rate": 7.507669453339903e-06, + "loss": 0.57124263048172, + "step": 4113 + }, + { + "epoch": 1.2029536481941805, + "grad_norm": 1.758611342292707, + "learning_rate": 7.502987603991111e-06, + "loss": 0.5228173732757568, + "step": 4114 + }, + { + "epoch": 1.2032460886094458, + "grad_norm": 1.7352024129193708, + "learning_rate": 7.4983063383037864e-06, + "loss": 0.5501765012741089, + "step": 4115 + }, + { + "epoch": 1.2035385290247111, + "grad_norm": 1.6782467710972089, + "learning_rate": 7.493625657372141e-06, + "loss": 0.5062840580940247, + "step": 4116 + }, + { + "epoch": 1.2038309694399767, + "grad_norm": 1.6960273401585455, + "learning_rate": 7.4889455622902616e-06, + "loss": 0.7060763835906982, + "step": 4117 + }, + { + "epoch": 1.204123409855242, + "grad_norm": 1.478061987478783, + "learning_rate": 7.484266054152088e-06, + "loss": 0.42127668857574463, + "step": 4118 + }, + { + "epoch": 1.2044158502705073, + "grad_norm": 1.3574946815299211, + "learning_rate": 7.479587134051429e-06, + "loss": 0.490860253572464, + "step": 4119 + }, + { + "epoch": 1.204708290685773, + "grad_norm": 1.443033575116078, + "learning_rate": 7.474908803081955e-06, + "loss": 0.45786625146865845, + "step": 4120 + }, + { + "epoch": 1.2050007311010382, + "grad_norm": 1.810733388901398, + "learning_rate": 7.470231062337192e-06, + "loss": 0.5267277359962463, + "step": 4121 + }, + { + "epoch": 1.2052931715163036, + "grad_norm": 1.670838162040588, + "learning_rate": 7.465553912910539e-06, + "loss": 0.47834646701812744, + "step": 4122 + }, + { + "epoch": 1.2055856119315689, + "grad_norm": 1.4366745635956868, + "learning_rate": 7.460877355895249e-06, + "loss": 0.5348576903343201, + "step": 4123 + }, + { + "epoch": 1.2058780523468344, + "grad_norm": 1.7186674622129299, + "learning_rate": 7.456201392384437e-06, + "loss": 0.47992441058158875, + "step": 4124 + }, + { + "epoch": 1.2061704927620998, + "grad_norm": 1.6108537844876905, + "learning_rate": 7.451526023471085e-06, + "loss": 0.5693913698196411, + "step": 4125 + }, + { + "epoch": 1.206462933177365, + "grad_norm": 1.8995573488864546, + "learning_rate": 7.4468512502480305e-06, + "loss": 0.5165153741836548, + "step": 4126 + }, + { + "epoch": 1.2067553735926304, + "grad_norm": 1.4833110616884417, + "learning_rate": 7.442177073807973e-06, + "loss": 0.522534966468811, + "step": 4127 + }, + { + "epoch": 1.207047814007896, + "grad_norm": 2.0276890753098464, + "learning_rate": 7.43750349524347e-06, + "loss": 0.6298432946205139, + "step": 4128 + }, + { + "epoch": 1.2073402544231613, + "grad_norm": 1.5840348747117112, + "learning_rate": 7.432830515646947e-06, + "loss": 0.5077394247055054, + "step": 4129 + }, + { + "epoch": 1.2076326948384266, + "grad_norm": 2.0187303897146682, + "learning_rate": 7.428158136110681e-06, + "loss": 0.6492841839790344, + "step": 4130 + }, + { + "epoch": 1.2079251352536922, + "grad_norm": 1.8651226738731277, + "learning_rate": 7.423486357726813e-06, + "loss": 0.5204535126686096, + "step": 4131 + }, + { + "epoch": 1.2082175756689575, + "grad_norm": 1.7208004693147547, + "learning_rate": 7.418815181587347e-06, + "loss": 0.56598961353302, + "step": 4132 + }, + { + "epoch": 1.2085100160842228, + "grad_norm": 1.7632065676998485, + "learning_rate": 7.4141446087841364e-06, + "loss": 0.486950159072876, + "step": 4133 + }, + { + "epoch": 1.2088024564994881, + "grad_norm": 1.8229002651567825, + "learning_rate": 7.4094746404089e-06, + "loss": 0.7218466997146606, + "step": 4134 + }, + { + "epoch": 1.2090948969147537, + "grad_norm": 1.715700034058204, + "learning_rate": 7.404805277553218e-06, + "loss": 0.6486172676086426, + "step": 4135 + }, + { + "epoch": 1.209387337330019, + "grad_norm": 1.8385918373460561, + "learning_rate": 7.400136521308521e-06, + "loss": 0.6160574555397034, + "step": 4136 + }, + { + "epoch": 1.2096797777452843, + "grad_norm": 1.6766631954981184, + "learning_rate": 7.395468372766107e-06, + "loss": 0.6184699535369873, + "step": 4137 + }, + { + "epoch": 1.2099722181605497, + "grad_norm": 1.6881704887676476, + "learning_rate": 7.390800833017124e-06, + "loss": 0.5795263051986694, + "step": 4138 + }, + { + "epoch": 1.2102646585758152, + "grad_norm": 1.6841718896097397, + "learning_rate": 7.386133903152581e-06, + "loss": 0.5409367084503174, + "step": 4139 + }, + { + "epoch": 1.2105570989910805, + "grad_norm": 1.678948206873695, + "learning_rate": 7.3814675842633465e-06, + "loss": 0.47924935817718506, + "step": 4140 + }, + { + "epoch": 1.2108495394063459, + "grad_norm": 1.6532202064740131, + "learning_rate": 7.376801877440143e-06, + "loss": 0.5737412571907043, + "step": 4141 + }, + { + "epoch": 1.2111419798216114, + "grad_norm": 1.5307761286613382, + "learning_rate": 7.372136783773551e-06, + "loss": 0.538013219833374, + "step": 4142 + }, + { + "epoch": 1.2114344202368768, + "grad_norm": 1.4940902719253717, + "learning_rate": 7.367472304354011e-06, + "loss": 0.4523904323577881, + "step": 4143 + }, + { + "epoch": 1.211726860652142, + "grad_norm": 1.4793905716399964, + "learning_rate": 7.362808440271811e-06, + "loss": 0.5057293176651001, + "step": 4144 + }, + { + "epoch": 1.2120193010674076, + "grad_norm": 1.4373562566302274, + "learning_rate": 7.358145192617103e-06, + "loss": 0.4653171896934509, + "step": 4145 + }, + { + "epoch": 1.212311741482673, + "grad_norm": 1.6048946971271119, + "learning_rate": 7.353482562479896e-06, + "loss": 0.607070803642273, + "step": 4146 + }, + { + "epoch": 1.2126041818979383, + "grad_norm": 1.51939699208445, + "learning_rate": 7.348820550950047e-06, + "loss": 0.4721861481666565, + "step": 4147 + }, + { + "epoch": 1.2128966223132036, + "grad_norm": 1.8802239228266517, + "learning_rate": 7.3441591591172765e-06, + "loss": 0.6656746864318848, + "step": 4148 + }, + { + "epoch": 1.2131890627284692, + "grad_norm": 1.504596663567376, + "learning_rate": 7.339498388071154e-06, + "loss": 0.5231848359107971, + "step": 4149 + }, + { + "epoch": 1.2134815031437345, + "grad_norm": 1.4250712810936565, + "learning_rate": 7.334838238901106e-06, + "loss": 0.42241257429122925, + "step": 4150 + }, + { + "epoch": 1.2137739435589998, + "grad_norm": 1.4721862632309721, + "learning_rate": 7.3301787126964165e-06, + "loss": 0.427111953496933, + "step": 4151 + }, + { + "epoch": 1.2140663839742651, + "grad_norm": 2.038283523639075, + "learning_rate": 7.325519810546219e-06, + "loss": 0.6208339929580688, + "step": 4152 + }, + { + "epoch": 1.2143588243895307, + "grad_norm": 1.3970516014119925, + "learning_rate": 7.320861533539505e-06, + "loss": 0.5031273365020752, + "step": 4153 + }, + { + "epoch": 1.214651264804796, + "grad_norm": 1.5153681425347725, + "learning_rate": 7.3162038827651205e-06, + "loss": 0.5617444515228271, + "step": 4154 + }, + { + "epoch": 1.2149437052200613, + "grad_norm": 1.4855483785732004, + "learning_rate": 7.311546859311758e-06, + "loss": 0.4616255462169647, + "step": 4155 + }, + { + "epoch": 1.215236145635327, + "grad_norm": 1.5704453976932513, + "learning_rate": 7.306890464267972e-06, + "loss": 0.5799977779388428, + "step": 4156 + }, + { + "epoch": 1.2155285860505922, + "grad_norm": 1.521477491941422, + "learning_rate": 7.302234698722165e-06, + "loss": 0.5669786930084229, + "step": 4157 + }, + { + "epoch": 1.2158210264658575, + "grad_norm": 1.5325381791627977, + "learning_rate": 7.297579563762595e-06, + "loss": 0.5622642040252686, + "step": 4158 + }, + { + "epoch": 1.216113466881123, + "grad_norm": 1.8789411887268221, + "learning_rate": 7.292925060477367e-06, + "loss": 0.6896791458129883, + "step": 4159 + }, + { + "epoch": 1.2164059072963884, + "grad_norm": 1.5263918361022677, + "learning_rate": 7.288271189954451e-06, + "loss": 0.6704437136650085, + "step": 4160 + }, + { + "epoch": 1.2166983477116537, + "grad_norm": 1.6192057061391554, + "learning_rate": 7.2836179532816565e-06, + "loss": 0.6340646743774414, + "step": 4161 + }, + { + "epoch": 1.216990788126919, + "grad_norm": 1.4283430296516553, + "learning_rate": 7.278965351546648e-06, + "loss": 0.528992772102356, + "step": 4162 + }, + { + "epoch": 1.2172832285421846, + "grad_norm": 1.4842100691170903, + "learning_rate": 7.274313385836949e-06, + "loss": 0.45160621404647827, + "step": 4163 + }, + { + "epoch": 1.21757566895745, + "grad_norm": 1.3859373993268853, + "learning_rate": 7.269662057239919e-06, + "loss": 0.5398670434951782, + "step": 4164 + }, + { + "epoch": 1.2178681093727153, + "grad_norm": 1.7598892874276293, + "learning_rate": 7.265011366842785e-06, + "loss": 0.5174476504325867, + "step": 4165 + }, + { + "epoch": 1.2181605497879806, + "grad_norm": 1.663231631427072, + "learning_rate": 7.260361315732613e-06, + "loss": 0.4830206632614136, + "step": 4166 + }, + { + "epoch": 1.2184529902032462, + "grad_norm": 1.4149457900973579, + "learning_rate": 7.2557119049963266e-06, + "loss": 0.42422181367874146, + "step": 4167 + }, + { + "epoch": 1.2187454306185115, + "grad_norm": 1.363467777836694, + "learning_rate": 7.251063135720699e-06, + "loss": 0.43544018268585205, + "step": 4168 + }, + { + "epoch": 1.2190378710337768, + "grad_norm": 1.4776092804767433, + "learning_rate": 7.2464150089923465e-06, + "loss": 0.5352005362510681, + "step": 4169 + }, + { + "epoch": 1.2193303114490424, + "grad_norm": 1.5459436268475357, + "learning_rate": 7.241767525897746e-06, + "loss": 0.4718678891658783, + "step": 4170 + }, + { + "epoch": 1.2196227518643077, + "grad_norm": 1.4994134423194976, + "learning_rate": 7.237120687523214e-06, + "loss": 0.618084192276001, + "step": 4171 + }, + { + "epoch": 1.219915192279573, + "grad_norm": 1.8137589794234399, + "learning_rate": 7.232474494954924e-06, + "loss": 0.625995397567749, + "step": 4172 + }, + { + "epoch": 1.2202076326948383, + "grad_norm": 1.4989590312422592, + "learning_rate": 7.227828949278894e-06, + "loss": 0.5382465124130249, + "step": 4173 + }, + { + "epoch": 1.220500073110104, + "grad_norm": 1.702878462884744, + "learning_rate": 7.223184051580992e-06, + "loss": 0.5299465656280518, + "step": 4174 + }, + { + "epoch": 1.2207925135253692, + "grad_norm": 1.7776293184889576, + "learning_rate": 7.218539802946934e-06, + "loss": 0.5899940729141235, + "step": 4175 + }, + { + "epoch": 1.2210849539406345, + "grad_norm": 1.9763552708522982, + "learning_rate": 7.213896204462286e-06, + "loss": 0.6126594543457031, + "step": 4176 + }, + { + "epoch": 1.2213773943558999, + "grad_norm": 1.6580044033592523, + "learning_rate": 7.20925325721246e-06, + "loss": 0.5576338768005371, + "step": 4177 + }, + { + "epoch": 1.2216698347711654, + "grad_norm": 1.5044012673537284, + "learning_rate": 7.204610962282717e-06, + "loss": 0.540515661239624, + "step": 4178 + }, + { + "epoch": 1.2219622751864307, + "grad_norm": 1.5281012838641301, + "learning_rate": 7.1999693207581675e-06, + "loss": 0.5306440591812134, + "step": 4179 + }, + { + "epoch": 1.222254715601696, + "grad_norm": 2.02113466617051, + "learning_rate": 7.195328333723763e-06, + "loss": 0.6274853944778442, + "step": 4180 + }, + { + "epoch": 1.2225471560169616, + "grad_norm": 1.6954554706562375, + "learning_rate": 7.190688002264308e-06, + "loss": 0.5626333951950073, + "step": 4181 + }, + { + "epoch": 1.222839596432227, + "grad_norm": 1.6364457786315536, + "learning_rate": 7.18604832746445e-06, + "loss": 0.5938719511032104, + "step": 4182 + }, + { + "epoch": 1.2231320368474923, + "grad_norm": 1.4010331016668016, + "learning_rate": 7.181409310408688e-06, + "loss": 0.4599727988243103, + "step": 4183 + }, + { + "epoch": 1.2234244772627578, + "grad_norm": 1.516823379099723, + "learning_rate": 7.176770952181363e-06, + "loss": 0.5912302732467651, + "step": 4184 + }, + { + "epoch": 1.2237169176780232, + "grad_norm": 1.24563200951521, + "learning_rate": 7.172133253866662e-06, + "loss": 0.534631073474884, + "step": 4185 + }, + { + "epoch": 1.2240093580932885, + "grad_norm": 1.3825393422514298, + "learning_rate": 7.167496216548618e-06, + "loss": 0.5084418058395386, + "step": 4186 + }, + { + "epoch": 1.2243017985085538, + "grad_norm": 1.6343841724383257, + "learning_rate": 7.162859841311112e-06, + "loss": 0.6906956434249878, + "step": 4187 + }, + { + "epoch": 1.2245942389238194, + "grad_norm": 1.6583835426138527, + "learning_rate": 7.158224129237867e-06, + "loss": 0.5578658580780029, + "step": 4188 + }, + { + "epoch": 1.2248866793390847, + "grad_norm": 1.4116232043960963, + "learning_rate": 7.153589081412455e-06, + "loss": 0.4438907206058502, + "step": 4189 + }, + { + "epoch": 1.22517911975435, + "grad_norm": 1.9189119615156511, + "learning_rate": 7.148954698918289e-06, + "loss": 0.6366580724716187, + "step": 4190 + }, + { + "epoch": 1.2254715601696153, + "grad_norm": 1.674796821883658, + "learning_rate": 7.144320982838628e-06, + "loss": 0.5532524585723877, + "step": 4191 + }, + { + "epoch": 1.2257640005848809, + "grad_norm": 1.3678471530217577, + "learning_rate": 7.139687934256574e-06, + "loss": 0.4847594201564789, + "step": 4192 + }, + { + "epoch": 1.2260564410001462, + "grad_norm": 1.651582950772816, + "learning_rate": 7.135055554255073e-06, + "loss": 0.6273454427719116, + "step": 4193 + }, + { + "epoch": 1.2263488814154115, + "grad_norm": 1.826963047999446, + "learning_rate": 7.130423843916917e-06, + "loss": 0.6320512294769287, + "step": 4194 + }, + { + "epoch": 1.226641321830677, + "grad_norm": 1.9938752870068028, + "learning_rate": 7.125792804324741e-06, + "loss": 0.5499723553657532, + "step": 4195 + }, + { + "epoch": 1.2269337622459424, + "grad_norm": 1.531512294163018, + "learning_rate": 7.121162436561023e-06, + "loss": 0.5855484008789062, + "step": 4196 + }, + { + "epoch": 1.2272262026612077, + "grad_norm": 1.5092619418718032, + "learning_rate": 7.11653274170808e-06, + "loss": 0.5998305678367615, + "step": 4197 + }, + { + "epoch": 1.2275186430764733, + "grad_norm": 1.6613439290789596, + "learning_rate": 7.111903720848077e-06, + "loss": 0.6963703632354736, + "step": 4198 + }, + { + "epoch": 1.2278110834917386, + "grad_norm": 1.7273945695579416, + "learning_rate": 7.10727537506302e-06, + "loss": 0.5664974451065063, + "step": 4199 + }, + { + "epoch": 1.228103523907004, + "grad_norm": 1.8611907189119672, + "learning_rate": 7.102647705434755e-06, + "loss": 0.6502630710601807, + "step": 4200 + }, + { + "epoch": 1.2283959643222693, + "grad_norm": 1.5674599206950446, + "learning_rate": 7.098020713044973e-06, + "loss": 0.5727233290672302, + "step": 4201 + }, + { + "epoch": 1.2286884047375348, + "grad_norm": 1.4105219463780128, + "learning_rate": 7.093394398975206e-06, + "loss": 0.47885602712631226, + "step": 4202 + }, + { + "epoch": 1.2289808451528001, + "grad_norm": 1.62325320016664, + "learning_rate": 7.088768764306826e-06, + "loss": 0.46089547872543335, + "step": 4203 + }, + { + "epoch": 1.2292732855680655, + "grad_norm": 1.3853973501267451, + "learning_rate": 7.084143810121044e-06, + "loss": 0.48920977115631104, + "step": 4204 + }, + { + "epoch": 1.2295657259833308, + "grad_norm": 1.4371671531095065, + "learning_rate": 7.07951953749892e-06, + "loss": 0.5320104956626892, + "step": 4205 + }, + { + "epoch": 1.2298581663985964, + "grad_norm": 1.693565977205871, + "learning_rate": 7.074895947521347e-06, + "loss": 0.6403206586837769, + "step": 4206 + }, + { + "epoch": 1.2301506068138617, + "grad_norm": 1.3774390509755927, + "learning_rate": 7.070273041269062e-06, + "loss": 0.5522217750549316, + "step": 4207 + }, + { + "epoch": 1.230443047229127, + "grad_norm": 1.644407790392686, + "learning_rate": 7.0656508198226405e-06, + "loss": 0.5235073566436768, + "step": 4208 + }, + { + "epoch": 1.2307354876443926, + "grad_norm": 1.9076552987416457, + "learning_rate": 7.061029284262497e-06, + "loss": 0.5972521305084229, + "step": 4209 + }, + { + "epoch": 1.2310279280596579, + "grad_norm": 1.7443828706372393, + "learning_rate": 7.0564084356688885e-06, + "loss": 0.5989280343055725, + "step": 4210 + }, + { + "epoch": 1.2313203684749232, + "grad_norm": 1.4723000244161777, + "learning_rate": 7.051788275121913e-06, + "loss": 0.5714213848114014, + "step": 4211 + }, + { + "epoch": 1.2316128088901885, + "grad_norm": 1.715005842824084, + "learning_rate": 7.047168803701502e-06, + "loss": 0.5588504076004028, + "step": 4212 + }, + { + "epoch": 1.231905249305454, + "grad_norm": 1.3648320017744335, + "learning_rate": 7.042550022487431e-06, + "loss": 0.47527533769607544, + "step": 4213 + }, + { + "epoch": 1.2321976897207194, + "grad_norm": 1.4838404108317171, + "learning_rate": 7.03793193255931e-06, + "loss": 0.5281137228012085, + "step": 4214 + }, + { + "epoch": 1.2324901301359847, + "grad_norm": 1.7839268972332825, + "learning_rate": 7.033314534996589e-06, + "loss": 0.5509631037712097, + "step": 4215 + }, + { + "epoch": 1.23278257055125, + "grad_norm": 1.5991883103171023, + "learning_rate": 7.028697830878557e-06, + "loss": 0.5291438698768616, + "step": 4216 + }, + { + "epoch": 1.2330750109665156, + "grad_norm": 1.9253124571991533, + "learning_rate": 7.024081821284343e-06, + "loss": 0.5931780934333801, + "step": 4217 + }, + { + "epoch": 1.233367451381781, + "grad_norm": 1.5978832259158926, + "learning_rate": 7.019466507292908e-06, + "loss": 0.4883537292480469, + "step": 4218 + }, + { + "epoch": 1.2336598917970463, + "grad_norm": 1.5441369085427046, + "learning_rate": 7.014851889983058e-06, + "loss": 0.45155030488967896, + "step": 4219 + }, + { + "epoch": 1.2339523322123118, + "grad_norm": 1.7603110515675113, + "learning_rate": 7.010237970433426e-06, + "loss": 0.6107507944107056, + "step": 4220 + }, + { + "epoch": 1.2342447726275771, + "grad_norm": 1.4005214588133317, + "learning_rate": 7.0056247497224905e-06, + "loss": 0.41764840483665466, + "step": 4221 + }, + { + "epoch": 1.2345372130428425, + "grad_norm": 1.4727432689856292, + "learning_rate": 7.0010122289285635e-06, + "loss": 0.6786199808120728, + "step": 4222 + }, + { + "epoch": 1.234829653458108, + "grad_norm": 1.6328773458986388, + "learning_rate": 6.996400409129793e-06, + "loss": 0.5378292798995972, + "step": 4223 + }, + { + "epoch": 1.2351220938733734, + "grad_norm": 1.5470680329093456, + "learning_rate": 6.9917892914041685e-06, + "loss": 0.47646570205688477, + "step": 4224 + }, + { + "epoch": 1.2354145342886387, + "grad_norm": 1.6302332764801317, + "learning_rate": 6.987178876829503e-06, + "loss": 0.554225504398346, + "step": 4225 + }, + { + "epoch": 1.235706974703904, + "grad_norm": 1.8888599643549215, + "learning_rate": 6.982569166483459e-06, + "loss": 0.42614030838012695, + "step": 4226 + }, + { + "epoch": 1.2359994151191696, + "grad_norm": 1.523118498051214, + "learning_rate": 6.977960161443524e-06, + "loss": 0.5043676495552063, + "step": 4227 + }, + { + "epoch": 1.2362918555344349, + "grad_norm": 1.454372819437309, + "learning_rate": 6.973351862787029e-06, + "loss": 0.4905642569065094, + "step": 4228 + }, + { + "epoch": 1.2365842959497002, + "grad_norm": 1.6152329822736995, + "learning_rate": 6.9687442715911325e-06, + "loss": 0.5860332250595093, + "step": 4229 + }, + { + "epoch": 1.2368767363649655, + "grad_norm": 1.3841079659340747, + "learning_rate": 6.9641373889328345e-06, + "loss": 0.4900137782096863, + "step": 4230 + }, + { + "epoch": 1.237169176780231, + "grad_norm": 1.7249957815195471, + "learning_rate": 6.959531215888961e-06, + "loss": 0.5736855268478394, + "step": 4231 + }, + { + "epoch": 1.2374616171954964, + "grad_norm": 1.6635333389812996, + "learning_rate": 6.95492575353618e-06, + "loss": 0.6390400528907776, + "step": 4232 + }, + { + "epoch": 1.2377540576107617, + "grad_norm": 1.6623693676348965, + "learning_rate": 6.95032100295099e-06, + "loss": 0.6553822159767151, + "step": 4233 + }, + { + "epoch": 1.2380464980260273, + "grad_norm": 1.871056647578711, + "learning_rate": 6.945716965209723e-06, + "loss": 0.6685863733291626, + "step": 4234 + }, + { + "epoch": 1.2383389384412926, + "grad_norm": 1.7090289188063175, + "learning_rate": 6.941113641388542e-06, + "loss": 0.5172277688980103, + "step": 4235 + }, + { + "epoch": 1.238631378856558, + "grad_norm": 1.9648968097135298, + "learning_rate": 6.936511032563451e-06, + "loss": 0.6578007936477661, + "step": 4236 + }, + { + "epoch": 1.2389238192718235, + "grad_norm": 1.5304274814539944, + "learning_rate": 6.931909139810283e-06, + "loss": 0.5679500699043274, + "step": 4237 + }, + { + "epoch": 1.2392162596870888, + "grad_norm": 1.6592749019605815, + "learning_rate": 6.927307964204695e-06, + "loss": 0.49142318964004517, + "step": 4238 + }, + { + "epoch": 1.2395087001023541, + "grad_norm": 1.497996058585022, + "learning_rate": 6.9227075068221926e-06, + "loss": 0.5339487195014954, + "step": 4239 + }, + { + "epoch": 1.2398011405176195, + "grad_norm": 1.9993237065248757, + "learning_rate": 6.918107768738097e-06, + "loss": 0.5845860242843628, + "step": 4240 + }, + { + "epoch": 1.240093580932885, + "grad_norm": 2.5543699126297823, + "learning_rate": 6.9135087510275735e-06, + "loss": 0.6767281889915466, + "step": 4241 + }, + { + "epoch": 1.2403860213481503, + "grad_norm": 1.850547226886836, + "learning_rate": 6.908910454765612e-06, + "loss": 0.6119472980499268, + "step": 4242 + }, + { + "epoch": 1.2406784617634157, + "grad_norm": 1.6013723709723773, + "learning_rate": 6.904312881027038e-06, + "loss": 0.6375409364700317, + "step": 4243 + }, + { + "epoch": 1.240970902178681, + "grad_norm": 1.9482571730059268, + "learning_rate": 6.899716030886508e-06, + "loss": 0.7059881687164307, + "step": 4244 + }, + { + "epoch": 1.2412633425939466, + "grad_norm": 1.9206862231453385, + "learning_rate": 6.895119905418504e-06, + "loss": 0.6463328003883362, + "step": 4245 + }, + { + "epoch": 1.2415557830092119, + "grad_norm": 1.5219372029025222, + "learning_rate": 6.890524505697345e-06, + "loss": 0.5374869108200073, + "step": 4246 + }, + { + "epoch": 1.2418482234244772, + "grad_norm": 1.625313205404651, + "learning_rate": 6.885929832797176e-06, + "loss": 0.5219276547431946, + "step": 4247 + }, + { + "epoch": 1.2421406638397428, + "grad_norm": 1.4315105659194174, + "learning_rate": 6.881335887791973e-06, + "loss": 0.4815624952316284, + "step": 4248 + }, + { + "epoch": 1.242433104255008, + "grad_norm": 1.318059168550072, + "learning_rate": 6.8767426717555475e-06, + "loss": 0.5111992955207825, + "step": 4249 + }, + { + "epoch": 1.2427255446702734, + "grad_norm": 1.6870166439076426, + "learning_rate": 6.872150185761533e-06, + "loss": 0.5331606268882751, + "step": 4250 + }, + { + "epoch": 1.2430179850855387, + "grad_norm": 1.5572023614320247, + "learning_rate": 6.867558430883393e-06, + "loss": 0.5375202894210815, + "step": 4251 + }, + { + "epoch": 1.2433104255008043, + "grad_norm": 1.495445158871636, + "learning_rate": 6.862967408194425e-06, + "loss": 0.5667152404785156, + "step": 4252 + }, + { + "epoch": 1.2436028659160696, + "grad_norm": 2.036302557289267, + "learning_rate": 6.858377118767752e-06, + "loss": 0.5679255723953247, + "step": 4253 + }, + { + "epoch": 1.243895306331335, + "grad_norm": 1.7798647531094058, + "learning_rate": 6.853787563676324e-06, + "loss": 0.6097947359085083, + "step": 4254 + }, + { + "epoch": 1.2441877467466003, + "grad_norm": 1.458407608257313, + "learning_rate": 6.849198743992927e-06, + "loss": 0.41869044303894043, + "step": 4255 + }, + { + "epoch": 1.2444801871618658, + "grad_norm": 1.595586166137391, + "learning_rate": 6.8446106607901655e-06, + "loss": 0.6414821147918701, + "step": 4256 + }, + { + "epoch": 1.2447726275771311, + "grad_norm": 1.9180058965370612, + "learning_rate": 6.840023315140476e-06, + "loss": 0.5985021591186523, + "step": 4257 + }, + { + "epoch": 1.2450650679923965, + "grad_norm": 1.429348085027092, + "learning_rate": 6.8354367081161235e-06, + "loss": 0.4718092381954193, + "step": 4258 + }, + { + "epoch": 1.245357508407662, + "grad_norm": 1.374927912317877, + "learning_rate": 6.8308508407892e-06, + "loss": 0.46431800723075867, + "step": 4259 + }, + { + "epoch": 1.2456499488229273, + "grad_norm": 1.4906925043469428, + "learning_rate": 6.826265714231624e-06, + "loss": 0.5499997735023499, + "step": 4260 + }, + { + "epoch": 1.2459423892381927, + "grad_norm": 1.605653884930273, + "learning_rate": 6.8216813295151415e-06, + "loss": 0.6078206300735474, + "step": 4261 + }, + { + "epoch": 1.2462348296534582, + "grad_norm": 1.6116067904051048, + "learning_rate": 6.817097687711322e-06, + "loss": 0.5706520080566406, + "step": 4262 + }, + { + "epoch": 1.2465272700687235, + "grad_norm": 1.4579793726336556, + "learning_rate": 6.812514789891566e-06, + "loss": 0.5210137367248535, + "step": 4263 + }, + { + "epoch": 1.2468197104839889, + "grad_norm": 1.5969341972097826, + "learning_rate": 6.807932637127097e-06, + "loss": 0.42632028460502625, + "step": 4264 + }, + { + "epoch": 1.2471121508992542, + "grad_norm": 1.3281470644259092, + "learning_rate": 6.803351230488967e-06, + "loss": 0.49990004301071167, + "step": 4265 + }, + { + "epoch": 1.2474045913145198, + "grad_norm": 1.6439327542913937, + "learning_rate": 6.798770571048052e-06, + "loss": 0.557829737663269, + "step": 4266 + }, + { + "epoch": 1.247697031729785, + "grad_norm": 1.6838717466364301, + "learning_rate": 6.794190659875052e-06, + "loss": 0.4784187078475952, + "step": 4267 + }, + { + "epoch": 1.2479894721450504, + "grad_norm": 1.6243877795123443, + "learning_rate": 6.789611498040492e-06, + "loss": 0.4795057773590088, + "step": 4268 + }, + { + "epoch": 1.2482819125603157, + "grad_norm": 1.4149752899303223, + "learning_rate": 6.785033086614725e-06, + "loss": 0.415715754032135, + "step": 4269 + }, + { + "epoch": 1.2485743529755813, + "grad_norm": 1.4478921102692126, + "learning_rate": 6.7804554266679266e-06, + "loss": 0.49056607484817505, + "step": 4270 + }, + { + "epoch": 1.2488667933908466, + "grad_norm": 1.8227279880342706, + "learning_rate": 6.775878519270098e-06, + "loss": 0.5268200039863586, + "step": 4271 + }, + { + "epoch": 1.249159233806112, + "grad_norm": 1.5664194732567784, + "learning_rate": 6.771302365491064e-06, + "loss": 0.6250356435775757, + "step": 4272 + }, + { + "epoch": 1.2494516742213775, + "grad_norm": 1.5152208337758115, + "learning_rate": 6.76672696640047e-06, + "loss": 0.5403029918670654, + "step": 4273 + }, + { + "epoch": 1.2497441146366428, + "grad_norm": 1.6699524807174595, + "learning_rate": 6.762152323067787e-06, + "loss": 0.47006577253341675, + "step": 4274 + }, + { + "epoch": 1.2500365550519081, + "grad_norm": 1.7406248179582138, + "learning_rate": 6.7575784365623134e-06, + "loss": 0.5088232755661011, + "step": 4275 + }, + { + "epoch": 1.2503289954671737, + "grad_norm": 1.7598214720338152, + "learning_rate": 6.7530053079531664e-06, + "loss": 0.5438642501831055, + "step": 4276 + }, + { + "epoch": 1.250621435882439, + "grad_norm": 1.4316922317447767, + "learning_rate": 6.748432938309286e-06, + "loss": 0.45436567068099976, + "step": 4277 + }, + { + "epoch": 1.2509138762977043, + "grad_norm": 1.5793052704561465, + "learning_rate": 6.743861328699438e-06, + "loss": 0.5298944115638733, + "step": 4278 + }, + { + "epoch": 1.2512063167129697, + "grad_norm": 1.3504092629468785, + "learning_rate": 6.7392904801922055e-06, + "loss": 0.49393707513809204, + "step": 4279 + }, + { + "epoch": 1.251498757128235, + "grad_norm": 1.4852717426676887, + "learning_rate": 6.734720393855998e-06, + "loss": 0.5540947318077087, + "step": 4280 + }, + { + "epoch": 1.2517911975435005, + "grad_norm": 1.4330918355062934, + "learning_rate": 6.730151070759043e-06, + "loss": 0.47406166791915894, + "step": 4281 + }, + { + "epoch": 1.2520836379587659, + "grad_norm": 1.5653956712736337, + "learning_rate": 6.725582511969397e-06, + "loss": 0.46885907649993896, + "step": 4282 + }, + { + "epoch": 1.2523760783740312, + "grad_norm": 1.7710771095422673, + "learning_rate": 6.721014718554931e-06, + "loss": 0.537517786026001, + "step": 4283 + }, + { + "epoch": 1.2526685187892967, + "grad_norm": 1.5323701554592244, + "learning_rate": 6.716447691583336e-06, + "loss": 0.514340341091156, + "step": 4284 + }, + { + "epoch": 1.252960959204562, + "grad_norm": 1.6716715067641383, + "learning_rate": 6.711881432122129e-06, + "loss": 0.5696117281913757, + "step": 4285 + }, + { + "epoch": 1.2532533996198274, + "grad_norm": 1.632492076185155, + "learning_rate": 6.707315941238645e-06, + "loss": 0.5620799660682678, + "step": 4286 + }, + { + "epoch": 1.253545840035093, + "grad_norm": 1.7721487037647632, + "learning_rate": 6.702751220000039e-06, + "loss": 0.4832923412322998, + "step": 4287 + }, + { + "epoch": 1.2538382804503583, + "grad_norm": 1.7195688873272827, + "learning_rate": 6.698187269473289e-06, + "loss": 0.6608176231384277, + "step": 4288 + }, + { + "epoch": 1.2541307208656236, + "grad_norm": 1.67536250359078, + "learning_rate": 6.69362409072519e-06, + "loss": 0.6002779006958008, + "step": 4289 + }, + { + "epoch": 1.2544231612808892, + "grad_norm": 1.5859756058231869, + "learning_rate": 6.689061684822357e-06, + "loss": 0.49898988008499146, + "step": 4290 + }, + { + "epoch": 1.2547156016961545, + "grad_norm": 1.908707186131175, + "learning_rate": 6.684500052831222e-06, + "loss": 0.5887055397033691, + "step": 4291 + }, + { + "epoch": 1.2550080421114198, + "grad_norm": 1.7680049519728702, + "learning_rate": 6.679939195818043e-06, + "loss": 0.6494714617729187, + "step": 4292 + }, + { + "epoch": 1.2553004825266851, + "grad_norm": 2.320887096811341, + "learning_rate": 6.67537911484889e-06, + "loss": 0.5708397626876831, + "step": 4293 + }, + { + "epoch": 1.2555929229419505, + "grad_norm": 1.4472817266256797, + "learning_rate": 6.670819810989656e-06, + "loss": 0.40412014722824097, + "step": 4294 + }, + { + "epoch": 1.255885363357216, + "grad_norm": 1.675200347061479, + "learning_rate": 6.666261285306048e-06, + "loss": 0.5141078233718872, + "step": 4295 + }, + { + "epoch": 1.2561778037724813, + "grad_norm": 1.8039877813287382, + "learning_rate": 6.661703538863595e-06, + "loss": 0.6463406085968018, + "step": 4296 + }, + { + "epoch": 1.2564702441877467, + "grad_norm": 1.5123528456732447, + "learning_rate": 6.657146572727643e-06, + "loss": 0.5809177160263062, + "step": 4297 + }, + { + "epoch": 1.2567626846030122, + "grad_norm": 1.6628802038143384, + "learning_rate": 6.652590387963354e-06, + "loss": 0.5124412775039673, + "step": 4298 + }, + { + "epoch": 1.2570551250182775, + "grad_norm": 1.8011842610745197, + "learning_rate": 6.64803498563571e-06, + "loss": 0.5399736762046814, + "step": 4299 + }, + { + "epoch": 1.2573475654335429, + "grad_norm": 1.4403786785249715, + "learning_rate": 6.6434803668095095e-06, + "loss": 0.548133373260498, + "step": 4300 + }, + { + "epoch": 1.2576400058488084, + "grad_norm": 1.7736401224051406, + "learning_rate": 6.638926532549364e-06, + "loss": 0.45056310296058655, + "step": 4301 + }, + { + "epoch": 1.2579324462640737, + "grad_norm": 2.434184879977136, + "learning_rate": 6.634373483919705e-06, + "loss": 0.5191814303398132, + "step": 4302 + }, + { + "epoch": 1.258224886679339, + "grad_norm": 1.4188278481806091, + "learning_rate": 6.62982122198478e-06, + "loss": 0.41939109563827515, + "step": 4303 + }, + { + "epoch": 1.2585173270946044, + "grad_norm": 1.6631261031278954, + "learning_rate": 6.625269747808655e-06, + "loss": 0.6535190939903259, + "step": 4304 + }, + { + "epoch": 1.2588097675098697, + "grad_norm": 1.7210614964326925, + "learning_rate": 6.620719062455207e-06, + "loss": 0.6282539367675781, + "step": 4305 + }, + { + "epoch": 1.2591022079251353, + "grad_norm": 1.5686327106153548, + "learning_rate": 6.616169166988133e-06, + "loss": 0.5378686189651489, + "step": 4306 + }, + { + "epoch": 1.2593946483404006, + "grad_norm": 1.80292094791683, + "learning_rate": 6.611620062470942e-06, + "loss": 0.5278643369674683, + "step": 4307 + }, + { + "epoch": 1.259687088755666, + "grad_norm": 1.5211478183195457, + "learning_rate": 6.607071749966958e-06, + "loss": 0.5578285455703735, + "step": 4308 + }, + { + "epoch": 1.2599795291709315, + "grad_norm": 1.7646090466366875, + "learning_rate": 6.602524230539324e-06, + "loss": 0.6452580094337463, + "step": 4309 + }, + { + "epoch": 1.2602719695861968, + "grad_norm": 1.7812547970338353, + "learning_rate": 6.597977505250992e-06, + "loss": 0.6133028268814087, + "step": 4310 + }, + { + "epoch": 1.2605644100014621, + "grad_norm": 1.552230597230507, + "learning_rate": 6.5934315751647345e-06, + "loss": 0.4930221140384674, + "step": 4311 + }, + { + "epoch": 1.2608568504167277, + "grad_norm": 2.197359143106273, + "learning_rate": 6.588886441343136e-06, + "loss": 0.48653531074523926, + "step": 4312 + }, + { + "epoch": 1.261149290831993, + "grad_norm": 1.851387133095935, + "learning_rate": 6.5843421048485915e-06, + "loss": 0.6594399213790894, + "step": 4313 + }, + { + "epoch": 1.2614417312472583, + "grad_norm": 1.66909694599425, + "learning_rate": 6.579798566743314e-06, + "loss": 0.5164401531219482, + "step": 4314 + }, + { + "epoch": 1.2617341716625239, + "grad_norm": 1.7484363064869977, + "learning_rate": 6.5752558280893245e-06, + "loss": 0.6338971853256226, + "step": 4315 + }, + { + "epoch": 1.2620266120777892, + "grad_norm": 1.7526913055276123, + "learning_rate": 6.570713889948461e-06, + "loss": 0.5301859974861145, + "step": 4316 + }, + { + "epoch": 1.2623190524930545, + "grad_norm": 1.5016995868339762, + "learning_rate": 6.566172753382376e-06, + "loss": 0.4572887420654297, + "step": 4317 + }, + { + "epoch": 1.2626114929083199, + "grad_norm": 1.5874066468532555, + "learning_rate": 6.561632419452532e-06, + "loss": 0.5235984325408936, + "step": 4318 + }, + { + "epoch": 1.2629039333235852, + "grad_norm": 1.5456604836068861, + "learning_rate": 6.557092889220206e-06, + "loss": 0.586036205291748, + "step": 4319 + }, + { + "epoch": 1.2631963737388507, + "grad_norm": 1.6865403223453492, + "learning_rate": 6.5525541637464855e-06, + "loss": 0.4728356599807739, + "step": 4320 + }, + { + "epoch": 1.263488814154116, + "grad_norm": 1.5435862254535146, + "learning_rate": 6.548016244092265e-06, + "loss": 0.4932190179824829, + "step": 4321 + }, + { + "epoch": 1.2637812545693814, + "grad_norm": 1.6817765339416926, + "learning_rate": 6.543479131318259e-06, + "loss": 0.525676429271698, + "step": 4322 + }, + { + "epoch": 1.264073694984647, + "grad_norm": 1.4602981048339732, + "learning_rate": 6.538942826484991e-06, + "loss": 0.5462610721588135, + "step": 4323 + }, + { + "epoch": 1.2643661353999123, + "grad_norm": 1.6170865165049584, + "learning_rate": 6.534407330652792e-06, + "loss": 0.5391229391098022, + "step": 4324 + }, + { + "epoch": 1.2646585758151776, + "grad_norm": 1.7047610503615187, + "learning_rate": 6.529872644881811e-06, + "loss": 0.5361309051513672, + "step": 4325 + }, + { + "epoch": 1.2649510162304431, + "grad_norm": 1.7296167923882715, + "learning_rate": 6.525338770232001e-06, + "loss": 0.5692390203475952, + "step": 4326 + }, + { + "epoch": 1.2652434566457085, + "grad_norm": 1.7314833561159049, + "learning_rate": 6.520805707763125e-06, + "loss": 0.5337555408477783, + "step": 4327 + }, + { + "epoch": 1.2655358970609738, + "grad_norm": 1.5538338127930955, + "learning_rate": 6.5162734585347605e-06, + "loss": 0.604168176651001, + "step": 4328 + }, + { + "epoch": 1.2658283374762394, + "grad_norm": 1.621069176676038, + "learning_rate": 6.5117420236062955e-06, + "loss": 0.5404821038246155, + "step": 4329 + }, + { + "epoch": 1.2661207778915047, + "grad_norm": 1.8779165644410452, + "learning_rate": 6.507211404036922e-06, + "loss": 0.6097038388252258, + "step": 4330 + }, + { + "epoch": 1.26641321830677, + "grad_norm": 1.41106750899854, + "learning_rate": 6.50268160088565e-06, + "loss": 0.44309180974960327, + "step": 4331 + }, + { + "epoch": 1.2667056587220353, + "grad_norm": 1.633689199912191, + "learning_rate": 6.498152615211286e-06, + "loss": 0.5703015923500061, + "step": 4332 + }, + { + "epoch": 1.2669980991373007, + "grad_norm": 1.9239494523704173, + "learning_rate": 6.4936244480724575e-06, + "loss": 0.5745347738265991, + "step": 4333 + }, + { + "epoch": 1.2672905395525662, + "grad_norm": 1.7558467932702122, + "learning_rate": 6.489097100527595e-06, + "loss": 0.6611922979354858, + "step": 4334 + }, + { + "epoch": 1.2675829799678315, + "grad_norm": 1.373367301388142, + "learning_rate": 6.484570573634939e-06, + "loss": 0.4560534358024597, + "step": 4335 + }, + { + "epoch": 1.2678754203830969, + "grad_norm": 1.3735982195225196, + "learning_rate": 6.480044868452535e-06, + "loss": 0.3765673041343689, + "step": 4336 + }, + { + "epoch": 1.2681678607983624, + "grad_norm": 1.631255659187599, + "learning_rate": 6.475519986038246e-06, + "loss": 0.6471004486083984, + "step": 4337 + }, + { + "epoch": 1.2684603012136277, + "grad_norm": 1.6199016829966775, + "learning_rate": 6.4709959274497284e-06, + "loss": 0.5639084577560425, + "step": 4338 + }, + { + "epoch": 1.268752741628893, + "grad_norm": 1.6880087227037737, + "learning_rate": 6.4664726937444545e-06, + "loss": 0.6367507576942444, + "step": 4339 + }, + { + "epoch": 1.2690451820441586, + "grad_norm": 2.0302420653268958, + "learning_rate": 6.4619502859797055e-06, + "loss": 0.6803586483001709, + "step": 4340 + }, + { + "epoch": 1.269337622459424, + "grad_norm": 1.7398101139995543, + "learning_rate": 6.457428705212565e-06, + "loss": 0.49068397283554077, + "step": 4341 + }, + { + "epoch": 1.2696300628746893, + "grad_norm": 1.8759736386903334, + "learning_rate": 6.4529079524999296e-06, + "loss": 0.616880476474762, + "step": 4342 + }, + { + "epoch": 1.2699225032899546, + "grad_norm": 1.3483643409763457, + "learning_rate": 6.448388028898489e-06, + "loss": 0.45614945888519287, + "step": 4343 + }, + { + "epoch": 1.27021494370522, + "grad_norm": 1.4554785032074153, + "learning_rate": 6.443868935464754e-06, + "loss": 0.49267178773880005, + "step": 4344 + }, + { + "epoch": 1.2705073841204855, + "grad_norm": 1.6269409722468795, + "learning_rate": 6.439350673255033e-06, + "loss": 0.5169225335121155, + "step": 4345 + }, + { + "epoch": 1.2707998245357508, + "grad_norm": 1.4955295461512919, + "learning_rate": 6.434833243325442e-06, + "loss": 0.4999169111251831, + "step": 4346 + }, + { + "epoch": 1.2710922649510161, + "grad_norm": 1.6243334237328435, + "learning_rate": 6.430316646731906e-06, + "loss": 0.6282567977905273, + "step": 4347 + }, + { + "epoch": 1.2713847053662817, + "grad_norm": 1.6085299245102849, + "learning_rate": 6.425800884530151e-06, + "loss": 0.5007494688034058, + "step": 4348 + }, + { + "epoch": 1.271677145781547, + "grad_norm": 1.656568917278449, + "learning_rate": 6.421285957775705e-06, + "loss": 0.5178118944168091, + "step": 4349 + }, + { + "epoch": 1.2719695861968123, + "grad_norm": 1.560370266514351, + "learning_rate": 6.4167718675239075e-06, + "loss": 0.5473636388778687, + "step": 4350 + }, + { + "epoch": 1.2722620266120779, + "grad_norm": 1.6953423126666767, + "learning_rate": 6.4122586148299004e-06, + "loss": 0.5863620042800903, + "step": 4351 + }, + { + "epoch": 1.2725544670273432, + "grad_norm": 1.8607908969719156, + "learning_rate": 6.407746200748628e-06, + "loss": 0.5301654934883118, + "step": 4352 + }, + { + "epoch": 1.2728469074426085, + "grad_norm": 1.6932378497792755, + "learning_rate": 6.403234626334842e-06, + "loss": 0.5856075286865234, + "step": 4353 + }, + { + "epoch": 1.273139347857874, + "grad_norm": 1.678003179838639, + "learning_rate": 6.39872389264309e-06, + "loss": 0.49686455726623535, + "step": 4354 + }, + { + "epoch": 1.2734317882731394, + "grad_norm": 1.4854139308295418, + "learning_rate": 6.394214000727734e-06, + "loss": 0.5032684803009033, + "step": 4355 + }, + { + "epoch": 1.2737242286884047, + "grad_norm": 1.8801294667488437, + "learning_rate": 6.389704951642931e-06, + "loss": 0.6855330467224121, + "step": 4356 + }, + { + "epoch": 1.27401666910367, + "grad_norm": 1.479367610859775, + "learning_rate": 6.385196746442644e-06, + "loss": 0.5333864688873291, + "step": 4357 + }, + { + "epoch": 1.2743091095189354, + "grad_norm": 1.5944305875728124, + "learning_rate": 6.380689386180641e-06, + "loss": 0.5597629547119141, + "step": 4358 + }, + { + "epoch": 1.274601549934201, + "grad_norm": 1.467403558865203, + "learning_rate": 6.376182871910488e-06, + "loss": 0.4576488137245178, + "step": 4359 + }, + { + "epoch": 1.2748939903494663, + "grad_norm": 1.7247772731373485, + "learning_rate": 6.371677204685555e-06, + "loss": 0.45165061950683594, + "step": 4360 + }, + { + "epoch": 1.2751864307647316, + "grad_norm": 1.5415632861050979, + "learning_rate": 6.367172385559014e-06, + "loss": 0.5451514720916748, + "step": 4361 + }, + { + "epoch": 1.2754788711799971, + "grad_norm": 1.874618224476165, + "learning_rate": 6.362668415583841e-06, + "loss": 0.6141163110733032, + "step": 4362 + }, + { + "epoch": 1.2757713115952625, + "grad_norm": 1.6869879622469415, + "learning_rate": 6.358165295812809e-06, + "loss": 0.5156669020652771, + "step": 4363 + }, + { + "epoch": 1.2760637520105278, + "grad_norm": 1.8328178355603366, + "learning_rate": 6.3536630272984974e-06, + "loss": 0.41485118865966797, + "step": 4364 + }, + { + "epoch": 1.2763561924257933, + "grad_norm": 1.546563271256682, + "learning_rate": 6.3491616110932845e-06, + "loss": 0.386514276266098, + "step": 4365 + }, + { + "epoch": 1.2766486328410587, + "grad_norm": 1.472426766767245, + "learning_rate": 6.344661048249345e-06, + "loss": 0.5620483160018921, + "step": 4366 + }, + { + "epoch": 1.276941073256324, + "grad_norm": 1.6328857080628636, + "learning_rate": 6.340161339818662e-06, + "loss": 0.4910007119178772, + "step": 4367 + }, + { + "epoch": 1.2772335136715895, + "grad_norm": 1.3312787841228058, + "learning_rate": 6.335662486853014e-06, + "loss": 0.4628123939037323, + "step": 4368 + }, + { + "epoch": 1.2775259540868549, + "grad_norm": 1.7576669653081538, + "learning_rate": 6.331164490403978e-06, + "loss": 0.5129125118255615, + "step": 4369 + }, + { + "epoch": 1.2778183945021202, + "grad_norm": 1.3282548492081792, + "learning_rate": 6.326667351522939e-06, + "loss": 0.45091521739959717, + "step": 4370 + }, + { + "epoch": 1.2781108349173855, + "grad_norm": 1.4312089210542207, + "learning_rate": 6.322171071261071e-06, + "loss": 0.4914324879646301, + "step": 4371 + }, + { + "epoch": 1.2784032753326509, + "grad_norm": 1.7409991660962885, + "learning_rate": 6.317675650669353e-06, + "loss": 0.6361461877822876, + "step": 4372 + }, + { + "epoch": 1.2786957157479164, + "grad_norm": 1.6196651007639755, + "learning_rate": 6.313181090798561e-06, + "loss": 0.4251636564731598, + "step": 4373 + }, + { + "epoch": 1.2789881561631817, + "grad_norm": 1.7204832108380748, + "learning_rate": 6.308687392699275e-06, + "loss": 0.5605714321136475, + "step": 4374 + }, + { + "epoch": 1.279280596578447, + "grad_norm": 1.5898129202606366, + "learning_rate": 6.304194557421867e-06, + "loss": 0.5366392731666565, + "step": 4375 + }, + { + "epoch": 1.2795730369937126, + "grad_norm": 1.9084263306328586, + "learning_rate": 6.299702586016512e-06, + "loss": 0.5501587986946106, + "step": 4376 + }, + { + "epoch": 1.279865477408978, + "grad_norm": 1.856477952130892, + "learning_rate": 6.295211479533177e-06, + "loss": 0.6145694851875305, + "step": 4377 + }, + { + "epoch": 1.2801579178242433, + "grad_norm": 1.9271512769721166, + "learning_rate": 6.2907212390216335e-06, + "loss": 0.5921984910964966, + "step": 4378 + }, + { + "epoch": 1.2804503582395088, + "grad_norm": 1.5061577707687395, + "learning_rate": 6.286231865531447e-06, + "loss": 0.4376833140850067, + "step": 4379 + }, + { + "epoch": 1.2807427986547741, + "grad_norm": 1.5348932565255202, + "learning_rate": 6.281743360111983e-06, + "loss": 0.5141662955284119, + "step": 4380 + }, + { + "epoch": 1.2810352390700395, + "grad_norm": 1.700541758244486, + "learning_rate": 6.2772557238124025e-06, + "loss": 0.7065848112106323, + "step": 4381 + }, + { + "epoch": 1.2813276794853048, + "grad_norm": 1.500203661604044, + "learning_rate": 6.272768957681659e-06, + "loss": 0.5662813186645508, + "step": 4382 + }, + { + "epoch": 1.2816201199005701, + "grad_norm": 1.5006210101215816, + "learning_rate": 6.268283062768512e-06, + "loss": 0.46340662240982056, + "step": 4383 + }, + { + "epoch": 1.2819125603158357, + "grad_norm": 1.5406586553103667, + "learning_rate": 6.263798040121508e-06, + "loss": 0.5258422493934631, + "step": 4384 + }, + { + "epoch": 1.282205000731101, + "grad_norm": 1.8313859097442655, + "learning_rate": 6.2593138907889965e-06, + "loss": 0.5586943030357361, + "step": 4385 + }, + { + "epoch": 1.2824974411463663, + "grad_norm": 1.707661958872181, + "learning_rate": 6.254830615819116e-06, + "loss": 0.5224723815917969, + "step": 4386 + }, + { + "epoch": 1.2827898815616319, + "grad_norm": 1.8755820352841006, + "learning_rate": 6.250348216259812e-06, + "loss": 0.6092125177383423, + "step": 4387 + }, + { + "epoch": 1.2830823219768972, + "grad_norm": 1.6601692047393128, + "learning_rate": 6.245866693158813e-06, + "loss": 0.5582839250564575, + "step": 4388 + }, + { + "epoch": 1.2833747623921625, + "grad_norm": 1.529218817283274, + "learning_rate": 6.241386047563649e-06, + "loss": 0.6074620485305786, + "step": 4389 + }, + { + "epoch": 1.283667202807428, + "grad_norm": 1.3747332990929297, + "learning_rate": 6.236906280521646e-06, + "loss": 0.6247550845146179, + "step": 4390 + }, + { + "epoch": 1.2839596432226934, + "grad_norm": 1.6645308511195784, + "learning_rate": 6.232427393079919e-06, + "loss": 0.5325940847396851, + "step": 4391 + }, + { + "epoch": 1.2842520836379587, + "grad_norm": 1.5279900789464966, + "learning_rate": 6.227949386285379e-06, + "loss": 0.5082288980484009, + "step": 4392 + }, + { + "epoch": 1.2845445240532243, + "grad_norm": 1.587332587045442, + "learning_rate": 6.223472261184738e-06, + "loss": 0.5704036355018616, + "step": 4393 + }, + { + "epoch": 1.2848369644684896, + "grad_norm": 1.7646477307813349, + "learning_rate": 6.218996018824492e-06, + "loss": 0.5301543474197388, + "step": 4394 + }, + { + "epoch": 1.285129404883755, + "grad_norm": 1.6829663682000435, + "learning_rate": 6.21452066025094e-06, + "loss": 0.48660725355148315, + "step": 4395 + }, + { + "epoch": 1.2854218452990203, + "grad_norm": 1.7324467857194032, + "learning_rate": 6.210046186510168e-06, + "loss": 0.5744560956954956, + "step": 4396 + }, + { + "epoch": 1.2857142857142856, + "grad_norm": 1.6645302463411007, + "learning_rate": 6.205572598648055e-06, + "loss": 0.5714898109436035, + "step": 4397 + }, + { + "epoch": 1.2860067261295511, + "grad_norm": 1.8166911532739076, + "learning_rate": 6.201099897710277e-06, + "loss": 0.6616571545600891, + "step": 4398 + }, + { + "epoch": 1.2862991665448165, + "grad_norm": 1.75450880953695, + "learning_rate": 6.1966280847423e-06, + "loss": 0.5552959442138672, + "step": 4399 + }, + { + "epoch": 1.2865916069600818, + "grad_norm": 1.6738534376194054, + "learning_rate": 6.192157160789382e-06, + "loss": 0.5544919967651367, + "step": 4400 + }, + { + "epoch": 1.2868840473753473, + "grad_norm": 1.6448049553355306, + "learning_rate": 6.18768712689658e-06, + "loss": 0.5914726853370667, + "step": 4401 + }, + { + "epoch": 1.2871764877906127, + "grad_norm": 1.76025336575331, + "learning_rate": 6.183217984108729e-06, + "loss": 0.47191259264945984, + "step": 4402 + }, + { + "epoch": 1.287468928205878, + "grad_norm": 1.690038062727397, + "learning_rate": 6.178749733470468e-06, + "loss": 0.6479181051254272, + "step": 4403 + }, + { + "epoch": 1.2877613686211435, + "grad_norm": 1.5093061541159978, + "learning_rate": 6.174282376026225e-06, + "loss": 0.42491137981414795, + "step": 4404 + }, + { + "epoch": 1.2880538090364089, + "grad_norm": 1.5952968160469727, + "learning_rate": 6.169815912820214e-06, + "loss": 0.6037728786468506, + "step": 4405 + }, + { + "epoch": 1.2883462494516742, + "grad_norm": 1.6035701682484467, + "learning_rate": 6.165350344896446e-06, + "loss": 0.4979787766933441, + "step": 4406 + }, + { + "epoch": 1.2886386898669397, + "grad_norm": 1.800062229580063, + "learning_rate": 6.160885673298722e-06, + "loss": 0.5863564014434814, + "step": 4407 + }, + { + "epoch": 1.288931130282205, + "grad_norm": 1.735193401842224, + "learning_rate": 6.156421899070628e-06, + "loss": 0.6516878008842468, + "step": 4408 + }, + { + "epoch": 1.2892235706974704, + "grad_norm": 1.3644068122534347, + "learning_rate": 6.151959023255545e-06, + "loss": 0.45655903220176697, + "step": 4409 + }, + { + "epoch": 1.2895160111127357, + "grad_norm": 1.5401566996811273, + "learning_rate": 6.147497046896644e-06, + "loss": 0.4751289486885071, + "step": 4410 + }, + { + "epoch": 1.289808451528001, + "grad_norm": 1.6902527178920421, + "learning_rate": 6.1430359710368845e-06, + "loss": 0.48472684621810913, + "step": 4411 + }, + { + "epoch": 1.2901008919432666, + "grad_norm": 1.5473669029252384, + "learning_rate": 6.138575796719017e-06, + "loss": 0.5014214515686035, + "step": 4412 + }, + { + "epoch": 1.290393332358532, + "grad_norm": 1.7827106404845192, + "learning_rate": 6.134116524985581e-06, + "loss": 0.5979991555213928, + "step": 4413 + }, + { + "epoch": 1.2906857727737973, + "grad_norm": 1.325839826079579, + "learning_rate": 6.129658156878899e-06, + "loss": 0.4651130437850952, + "step": 4414 + }, + { + "epoch": 1.2909782131890628, + "grad_norm": 1.7806648175874917, + "learning_rate": 6.125200693441092e-06, + "loss": 0.5938215255737305, + "step": 4415 + }, + { + "epoch": 1.2912706536043281, + "grad_norm": 1.5490961027602033, + "learning_rate": 6.1207441357140626e-06, + "loss": 0.4893927574157715, + "step": 4416 + }, + { + "epoch": 1.2915630940195935, + "grad_norm": 1.7524993955466766, + "learning_rate": 6.116288484739507e-06, + "loss": 0.5546435713768005, + "step": 4417 + }, + { + "epoch": 1.291855534434859, + "grad_norm": 1.8413981048239587, + "learning_rate": 6.111833741558905e-06, + "loss": 0.545367419719696, + "step": 4418 + }, + { + "epoch": 1.2921479748501243, + "grad_norm": 1.4120684443774227, + "learning_rate": 6.1073799072135245e-06, + "loss": 0.47479283809661865, + "step": 4419 + }, + { + "epoch": 1.2924404152653897, + "grad_norm": 1.6721044710471762, + "learning_rate": 6.102926982744423e-06, + "loss": 0.5109270215034485, + "step": 4420 + }, + { + "epoch": 1.292732855680655, + "grad_norm": 1.774842272860347, + "learning_rate": 6.098474969192445e-06, + "loss": 0.5862404108047485, + "step": 4421 + }, + { + "epoch": 1.2930252960959203, + "grad_norm": 1.5821200459355214, + "learning_rate": 6.09402386759822e-06, + "loss": 0.5031660795211792, + "step": 4422 + }, + { + "epoch": 1.2933177365111859, + "grad_norm": 1.7397846198854208, + "learning_rate": 6.089573679002168e-06, + "loss": 0.47179776430130005, + "step": 4423 + }, + { + "epoch": 1.2936101769264512, + "grad_norm": 1.5340233803824985, + "learning_rate": 6.085124404444495e-06, + "loss": 0.45889902114868164, + "step": 4424 + }, + { + "epoch": 1.2939026173417165, + "grad_norm": 1.5550814946749143, + "learning_rate": 6.080676044965188e-06, + "loss": 0.49759042263031006, + "step": 4425 + }, + { + "epoch": 1.294195057756982, + "grad_norm": 1.9841525065569887, + "learning_rate": 6.076228601604024e-06, + "loss": 0.5980732440948486, + "step": 4426 + }, + { + "epoch": 1.2944874981722474, + "grad_norm": 1.6256180215634828, + "learning_rate": 6.07178207540057e-06, + "loss": 0.6167548894882202, + "step": 4427 + }, + { + "epoch": 1.2947799385875127, + "grad_norm": 1.7343822678821683, + "learning_rate": 6.067336467394169e-06, + "loss": 0.5632568597793579, + "step": 4428 + }, + { + "epoch": 1.2950723790027783, + "grad_norm": 1.713926568632917, + "learning_rate": 6.062891778623961e-06, + "loss": 0.5521456003189087, + "step": 4429 + }, + { + "epoch": 1.2953648194180436, + "grad_norm": 1.4514202434870498, + "learning_rate": 6.058448010128861e-06, + "loss": 0.5916576385498047, + "step": 4430 + }, + { + "epoch": 1.295657259833309, + "grad_norm": 1.4200773171635346, + "learning_rate": 6.054005162947571e-06, + "loss": 0.546825647354126, + "step": 4431 + }, + { + "epoch": 1.2959497002485745, + "grad_norm": 1.903586469303659, + "learning_rate": 6.049563238118584e-06, + "loss": 0.5704302787780762, + "step": 4432 + }, + { + "epoch": 1.2962421406638398, + "grad_norm": 1.6923235048512564, + "learning_rate": 6.0451222366801706e-06, + "loss": 0.5791710615158081, + "step": 4433 + }, + { + "epoch": 1.2965345810791051, + "grad_norm": 1.5242567102891653, + "learning_rate": 6.040682159670389e-06, + "loss": 0.41179752349853516, + "step": 4434 + }, + { + "epoch": 1.2968270214943705, + "grad_norm": 1.7120079687188825, + "learning_rate": 6.03624300812708e-06, + "loss": 0.5213680267333984, + "step": 4435 + }, + { + "epoch": 1.2971194619096358, + "grad_norm": 1.6198208396506975, + "learning_rate": 6.0318047830878675e-06, + "loss": 0.4917318522930145, + "step": 4436 + }, + { + "epoch": 1.2974119023249013, + "grad_norm": 1.9301576881874427, + "learning_rate": 6.027367485590159e-06, + "loss": 0.6347956657409668, + "step": 4437 + }, + { + "epoch": 1.2977043427401667, + "grad_norm": 1.454096730257314, + "learning_rate": 6.022931116671147e-06, + "loss": 0.5263427495956421, + "step": 4438 + }, + { + "epoch": 1.297996783155432, + "grad_norm": 1.3982615348649814, + "learning_rate": 6.018495677367806e-06, + "loss": 0.5686784982681274, + "step": 4439 + }, + { + "epoch": 1.2982892235706975, + "grad_norm": 1.6986790860575087, + "learning_rate": 6.0140611687168934e-06, + "loss": 0.576974630355835, + "step": 4440 + }, + { + "epoch": 1.2985816639859629, + "grad_norm": 1.7183954732732796, + "learning_rate": 6.009627591754946e-06, + "loss": 0.5375877618789673, + "step": 4441 + }, + { + "epoch": 1.2988741044012282, + "grad_norm": 1.7026702794952187, + "learning_rate": 6.005194947518287e-06, + "loss": 0.6106576919555664, + "step": 4442 + }, + { + "epoch": 1.2991665448164937, + "grad_norm": 1.6076086367802058, + "learning_rate": 6.000763237043021e-06, + "loss": 0.475483238697052, + "step": 4443 + }, + { + "epoch": 1.299458985231759, + "grad_norm": 1.7568326021636087, + "learning_rate": 5.9963324613650335e-06, + "loss": 0.5819226503372192, + "step": 4444 + }, + { + "epoch": 1.2997514256470244, + "grad_norm": 1.6384408260054233, + "learning_rate": 5.991902621519988e-06, + "loss": 0.6394410133361816, + "step": 4445 + }, + { + "epoch": 1.30004386606229, + "grad_norm": 1.653615111391099, + "learning_rate": 5.987473718543338e-06, + "loss": 0.48502016067504883, + "step": 4446 + }, + { + "epoch": 1.3003363064775553, + "grad_norm": 1.5217151928427126, + "learning_rate": 5.983045753470308e-06, + "loss": 0.5782333612442017, + "step": 4447 + }, + { + "epoch": 1.3006287468928206, + "grad_norm": 1.8358895387455052, + "learning_rate": 5.97861872733591e-06, + "loss": 0.5498893857002258, + "step": 4448 + }, + { + "epoch": 1.300921187308086, + "grad_norm": 1.5773905938706185, + "learning_rate": 5.974192641174934e-06, + "loss": 0.47757571935653687, + "step": 4449 + }, + { + "epoch": 1.3012136277233513, + "grad_norm": 1.751650457738534, + "learning_rate": 5.96976749602195e-06, + "loss": 0.5401994585990906, + "step": 4450 + }, + { + "epoch": 1.3015060681386168, + "grad_norm": 1.7445816604225337, + "learning_rate": 5.965343292911309e-06, + "loss": 0.5818814635276794, + "step": 4451 + }, + { + "epoch": 1.3017985085538821, + "grad_norm": 1.890298335476633, + "learning_rate": 5.9609200328771465e-06, + "loss": 0.524645984172821, + "step": 4452 + }, + { + "epoch": 1.3020909489691475, + "grad_norm": 1.6124004265504417, + "learning_rate": 5.956497716953365e-06, + "loss": 0.46523183584213257, + "step": 4453 + }, + { + "epoch": 1.302383389384413, + "grad_norm": 1.6328139064911342, + "learning_rate": 5.952076346173657e-06, + "loss": 0.6066159009933472, + "step": 4454 + }, + { + "epoch": 1.3026758297996783, + "grad_norm": 1.5743831575113747, + "learning_rate": 5.947655921571491e-06, + "loss": 0.48635774850845337, + "step": 4455 + }, + { + "epoch": 1.3029682702149437, + "grad_norm": 1.7296441740948125, + "learning_rate": 5.943236444180116e-06, + "loss": 0.5159435868263245, + "step": 4456 + }, + { + "epoch": 1.3032607106302092, + "grad_norm": 1.43545214825073, + "learning_rate": 5.938817915032558e-06, + "loss": 0.5566878914833069, + "step": 4457 + }, + { + "epoch": 1.3035531510454745, + "grad_norm": 1.5736652583628634, + "learning_rate": 5.934400335161618e-06, + "loss": 0.46998029947280884, + "step": 4458 + }, + { + "epoch": 1.3038455914607399, + "grad_norm": 1.7808256717613173, + "learning_rate": 5.92998370559988e-06, + "loss": 0.5554553270339966, + "step": 4459 + }, + { + "epoch": 1.3041380318760052, + "grad_norm": 1.7335497855414168, + "learning_rate": 5.925568027379704e-06, + "loss": 0.5659651756286621, + "step": 4460 + }, + { + "epoch": 1.3044304722912705, + "grad_norm": 1.4784849199972236, + "learning_rate": 5.921153301533229e-06, + "loss": 0.5105445981025696, + "step": 4461 + }, + { + "epoch": 1.304722912706536, + "grad_norm": 1.6833489269681376, + "learning_rate": 5.91673952909237e-06, + "loss": 0.5255740284919739, + "step": 4462 + }, + { + "epoch": 1.3050153531218014, + "grad_norm": 1.6388447853221406, + "learning_rate": 5.912326711088821e-06, + "loss": 0.5691270232200623, + "step": 4463 + }, + { + "epoch": 1.3053077935370667, + "grad_norm": 1.64945916767282, + "learning_rate": 5.907914848554048e-06, + "loss": 0.5783474445343018, + "step": 4464 + }, + { + "epoch": 1.3056002339523323, + "grad_norm": 1.631334603802349, + "learning_rate": 5.903503942519299e-06, + "loss": 0.6305002570152283, + "step": 4465 + }, + { + "epoch": 1.3058926743675976, + "grad_norm": 1.9357776829199835, + "learning_rate": 5.8990939940156e-06, + "loss": 0.6465631723403931, + "step": 4466 + }, + { + "epoch": 1.306185114782863, + "grad_norm": 1.8264406193491898, + "learning_rate": 5.8946850040737434e-06, + "loss": 0.4883456230163574, + "step": 4467 + }, + { + "epoch": 1.3064775551981285, + "grad_norm": 1.3902013367704193, + "learning_rate": 5.890276973724305e-06, + "loss": 0.4896056056022644, + "step": 4468 + }, + { + "epoch": 1.3067699956133938, + "grad_norm": 1.6292986861573446, + "learning_rate": 5.885869903997638e-06, + "loss": 0.603757917881012, + "step": 4469 + }, + { + "epoch": 1.3070624360286591, + "grad_norm": 1.6368879465310389, + "learning_rate": 5.881463795923866e-06, + "loss": 0.5412129163742065, + "step": 4470 + }, + { + "epoch": 1.3073548764439247, + "grad_norm": 1.576979548849775, + "learning_rate": 5.877058650532891e-06, + "loss": 0.5255335569381714, + "step": 4471 + }, + { + "epoch": 1.30764731685919, + "grad_norm": 1.861250264495057, + "learning_rate": 5.87265446885439e-06, + "loss": 0.5855039358139038, + "step": 4472 + }, + { + "epoch": 1.3079397572744553, + "grad_norm": 1.7387082626664492, + "learning_rate": 5.868251251917811e-06, + "loss": 0.5763603448867798, + "step": 4473 + }, + { + "epoch": 1.3082321976897207, + "grad_norm": 1.7494976398773932, + "learning_rate": 5.86384900075238e-06, + "loss": 0.5148910880088806, + "step": 4474 + }, + { + "epoch": 1.308524638104986, + "grad_norm": 1.726220320494232, + "learning_rate": 5.859447716387097e-06, + "loss": 0.6387143135070801, + "step": 4475 + }, + { + "epoch": 1.3088170785202515, + "grad_norm": 1.6421362434800872, + "learning_rate": 5.855047399850735e-06, + "loss": 0.5492211580276489, + "step": 4476 + }, + { + "epoch": 1.3091095189355169, + "grad_norm": 1.748321310864673, + "learning_rate": 5.850648052171843e-06, + "loss": 0.5715115070343018, + "step": 4477 + }, + { + "epoch": 1.3094019593507822, + "grad_norm": 1.8948603499593957, + "learning_rate": 5.8462496743787385e-06, + "loss": 0.6295989155769348, + "step": 4478 + }, + { + "epoch": 1.3096943997660477, + "grad_norm": 1.6169983680834699, + "learning_rate": 5.841852267499518e-06, + "loss": 0.5843105316162109, + "step": 4479 + }, + { + "epoch": 1.309986840181313, + "grad_norm": 1.443044009123256, + "learning_rate": 5.837455832562049e-06, + "loss": 0.43283605575561523, + "step": 4480 + }, + { + "epoch": 1.3102792805965784, + "grad_norm": 1.6217104179487012, + "learning_rate": 5.8330603705939684e-06, + "loss": 0.6115404367446899, + "step": 4481 + }, + { + "epoch": 1.310571721011844, + "grad_norm": 1.2325386929467517, + "learning_rate": 5.828665882622692e-06, + "loss": 0.4274179935455322, + "step": 4482 + }, + { + "epoch": 1.3108641614271093, + "grad_norm": 1.3722363792161896, + "learning_rate": 5.824272369675403e-06, + "loss": 0.4385778307914734, + "step": 4483 + }, + { + "epoch": 1.3111566018423746, + "grad_norm": 1.939305382555819, + "learning_rate": 5.819879832779058e-06, + "loss": 0.6310205459594727, + "step": 4484 + }, + { + "epoch": 1.3114490422576401, + "grad_norm": 1.5511013635003787, + "learning_rate": 5.815488272960388e-06, + "loss": 0.6309192180633545, + "step": 4485 + }, + { + "epoch": 1.3117414826729055, + "grad_norm": 1.8051032087296774, + "learning_rate": 5.811097691245895e-06, + "loss": 0.4751497507095337, + "step": 4486 + }, + { + "epoch": 1.3120339230881708, + "grad_norm": 1.5897893613027336, + "learning_rate": 5.806708088661846e-06, + "loss": 0.5540175437927246, + "step": 4487 + }, + { + "epoch": 1.3123263635034361, + "grad_norm": 1.924801228279098, + "learning_rate": 5.802319466234283e-06, + "loss": 0.5533273816108704, + "step": 4488 + }, + { + "epoch": 1.3126188039187014, + "grad_norm": 1.5486991099512135, + "learning_rate": 5.797931824989023e-06, + "loss": 0.463643878698349, + "step": 4489 + }, + { + "epoch": 1.312911244333967, + "grad_norm": 1.9073169839874196, + "learning_rate": 5.79354516595165e-06, + "loss": 0.5990232229232788, + "step": 4490 + }, + { + "epoch": 1.3132036847492323, + "grad_norm": 1.7681103257151853, + "learning_rate": 5.789159490147518e-06, + "loss": 0.5569760799407959, + "step": 4491 + }, + { + "epoch": 1.3134961251644977, + "grad_norm": 1.598897244778613, + "learning_rate": 5.784774798601755e-06, + "loss": 0.5016749501228333, + "step": 4492 + }, + { + "epoch": 1.3137885655797632, + "grad_norm": 1.8830720070455038, + "learning_rate": 5.780391092339253e-06, + "loss": 0.5624934434890747, + "step": 4493 + }, + { + "epoch": 1.3140810059950285, + "grad_norm": 2.146444811832683, + "learning_rate": 5.776008372384676e-06, + "loss": 0.7445797920227051, + "step": 4494 + }, + { + "epoch": 1.3143734464102939, + "grad_norm": 1.9276650555591395, + "learning_rate": 5.771626639762461e-06, + "loss": 0.5849495530128479, + "step": 4495 + }, + { + "epoch": 1.3146658868255594, + "grad_norm": 1.6679644602081254, + "learning_rate": 5.767245895496809e-06, + "loss": 0.5672163367271423, + "step": 4496 + }, + { + "epoch": 1.3149583272408247, + "grad_norm": 1.4482015307125622, + "learning_rate": 5.762866140611698e-06, + "loss": 0.5278276801109314, + "step": 4497 + }, + { + "epoch": 1.31525076765609, + "grad_norm": 1.8273800354421317, + "learning_rate": 5.7584873761308615e-06, + "loss": 0.54908686876297, + "step": 4498 + }, + { + "epoch": 1.3155432080713554, + "grad_norm": 1.7592605115208164, + "learning_rate": 5.754109603077811e-06, + "loss": 0.5257589817047119, + "step": 4499 + }, + { + "epoch": 1.3158356484866207, + "grad_norm": 1.4910358958486878, + "learning_rate": 5.749732822475825e-06, + "loss": 0.5744988918304443, + "step": 4500 + }, + { + "epoch": 1.3161280889018863, + "grad_norm": 1.4827754689170145, + "learning_rate": 5.74535703534795e-06, + "loss": 0.5186365246772766, + "step": 4501 + }, + { + "epoch": 1.3164205293171516, + "grad_norm": 1.6539527720112557, + "learning_rate": 5.740982242716999e-06, + "loss": 0.53574538230896, + "step": 4502 + }, + { + "epoch": 1.316712969732417, + "grad_norm": 1.5347054109635063, + "learning_rate": 5.736608445605555e-06, + "loss": 0.6087717413902283, + "step": 4503 + }, + { + "epoch": 1.3170054101476825, + "grad_norm": 1.5413257189374059, + "learning_rate": 5.732235645035964e-06, + "loss": 0.5132769346237183, + "step": 4504 + }, + { + "epoch": 1.3172978505629478, + "grad_norm": 1.6361856291197476, + "learning_rate": 5.727863842030342e-06, + "loss": 0.588458776473999, + "step": 4505 + }, + { + "epoch": 1.3175902909782131, + "grad_norm": 1.6129388653597692, + "learning_rate": 5.723493037610572e-06, + "loss": 0.5154894590377808, + "step": 4506 + }, + { + "epoch": 1.3178827313934787, + "grad_norm": 1.5507002889867831, + "learning_rate": 5.719123232798304e-06, + "loss": 0.586688220500946, + "step": 4507 + }, + { + "epoch": 1.318175171808744, + "grad_norm": 1.8125403251714918, + "learning_rate": 5.714754428614956e-06, + "loss": 0.4948856830596924, + "step": 4508 + }, + { + "epoch": 1.3184676122240093, + "grad_norm": 1.5128350944665496, + "learning_rate": 5.7103866260817005e-06, + "loss": 0.6179821491241455, + "step": 4509 + }, + { + "epoch": 1.3187600526392749, + "grad_norm": 1.876290206668384, + "learning_rate": 5.7060198262194914e-06, + "loss": 0.5865011811256409, + "step": 4510 + }, + { + "epoch": 1.3190524930545402, + "grad_norm": 1.660419141577327, + "learning_rate": 5.701654030049038e-06, + "loss": 0.519783079624176, + "step": 4511 + }, + { + "epoch": 1.3193449334698055, + "grad_norm": 1.5035780556155738, + "learning_rate": 5.697289238590822e-06, + "loss": 0.4238147437572479, + "step": 4512 + }, + { + "epoch": 1.3196373738850709, + "grad_norm": 1.6350345014151721, + "learning_rate": 5.6929254528650855e-06, + "loss": 0.5931107997894287, + "step": 4513 + }, + { + "epoch": 1.3199298143003362, + "grad_norm": 1.7485415603348589, + "learning_rate": 5.688562673891837e-06, + "loss": 0.7454524040222168, + "step": 4514 + }, + { + "epoch": 1.3202222547156017, + "grad_norm": 1.6756127294636487, + "learning_rate": 5.684200902690848e-06, + "loss": 0.5909554362297058, + "step": 4515 + }, + { + "epoch": 1.320514695130867, + "grad_norm": 1.449068353866628, + "learning_rate": 5.67984014028166e-06, + "loss": 0.5059943199157715, + "step": 4516 + }, + { + "epoch": 1.3208071355461324, + "grad_norm": 1.3855018310443914, + "learning_rate": 5.675480387683572e-06, + "loss": 0.4387373924255371, + "step": 4517 + }, + { + "epoch": 1.321099575961398, + "grad_norm": 1.6368288915875209, + "learning_rate": 5.671121645915648e-06, + "loss": 0.6452310681343079, + "step": 4518 + }, + { + "epoch": 1.3213920163766633, + "grad_norm": 1.4569471180570228, + "learning_rate": 5.666763915996725e-06, + "loss": 0.5629088282585144, + "step": 4519 + }, + { + "epoch": 1.3216844567919286, + "grad_norm": 1.6108062624448902, + "learning_rate": 5.662407198945386e-06, + "loss": 0.6442849636077881, + "step": 4520 + }, + { + "epoch": 1.3219768972071941, + "grad_norm": 1.4707356833436183, + "learning_rate": 5.6580514957799894e-06, + "loss": 0.5330031514167786, + "step": 4521 + }, + { + "epoch": 1.3222693376224595, + "grad_norm": 1.4396348923376052, + "learning_rate": 5.6536968075186575e-06, + "loss": 0.471035361289978, + "step": 4522 + }, + { + "epoch": 1.3225617780377248, + "grad_norm": 1.5589169874424196, + "learning_rate": 5.649343135179271e-06, + "loss": 0.5675650835037231, + "step": 4523 + }, + { + "epoch": 1.3228542184529903, + "grad_norm": 1.6961906881686575, + "learning_rate": 5.644990479779473e-06, + "loss": 0.5458093881607056, + "step": 4524 + }, + { + "epoch": 1.3231466588682557, + "grad_norm": 1.5690712646364733, + "learning_rate": 5.640638842336672e-06, + "loss": 0.5625189542770386, + "step": 4525 + }, + { + "epoch": 1.323439099283521, + "grad_norm": 1.778677748743509, + "learning_rate": 5.636288223868038e-06, + "loss": 0.5868214964866638, + "step": 4526 + }, + { + "epoch": 1.3237315396987863, + "grad_norm": 1.6502123203157841, + "learning_rate": 5.631938625390498e-06, + "loss": 0.5340765714645386, + "step": 4527 + }, + { + "epoch": 1.3240239801140516, + "grad_norm": 1.4463169385647288, + "learning_rate": 5.627590047920747e-06, + "loss": 0.4487069845199585, + "step": 4528 + }, + { + "epoch": 1.3243164205293172, + "grad_norm": 1.5750183859940412, + "learning_rate": 5.623242492475237e-06, + "loss": 0.4246913194656372, + "step": 4529 + }, + { + "epoch": 1.3246088609445825, + "grad_norm": 1.6537085849345186, + "learning_rate": 5.618895960070188e-06, + "loss": 0.49904564023017883, + "step": 4530 + }, + { + "epoch": 1.3249013013598478, + "grad_norm": 1.6201874773916152, + "learning_rate": 5.614550451721566e-06, + "loss": 0.5506085157394409, + "step": 4531 + }, + { + "epoch": 1.3251937417751134, + "grad_norm": 1.6929750939693964, + "learning_rate": 5.610205968445111e-06, + "loss": 0.4861884117126465, + "step": 4532 + }, + { + "epoch": 1.3254861821903787, + "grad_norm": 1.5616728357477914, + "learning_rate": 5.605862511256322e-06, + "loss": 0.5639146566390991, + "step": 4533 + }, + { + "epoch": 1.325778622605644, + "grad_norm": 1.3747626231277423, + "learning_rate": 5.601520081170455e-06, + "loss": 0.43305879831314087, + "step": 4534 + }, + { + "epoch": 1.3260710630209096, + "grad_norm": 1.4728588464752952, + "learning_rate": 5.597178679202524e-06, + "loss": 0.4820408821105957, + "step": 4535 + }, + { + "epoch": 1.326363503436175, + "grad_norm": 2.092875019342334, + "learning_rate": 5.592838306367307e-06, + "loss": 0.5601707100868225, + "step": 4536 + }, + { + "epoch": 1.3266559438514403, + "grad_norm": 1.6269012393440097, + "learning_rate": 5.588498963679339e-06, + "loss": 0.5655055046081543, + "step": 4537 + }, + { + "epoch": 1.3269483842667056, + "grad_norm": 1.871556737283143, + "learning_rate": 5.584160652152917e-06, + "loss": 0.5425975322723389, + "step": 4538 + }, + { + "epoch": 1.327240824681971, + "grad_norm": 1.5388263554547548, + "learning_rate": 5.579823372802098e-06, + "loss": 0.607103168964386, + "step": 4539 + }, + { + "epoch": 1.3275332650972365, + "grad_norm": 1.6396827179367406, + "learning_rate": 5.575487126640686e-06, + "loss": 0.6011538505554199, + "step": 4540 + }, + { + "epoch": 1.3278257055125018, + "grad_norm": 1.6364470669862505, + "learning_rate": 5.571151914682258e-06, + "loss": 0.5333601236343384, + "step": 4541 + }, + { + "epoch": 1.3281181459277671, + "grad_norm": 1.7756177203838306, + "learning_rate": 5.566817737940142e-06, + "loss": 0.576410174369812, + "step": 4542 + }, + { + "epoch": 1.3284105863430327, + "grad_norm": 1.8060302167235907, + "learning_rate": 5.562484597427425e-06, + "loss": 0.506458044052124, + "step": 4543 + }, + { + "epoch": 1.328703026758298, + "grad_norm": 2.0174061298696975, + "learning_rate": 5.558152494156955e-06, + "loss": 0.5893718004226685, + "step": 4544 + }, + { + "epoch": 1.3289954671735633, + "grad_norm": 1.6979483029237916, + "learning_rate": 5.55382142914133e-06, + "loss": 0.508120059967041, + "step": 4545 + }, + { + "epoch": 1.3292879075888289, + "grad_norm": 1.5737735987577735, + "learning_rate": 5.5494914033929126e-06, + "loss": 0.6103616952896118, + "step": 4546 + }, + { + "epoch": 1.3295803480040942, + "grad_norm": 1.7304904972315491, + "learning_rate": 5.545162417923822e-06, + "loss": 0.5290235280990601, + "step": 4547 + }, + { + "epoch": 1.3298727884193595, + "grad_norm": 1.5350904839753017, + "learning_rate": 5.540834473745929e-06, + "loss": 0.5729631185531616, + "step": 4548 + }, + { + "epoch": 1.330165228834625, + "grad_norm": 1.5574358916011883, + "learning_rate": 5.536507571870866e-06, + "loss": 0.48720547556877136, + "step": 4549 + }, + { + "epoch": 1.3304576692498904, + "grad_norm": 1.5393587740053045, + "learning_rate": 5.532181713310023e-06, + "loss": 0.4987955689430237, + "step": 4550 + }, + { + "epoch": 1.3307501096651557, + "grad_norm": 1.3126988702980638, + "learning_rate": 5.527856899074536e-06, + "loss": 0.4002467393875122, + "step": 4551 + }, + { + "epoch": 1.331042550080421, + "grad_norm": 2.0947575410388866, + "learning_rate": 5.523533130175308e-06, + "loss": 0.7435724139213562, + "step": 4552 + }, + { + "epoch": 1.3313349904956864, + "grad_norm": 1.541726198150986, + "learning_rate": 5.519210407622993e-06, + "loss": 0.34711340069770813, + "step": 4553 + }, + { + "epoch": 1.331627430910952, + "grad_norm": 1.6396721749099359, + "learning_rate": 5.514888732428003e-06, + "loss": 0.4749720096588135, + "step": 4554 + }, + { + "epoch": 1.3319198713262173, + "grad_norm": 1.7586628740577253, + "learning_rate": 5.5105681056005e-06, + "loss": 0.5818741321563721, + "step": 4555 + }, + { + "epoch": 1.3322123117414826, + "grad_norm": 1.7095504305078453, + "learning_rate": 5.506248528150407e-06, + "loss": 0.5715004801750183, + "step": 4556 + }, + { + "epoch": 1.3325047521567481, + "grad_norm": 1.7722621684818736, + "learning_rate": 5.501930001087399e-06, + "loss": 0.5465661287307739, + "step": 4557 + }, + { + "epoch": 1.3327971925720135, + "grad_norm": 1.7620411170921917, + "learning_rate": 5.4976125254209035e-06, + "loss": 0.6324847936630249, + "step": 4558 + }, + { + "epoch": 1.3330896329872788, + "grad_norm": 1.4165701736936904, + "learning_rate": 5.493296102160105e-06, + "loss": 0.4616294503211975, + "step": 4559 + }, + { + "epoch": 1.3333820734025443, + "grad_norm": 1.6922106714814378, + "learning_rate": 5.488980732313942e-06, + "loss": 0.5187079310417175, + "step": 4560 + }, + { + "epoch": 1.3336745138178097, + "grad_norm": 1.8396067182286635, + "learning_rate": 5.484666416891109e-06, + "loss": 0.6120654344558716, + "step": 4561 + }, + { + "epoch": 1.333966954233075, + "grad_norm": 1.6878860661661148, + "learning_rate": 5.480353156900044e-06, + "loss": 0.6171379685401917, + "step": 4562 + }, + { + "epoch": 1.3342593946483405, + "grad_norm": 1.510636167770684, + "learning_rate": 5.4760409533489475e-06, + "loss": 0.4690072536468506, + "step": 4563 + }, + { + "epoch": 1.3345518350636059, + "grad_norm": 1.5961764389633983, + "learning_rate": 5.471729807245773e-06, + "loss": 0.511309802532196, + "step": 4564 + }, + { + "epoch": 1.3348442754788712, + "grad_norm": 1.6355911684199975, + "learning_rate": 5.467419719598223e-06, + "loss": 0.5657862424850464, + "step": 4565 + }, + { + "epoch": 1.3351367158941365, + "grad_norm": 1.7641189489668823, + "learning_rate": 5.4631106914137555e-06, + "loss": 0.4263400733470917, + "step": 4566 + }, + { + "epoch": 1.3354291563094018, + "grad_norm": 1.8179548841156754, + "learning_rate": 5.458802723699579e-06, + "loss": 0.6275177001953125, + "step": 4567 + }, + { + "epoch": 1.3357215967246674, + "grad_norm": 1.6668120373290058, + "learning_rate": 5.454495817462655e-06, + "loss": 0.3857421278953552, + "step": 4568 + }, + { + "epoch": 1.3360140371399327, + "grad_norm": 1.7165178528012586, + "learning_rate": 5.450189973709697e-06, + "loss": 0.5834560394287109, + "step": 4569 + }, + { + "epoch": 1.336306477555198, + "grad_norm": 1.6632572235317495, + "learning_rate": 5.445885193447169e-06, + "loss": 0.6165010929107666, + "step": 4570 + }, + { + "epoch": 1.3365989179704636, + "grad_norm": 1.7470412065212853, + "learning_rate": 5.441581477681288e-06, + "loss": 0.6034595966339111, + "step": 4571 + }, + { + "epoch": 1.336891358385729, + "grad_norm": 1.740024112758077, + "learning_rate": 5.43727882741802e-06, + "loss": 0.570164144039154, + "step": 4572 + }, + { + "epoch": 1.3371837988009942, + "grad_norm": 1.4917354928366209, + "learning_rate": 5.432977243663089e-06, + "loss": 0.5369169116020203, + "step": 4573 + }, + { + "epoch": 1.3374762392162598, + "grad_norm": 1.7875464183853407, + "learning_rate": 5.428676727421954e-06, + "loss": 0.5624364614486694, + "step": 4574 + }, + { + "epoch": 1.3377686796315251, + "grad_norm": 1.517348885410251, + "learning_rate": 5.424377279699842e-06, + "loss": 0.5002127885818481, + "step": 4575 + }, + { + "epoch": 1.3380611200467905, + "grad_norm": 1.7071888960959534, + "learning_rate": 5.42007890150172e-06, + "loss": 0.5998499393463135, + "step": 4576 + }, + { + "epoch": 1.3383535604620558, + "grad_norm": 1.7074905497433162, + "learning_rate": 5.415781593832307e-06, + "loss": 0.5988572835922241, + "step": 4577 + }, + { + "epoch": 1.338646000877321, + "grad_norm": 1.6551550553396004, + "learning_rate": 5.411485357696075e-06, + "loss": 0.5202064514160156, + "step": 4578 + }, + { + "epoch": 1.3389384412925867, + "grad_norm": 2.519364812628366, + "learning_rate": 5.407190194097241e-06, + "loss": 0.5246714949607849, + "step": 4579 + }, + { + "epoch": 1.339230881707852, + "grad_norm": 1.5907571805696734, + "learning_rate": 5.4028961040397765e-06, + "loss": 0.5998588800430298, + "step": 4580 + }, + { + "epoch": 1.3395233221231173, + "grad_norm": 1.7851321190756844, + "learning_rate": 5.3986030885273945e-06, + "loss": 0.5971418023109436, + "step": 4581 + }, + { + "epoch": 1.3398157625383829, + "grad_norm": 1.5857061971181772, + "learning_rate": 5.3943111485635644e-06, + "loss": 0.4638952910900116, + "step": 4582 + }, + { + "epoch": 1.3401082029536482, + "grad_norm": 1.5981773831835344, + "learning_rate": 5.390020285151502e-06, + "loss": 0.5007182955741882, + "step": 4583 + }, + { + "epoch": 1.3404006433689135, + "grad_norm": 1.610643010141743, + "learning_rate": 5.385730499294171e-06, + "loss": 0.5013964772224426, + "step": 4584 + }, + { + "epoch": 1.340693083784179, + "grad_norm": 1.6360724667305655, + "learning_rate": 5.381441791994276e-06, + "loss": 0.5699980854988098, + "step": 4585 + }, + { + "epoch": 1.3409855241994444, + "grad_norm": 1.6423818252193456, + "learning_rate": 5.377154164254283e-06, + "loss": 0.5326210260391235, + "step": 4586 + }, + { + "epoch": 1.3412779646147097, + "grad_norm": 1.5111806674915849, + "learning_rate": 5.372867617076395e-06, + "loss": 0.6065158843994141, + "step": 4587 + }, + { + "epoch": 1.3415704050299753, + "grad_norm": 1.356022290658006, + "learning_rate": 5.368582151462569e-06, + "loss": 0.48427143692970276, + "step": 4588 + }, + { + "epoch": 1.3418628454452406, + "grad_norm": 1.4868111001385538, + "learning_rate": 5.364297768414505e-06, + "loss": 0.5755994915962219, + "step": 4589 + }, + { + "epoch": 1.342155285860506, + "grad_norm": 1.4690268021295017, + "learning_rate": 5.360014468933652e-06, + "loss": 0.4959644377231598, + "step": 4590 + }, + { + "epoch": 1.3424477262757712, + "grad_norm": 1.5383458553689457, + "learning_rate": 5.355732254021205e-06, + "loss": 0.5374274253845215, + "step": 4591 + }, + { + "epoch": 1.3427401666910366, + "grad_norm": 1.6286753609495908, + "learning_rate": 5.351451124678106e-06, + "loss": 0.5875111818313599, + "step": 4592 + }, + { + "epoch": 1.3430326071063021, + "grad_norm": 1.7964496178319949, + "learning_rate": 5.347171081905045e-06, + "loss": 0.5230692028999329, + "step": 4593 + }, + { + "epoch": 1.3433250475215675, + "grad_norm": 1.424672908012482, + "learning_rate": 5.342892126702453e-06, + "loss": 0.4624518156051636, + "step": 4594 + }, + { + "epoch": 1.3436174879368328, + "grad_norm": 1.9140370650793175, + "learning_rate": 5.3386142600705134e-06, + "loss": 0.5141074061393738, + "step": 4595 + }, + { + "epoch": 1.3439099283520983, + "grad_norm": 1.6249918744835086, + "learning_rate": 5.334337483009147e-06, + "loss": 0.4655565023422241, + "step": 4596 + }, + { + "epoch": 1.3442023687673637, + "grad_norm": 1.6516547156710706, + "learning_rate": 5.330061796518025e-06, + "loss": 0.6135094165802002, + "step": 4597 + }, + { + "epoch": 1.344494809182629, + "grad_norm": 1.595543646054287, + "learning_rate": 5.325787201596563e-06, + "loss": 0.5865254402160645, + "step": 4598 + }, + { + "epoch": 1.3447872495978945, + "grad_norm": 1.8032344885262006, + "learning_rate": 5.321513699243924e-06, + "loss": 0.5290840268135071, + "step": 4599 + }, + { + "epoch": 1.3450796900131599, + "grad_norm": 1.5294052976370318, + "learning_rate": 5.317241290459012e-06, + "loss": 0.554675817489624, + "step": 4600 + }, + { + "epoch": 1.3453721304284252, + "grad_norm": 1.499219614332531, + "learning_rate": 5.312969976240479e-06, + "loss": 0.5033853650093079, + "step": 4601 + }, + { + "epoch": 1.3456645708436907, + "grad_norm": 1.8108264508032192, + "learning_rate": 5.308699757586713e-06, + "loss": 0.44666093587875366, + "step": 4602 + }, + { + "epoch": 1.345957011258956, + "grad_norm": 1.5332559280539126, + "learning_rate": 5.304430635495856e-06, + "loss": 0.5447900891304016, + "step": 4603 + }, + { + "epoch": 1.3462494516742214, + "grad_norm": 1.507503116151542, + "learning_rate": 5.30016261096579e-06, + "loss": 0.4425917863845825, + "step": 4604 + }, + { + "epoch": 1.3465418920894867, + "grad_norm": 1.508411296889156, + "learning_rate": 5.295895684994137e-06, + "loss": 0.4411497712135315, + "step": 4605 + }, + { + "epoch": 1.346834332504752, + "grad_norm": 1.537668383754579, + "learning_rate": 5.291629858578271e-06, + "loss": 0.5577414631843567, + "step": 4606 + }, + { + "epoch": 1.3471267729200176, + "grad_norm": 1.7128549715372505, + "learning_rate": 5.287365132715293e-06, + "loss": 0.4754186272621155, + "step": 4607 + }, + { + "epoch": 1.347419213335283, + "grad_norm": 1.6521724702121328, + "learning_rate": 5.283101508402063e-06, + "loss": 0.5582431554794312, + "step": 4608 + }, + { + "epoch": 1.3477116537505482, + "grad_norm": 1.7476811492664892, + "learning_rate": 5.2788389866351755e-06, + "loss": 0.5552654266357422, + "step": 4609 + }, + { + "epoch": 1.3480040941658138, + "grad_norm": 1.8662632335270106, + "learning_rate": 5.2745775684109705e-06, + "loss": 0.5776556730270386, + "step": 4610 + }, + { + "epoch": 1.3482965345810791, + "grad_norm": 1.7735552141557176, + "learning_rate": 5.270317254725528e-06, + "loss": 0.5859286785125732, + "step": 4611 + }, + { + "epoch": 1.3485889749963444, + "grad_norm": 1.5182169678473143, + "learning_rate": 5.2660580465746694e-06, + "loss": 0.5914887189865112, + "step": 4612 + }, + { + "epoch": 1.34888141541161, + "grad_norm": 1.6371325039607922, + "learning_rate": 5.261799944953956e-06, + "loss": 0.43669426441192627, + "step": 4613 + }, + { + "epoch": 1.3491738558268753, + "grad_norm": 1.718792113074269, + "learning_rate": 5.2575429508587e-06, + "loss": 0.473773717880249, + "step": 4614 + }, + { + "epoch": 1.3494662962421407, + "grad_norm": 1.7451807781202082, + "learning_rate": 5.253287065283949e-06, + "loss": 0.5011228919029236, + "step": 4615 + }, + { + "epoch": 1.349758736657406, + "grad_norm": 1.6598931266775088, + "learning_rate": 5.249032289224483e-06, + "loss": 0.5839254856109619, + "step": 4616 + }, + { + "epoch": 1.3500511770726713, + "grad_norm": 1.7262514320572941, + "learning_rate": 5.244778623674831e-06, + "loss": 0.5375077128410339, + "step": 4617 + }, + { + "epoch": 1.3503436174879369, + "grad_norm": 1.4572654878782452, + "learning_rate": 5.240526069629265e-06, + "loss": 0.49445679783821106, + "step": 4618 + }, + { + "epoch": 1.3506360579032022, + "grad_norm": 1.5263979209526246, + "learning_rate": 5.236274628081792e-06, + "loss": 0.5369694828987122, + "step": 4619 + }, + { + "epoch": 1.3509284983184675, + "grad_norm": 1.8018674546255473, + "learning_rate": 5.23202430002616e-06, + "loss": 0.6017554402351379, + "step": 4620 + }, + { + "epoch": 1.351220938733733, + "grad_norm": 1.9428924144840352, + "learning_rate": 5.227775086455859e-06, + "loss": 0.5380403995513916, + "step": 4621 + }, + { + "epoch": 1.3515133791489984, + "grad_norm": 1.6665289001084298, + "learning_rate": 5.223526988364116e-06, + "loss": 0.5650593042373657, + "step": 4622 + }, + { + "epoch": 1.3518058195642637, + "grad_norm": 1.5672489406384107, + "learning_rate": 5.219280006743897e-06, + "loss": 0.5572884678840637, + "step": 4623 + }, + { + "epoch": 1.3520982599795293, + "grad_norm": 1.839257774768153, + "learning_rate": 5.21503414258791e-06, + "loss": 0.5304458141326904, + "step": 4624 + }, + { + "epoch": 1.3523907003947946, + "grad_norm": 1.8264084905380675, + "learning_rate": 5.2107893968886005e-06, + "loss": 0.6702588796615601, + "step": 4625 + }, + { + "epoch": 1.35268314081006, + "grad_norm": 1.5301776431109881, + "learning_rate": 5.206545770638152e-06, + "loss": 0.4607279300689697, + "step": 4626 + }, + { + "epoch": 1.3529755812253255, + "grad_norm": 1.4702386368708713, + "learning_rate": 5.202303264828482e-06, + "loss": 0.5759040713310242, + "step": 4627 + }, + { + "epoch": 1.3532680216405908, + "grad_norm": 1.6340224609334149, + "learning_rate": 5.198061880451253e-06, + "loss": 0.446469783782959, + "step": 4628 + }, + { + "epoch": 1.3535604620558561, + "grad_norm": 1.6416831158378962, + "learning_rate": 5.193821618497864e-06, + "loss": 0.4869040846824646, + "step": 4629 + }, + { + "epoch": 1.3538529024711214, + "grad_norm": 1.59588454548975, + "learning_rate": 5.189582479959449e-06, + "loss": 0.5153477191925049, + "step": 4630 + }, + { + "epoch": 1.3541453428863868, + "grad_norm": 1.6964185114911852, + "learning_rate": 5.185344465826883e-06, + "loss": 0.4958652853965759, + "step": 4631 + }, + { + "epoch": 1.3544377833016523, + "grad_norm": 1.544404184800908, + "learning_rate": 5.1811075770907715e-06, + "loss": 0.5314347743988037, + "step": 4632 + }, + { + "epoch": 1.3547302237169176, + "grad_norm": 1.6488125019330604, + "learning_rate": 5.176871814741466e-06, + "loss": 0.5366088151931763, + "step": 4633 + }, + { + "epoch": 1.355022664132183, + "grad_norm": 1.7011582339400138, + "learning_rate": 5.172637179769049e-06, + "loss": 0.6239185929298401, + "step": 4634 + }, + { + "epoch": 1.3553151045474485, + "grad_norm": 1.8789833552926098, + "learning_rate": 5.168403673163341e-06, + "loss": 0.5516507625579834, + "step": 4635 + }, + { + "epoch": 1.3556075449627139, + "grad_norm": 1.6420696506744512, + "learning_rate": 5.164171295913898e-06, + "loss": 0.5859683156013489, + "step": 4636 + }, + { + "epoch": 1.3558999853779792, + "grad_norm": 1.6138084463921514, + "learning_rate": 5.159940049010015e-06, + "loss": 0.5913225412368774, + "step": 4637 + }, + { + "epoch": 1.3561924257932447, + "grad_norm": 1.690951404825549, + "learning_rate": 5.155709933440714e-06, + "loss": 0.650983989238739, + "step": 4638 + }, + { + "epoch": 1.35648486620851, + "grad_norm": 1.7360324268029201, + "learning_rate": 5.151480950194762e-06, + "loss": 0.5631625652313232, + "step": 4639 + }, + { + "epoch": 1.3567773066237754, + "grad_norm": 1.9305214623229574, + "learning_rate": 5.147253100260659e-06, + "loss": 0.48153650760650635, + "step": 4640 + }, + { + "epoch": 1.357069747039041, + "grad_norm": 1.382159174171422, + "learning_rate": 5.143026384626637e-06, + "loss": 0.43598422408103943, + "step": 4641 + }, + { + "epoch": 1.3573621874543063, + "grad_norm": 1.5586949144187017, + "learning_rate": 5.138800804280668e-06, + "loss": 0.5323987007141113, + "step": 4642 + }, + { + "epoch": 1.3576546278695716, + "grad_norm": 1.739858834969472, + "learning_rate": 5.134576360210454e-06, + "loss": 0.5386587977409363, + "step": 4643 + }, + { + "epoch": 1.357947068284837, + "grad_norm": 1.7229356194902612, + "learning_rate": 5.130353053403434e-06, + "loss": 0.4913867115974426, + "step": 4644 + }, + { + "epoch": 1.3582395087001022, + "grad_norm": 2.681042611993396, + "learning_rate": 5.12613088484678e-06, + "loss": 0.6516048908233643, + "step": 4645 + }, + { + "epoch": 1.3585319491153678, + "grad_norm": 1.7863407962771196, + "learning_rate": 5.121909855527398e-06, + "loss": 0.5290599465370178, + "step": 4646 + }, + { + "epoch": 1.3588243895306331, + "grad_norm": 1.992281323100596, + "learning_rate": 5.117689966431927e-06, + "loss": 0.7909928560256958, + "step": 4647 + }, + { + "epoch": 1.3591168299458984, + "grad_norm": 1.7798386890797042, + "learning_rate": 5.113471218546746e-06, + "loss": 0.4751276969909668, + "step": 4648 + }, + { + "epoch": 1.359409270361164, + "grad_norm": 1.3934486662021524, + "learning_rate": 5.109253612857954e-06, + "loss": 0.4542301893234253, + "step": 4649 + }, + { + "epoch": 1.3597017107764293, + "grad_norm": 1.6724566490890436, + "learning_rate": 5.105037150351393e-06, + "loss": 0.5355349779129028, + "step": 4650 + }, + { + "epoch": 1.3599941511916946, + "grad_norm": 1.7131391763754547, + "learning_rate": 5.100821832012637e-06, + "loss": 0.4994719326496124, + "step": 4651 + }, + { + "epoch": 1.3602865916069602, + "grad_norm": 1.7061763475820229, + "learning_rate": 5.096607658826989e-06, + "loss": 0.6171674728393555, + "step": 4652 + }, + { + "epoch": 1.3605790320222255, + "grad_norm": 1.6851325839422124, + "learning_rate": 5.092394631779487e-06, + "loss": 0.5386878252029419, + "step": 4653 + }, + { + "epoch": 1.3608714724374908, + "grad_norm": 1.4863597978488459, + "learning_rate": 5.088182751854903e-06, + "loss": 0.4495810270309448, + "step": 4654 + }, + { + "epoch": 1.3611639128527562, + "grad_norm": 1.560829764762291, + "learning_rate": 5.083972020037735e-06, + "loss": 0.5540642142295837, + "step": 4655 + }, + { + "epoch": 1.3614563532680215, + "grad_norm": 1.7743988570673719, + "learning_rate": 5.079762437312219e-06, + "loss": 0.6020554900169373, + "step": 4656 + }, + { + "epoch": 1.361748793683287, + "grad_norm": 1.5410143370370128, + "learning_rate": 5.075554004662316e-06, + "loss": 0.47981250286102295, + "step": 4657 + }, + { + "epoch": 1.3620412340985524, + "grad_norm": 1.6809006565320033, + "learning_rate": 5.071346723071724e-06, + "loss": 0.6206443905830383, + "step": 4658 + }, + { + "epoch": 1.3623336745138177, + "grad_norm": 1.2946163710464256, + "learning_rate": 5.067140593523869e-06, + "loss": 0.46899446845054626, + "step": 4659 + }, + { + "epoch": 1.3626261149290833, + "grad_norm": 1.3692435027739418, + "learning_rate": 5.062935617001912e-06, + "loss": 0.5695985555648804, + "step": 4660 + }, + { + "epoch": 1.3629185553443486, + "grad_norm": 1.5567765237338644, + "learning_rate": 5.058731794488732e-06, + "loss": 0.5524671077728271, + "step": 4661 + }, + { + "epoch": 1.363210995759614, + "grad_norm": 1.5953543121744755, + "learning_rate": 5.054529126966953e-06, + "loss": 0.4655245244503021, + "step": 4662 + }, + { + "epoch": 1.3635034361748795, + "grad_norm": 1.6197588686677031, + "learning_rate": 5.050327615418921e-06, + "loss": 0.5617693662643433, + "step": 4663 + }, + { + "epoch": 1.3637958765901448, + "grad_norm": 1.515126796303483, + "learning_rate": 5.046127260826714e-06, + "loss": 0.52044677734375, + "step": 4664 + }, + { + "epoch": 1.3640883170054101, + "grad_norm": 1.6797173356320934, + "learning_rate": 5.041928064172139e-06, + "loss": 0.4567520022392273, + "step": 4665 + }, + { + "epoch": 1.3643807574206757, + "grad_norm": 1.5794296901996336, + "learning_rate": 5.037730026436736e-06, + "loss": 0.5942729711532593, + "step": 4666 + }, + { + "epoch": 1.364673197835941, + "grad_norm": 1.6501244665537385, + "learning_rate": 5.033533148601766e-06, + "loss": 0.3824811279773712, + "step": 4667 + }, + { + "epoch": 1.3649656382512063, + "grad_norm": 1.4770402468740385, + "learning_rate": 5.029337431648227e-06, + "loss": 0.4710771441459656, + "step": 4668 + }, + { + "epoch": 1.3652580786664716, + "grad_norm": 1.5059979846835174, + "learning_rate": 5.02514287655684e-06, + "loss": 0.6617978811264038, + "step": 4669 + }, + { + "epoch": 1.365550519081737, + "grad_norm": 1.5829629132621983, + "learning_rate": 5.020949484308058e-06, + "loss": 0.5237355828285217, + "step": 4670 + }, + { + "epoch": 1.3658429594970025, + "grad_norm": 1.4158253094169178, + "learning_rate": 5.016757255882065e-06, + "loss": 0.4544803500175476, + "step": 4671 + }, + { + "epoch": 1.3661353999122678, + "grad_norm": 1.8761810485620272, + "learning_rate": 5.012566192258763e-06, + "loss": 0.5854490399360657, + "step": 4672 + }, + { + "epoch": 1.3664278403275332, + "grad_norm": 1.902502544434852, + "learning_rate": 5.008376294417787e-06, + "loss": 0.6275635361671448, + "step": 4673 + }, + { + "epoch": 1.3667202807427987, + "grad_norm": 1.6133596882151136, + "learning_rate": 5.004187563338504e-06, + "loss": 0.5160082578659058, + "step": 4674 + }, + { + "epoch": 1.367012721158064, + "grad_norm": 1.439845673979846, + "learning_rate": 5.000000000000003e-06, + "loss": 0.5203640460968018, + "step": 4675 + }, + { + "epoch": 1.3673051615733294, + "grad_norm": 2.025079516078861, + "learning_rate": 4.9958136053811e-06, + "loss": 0.6836066246032715, + "step": 4676 + }, + { + "epoch": 1.367597601988595, + "grad_norm": 1.5727820508513324, + "learning_rate": 4.991628380460343e-06, + "loss": 0.5566641092300415, + "step": 4677 + }, + { + "epoch": 1.3678900424038603, + "grad_norm": 1.643119627925769, + "learning_rate": 4.9874443262159984e-06, + "loss": 0.5618000030517578, + "step": 4678 + }, + { + "epoch": 1.3681824828191256, + "grad_norm": 1.4054605482949574, + "learning_rate": 4.983261443626068e-06, + "loss": 0.4605063796043396, + "step": 4679 + }, + { + "epoch": 1.3684749232343911, + "grad_norm": 1.7557732951775291, + "learning_rate": 4.97907973366827e-06, + "loss": 0.48282021284103394, + "step": 4680 + }, + { + "epoch": 1.3687673636496565, + "grad_norm": 1.467194830130128, + "learning_rate": 4.974899197320059e-06, + "loss": 0.42356133460998535, + "step": 4681 + }, + { + "epoch": 1.3690598040649218, + "grad_norm": 1.3266470239270218, + "learning_rate": 4.97071983555861e-06, + "loss": 0.459377646446228, + "step": 4682 + }, + { + "epoch": 1.369352244480187, + "grad_norm": 1.9278413810039654, + "learning_rate": 4.966541649360819e-06, + "loss": 0.5539775490760803, + "step": 4683 + }, + { + "epoch": 1.3696446848954524, + "grad_norm": 1.7014699336581571, + "learning_rate": 4.962364639703311e-06, + "loss": 0.5593239068984985, + "step": 4684 + }, + { + "epoch": 1.369937125310718, + "grad_norm": 1.8333805174527635, + "learning_rate": 4.958188807562441e-06, + "loss": 0.5425251722335815, + "step": 4685 + }, + { + "epoch": 1.3702295657259833, + "grad_norm": 1.564182289934299, + "learning_rate": 4.954014153914282e-06, + "loss": 0.5183289051055908, + "step": 4686 + }, + { + "epoch": 1.3705220061412486, + "grad_norm": 1.6834251116472225, + "learning_rate": 4.9498406797346345e-06, + "loss": 0.5278980731964111, + "step": 4687 + }, + { + "epoch": 1.3708144465565142, + "grad_norm": 1.6861784833580373, + "learning_rate": 4.9456683859990185e-06, + "loss": 0.4857858419418335, + "step": 4688 + }, + { + "epoch": 1.3711068869717795, + "grad_norm": 1.4955733852507764, + "learning_rate": 4.94149727368269e-06, + "loss": 0.4889591336250305, + "step": 4689 + }, + { + "epoch": 1.3713993273870448, + "grad_norm": 2.1119376280699105, + "learning_rate": 4.937327343760617e-06, + "loss": 0.5475220680236816, + "step": 4690 + }, + { + "epoch": 1.3716917678023104, + "grad_norm": 1.8065068083746048, + "learning_rate": 4.933158597207501e-06, + "loss": 0.5794380903244019, + "step": 4691 + }, + { + "epoch": 1.3719842082175757, + "grad_norm": 1.5916906211687458, + "learning_rate": 4.928991034997752e-06, + "loss": 0.42212024331092834, + "step": 4692 + }, + { + "epoch": 1.372276648632841, + "grad_norm": 1.8447627986814241, + "learning_rate": 4.924824658105516e-06, + "loss": 0.6091631054878235, + "step": 4693 + }, + { + "epoch": 1.3725690890481064, + "grad_norm": 1.8839419484958528, + "learning_rate": 4.9206594675046595e-06, + "loss": 0.544279158115387, + "step": 4694 + }, + { + "epoch": 1.3728615294633717, + "grad_norm": 1.4361678658463186, + "learning_rate": 4.916495464168768e-06, + "loss": 0.46237099170684814, + "step": 4695 + }, + { + "epoch": 1.3731539698786372, + "grad_norm": 1.5990237040506552, + "learning_rate": 4.912332649071154e-06, + "loss": 0.5615352392196655, + "step": 4696 + }, + { + "epoch": 1.3734464102939026, + "grad_norm": 1.7554295249178744, + "learning_rate": 4.90817102318485e-06, + "loss": 0.5552200078964233, + "step": 4697 + }, + { + "epoch": 1.373738850709168, + "grad_norm": 1.798510214490848, + "learning_rate": 4.904010587482612e-06, + "loss": 0.5466557741165161, + "step": 4698 + }, + { + "epoch": 1.3740312911244335, + "grad_norm": 1.8536275815794498, + "learning_rate": 4.8998513429369135e-06, + "loss": 0.6131544709205627, + "step": 4699 + }, + { + "epoch": 1.3743237315396988, + "grad_norm": 1.7671899353023186, + "learning_rate": 4.895693290519954e-06, + "loss": 0.5264796018600464, + "step": 4700 + }, + { + "epoch": 1.374616171954964, + "grad_norm": 1.6582809024037055, + "learning_rate": 4.891536431203653e-06, + "loss": 0.5179097652435303, + "step": 4701 + }, + { + "epoch": 1.3749086123702297, + "grad_norm": 1.7203915102871608, + "learning_rate": 4.887380765959655e-06, + "loss": 0.46007782220840454, + "step": 4702 + }, + { + "epoch": 1.375201052785495, + "grad_norm": 1.3949646851760964, + "learning_rate": 4.8832262957593145e-06, + "loss": 0.48182815313339233, + "step": 4703 + }, + { + "epoch": 1.3754934932007603, + "grad_norm": 1.6488295590740498, + "learning_rate": 4.879073021573717e-06, + "loss": 0.5334529280662537, + "step": 4704 + }, + { + "epoch": 1.3757859336160259, + "grad_norm": 1.824410831192183, + "learning_rate": 4.874920944373665e-06, + "loss": 0.5984899997711182, + "step": 4705 + }, + { + "epoch": 1.3760783740312912, + "grad_norm": 1.633539262172952, + "learning_rate": 4.870770065129681e-06, + "loss": 0.46676474809646606, + "step": 4706 + }, + { + "epoch": 1.3763708144465565, + "grad_norm": 1.6766360321424407, + "learning_rate": 4.866620384812008e-06, + "loss": 0.4608241617679596, + "step": 4707 + }, + { + "epoch": 1.3766632548618218, + "grad_norm": 1.6783484732888503, + "learning_rate": 4.862471904390609e-06, + "loss": 0.5877207517623901, + "step": 4708 + }, + { + "epoch": 1.3769556952770872, + "grad_norm": 1.9194747868225221, + "learning_rate": 4.858324624835164e-06, + "loss": 0.5243252515792847, + "step": 4709 + }, + { + "epoch": 1.3772481356923527, + "grad_norm": 1.7326979192308607, + "learning_rate": 4.854178547115078e-06, + "loss": 0.528606653213501, + "step": 4710 + }, + { + "epoch": 1.377540576107618, + "grad_norm": 1.761919042167513, + "learning_rate": 4.850033672199469e-06, + "loss": 0.46468549966812134, + "step": 4711 + }, + { + "epoch": 1.3778330165228834, + "grad_norm": 1.5919653348557072, + "learning_rate": 4.8458900010571765e-06, + "loss": 0.5368300676345825, + "step": 4712 + }, + { + "epoch": 1.378125456938149, + "grad_norm": 1.6462148743894651, + "learning_rate": 4.8417475346567635e-06, + "loss": 0.5156906843185425, + "step": 4713 + }, + { + "epoch": 1.3784178973534142, + "grad_norm": 1.718628393460986, + "learning_rate": 4.837606273966496e-06, + "loss": 0.5899196863174438, + "step": 4714 + }, + { + "epoch": 1.3787103377686796, + "grad_norm": 1.6725614455419595, + "learning_rate": 4.833466219954376e-06, + "loss": 0.5820844769477844, + "step": 4715 + }, + { + "epoch": 1.3790027781839451, + "grad_norm": 1.5883271974734077, + "learning_rate": 4.829327373588113e-06, + "loss": 0.4926246404647827, + "step": 4716 + }, + { + "epoch": 1.3792952185992104, + "grad_norm": 1.5404696535835014, + "learning_rate": 4.825189735835138e-06, + "loss": 0.5417006611824036, + "step": 4717 + }, + { + "epoch": 1.3795876590144758, + "grad_norm": 1.5296186550545692, + "learning_rate": 4.821053307662599e-06, + "loss": 0.4130229949951172, + "step": 4718 + }, + { + "epoch": 1.3798800994297413, + "grad_norm": 1.279729123751172, + "learning_rate": 4.8169180900373615e-06, + "loss": 0.4553627371788025, + "step": 4719 + }, + { + "epoch": 1.3801725398450067, + "grad_norm": 1.3535233614920503, + "learning_rate": 4.812784083926005e-06, + "loss": 0.523567259311676, + "step": 4720 + }, + { + "epoch": 1.380464980260272, + "grad_norm": 1.585136917164004, + "learning_rate": 4.808651290294832e-06, + "loss": 0.4643239378929138, + "step": 4721 + }, + { + "epoch": 1.3807574206755373, + "grad_norm": 1.4443352165881056, + "learning_rate": 4.804519710109856e-06, + "loss": 0.4631537199020386, + "step": 4722 + }, + { + "epoch": 1.3810498610908026, + "grad_norm": 1.9168786498716517, + "learning_rate": 4.8003893443368075e-06, + "loss": 0.5304736495018005, + "step": 4723 + }, + { + "epoch": 1.3813423015060682, + "grad_norm": 1.7679231174871453, + "learning_rate": 4.79626019394114e-06, + "loss": 0.4357796907424927, + "step": 4724 + }, + { + "epoch": 1.3816347419213335, + "grad_norm": 1.9313439900637919, + "learning_rate": 4.7921322598880095e-06, + "loss": 0.6693407297134399, + "step": 4725 + }, + { + "epoch": 1.3819271823365988, + "grad_norm": 1.614277655310262, + "learning_rate": 4.788005543142299e-06, + "loss": 0.5333320498466492, + "step": 4726 + }, + { + "epoch": 1.3822196227518644, + "grad_norm": 1.900002017358812, + "learning_rate": 4.783880044668603e-06, + "loss": 0.5782167911529541, + "step": 4727 + }, + { + "epoch": 1.3825120631671297, + "grad_norm": 1.8216810622231216, + "learning_rate": 4.779755765431231e-06, + "loss": 0.581318199634552, + "step": 4728 + }, + { + "epoch": 1.382804503582395, + "grad_norm": 1.6899321824779212, + "learning_rate": 4.775632706394211e-06, + "loss": 0.5812945365905762, + "step": 4729 + }, + { + "epoch": 1.3830969439976606, + "grad_norm": 1.7981132988330288, + "learning_rate": 4.771510868521279e-06, + "loss": 0.460615873336792, + "step": 4730 + }, + { + "epoch": 1.383389384412926, + "grad_norm": 1.8316112888726737, + "learning_rate": 4.767390252775894e-06, + "loss": 0.5934186577796936, + "step": 4731 + }, + { + "epoch": 1.3836818248281912, + "grad_norm": 1.6355522234245776, + "learning_rate": 4.763270860121222e-06, + "loss": 0.4928584098815918, + "step": 4732 + }, + { + "epoch": 1.3839742652434566, + "grad_norm": 1.6231538800234695, + "learning_rate": 4.759152691520146e-06, + "loss": 0.505489706993103, + "step": 4733 + }, + { + "epoch": 1.3842667056587221, + "grad_norm": 1.5771553081820557, + "learning_rate": 4.755035747935264e-06, + "loss": 0.5679354667663574, + "step": 4734 + }, + { + "epoch": 1.3845591460739874, + "grad_norm": 1.7096467723863036, + "learning_rate": 4.750920030328889e-06, + "loss": 0.5744746923446655, + "step": 4735 + }, + { + "epoch": 1.3848515864892528, + "grad_norm": 1.6483531613381477, + "learning_rate": 4.7468055396630395e-06, + "loss": 0.4953685402870178, + "step": 4736 + }, + { + "epoch": 1.385144026904518, + "grad_norm": 1.8803927120396235, + "learning_rate": 4.742692276899454e-06, + "loss": 0.6083461046218872, + "step": 4737 + }, + { + "epoch": 1.3854364673197836, + "grad_norm": 1.5633925902592396, + "learning_rate": 4.738580242999584e-06, + "loss": 0.4980735778808594, + "step": 4738 + }, + { + "epoch": 1.385728907735049, + "grad_norm": 1.4499409145464446, + "learning_rate": 4.734469438924594e-06, + "loss": 0.46363019943237305, + "step": 4739 + }, + { + "epoch": 1.3860213481503143, + "grad_norm": 1.818813219831182, + "learning_rate": 4.730359865635355e-06, + "loss": 0.5946298837661743, + "step": 4740 + }, + { + "epoch": 1.3863137885655799, + "grad_norm": 1.6327330611392554, + "learning_rate": 4.726251524092459e-06, + "loss": 0.5630123615264893, + "step": 4741 + }, + { + "epoch": 1.3866062289808452, + "grad_norm": 1.5382056004014089, + "learning_rate": 4.7221444152562045e-06, + "loss": 0.5353481769561768, + "step": 4742 + }, + { + "epoch": 1.3868986693961105, + "grad_norm": 1.7585652476725264, + "learning_rate": 4.718038540086602e-06, + "loss": 0.5170711874961853, + "step": 4743 + }, + { + "epoch": 1.387191109811376, + "grad_norm": 1.8043747351160766, + "learning_rate": 4.713933899543377e-06, + "loss": 0.600492000579834, + "step": 4744 + }, + { + "epoch": 1.3874835502266414, + "grad_norm": 1.5446435468278237, + "learning_rate": 4.709830494585962e-06, + "loss": 0.5291938781738281, + "step": 4745 + }, + { + "epoch": 1.3877759906419067, + "grad_norm": 1.658022225410227, + "learning_rate": 4.7057283261735055e-06, + "loss": 0.5664317011833191, + "step": 4746 + }, + { + "epoch": 1.388068431057172, + "grad_norm": 1.8477945736694077, + "learning_rate": 4.701627395264866e-06, + "loss": 0.606655478477478, + "step": 4747 + }, + { + "epoch": 1.3883608714724374, + "grad_norm": 1.5930247770190467, + "learning_rate": 4.697527702818604e-06, + "loss": 0.6160893440246582, + "step": 4748 + }, + { + "epoch": 1.388653311887703, + "grad_norm": 1.510283707012234, + "learning_rate": 4.693429249793002e-06, + "loss": 0.45944249629974365, + "step": 4749 + }, + { + "epoch": 1.3889457523029682, + "grad_norm": 1.7369442621234958, + "learning_rate": 4.689332037146049e-06, + "loss": 0.5737302303314209, + "step": 4750 + }, + { + "epoch": 1.3892381927182336, + "grad_norm": 1.7885159565933124, + "learning_rate": 4.685236065835443e-06, + "loss": 0.4075150787830353, + "step": 4751 + }, + { + "epoch": 1.3895306331334991, + "grad_norm": 1.7699683741602097, + "learning_rate": 4.681141336818592e-06, + "loss": 0.5832744836807251, + "step": 4752 + }, + { + "epoch": 1.3898230735487644, + "grad_norm": 1.6617741591328279, + "learning_rate": 4.6770478510526155e-06, + "loss": 0.5444560647010803, + "step": 4753 + }, + { + "epoch": 1.3901155139640298, + "grad_norm": 1.5343212819990357, + "learning_rate": 4.672955609494339e-06, + "loss": 0.6087433695793152, + "step": 4754 + }, + { + "epoch": 1.3904079543792953, + "grad_norm": 1.3783003966189016, + "learning_rate": 4.6688646131002995e-06, + "loss": 0.3781468868255615, + "step": 4755 + }, + { + "epoch": 1.3907003947945606, + "grad_norm": 2.0008130334792953, + "learning_rate": 4.664774862826742e-06, + "loss": 0.43719804286956787, + "step": 4756 + }, + { + "epoch": 1.390992835209826, + "grad_norm": 1.7926138812382992, + "learning_rate": 4.660686359629623e-06, + "loss": 0.550011932849884, + "step": 4757 + }, + { + "epoch": 1.3912852756250915, + "grad_norm": 1.670816081047031, + "learning_rate": 4.656599104464607e-06, + "loss": 0.6060909032821655, + "step": 4758 + }, + { + "epoch": 1.3915777160403568, + "grad_norm": 1.727898538684726, + "learning_rate": 4.652513098287058e-06, + "loss": 0.5169791579246521, + "step": 4759 + }, + { + "epoch": 1.3918701564556222, + "grad_norm": 1.667801698839589, + "learning_rate": 4.6484283420520594e-06, + "loss": 0.43063026666641235, + "step": 4760 + }, + { + "epoch": 1.3921625968708875, + "grad_norm": 1.6770983664766483, + "learning_rate": 4.644344836714397e-06, + "loss": 0.5426993370056152, + "step": 4761 + }, + { + "epoch": 1.3924550372861528, + "grad_norm": 1.7220159777866155, + "learning_rate": 4.6402625832285665e-06, + "loss": 0.5260995030403137, + "step": 4762 + }, + { + "epoch": 1.3927474777014184, + "grad_norm": 1.791130103339175, + "learning_rate": 4.63618158254877e-06, + "loss": 0.5206680297851562, + "step": 4763 + }, + { + "epoch": 1.3930399181166837, + "grad_norm": 1.8800757395074672, + "learning_rate": 4.632101835628912e-06, + "loss": 0.5250430703163147, + "step": 4764 + }, + { + "epoch": 1.393332358531949, + "grad_norm": 1.5663601185417966, + "learning_rate": 4.628023343422616e-06, + "loss": 0.5409445762634277, + "step": 4765 + }, + { + "epoch": 1.3936247989472146, + "grad_norm": 1.6199099812994435, + "learning_rate": 4.6239461068832056e-06, + "loss": 0.4676284193992615, + "step": 4766 + }, + { + "epoch": 1.39391723936248, + "grad_norm": 1.6644750420264167, + "learning_rate": 4.6198701269637014e-06, + "loss": 0.6019079089164734, + "step": 4767 + }, + { + "epoch": 1.3942096797777452, + "grad_norm": 1.6721679687151758, + "learning_rate": 4.615795404616844e-06, + "loss": 0.5434615612030029, + "step": 4768 + }, + { + "epoch": 1.3945021201930108, + "grad_norm": 1.8615818009836036, + "learning_rate": 4.611721940795074e-06, + "loss": 0.5817157030105591, + "step": 4769 + }, + { + "epoch": 1.3947945606082761, + "grad_norm": 1.7318982025014367, + "learning_rate": 4.607649736450539e-06, + "loss": 0.5601100921630859, + "step": 4770 + }, + { + "epoch": 1.3950870010235414, + "grad_norm": 1.8105361405271991, + "learning_rate": 4.6035787925350915e-06, + "loss": 0.5955039262771606, + "step": 4771 + }, + { + "epoch": 1.3953794414388068, + "grad_norm": 1.735716832820506, + "learning_rate": 4.5995091100002905e-06, + "loss": 0.47491732239723206, + "step": 4772 + }, + { + "epoch": 1.3956718818540723, + "grad_norm": 1.7916635810918338, + "learning_rate": 4.595440689797402e-06, + "loss": 0.5451281070709229, + "step": 4773 + }, + { + "epoch": 1.3959643222693376, + "grad_norm": 1.5652511418689858, + "learning_rate": 4.591373532877389e-06, + "loss": 0.3973035514354706, + "step": 4774 + }, + { + "epoch": 1.396256762684603, + "grad_norm": 1.6712606601404056, + "learning_rate": 4.587307640190929e-06, + "loss": 0.604694128036499, + "step": 4775 + }, + { + "epoch": 1.3965492030998683, + "grad_norm": 1.3684363761943823, + "learning_rate": 4.583243012688397e-06, + "loss": 0.4120032489299774, + "step": 4776 + }, + { + "epoch": 1.3968416435151338, + "grad_norm": 1.5200379644064634, + "learning_rate": 4.579179651319878e-06, + "loss": 0.4864089787006378, + "step": 4777 + }, + { + "epoch": 1.3971340839303992, + "grad_norm": 1.7660999886821023, + "learning_rate": 4.57511755703516e-06, + "loss": 0.5774982571601868, + "step": 4778 + }, + { + "epoch": 1.3974265243456645, + "grad_norm": 1.7243096372475708, + "learning_rate": 4.571056730783725e-06, + "loss": 0.48220688104629517, + "step": 4779 + }, + { + "epoch": 1.39771896476093, + "grad_norm": 1.4235878512993427, + "learning_rate": 4.566997173514771e-06, + "loss": 0.4636304974555969, + "step": 4780 + }, + { + "epoch": 1.3980114051761954, + "grad_norm": 1.3469561341500977, + "learning_rate": 4.562938886177194e-06, + "loss": 0.500522792339325, + "step": 4781 + }, + { + "epoch": 1.3983038455914607, + "grad_norm": 1.8391525606302594, + "learning_rate": 4.558881869719595e-06, + "loss": 0.5322657823562622, + "step": 4782 + }, + { + "epoch": 1.3985962860067263, + "grad_norm": 1.8673725266705359, + "learning_rate": 4.554826125090276e-06, + "loss": 0.5013759136199951, + "step": 4783 + }, + { + "epoch": 1.3988887264219916, + "grad_norm": 1.5888002392216285, + "learning_rate": 4.550771653237242e-06, + "loss": 0.4261836111545563, + "step": 4784 + }, + { + "epoch": 1.399181166837257, + "grad_norm": 1.6811392186782483, + "learning_rate": 4.546718455108205e-06, + "loss": 0.6181522607803345, + "step": 4785 + }, + { + "epoch": 1.3994736072525222, + "grad_norm": 1.7420663714537028, + "learning_rate": 4.54266653165057e-06, + "loss": 0.6267478466033936, + "step": 4786 + }, + { + "epoch": 1.3997660476677876, + "grad_norm": 1.841391700351839, + "learning_rate": 4.5386158838114535e-06, + "loss": 0.5382452607154846, + "step": 4787 + }, + { + "epoch": 1.400058488083053, + "grad_norm": 1.5361116059310378, + "learning_rate": 4.534566512537668e-06, + "loss": 0.5973625183105469, + "step": 4788 + }, + { + "epoch": 1.4003509284983184, + "grad_norm": 1.7115299901221885, + "learning_rate": 4.530518418775734e-06, + "loss": 0.57401442527771, + "step": 4789 + }, + { + "epoch": 1.4006433689135838, + "grad_norm": 1.7539136213830773, + "learning_rate": 4.52647160347186e-06, + "loss": 0.5712965726852417, + "step": 4790 + }, + { + "epoch": 1.4009358093288493, + "grad_norm": 1.7324506482257287, + "learning_rate": 4.52242606757197e-06, + "loss": 0.5678268671035767, + "step": 4791 + }, + { + "epoch": 1.4012282497441146, + "grad_norm": 1.8696367540913243, + "learning_rate": 4.518381812021682e-06, + "loss": 0.4798399806022644, + "step": 4792 + }, + { + "epoch": 1.40152069015938, + "grad_norm": 1.570253187142898, + "learning_rate": 4.514338837766317e-06, + "loss": 0.48918360471725464, + "step": 4793 + }, + { + "epoch": 1.4018131305746455, + "grad_norm": 1.4711408699123494, + "learning_rate": 4.510297145750894e-06, + "loss": 0.47836846113204956, + "step": 4794 + }, + { + "epoch": 1.4021055709899108, + "grad_norm": 1.6409652265079098, + "learning_rate": 4.506256736920136e-06, + "loss": 0.4956067204475403, + "step": 4795 + }, + { + "epoch": 1.4023980114051762, + "grad_norm": 1.6571409914414528, + "learning_rate": 4.502217612218463e-06, + "loss": 0.39146924018859863, + "step": 4796 + }, + { + "epoch": 1.4026904518204417, + "grad_norm": 1.6190957574837974, + "learning_rate": 4.498179772589998e-06, + "loss": 0.46657800674438477, + "step": 4797 + }, + { + "epoch": 1.402982892235707, + "grad_norm": 1.5760103505209448, + "learning_rate": 4.4941432189785574e-06, + "loss": 0.4949738383293152, + "step": 4798 + }, + { + "epoch": 1.4032753326509724, + "grad_norm": 1.882895838026707, + "learning_rate": 4.490107952327663e-06, + "loss": 0.5256912708282471, + "step": 4799 + }, + { + "epoch": 1.4035677730662377, + "grad_norm": 1.7128737744359326, + "learning_rate": 4.486073973580539e-06, + "loss": 0.38139551877975464, + "step": 4800 + }, + { + "epoch": 1.403860213481503, + "grad_norm": 1.8140605273544137, + "learning_rate": 4.482041283680095e-06, + "loss": 0.5014597177505493, + "step": 4801 + }, + { + "epoch": 1.4041526538967686, + "grad_norm": 1.8595922924331247, + "learning_rate": 4.478009883568951e-06, + "loss": 0.5497276186943054, + "step": 4802 + }, + { + "epoch": 1.404445094312034, + "grad_norm": 2.0532585085438524, + "learning_rate": 4.473979774189422e-06, + "loss": 0.6098340749740601, + "step": 4803 + }, + { + "epoch": 1.4047375347272992, + "grad_norm": 1.7416135071315817, + "learning_rate": 4.469950956483522e-06, + "loss": 0.40206801891326904, + "step": 4804 + }, + { + "epoch": 1.4050299751425648, + "grad_norm": 1.5567497019384768, + "learning_rate": 4.465923431392962e-06, + "loss": 0.5362050533294678, + "step": 4805 + }, + { + "epoch": 1.40532241555783, + "grad_norm": 1.6896555289921489, + "learning_rate": 4.461897199859153e-06, + "loss": 0.5688962936401367, + "step": 4806 + }, + { + "epoch": 1.4056148559730954, + "grad_norm": 2.0519988466480723, + "learning_rate": 4.457872262823202e-06, + "loss": 0.5270779132843018, + "step": 4807 + }, + { + "epoch": 1.405907296388361, + "grad_norm": 1.9613398978608871, + "learning_rate": 4.453848621225913e-06, + "loss": 0.5656974911689758, + "step": 4808 + }, + { + "epoch": 1.4061997368036263, + "grad_norm": 1.517853308784437, + "learning_rate": 4.449826276007786e-06, + "loss": 0.44072896242141724, + "step": 4809 + }, + { + "epoch": 1.4064921772188916, + "grad_norm": 1.642033723460973, + "learning_rate": 4.445805228109022e-06, + "loss": 0.5851765871047974, + "step": 4810 + }, + { + "epoch": 1.406784617634157, + "grad_norm": 1.71031586004946, + "learning_rate": 4.441785478469519e-06, + "loss": 0.6174030303955078, + "step": 4811 + }, + { + "epoch": 1.4070770580494225, + "grad_norm": 1.5609662983326855, + "learning_rate": 4.437767028028863e-06, + "loss": 0.542346715927124, + "step": 4812 + }, + { + "epoch": 1.4073694984646878, + "grad_norm": 1.855237193625426, + "learning_rate": 4.433749877726345e-06, + "loss": 0.4964073598384857, + "step": 4813 + }, + { + "epoch": 1.4076619388799532, + "grad_norm": 1.798693836443108, + "learning_rate": 4.429734028500951e-06, + "loss": 0.5309566259384155, + "step": 4814 + }, + { + "epoch": 1.4079543792952185, + "grad_norm": 1.7569401782763947, + "learning_rate": 4.425719481291359e-06, + "loss": 0.5799233913421631, + "step": 4815 + }, + { + "epoch": 1.408246819710484, + "grad_norm": 1.6640340310451727, + "learning_rate": 4.4217062370359456e-06, + "loss": 0.37344229221343994, + "step": 4816 + }, + { + "epoch": 1.4085392601257494, + "grad_norm": 1.9633336456325348, + "learning_rate": 4.417694296672783e-06, + "loss": 0.5752555727958679, + "step": 4817 + }, + { + "epoch": 1.4088317005410147, + "grad_norm": 1.8625982582112681, + "learning_rate": 4.413683661139638e-06, + "loss": 0.61701500415802, + "step": 4818 + }, + { + "epoch": 1.4091241409562802, + "grad_norm": 1.6641617857653193, + "learning_rate": 4.409674331373972e-06, + "loss": 0.4163259267807007, + "step": 4819 + }, + { + "epoch": 1.4094165813715456, + "grad_norm": 1.4025408210631873, + "learning_rate": 4.40566630831294e-06, + "loss": 0.46583253145217896, + "step": 4820 + }, + { + "epoch": 1.409709021786811, + "grad_norm": 1.739036857290848, + "learning_rate": 4.401659592893396e-06, + "loss": 0.5230617523193359, + "step": 4821 + }, + { + "epoch": 1.4100014622020764, + "grad_norm": 1.7435910389535008, + "learning_rate": 4.397654186051887e-06, + "loss": 0.6351375579833984, + "step": 4822 + }, + { + "epoch": 1.4102939026173418, + "grad_norm": 1.6526547277716674, + "learning_rate": 4.3936500887246445e-06, + "loss": 0.5895766615867615, + "step": 4823 + }, + { + "epoch": 1.410586343032607, + "grad_norm": 1.7357556256264726, + "learning_rate": 4.389647301847607e-06, + "loss": 0.49772539734840393, + "step": 4824 + }, + { + "epoch": 1.4108787834478724, + "grad_norm": 1.6867136550948763, + "learning_rate": 4.385645826356402e-06, + "loss": 0.593197226524353, + "step": 4825 + }, + { + "epoch": 1.4111712238631378, + "grad_norm": 1.497358571958903, + "learning_rate": 4.381645663186348e-06, + "loss": 0.4971385598182678, + "step": 4826 + }, + { + "epoch": 1.4114636642784033, + "grad_norm": 1.772016135609381, + "learning_rate": 4.3776468132724605e-06, + "loss": 0.5452263951301575, + "step": 4827 + }, + { + "epoch": 1.4117561046936686, + "grad_norm": 1.9896815505139207, + "learning_rate": 4.373649277549446e-06, + "loss": 0.6085976362228394, + "step": 4828 + }, + { + "epoch": 1.412048545108934, + "grad_norm": 1.4346670326917912, + "learning_rate": 4.369653056951705e-06, + "loss": 0.5594700574874878, + "step": 4829 + }, + { + "epoch": 1.4123409855241995, + "grad_norm": 1.6570477364640872, + "learning_rate": 4.365658152413328e-06, + "loss": 0.5099719166755676, + "step": 4830 + }, + { + "epoch": 1.4126334259394648, + "grad_norm": 1.557110878077197, + "learning_rate": 4.3616645648681e-06, + "loss": 0.5683532953262329, + "step": 4831 + }, + { + "epoch": 1.4129258663547302, + "grad_norm": 1.9307182018155977, + "learning_rate": 4.3576722952495e-06, + "loss": 0.5311406850814819, + "step": 4832 + }, + { + "epoch": 1.4132183067699957, + "grad_norm": 1.6214149336480879, + "learning_rate": 4.353681344490693e-06, + "loss": 0.5299100875854492, + "step": 4833 + }, + { + "epoch": 1.413510747185261, + "grad_norm": 1.6883675181677418, + "learning_rate": 4.349691713524546e-06, + "loss": 0.5531362891197205, + "step": 4834 + }, + { + "epoch": 1.4138031876005264, + "grad_norm": 1.7469666557337236, + "learning_rate": 4.345703403283603e-06, + "loss": 0.5315259099006653, + "step": 4835 + }, + { + "epoch": 1.414095628015792, + "grad_norm": 2.0019997249517645, + "learning_rate": 4.341716414700112e-06, + "loss": 0.583083987236023, + "step": 4836 + }, + { + "epoch": 1.4143880684310572, + "grad_norm": 1.680867008867613, + "learning_rate": 4.337730748706005e-06, + "loss": 0.5273857116699219, + "step": 4837 + }, + { + "epoch": 1.4146805088463226, + "grad_norm": 1.6688598484210682, + "learning_rate": 4.333746406232908e-06, + "loss": 0.4903373718261719, + "step": 4838 + }, + { + "epoch": 1.414972949261588, + "grad_norm": 1.4926269811940354, + "learning_rate": 4.329763388212134e-06, + "loss": 0.5807479619979858, + "step": 4839 + }, + { + "epoch": 1.4152653896768532, + "grad_norm": 1.6552276273685866, + "learning_rate": 4.325781695574695e-06, + "loss": 0.5613743662834167, + "step": 4840 + }, + { + "epoch": 1.4155578300921188, + "grad_norm": 1.6028157865716284, + "learning_rate": 4.321801329251286e-06, + "loss": 0.5801016092300415, + "step": 4841 + }, + { + "epoch": 1.415850270507384, + "grad_norm": 1.6267997915866552, + "learning_rate": 4.3178222901722956e-06, + "loss": 0.6412584781646729, + "step": 4842 + }, + { + "epoch": 1.4161427109226494, + "grad_norm": 1.7251596479619187, + "learning_rate": 4.313844579267793e-06, + "loss": 0.5687737464904785, + "step": 4843 + }, + { + "epoch": 1.416435151337915, + "grad_norm": 1.6343964176323358, + "learning_rate": 4.309868197467548e-06, + "loss": 0.5668497085571289, + "step": 4844 + }, + { + "epoch": 1.4167275917531803, + "grad_norm": 1.811368112437045, + "learning_rate": 4.305893145701015e-06, + "loss": 0.5814717411994934, + "step": 4845 + }, + { + "epoch": 1.4170200321684456, + "grad_norm": 1.9246707148702022, + "learning_rate": 4.301919424897339e-06, + "loss": 0.5974467992782593, + "step": 4846 + }, + { + "epoch": 1.4173124725837112, + "grad_norm": 1.5643373795961777, + "learning_rate": 4.297947035985351e-06, + "loss": 0.48333030939102173, + "step": 4847 + }, + { + "epoch": 1.4176049129989765, + "grad_norm": 1.7102352976297683, + "learning_rate": 4.293975979893576e-06, + "loss": 0.5851039886474609, + "step": 4848 + }, + { + "epoch": 1.4178973534142418, + "grad_norm": 1.4778659468844006, + "learning_rate": 4.290006257550221e-06, + "loss": 0.5510480403900146, + "step": 4849 + }, + { + "epoch": 1.4181897938295072, + "grad_norm": 1.6670833236483533, + "learning_rate": 4.286037869883187e-06, + "loss": 0.6053529977798462, + "step": 4850 + }, + { + "epoch": 1.4184822342447727, + "grad_norm": 1.5745047113214952, + "learning_rate": 4.282070817820059e-06, + "loss": 0.471671462059021, + "step": 4851 + }, + { + "epoch": 1.418774674660038, + "grad_norm": 1.6834167266574704, + "learning_rate": 4.278105102288113e-06, + "loss": 0.4864043593406677, + "step": 4852 + }, + { + "epoch": 1.4190671150753034, + "grad_norm": 1.7275065448049989, + "learning_rate": 4.274140724214311e-06, + "loss": 0.6283255815505981, + "step": 4853 + }, + { + "epoch": 1.4193595554905687, + "grad_norm": 1.7634272907173199, + "learning_rate": 4.270177684525299e-06, + "loss": 0.4990651607513428, + "step": 4854 + }, + { + "epoch": 1.4196519959058342, + "grad_norm": 1.6718595783894241, + "learning_rate": 4.2662159841474145e-06, + "loss": 0.6053239703178406, + "step": 4855 + }, + { + "epoch": 1.4199444363210996, + "grad_norm": 1.541217587678611, + "learning_rate": 4.262255624006683e-06, + "loss": 0.45790988206863403, + "step": 4856 + }, + { + "epoch": 1.420236876736365, + "grad_norm": 1.5408074963828202, + "learning_rate": 4.2582966050288125e-06, + "loss": 0.49944519996643066, + "step": 4857 + }, + { + "epoch": 1.4205293171516304, + "grad_norm": 1.7145691587216874, + "learning_rate": 4.2543389281392e-06, + "loss": 0.5365482568740845, + "step": 4858 + }, + { + "epoch": 1.4208217575668958, + "grad_norm": 1.709871732141181, + "learning_rate": 4.2503825942629285e-06, + "loss": 0.7763599157333374, + "step": 4859 + }, + { + "epoch": 1.421114197982161, + "grad_norm": 1.6376653647841246, + "learning_rate": 4.246427604324768e-06, + "loss": 0.6125203371047974, + "step": 4860 + }, + { + "epoch": 1.4214066383974266, + "grad_norm": 1.8190946758346407, + "learning_rate": 4.242473959249172e-06, + "loss": 0.6634939312934875, + "step": 4861 + }, + { + "epoch": 1.421699078812692, + "grad_norm": 1.607723662080485, + "learning_rate": 4.238521659960283e-06, + "loss": 0.5117735862731934, + "step": 4862 + }, + { + "epoch": 1.4219915192279573, + "grad_norm": 1.6860730867984624, + "learning_rate": 4.234570707381925e-06, + "loss": 0.5700962543487549, + "step": 4863 + }, + { + "epoch": 1.4222839596432226, + "grad_norm": 1.5634193566609638, + "learning_rate": 4.23062110243761e-06, + "loss": 0.5443791151046753, + "step": 4864 + }, + { + "epoch": 1.422576400058488, + "grad_norm": 1.4504951290152908, + "learning_rate": 4.226672846050538e-06, + "loss": 0.5474614500999451, + "step": 4865 + }, + { + "epoch": 1.4228688404737535, + "grad_norm": 1.9578528314343135, + "learning_rate": 4.222725939143582e-06, + "loss": 0.5938940048217773, + "step": 4866 + }, + { + "epoch": 1.4231612808890188, + "grad_norm": 1.720980371359197, + "learning_rate": 4.21878038263931e-06, + "loss": 0.5010229349136353, + "step": 4867 + }, + { + "epoch": 1.4234537213042842, + "grad_norm": 1.8142108741121714, + "learning_rate": 4.214836177459975e-06, + "loss": 0.5186876058578491, + "step": 4868 + }, + { + "epoch": 1.4237461617195497, + "grad_norm": 1.6608706852165134, + "learning_rate": 4.210893324527507e-06, + "loss": 0.5998060703277588, + "step": 4869 + }, + { + "epoch": 1.424038602134815, + "grad_norm": 1.9807145100005583, + "learning_rate": 4.206951824763528e-06, + "loss": 0.5127147436141968, + "step": 4870 + }, + { + "epoch": 1.4243310425500804, + "grad_norm": 1.4194980170815183, + "learning_rate": 4.203011679089336e-06, + "loss": 0.5134439468383789, + "step": 4871 + }, + { + "epoch": 1.424623482965346, + "grad_norm": 1.728900083762804, + "learning_rate": 4.199072888425919e-06, + "loss": 0.6244111657142639, + "step": 4872 + }, + { + "epoch": 1.4249159233806112, + "grad_norm": 1.6442803911967188, + "learning_rate": 4.195135453693944e-06, + "loss": 0.4431127905845642, + "step": 4873 + }, + { + "epoch": 1.4252083637958766, + "grad_norm": 1.7030697753848931, + "learning_rate": 4.191199375813761e-06, + "loss": 0.6479794979095459, + "step": 4874 + }, + { + "epoch": 1.4255008042111421, + "grad_norm": 2.04011086867295, + "learning_rate": 4.187264655705407e-06, + "loss": 0.6386070847511292, + "step": 4875 + }, + { + "epoch": 1.4257932446264074, + "grad_norm": 1.6039579455905961, + "learning_rate": 4.183331294288603e-06, + "loss": 0.5201597213745117, + "step": 4876 + }, + { + "epoch": 1.4260856850416728, + "grad_norm": 1.7232164566002766, + "learning_rate": 4.179399292482737e-06, + "loss": 0.46355581283569336, + "step": 4877 + }, + { + "epoch": 1.426378125456938, + "grad_norm": 2.2615584884797975, + "learning_rate": 4.175468651206898e-06, + "loss": 0.5360985398292542, + "step": 4878 + }, + { + "epoch": 1.4266705658722034, + "grad_norm": 1.552480099700309, + "learning_rate": 4.171539371379847e-06, + "loss": 0.5545670390129089, + "step": 4879 + }, + { + "epoch": 1.426963006287469, + "grad_norm": 1.4276797255790008, + "learning_rate": 4.167611453920031e-06, + "loss": 0.445978581905365, + "step": 4880 + }, + { + "epoch": 1.4272554467027343, + "grad_norm": 1.7199888948749738, + "learning_rate": 4.163684899745576e-06, + "loss": 0.5242947340011597, + "step": 4881 + }, + { + "epoch": 1.4275478871179996, + "grad_norm": 1.7383193525416518, + "learning_rate": 4.15975970977429e-06, + "loss": 0.5544728636741638, + "step": 4882 + }, + { + "epoch": 1.4278403275332652, + "grad_norm": 2.073499174067984, + "learning_rate": 4.1558358849236626e-06, + "loss": 0.5400837063789368, + "step": 4883 + }, + { + "epoch": 1.4281327679485305, + "grad_norm": 1.6385411261569034, + "learning_rate": 4.151913426110864e-06, + "loss": 0.5201395153999329, + "step": 4884 + }, + { + "epoch": 1.4284252083637958, + "grad_norm": 1.7888379069815619, + "learning_rate": 4.147992334252745e-06, + "loss": 0.4414210319519043, + "step": 4885 + }, + { + "epoch": 1.4287176487790614, + "grad_norm": 1.7818076981346203, + "learning_rate": 4.144072610265838e-06, + "loss": 0.6590272188186646, + "step": 4886 + }, + { + "epoch": 1.4290100891943267, + "grad_norm": 1.4800084296243576, + "learning_rate": 4.140154255066356e-06, + "loss": 0.4734429717063904, + "step": 4887 + }, + { + "epoch": 1.429302529609592, + "grad_norm": 1.5398179955798732, + "learning_rate": 4.136237269570186e-06, + "loss": 0.45204073190689087, + "step": 4888 + }, + { + "epoch": 1.4295949700248574, + "grad_norm": 1.6199970278575915, + "learning_rate": 4.132321654692901e-06, + "loss": 0.6570174694061279, + "step": 4889 + }, + { + "epoch": 1.429887410440123, + "grad_norm": 1.7926483421459931, + "learning_rate": 4.128407411349754e-06, + "loss": 0.5159077644348145, + "step": 4890 + }, + { + "epoch": 1.4301798508553882, + "grad_norm": 1.603963849008659, + "learning_rate": 4.124494540455674e-06, + "loss": 0.5778994560241699, + "step": 4891 + }, + { + "epoch": 1.4304722912706536, + "grad_norm": 1.4954754441376699, + "learning_rate": 4.120583042925273e-06, + "loss": 0.4740722179412842, + "step": 4892 + }, + { + "epoch": 1.430764731685919, + "grad_norm": 1.4416066465695618, + "learning_rate": 4.116672919672837e-06, + "loss": 0.5561014413833618, + "step": 4893 + }, + { + "epoch": 1.4310571721011844, + "grad_norm": 1.5040800316270475, + "learning_rate": 4.112764171612335e-06, + "loss": 0.4834856688976288, + "step": 4894 + }, + { + "epoch": 1.4313496125164498, + "grad_norm": 1.691313354112802, + "learning_rate": 4.108856799657412e-06, + "loss": 0.5565547943115234, + "step": 4895 + }, + { + "epoch": 1.431642052931715, + "grad_norm": 1.8883359305911547, + "learning_rate": 4.104950804721395e-06, + "loss": 0.5401065349578857, + "step": 4896 + }, + { + "epoch": 1.4319344933469806, + "grad_norm": 1.3793655379788223, + "learning_rate": 4.101046187717284e-06, + "loss": 0.4792686700820923, + "step": 4897 + }, + { + "epoch": 1.432226933762246, + "grad_norm": 1.5922549032476903, + "learning_rate": 4.097142949557764e-06, + "loss": 0.5255981683731079, + "step": 4898 + }, + { + "epoch": 1.4325193741775113, + "grad_norm": 1.614736024187036, + "learning_rate": 4.093241091155187e-06, + "loss": 0.5535293817520142, + "step": 4899 + }, + { + "epoch": 1.4328118145927768, + "grad_norm": 1.8976199736566215, + "learning_rate": 4.089340613421589e-06, + "loss": 0.5235373973846436, + "step": 4900 + }, + { + "epoch": 1.4331042550080422, + "grad_norm": 1.8120415147677507, + "learning_rate": 4.085441517268687e-06, + "loss": 0.5538134574890137, + "step": 4901 + }, + { + "epoch": 1.4333966954233075, + "grad_norm": 1.5442149105119904, + "learning_rate": 4.081543803607869e-06, + "loss": 0.5394395589828491, + "step": 4902 + }, + { + "epoch": 1.4336891358385728, + "grad_norm": 1.6068663887611208, + "learning_rate": 4.077647473350201e-06, + "loss": 0.522742509841919, + "step": 4903 + }, + { + "epoch": 1.4339815762538382, + "grad_norm": 1.6377229499845016, + "learning_rate": 4.073752527406429e-06, + "loss": 0.559830367565155, + "step": 4904 + }, + { + "epoch": 1.4342740166691037, + "grad_norm": 1.7578675965544384, + "learning_rate": 4.069858966686971e-06, + "loss": 0.42535799741744995, + "step": 4905 + }, + { + "epoch": 1.434566457084369, + "grad_norm": 1.7745987719575682, + "learning_rate": 4.065966792101924e-06, + "loss": 0.6075177192687988, + "step": 4906 + }, + { + "epoch": 1.4348588974996344, + "grad_norm": 1.7444570198074862, + "learning_rate": 4.06207600456106e-06, + "loss": 0.5010570883750916, + "step": 4907 + }, + { + "epoch": 1.4351513379149, + "grad_norm": 1.621587467371749, + "learning_rate": 4.058186604973826e-06, + "loss": 0.571307897567749, + "step": 4908 + }, + { + "epoch": 1.4354437783301652, + "grad_norm": 1.643170818508206, + "learning_rate": 4.0542985942493505e-06, + "loss": 0.4918866455554962, + "step": 4909 + }, + { + "epoch": 1.4357362187454306, + "grad_norm": 1.8933520643034856, + "learning_rate": 4.050411973296425e-06, + "loss": 0.6588176488876343, + "step": 4910 + }, + { + "epoch": 1.436028659160696, + "grad_norm": 1.9180926902562168, + "learning_rate": 4.046526743023526e-06, + "loss": 0.7341527938842773, + "step": 4911 + }, + { + "epoch": 1.4363210995759614, + "grad_norm": 1.7782521784505012, + "learning_rate": 4.042642904338801e-06, + "loss": 0.5233849287033081, + "step": 4912 + }, + { + "epoch": 1.4366135399912268, + "grad_norm": 1.6182742405882007, + "learning_rate": 4.038760458150079e-06, + "loss": 0.5144373178482056, + "step": 4913 + }, + { + "epoch": 1.4369059804064923, + "grad_norm": 1.55901993468911, + "learning_rate": 4.034879405364853e-06, + "loss": 0.4520954489707947, + "step": 4914 + }, + { + "epoch": 1.4371984208217576, + "grad_norm": 1.6208081934978835, + "learning_rate": 4.030999746890295e-06, + "loss": 0.5632743835449219, + "step": 4915 + }, + { + "epoch": 1.437490861237023, + "grad_norm": 1.5950473237167822, + "learning_rate": 4.027121483633257e-06, + "loss": 0.49681180715560913, + "step": 4916 + }, + { + "epoch": 1.4377833016522883, + "grad_norm": 1.684721295445507, + "learning_rate": 4.023244616500257e-06, + "loss": 0.5182398557662964, + "step": 4917 + }, + { + "epoch": 1.4380757420675536, + "grad_norm": 1.6044294787301046, + "learning_rate": 4.019369146397493e-06, + "loss": 0.5686701536178589, + "step": 4918 + }, + { + "epoch": 1.4383681824828192, + "grad_norm": 1.682926006912085, + "learning_rate": 4.015495074230823e-06, + "loss": 0.5668520927429199, + "step": 4919 + }, + { + "epoch": 1.4386606228980845, + "grad_norm": 1.556828511748538, + "learning_rate": 4.011622400905794e-06, + "loss": 0.4511116147041321, + "step": 4920 + }, + { + "epoch": 1.4389530633133498, + "grad_norm": 1.677757503686359, + "learning_rate": 4.007751127327618e-06, + "loss": 0.4736326336860657, + "step": 4921 + }, + { + "epoch": 1.4392455037286154, + "grad_norm": 1.68287466179835, + "learning_rate": 4.003881254401183e-06, + "loss": 0.5705248117446899, + "step": 4922 + }, + { + "epoch": 1.4395379441438807, + "grad_norm": 1.4732853876066263, + "learning_rate": 4.000012783031047e-06, + "loss": 0.45527490973472595, + "step": 4923 + }, + { + "epoch": 1.439830384559146, + "grad_norm": 1.5504418192282816, + "learning_rate": 3.996145714121444e-06, + "loss": 0.4926735758781433, + "step": 4924 + }, + { + "epoch": 1.4401228249744116, + "grad_norm": 1.523617382800049, + "learning_rate": 3.992280048576276e-06, + "loss": 0.42700374126434326, + "step": 4925 + }, + { + "epoch": 1.440415265389677, + "grad_norm": 1.6783270187790582, + "learning_rate": 3.988415787299118e-06, + "loss": 0.5833145976066589, + "step": 4926 + }, + { + "epoch": 1.4407077058049422, + "grad_norm": 1.70461399954195, + "learning_rate": 3.98455293119322e-06, + "loss": 0.5290282964706421, + "step": 4927 + }, + { + "epoch": 1.4410001462202076, + "grad_norm": 1.9146871710495363, + "learning_rate": 3.9806914811614984e-06, + "loss": 0.4489266872406006, + "step": 4928 + }, + { + "epoch": 1.441292586635473, + "grad_norm": 1.9109717939773812, + "learning_rate": 3.97683143810655e-06, + "loss": 0.5630865097045898, + "step": 4929 + }, + { + "epoch": 1.4415850270507384, + "grad_norm": 1.6030492821452516, + "learning_rate": 3.972972802930627e-06, + "loss": 0.5962105989456177, + "step": 4930 + }, + { + "epoch": 1.4418774674660038, + "grad_norm": 1.789368844700869, + "learning_rate": 3.9691155765356674e-06, + "loss": 0.6059410572052002, + "step": 4931 + }, + { + "epoch": 1.442169907881269, + "grad_norm": 1.6894490985884645, + "learning_rate": 3.965259759823272e-06, + "loss": 0.5476605296134949, + "step": 4932 + }, + { + "epoch": 1.4424623482965346, + "grad_norm": 1.7561171676767597, + "learning_rate": 3.961405353694716e-06, + "loss": 0.70278000831604, + "step": 4933 + }, + { + "epoch": 1.4427547887118, + "grad_norm": 1.6884311650773163, + "learning_rate": 3.9575523590509445e-06, + "loss": 0.5838963389396667, + "step": 4934 + }, + { + "epoch": 1.4430472291270653, + "grad_norm": 1.536536052995308, + "learning_rate": 3.95370077679257e-06, + "loss": 0.508273720741272, + "step": 4935 + }, + { + "epoch": 1.4433396695423308, + "grad_norm": 1.4692622152510404, + "learning_rate": 3.949850607819876e-06, + "loss": 0.5053583383560181, + "step": 4936 + }, + { + "epoch": 1.4436321099575962, + "grad_norm": 1.5754477318406401, + "learning_rate": 3.946001853032818e-06, + "loss": 0.5729954242706299, + "step": 4937 + }, + { + "epoch": 1.4439245503728615, + "grad_norm": 1.833619886253515, + "learning_rate": 3.942154513331018e-06, + "loss": 0.5261870622634888, + "step": 4938 + }, + { + "epoch": 1.444216990788127, + "grad_norm": 1.3956467871190747, + "learning_rate": 3.9383085896137675e-06, + "loss": 0.34802311658859253, + "step": 4939 + }, + { + "epoch": 1.4445094312033924, + "grad_norm": 1.8896307306874633, + "learning_rate": 3.934464082780032e-06, + "loss": 0.48302024602890015, + "step": 4940 + }, + { + "epoch": 1.4448018716186577, + "grad_norm": 1.8507631130251807, + "learning_rate": 3.930620993728434e-06, + "loss": 0.6649061441421509, + "step": 4941 + }, + { + "epoch": 1.445094312033923, + "grad_norm": 1.705526500334542, + "learning_rate": 3.926779323357278e-06, + "loss": 0.5945848822593689, + "step": 4942 + }, + { + "epoch": 1.4453867524491884, + "grad_norm": 1.5476382055190478, + "learning_rate": 3.922939072564528e-06, + "loss": 0.4783032536506653, + "step": 4943 + }, + { + "epoch": 1.445679192864454, + "grad_norm": 1.6453487782833462, + "learning_rate": 3.919100242247821e-06, + "loss": 0.4619516134262085, + "step": 4944 + }, + { + "epoch": 1.4459716332797192, + "grad_norm": 1.5327149597771257, + "learning_rate": 3.915262833304461e-06, + "loss": 0.5652358531951904, + "step": 4945 + }, + { + "epoch": 1.4462640736949846, + "grad_norm": 1.4734419470243802, + "learning_rate": 3.911426846631416e-06, + "loss": 0.4523610472679138, + "step": 4946 + }, + { + "epoch": 1.44655651411025, + "grad_norm": 1.5670101583017915, + "learning_rate": 3.9075922831253276e-06, + "loss": 0.4914482831954956, + "step": 4947 + }, + { + "epoch": 1.4468489545255154, + "grad_norm": 1.7113071980283088, + "learning_rate": 3.9037591436825005e-06, + "loss": 0.4060005247592926, + "step": 4948 + }, + { + "epoch": 1.4471413949407808, + "grad_norm": 1.9320743237560347, + "learning_rate": 3.899927429198908e-06, + "loss": 0.49987125396728516, + "step": 4949 + }, + { + "epoch": 1.4474338353560463, + "grad_norm": 2.0596677045202036, + "learning_rate": 3.896097140570189e-06, + "loss": 0.6205358505249023, + "step": 4950 + }, + { + "epoch": 1.4477262757713116, + "grad_norm": 1.7670476784744638, + "learning_rate": 3.892268278691651e-06, + "loss": 0.5302955508232117, + "step": 4951 + }, + { + "epoch": 1.448018716186577, + "grad_norm": 1.7962585212488547, + "learning_rate": 3.888440844458272e-06, + "loss": 0.5225962400436401, + "step": 4952 + }, + { + "epoch": 1.4483111566018425, + "grad_norm": 1.8247561425410785, + "learning_rate": 3.884614838764682e-06, + "loss": 0.5030089616775513, + "step": 4953 + }, + { + "epoch": 1.4486035970171078, + "grad_norm": 1.8999355010605985, + "learning_rate": 3.880790262505192e-06, + "loss": 0.6060030460357666, + "step": 4954 + }, + { + "epoch": 1.4488960374323732, + "grad_norm": 1.8229751812699673, + "learning_rate": 3.8769671165737725e-06, + "loss": 0.5244846343994141, + "step": 4955 + }, + { + "epoch": 1.4491884778476385, + "grad_norm": 1.4616444667042836, + "learning_rate": 3.873145401864061e-06, + "loss": 0.46979671716690063, + "step": 4956 + }, + { + "epoch": 1.4494809182629038, + "grad_norm": 1.8452052569073554, + "learning_rate": 3.8693251192693596e-06, + "loss": 0.5201131105422974, + "step": 4957 + }, + { + "epoch": 1.4497733586781694, + "grad_norm": 1.679443447217904, + "learning_rate": 3.865506269682638e-06, + "loss": 0.5124838352203369, + "step": 4958 + }, + { + "epoch": 1.4500657990934347, + "grad_norm": 1.830132365627518, + "learning_rate": 3.861688853996525e-06, + "loss": 0.5613473653793335, + "step": 4959 + }, + { + "epoch": 1.4503582395087, + "grad_norm": 1.5976816836472583, + "learning_rate": 3.857872873103322e-06, + "loss": 0.46196621656417847, + "step": 4960 + }, + { + "epoch": 1.4506506799239656, + "grad_norm": 1.9393165963504067, + "learning_rate": 3.8540583278949905e-06, + "loss": 0.6427509784698486, + "step": 4961 + }, + { + "epoch": 1.450943120339231, + "grad_norm": 1.7485862700938968, + "learning_rate": 3.850245219263157e-06, + "loss": 0.6306381821632385, + "step": 4962 + }, + { + "epoch": 1.4512355607544962, + "grad_norm": 1.5645194602237047, + "learning_rate": 3.846433548099114e-06, + "loss": 0.46638673543930054, + "step": 4963 + }, + { + "epoch": 1.4515280011697618, + "grad_norm": 1.5360842567610604, + "learning_rate": 3.842623315293814e-06, + "loss": 0.4950143098831177, + "step": 4964 + }, + { + "epoch": 1.451820441585027, + "grad_norm": 1.5810107141405056, + "learning_rate": 3.838814521737875e-06, + "loss": 0.45698249340057373, + "step": 4965 + }, + { + "epoch": 1.4521128820002924, + "grad_norm": 1.6457012436395508, + "learning_rate": 3.8350071683215814e-06, + "loss": 0.6068260669708252, + "step": 4966 + }, + { + "epoch": 1.4524053224155578, + "grad_norm": 1.8188775401166803, + "learning_rate": 3.831201255934879e-06, + "loss": 0.5264104008674622, + "step": 4967 + }, + { + "epoch": 1.4526977628308233, + "grad_norm": 1.6372667669239498, + "learning_rate": 3.827396785467375e-06, + "loss": 0.5198315978050232, + "step": 4968 + }, + { + "epoch": 1.4529902032460886, + "grad_norm": 1.6294906688066837, + "learning_rate": 3.823593757808342e-06, + "loss": 0.504194438457489, + "step": 4969 + }, + { + "epoch": 1.453282643661354, + "grad_norm": 1.6016674444230832, + "learning_rate": 3.819792173846717e-06, + "loss": 0.5018986463546753, + "step": 4970 + }, + { + "epoch": 1.4535750840766193, + "grad_norm": 1.6893120935929504, + "learning_rate": 3.8159920344710936e-06, + "loss": 0.4847358465194702, + "step": 4971 + }, + { + "epoch": 1.4538675244918848, + "grad_norm": 1.6703483014148515, + "learning_rate": 3.812193340569733e-06, + "loss": 0.547623872756958, + "step": 4972 + }, + { + "epoch": 1.4541599649071502, + "grad_norm": 2.1389235560975615, + "learning_rate": 3.8083960930305562e-06, + "loss": 0.534354031085968, + "step": 4973 + }, + { + "epoch": 1.4544524053224155, + "grad_norm": 1.788418032061747, + "learning_rate": 3.8046002927411506e-06, + "loss": 0.6123033165931702, + "step": 4974 + }, + { + "epoch": 1.454744845737681, + "grad_norm": 1.6087574153138633, + "learning_rate": 3.8008059405887553e-06, + "loss": 0.5222622752189636, + "step": 4975 + }, + { + "epoch": 1.4550372861529464, + "grad_norm": 1.684901707974216, + "learning_rate": 3.7970130374602785e-06, + "loss": 0.5568759441375732, + "step": 4976 + }, + { + "epoch": 1.4553297265682117, + "grad_norm": 1.7459991230210548, + "learning_rate": 3.7932215842422903e-06, + "loss": 0.5458661317825317, + "step": 4977 + }, + { + "epoch": 1.4556221669834772, + "grad_norm": 1.6216302867008319, + "learning_rate": 3.789431581821019e-06, + "loss": 0.48293566703796387, + "step": 4978 + }, + { + "epoch": 1.4559146073987426, + "grad_norm": 1.893470262052562, + "learning_rate": 3.7856430310823546e-06, + "loss": 0.647431492805481, + "step": 4979 + }, + { + "epoch": 1.456207047814008, + "grad_norm": 1.6735249045743477, + "learning_rate": 3.7818559329118475e-06, + "loss": 0.48039543628692627, + "step": 4980 + }, + { + "epoch": 1.4564994882292732, + "grad_norm": 1.6704036620696165, + "learning_rate": 3.7780702881947084e-06, + "loss": 0.6705803871154785, + "step": 4981 + }, + { + "epoch": 1.4567919286445385, + "grad_norm": 1.7404901320645014, + "learning_rate": 3.7742860978158103e-06, + "loss": 0.564405083656311, + "step": 4982 + }, + { + "epoch": 1.457084369059804, + "grad_norm": 1.7081222209997355, + "learning_rate": 3.7705033626596844e-06, + "loss": 0.5208612084388733, + "step": 4983 + }, + { + "epoch": 1.4573768094750694, + "grad_norm": 1.909829427679328, + "learning_rate": 3.766722083610521e-06, + "loss": 0.6230732202529907, + "step": 4984 + }, + { + "epoch": 1.4576692498903348, + "grad_norm": 1.6601663066885601, + "learning_rate": 3.7629422615521747e-06, + "loss": 0.5741504430770874, + "step": 4985 + }, + { + "epoch": 1.4579616903056003, + "grad_norm": 1.584208244849031, + "learning_rate": 3.75916389736815e-06, + "loss": 0.5321571826934814, + "step": 4986 + }, + { + "epoch": 1.4582541307208656, + "grad_norm": 1.95685306597155, + "learning_rate": 3.7553869919416186e-06, + "loss": 0.6367009878158569, + "step": 4987 + }, + { + "epoch": 1.458546571136131, + "grad_norm": 1.5904913997392975, + "learning_rate": 3.75161154615541e-06, + "loss": 0.5736235976219177, + "step": 4988 + }, + { + "epoch": 1.4588390115513965, + "grad_norm": 2.0157501917439866, + "learning_rate": 3.7478375608920127e-06, + "loss": 0.5799358487129211, + "step": 4989 + }, + { + "epoch": 1.4591314519666618, + "grad_norm": 1.7515991790236536, + "learning_rate": 3.7440650370335675e-06, + "loss": 0.6065561771392822, + "step": 4990 + }, + { + "epoch": 1.4594238923819272, + "grad_norm": 1.4583944256149548, + "learning_rate": 3.740293975461886e-06, + "loss": 0.5182442665100098, + "step": 4991 + }, + { + "epoch": 1.4597163327971927, + "grad_norm": 1.6877116508095484, + "learning_rate": 3.736524377058429e-06, + "loss": 0.5065605640411377, + "step": 4992 + }, + { + "epoch": 1.460008773212458, + "grad_norm": 1.5024812411134352, + "learning_rate": 3.7327562427043163e-06, + "loss": 0.44326460361480713, + "step": 4993 + }, + { + "epoch": 1.4603012136277234, + "grad_norm": 1.9166701258714811, + "learning_rate": 3.7289895732803306e-06, + "loss": 0.6192547082901001, + "step": 4994 + }, + { + "epoch": 1.4605936540429887, + "grad_norm": 1.794387571688338, + "learning_rate": 3.725224369666899e-06, + "loss": 0.5487738847732544, + "step": 4995 + }, + { + "epoch": 1.460886094458254, + "grad_norm": 1.922772286834415, + "learning_rate": 3.7214606327441203e-06, + "loss": 0.558982253074646, + "step": 4996 + }, + { + "epoch": 1.4611785348735196, + "grad_norm": 1.770836311904495, + "learning_rate": 3.717698363391744e-06, + "loss": 0.5277853012084961, + "step": 4997 + }, + { + "epoch": 1.461470975288785, + "grad_norm": 1.7748123557502546, + "learning_rate": 3.7139375624891795e-06, + "loss": 0.6561184525489807, + "step": 4998 + }, + { + "epoch": 1.4617634157040502, + "grad_norm": 1.5647900159041126, + "learning_rate": 3.710178230915489e-06, + "loss": 0.46555888652801514, + "step": 4999 + }, + { + "epoch": 1.4620558561193158, + "grad_norm": 1.7414970962586886, + "learning_rate": 3.706420369549394e-06, + "loss": 0.5808060765266418, + "step": 5000 + }, + { + "epoch": 1.462348296534581, + "grad_norm": 1.442227314234909, + "learning_rate": 3.7026639792692722e-06, + "loss": 0.5407893061637878, + "step": 5001 + }, + { + "epoch": 1.4626407369498464, + "grad_norm": 2.580423891920115, + "learning_rate": 3.6989090609531574e-06, + "loss": 0.538393497467041, + "step": 5002 + }, + { + "epoch": 1.462933177365112, + "grad_norm": 1.8751864874321293, + "learning_rate": 3.6951556154787373e-06, + "loss": 0.530704140663147, + "step": 5003 + }, + { + "epoch": 1.4632256177803773, + "grad_norm": 1.4470439364888814, + "learning_rate": 3.691403643723359e-06, + "loss": 0.43352627754211426, + "step": 5004 + }, + { + "epoch": 1.4635180581956426, + "grad_norm": 1.6573279039642985, + "learning_rate": 3.687653146564025e-06, + "loss": 0.6047205924987793, + "step": 5005 + }, + { + "epoch": 1.463810498610908, + "grad_norm": 1.6556697002732312, + "learning_rate": 3.6839041248773857e-06, + "loss": 0.44708865880966187, + "step": 5006 + }, + { + "epoch": 1.4641029390261735, + "grad_norm": 1.6445747944839355, + "learning_rate": 3.680156579539753e-06, + "loss": 0.5653451681137085, + "step": 5007 + }, + { + "epoch": 1.4643953794414388, + "grad_norm": 1.750839565103172, + "learning_rate": 3.6764105114270966e-06, + "loss": 0.49293750524520874, + "step": 5008 + }, + { + "epoch": 1.4646878198567042, + "grad_norm": 1.7691390827672615, + "learning_rate": 3.672665921415034e-06, + "loss": 0.5761851072311401, + "step": 5009 + }, + { + "epoch": 1.4649802602719695, + "grad_norm": 1.7025752756263197, + "learning_rate": 3.668922810378841e-06, + "loss": 0.5188437700271606, + "step": 5010 + }, + { + "epoch": 1.465272700687235, + "grad_norm": 1.7765263620108804, + "learning_rate": 3.6651811791934476e-06, + "loss": 0.5159400701522827, + "step": 5011 + }, + { + "epoch": 1.4655651411025004, + "grad_norm": 1.4463295265937102, + "learning_rate": 3.6614410287334377e-06, + "loss": 0.478866845369339, + "step": 5012 + }, + { + "epoch": 1.4658575815177657, + "grad_norm": 1.6006806590634375, + "learning_rate": 3.6577023598730486e-06, + "loss": 0.5509926080703735, + "step": 5013 + }, + { + "epoch": 1.4661500219330312, + "grad_norm": 1.5613591503777215, + "learning_rate": 3.6539651734861705e-06, + "loss": 0.4872981309890747, + "step": 5014 + }, + { + "epoch": 1.4664424623482966, + "grad_norm": 1.4569843282992687, + "learning_rate": 3.6502294704463493e-06, + "loss": 0.47478264570236206, + "step": 5015 + }, + { + "epoch": 1.4667349027635619, + "grad_norm": 1.765955621655722, + "learning_rate": 3.646495251626785e-06, + "loss": 0.5140335559844971, + "step": 5016 + }, + { + "epoch": 1.4670273431788274, + "grad_norm": 1.5785594027919339, + "learning_rate": 3.6427625179003223e-06, + "loss": 0.41033172607421875, + "step": 5017 + }, + { + "epoch": 1.4673197835940928, + "grad_norm": 1.7731644033346952, + "learning_rate": 3.639031270139468e-06, + "loss": 0.4290558099746704, + "step": 5018 + }, + { + "epoch": 1.467612224009358, + "grad_norm": 1.8964888989060893, + "learning_rate": 3.635301509216379e-06, + "loss": 0.5903435349464417, + "step": 5019 + }, + { + "epoch": 1.4679046644246234, + "grad_norm": 1.7302589846174075, + "learning_rate": 3.6315732360028655e-06, + "loss": 0.6410748958587646, + "step": 5020 + }, + { + "epoch": 1.4681971048398887, + "grad_norm": 1.584781169707585, + "learning_rate": 3.6278464513703858e-06, + "loss": 0.5499910712242126, + "step": 5021 + }, + { + "epoch": 1.4684895452551543, + "grad_norm": 1.4876234400926511, + "learning_rate": 3.624121156190056e-06, + "loss": 0.4980154037475586, + "step": 5022 + }, + { + "epoch": 1.4687819856704196, + "grad_norm": 1.7622618315552074, + "learning_rate": 3.6203973513326395e-06, + "loss": 0.5910995006561279, + "step": 5023 + }, + { + "epoch": 1.469074426085685, + "grad_norm": 1.837302229581672, + "learning_rate": 3.6166750376685534e-06, + "loss": 0.6003058552742004, + "step": 5024 + }, + { + "epoch": 1.4693668665009505, + "grad_norm": 2.0086634437416215, + "learning_rate": 3.6129542160678655e-06, + "loss": 0.5655561685562134, + "step": 5025 + }, + { + "epoch": 1.4696593069162158, + "grad_norm": 1.6720399704395428, + "learning_rate": 3.609234887400297e-06, + "loss": 0.713152289390564, + "step": 5026 + }, + { + "epoch": 1.4699517473314812, + "grad_norm": 1.3619130802184511, + "learning_rate": 3.605517052535219e-06, + "loss": 0.41018784046173096, + "step": 5027 + }, + { + "epoch": 1.4702441877467467, + "grad_norm": 1.7429761856148576, + "learning_rate": 3.6018007123416486e-06, + "loss": 0.5852759480476379, + "step": 5028 + }, + { + "epoch": 1.470536628162012, + "grad_norm": 1.6763203292398523, + "learning_rate": 3.598085867688259e-06, + "loss": 0.5942279696464539, + "step": 5029 + }, + { + "epoch": 1.4708290685772774, + "grad_norm": 1.5957062749275768, + "learning_rate": 3.594372519443374e-06, + "loss": 0.6265639662742615, + "step": 5030 + }, + { + "epoch": 1.471121508992543, + "grad_norm": 1.6944518172910965, + "learning_rate": 3.5906606684749668e-06, + "loss": 0.4539163112640381, + "step": 5031 + }, + { + "epoch": 1.4714139494078082, + "grad_norm": 1.8810670575321342, + "learning_rate": 3.586950315650658e-06, + "loss": 0.5682815909385681, + "step": 5032 + }, + { + "epoch": 1.4717063898230736, + "grad_norm": 1.5382985580447415, + "learning_rate": 3.583241461837721e-06, + "loss": 0.5188582539558411, + "step": 5033 + }, + { + "epoch": 1.4719988302383389, + "grad_norm": 1.923705094705072, + "learning_rate": 3.5795341079030777e-06, + "loss": 0.501958966255188, + "step": 5034 + }, + { + "epoch": 1.4722912706536042, + "grad_norm": 1.769758245215022, + "learning_rate": 3.5758282547132995e-06, + "loss": 0.5748735666275024, + "step": 5035 + }, + { + "epoch": 1.4725837110688698, + "grad_norm": 1.720811530645175, + "learning_rate": 3.5721239031346067e-06, + "loss": 0.5796875357627869, + "step": 5036 + }, + { + "epoch": 1.472876151484135, + "grad_norm": 1.7760443740240528, + "learning_rate": 3.56842105403287e-06, + "loss": 0.457103431224823, + "step": 5037 + }, + { + "epoch": 1.4731685918994004, + "grad_norm": 1.607843165834991, + "learning_rate": 3.564719708273607e-06, + "loss": 0.5300487875938416, + "step": 5038 + }, + { + "epoch": 1.473461032314666, + "grad_norm": 1.7877129065541937, + "learning_rate": 3.5610198667219886e-06, + "loss": 0.48143619298934937, + "step": 5039 + }, + { + "epoch": 1.4737534727299313, + "grad_norm": 1.9171325817627416, + "learning_rate": 3.557321530242824e-06, + "loss": 0.5523685216903687, + "step": 5040 + }, + { + "epoch": 1.4740459131451966, + "grad_norm": 1.7367077785146405, + "learning_rate": 3.5536246997005785e-06, + "loss": 0.5820931196212769, + "step": 5041 + }, + { + "epoch": 1.4743383535604622, + "grad_norm": 1.6717570524697325, + "learning_rate": 3.5499293759593656e-06, + "loss": 0.6287394762039185, + "step": 5042 + }, + { + "epoch": 1.4746307939757275, + "grad_norm": 1.737914835396703, + "learning_rate": 3.5462355598829433e-06, + "loss": 0.4621254801750183, + "step": 5043 + }, + { + "epoch": 1.4749232343909928, + "grad_norm": 1.687652415457897, + "learning_rate": 3.5425432523347205e-06, + "loss": 0.5571160316467285, + "step": 5044 + }, + { + "epoch": 1.4752156748062581, + "grad_norm": 1.716802557057107, + "learning_rate": 3.5388524541777492e-06, + "loss": 0.4135715365409851, + "step": 5045 + }, + { + "epoch": 1.4755081152215237, + "grad_norm": 1.868527213017395, + "learning_rate": 3.535163166274733e-06, + "loss": 0.524153470993042, + "step": 5046 + }, + { + "epoch": 1.475800555636789, + "grad_norm": 1.9441558365554423, + "learning_rate": 3.5314753894880205e-06, + "loss": 0.6330267786979675, + "step": 5047 + }, + { + "epoch": 1.4760929960520544, + "grad_norm": 1.7270524835767156, + "learning_rate": 3.527789124679605e-06, + "loss": 0.46210330724716187, + "step": 5048 + }, + { + "epoch": 1.4763854364673197, + "grad_norm": 1.8799684878196978, + "learning_rate": 3.524104372711131e-06, + "loss": 0.49293309450149536, + "step": 5049 + }, + { + "epoch": 1.4766778768825852, + "grad_norm": 1.7601042593478657, + "learning_rate": 3.520421134443889e-06, + "loss": 0.6196815967559814, + "step": 5050 + }, + { + "epoch": 1.4769703172978506, + "grad_norm": 1.568738566408146, + "learning_rate": 3.5167394107388064e-06, + "loss": 0.42622530460357666, + "step": 5051 + }, + { + "epoch": 1.4772627577131159, + "grad_norm": 1.6087834768838942, + "learning_rate": 3.513059202456468e-06, + "loss": 0.4475107491016388, + "step": 5052 + }, + { + "epoch": 1.4775551981283814, + "grad_norm": 1.549049360877832, + "learning_rate": 3.5093805104571e-06, + "loss": 0.4295683205127716, + "step": 5053 + }, + { + "epoch": 1.4778476385436468, + "grad_norm": 1.512499491264911, + "learning_rate": 3.505703335600573e-06, + "loss": 0.5331642627716064, + "step": 5054 + }, + { + "epoch": 1.478140078958912, + "grad_norm": 1.7125050045051866, + "learning_rate": 3.5020276787464058e-06, + "loss": 0.5615599155426025, + "step": 5055 + }, + { + "epoch": 1.4784325193741776, + "grad_norm": 1.470462641632426, + "learning_rate": 3.4983535407537618e-06, + "loss": 0.5611366033554077, + "step": 5056 + }, + { + "epoch": 1.478724959789443, + "grad_norm": 2.0861134690908325, + "learning_rate": 3.494680922481445e-06, + "loss": 0.5891577005386353, + "step": 5057 + }, + { + "epoch": 1.4790174002047083, + "grad_norm": 1.981139638659905, + "learning_rate": 3.491009824787911e-06, + "loss": 0.5583761930465698, + "step": 5058 + }, + { + "epoch": 1.4793098406199736, + "grad_norm": 1.5020288470897978, + "learning_rate": 3.4873402485312548e-06, + "loss": 0.5001339912414551, + "step": 5059 + }, + { + "epoch": 1.479602281035239, + "grad_norm": 1.445341864944132, + "learning_rate": 3.4836721945692175e-06, + "loss": 0.5050641894340515, + "step": 5060 + }, + { + "epoch": 1.4798947214505045, + "grad_norm": 1.5825314066620513, + "learning_rate": 3.4800056637591885e-06, + "loss": 0.5377815365791321, + "step": 5061 + }, + { + "epoch": 1.4801871618657698, + "grad_norm": 1.6490614330323619, + "learning_rate": 3.4763406569581892e-06, + "loss": 0.5517662763595581, + "step": 5062 + }, + { + "epoch": 1.4804796022810351, + "grad_norm": 1.7535356829599726, + "learning_rate": 3.4726771750228984e-06, + "loss": 0.5908320546150208, + "step": 5063 + }, + { + "epoch": 1.4807720426963007, + "grad_norm": 1.640782634903257, + "learning_rate": 3.4690152188096293e-06, + "loss": 0.5169299840927124, + "step": 5064 + }, + { + "epoch": 1.481064483111566, + "grad_norm": 1.5566091974805318, + "learning_rate": 3.4653547891743457e-06, + "loss": 0.6198064088821411, + "step": 5065 + }, + { + "epoch": 1.4813569235268313, + "grad_norm": 1.7822104060368598, + "learning_rate": 3.4616958869726436e-06, + "loss": 0.4971558153629303, + "step": 5066 + }, + { + "epoch": 1.481649363942097, + "grad_norm": 1.8117473020924466, + "learning_rate": 3.4580385130597794e-06, + "loss": 0.556640088558197, + "step": 5067 + }, + { + "epoch": 1.4819418043573622, + "grad_norm": 1.7297037385384992, + "learning_rate": 3.4543826682906358e-06, + "loss": 0.5336956977844238, + "step": 5068 + }, + { + "epoch": 1.4822342447726276, + "grad_norm": 1.8723627634024749, + "learning_rate": 3.4507283535197454e-06, + "loss": 0.5185145735740662, + "step": 5069 + }, + { + "epoch": 1.482526685187893, + "grad_norm": 1.5962927751585108, + "learning_rate": 3.447075569601287e-06, + "loss": 0.5460748672485352, + "step": 5070 + }, + { + "epoch": 1.4828191256031584, + "grad_norm": 1.7486536420516579, + "learning_rate": 3.4434243173890667e-06, + "loss": 0.5860699415206909, + "step": 5071 + }, + { + "epoch": 1.4831115660184238, + "grad_norm": 1.5377337582646984, + "learning_rate": 3.4397745977365482e-06, + "loss": 0.5818450450897217, + "step": 5072 + }, + { + "epoch": 1.483404006433689, + "grad_norm": 1.6591511763241749, + "learning_rate": 3.4361264114968316e-06, + "loss": 0.4205876588821411, + "step": 5073 + }, + { + "epoch": 1.4836964468489544, + "grad_norm": 1.6097740909701606, + "learning_rate": 3.4324797595226567e-06, + "loss": 0.5503501892089844, + "step": 5074 + }, + { + "epoch": 1.48398888726422, + "grad_norm": 1.7613851561474803, + "learning_rate": 3.4288346426664063e-06, + "loss": 0.5388503074645996, + "step": 5075 + }, + { + "epoch": 1.4842813276794853, + "grad_norm": 1.5726280695427581, + "learning_rate": 3.4251910617801054e-06, + "loss": 0.5866841673851013, + "step": 5076 + }, + { + "epoch": 1.4845737680947506, + "grad_norm": 1.7063663913828162, + "learning_rate": 3.4215490177154176e-06, + "loss": 0.5377970337867737, + "step": 5077 + }, + { + "epoch": 1.4848662085100162, + "grad_norm": 2.013961516297246, + "learning_rate": 3.41790851132365e-06, + "loss": 0.6311028003692627, + "step": 5078 + }, + { + "epoch": 1.4851586489252815, + "grad_norm": 1.7100175604987324, + "learning_rate": 3.414269543455747e-06, + "loss": 0.5226441621780396, + "step": 5079 + }, + { + "epoch": 1.4854510893405468, + "grad_norm": 1.73285658375087, + "learning_rate": 3.410632114962298e-06, + "loss": 0.6306775212287903, + "step": 5080 + }, + { + "epoch": 1.4857435297558124, + "grad_norm": 1.8061194998201888, + "learning_rate": 3.406996226693531e-06, + "loss": 0.5432136058807373, + "step": 5081 + }, + { + "epoch": 1.4860359701710777, + "grad_norm": 1.564250952291821, + "learning_rate": 3.403361879499305e-06, + "loss": 0.4218754470348358, + "step": 5082 + }, + { + "epoch": 1.486328410586343, + "grad_norm": 1.7436245532279955, + "learning_rate": 3.3997290742291335e-06, + "loss": 0.5121650099754333, + "step": 5083 + }, + { + "epoch": 1.4866208510016083, + "grad_norm": 1.713174617853516, + "learning_rate": 3.39609781173216e-06, + "loss": 0.5489382743835449, + "step": 5084 + }, + { + "epoch": 1.486913291416874, + "grad_norm": 1.7492646537049668, + "learning_rate": 3.3924680928571694e-06, + "loss": 0.4190993309020996, + "step": 5085 + }, + { + "epoch": 1.4872057318321392, + "grad_norm": 2.012504952292692, + "learning_rate": 3.388839918452589e-06, + "loss": 0.5927796363830566, + "step": 5086 + }, + { + "epoch": 1.4874981722474045, + "grad_norm": 1.5385674447124333, + "learning_rate": 3.3852132893664803e-06, + "loss": 0.43746429681777954, + "step": 5087 + }, + { + "epoch": 1.4877906126626699, + "grad_norm": 1.592965785800762, + "learning_rate": 3.381588206446548e-06, + "loss": 0.41599413752555847, + "step": 5088 + }, + { + "epoch": 1.4880830530779354, + "grad_norm": 1.640030018717508, + "learning_rate": 3.3779646705401305e-06, + "loss": 0.5803484320640564, + "step": 5089 + }, + { + "epoch": 1.4883754934932008, + "grad_norm": 1.6162932555816476, + "learning_rate": 3.3743426824942082e-06, + "loss": 0.5277384519577026, + "step": 5090 + }, + { + "epoch": 1.488667933908466, + "grad_norm": 1.5149011711130314, + "learning_rate": 3.370722243155401e-06, + "loss": 0.5842317342758179, + "step": 5091 + }, + { + "epoch": 1.4889603743237316, + "grad_norm": 1.8602157485440332, + "learning_rate": 3.367103353369965e-06, + "loss": 0.5394416451454163, + "step": 5092 + }, + { + "epoch": 1.489252814738997, + "grad_norm": 1.6652727466684587, + "learning_rate": 3.3634860139837877e-06, + "loss": 0.5457144975662231, + "step": 5093 + }, + { + "epoch": 1.4895452551542623, + "grad_norm": 1.6270719194791377, + "learning_rate": 3.3598702258424044e-06, + "loss": 0.49552473425865173, + "step": 5094 + }, + { + "epoch": 1.4898376955695278, + "grad_norm": 1.8756044563450258, + "learning_rate": 3.3562559897909842e-06, + "loss": 0.5922214984893799, + "step": 5095 + }, + { + "epoch": 1.4901301359847932, + "grad_norm": 1.6902952443841357, + "learning_rate": 3.35264330667433e-06, + "loss": 0.5844507217407227, + "step": 5096 + }, + { + "epoch": 1.4904225764000585, + "grad_norm": 1.6441848915551236, + "learning_rate": 3.3490321773368872e-06, + "loss": 0.5096029043197632, + "step": 5097 + }, + { + "epoch": 1.4907150168153238, + "grad_norm": 1.8296617417124132, + "learning_rate": 3.345422602622734e-06, + "loss": 0.6343984603881836, + "step": 5098 + }, + { + "epoch": 1.4910074572305891, + "grad_norm": 1.7032992920741425, + "learning_rate": 3.3418145833755875e-06, + "loss": 0.5319832563400269, + "step": 5099 + }, + { + "epoch": 1.4912998976458547, + "grad_norm": 1.8127365107062148, + "learning_rate": 3.3382081204388006e-06, + "loss": 0.6453676819801331, + "step": 5100 + }, + { + "epoch": 1.49159233806112, + "grad_norm": 1.7068058578414038, + "learning_rate": 3.33460321465536e-06, + "loss": 0.5129305720329285, + "step": 5101 + }, + { + "epoch": 1.4918847784763853, + "grad_norm": 1.7103748262888143, + "learning_rate": 3.3309998668678912e-06, + "loss": 0.5680958032608032, + "step": 5102 + }, + { + "epoch": 1.492177218891651, + "grad_norm": 1.654140366409291, + "learning_rate": 3.32739807791866e-06, + "loss": 0.5959445834159851, + "step": 5103 + }, + { + "epoch": 1.4924696593069162, + "grad_norm": 1.5546485584978795, + "learning_rate": 3.3237978486495536e-06, + "loss": 0.5549102425575256, + "step": 5104 + }, + { + "epoch": 1.4927620997221815, + "grad_norm": 1.5522771682213525, + "learning_rate": 3.3201991799021084e-06, + "loss": 0.4219816327095032, + "step": 5105 + }, + { + "epoch": 1.493054540137447, + "grad_norm": 1.8150814493123832, + "learning_rate": 3.3166020725174906e-06, + "loss": 0.46013522148132324, + "step": 5106 + }, + { + "epoch": 1.4933469805527124, + "grad_norm": 2.2057259724068885, + "learning_rate": 3.3130065273365033e-06, + "loss": 0.6013174057006836, + "step": 5107 + }, + { + "epoch": 1.4936394209679777, + "grad_norm": 1.9081850485789635, + "learning_rate": 3.3094125451995827e-06, + "loss": 0.7097996473312378, + "step": 5108 + }, + { + "epoch": 1.4939318613832433, + "grad_norm": 1.6725604100107134, + "learning_rate": 3.305820126946799e-06, + "loss": 0.6704884767532349, + "step": 5109 + }, + { + "epoch": 1.4942243017985086, + "grad_norm": 1.735486744932862, + "learning_rate": 3.3022292734178605e-06, + "loss": 0.5211119651794434, + "step": 5110 + }, + { + "epoch": 1.494516742213774, + "grad_norm": 1.7718418689676594, + "learning_rate": 3.2986399854521065e-06, + "loss": 0.5830427408218384, + "step": 5111 + }, + { + "epoch": 1.4948091826290393, + "grad_norm": 1.574048881929475, + "learning_rate": 3.2950522638885106e-06, + "loss": 0.5647883415222168, + "step": 5112 + }, + { + "epoch": 1.4951016230443046, + "grad_norm": 1.3783682279274316, + "learning_rate": 3.2914661095656807e-06, + "loss": 0.46678125858306885, + "step": 5113 + }, + { + "epoch": 1.4953940634595702, + "grad_norm": 1.768460226758459, + "learning_rate": 3.287881523321863e-06, + "loss": 0.5391934514045715, + "step": 5114 + }, + { + "epoch": 1.4956865038748355, + "grad_norm": 1.532723290545503, + "learning_rate": 3.284298505994926e-06, + "loss": 0.4039243459701538, + "step": 5115 + }, + { + "epoch": 1.4959789442901008, + "grad_norm": 1.8718379114919181, + "learning_rate": 3.2807170584223802e-06, + "loss": 0.6187412738800049, + "step": 5116 + }, + { + "epoch": 1.4962713847053664, + "grad_norm": 1.730072311160077, + "learning_rate": 3.277137181441369e-06, + "loss": 0.5165137648582458, + "step": 5117 + }, + { + "epoch": 1.4965638251206317, + "grad_norm": 1.7402216150888872, + "learning_rate": 3.273558875888665e-06, + "loss": 0.6315420866012573, + "step": 5118 + }, + { + "epoch": 1.496856265535897, + "grad_norm": 1.6811341442796868, + "learning_rate": 3.269982142600677e-06, + "loss": 0.5522993206977844, + "step": 5119 + }, + { + "epoch": 1.4971487059511626, + "grad_norm": 1.8103742244487522, + "learning_rate": 3.266406982413444e-06, + "loss": 0.5751636028289795, + "step": 5120 + }, + { + "epoch": 1.4974411463664279, + "grad_norm": 1.8346826868047423, + "learning_rate": 3.262833396162637e-06, + "loss": 0.5552358031272888, + "step": 5121 + }, + { + "epoch": 1.4977335867816932, + "grad_norm": 1.4553347230926987, + "learning_rate": 3.259261384683562e-06, + "loss": 0.4971257150173187, + "step": 5122 + }, + { + "epoch": 1.4980260271969585, + "grad_norm": 1.7328825599332134, + "learning_rate": 3.2556909488111533e-06, + "loss": 0.3803454637527466, + "step": 5123 + }, + { + "epoch": 1.498318467612224, + "grad_norm": 1.7448185442015292, + "learning_rate": 3.25212208937998e-06, + "loss": 0.45348531007766724, + "step": 5124 + }, + { + "epoch": 1.4986109080274894, + "grad_norm": 1.6593501166731528, + "learning_rate": 3.2485548072242403e-06, + "loss": 0.4839708209037781, + "step": 5125 + }, + { + "epoch": 1.4989033484427547, + "grad_norm": 1.7004886969570365, + "learning_rate": 3.244989103177768e-06, + "loss": 0.4743500351905823, + "step": 5126 + }, + { + "epoch": 1.49919578885802, + "grad_norm": 1.7042585723205583, + "learning_rate": 3.241424978074018e-06, + "loss": 0.558182954788208, + "step": 5127 + }, + { + "epoch": 1.4994882292732856, + "grad_norm": 1.5886443982701122, + "learning_rate": 3.2378624327460874e-06, + "loss": 0.41309911012649536, + "step": 5128 + }, + { + "epoch": 1.499780669688551, + "grad_norm": 1.7452725700601364, + "learning_rate": 3.2343014680266984e-06, + "loss": 0.5627751350402832, + "step": 5129 + }, + { + "epoch": 1.5000731101038163, + "grad_norm": 1.8911076385977756, + "learning_rate": 3.230742084748204e-06, + "loss": 0.5374714732170105, + "step": 5130 + }, + { + "epoch": 1.5003655505190818, + "grad_norm": 1.7659792305895352, + "learning_rate": 3.2271842837425917e-06, + "loss": 0.4264039993286133, + "step": 5131 + }, + { + "epoch": 1.5006579909343472, + "grad_norm": 1.8312136055327797, + "learning_rate": 3.223628065841472e-06, + "loss": 0.44204217195510864, + "step": 5132 + }, + { + "epoch": 1.5009504313496125, + "grad_norm": 1.6892686547824762, + "learning_rate": 3.220073431876092e-06, + "loss": 0.5322041511535645, + "step": 5133 + }, + { + "epoch": 1.501242871764878, + "grad_norm": 1.6801975106342348, + "learning_rate": 3.216520382677324e-06, + "loss": 0.4741417169570923, + "step": 5134 + }, + { + "epoch": 1.5015353121801431, + "grad_norm": 1.9712166683153383, + "learning_rate": 3.212968919075672e-06, + "loss": 0.7069851756095886, + "step": 5135 + }, + { + "epoch": 1.5018277525954087, + "grad_norm": 1.6644566597906936, + "learning_rate": 3.2094190419012694e-06, + "loss": 0.6049044132232666, + "step": 5136 + }, + { + "epoch": 1.5021201930106742, + "grad_norm": 1.6420500389509403, + "learning_rate": 3.2058707519838817e-06, + "loss": 0.556586503982544, + "step": 5137 + }, + { + "epoch": 1.5024126334259393, + "grad_norm": 1.4612168804015682, + "learning_rate": 3.202324050152894e-06, + "loss": 0.46489936113357544, + "step": 5138 + }, + { + "epoch": 1.5027050738412049, + "grad_norm": 1.6808104719845611, + "learning_rate": 3.1987789372373292e-06, + "loss": 0.5332333445549011, + "step": 5139 + }, + { + "epoch": 1.5029975142564702, + "grad_norm": 1.5897163584111842, + "learning_rate": 3.1952354140658346e-06, + "loss": 0.5547586679458618, + "step": 5140 + }, + { + "epoch": 1.5032899546717355, + "grad_norm": 1.7343008366786887, + "learning_rate": 3.1916934814666858e-06, + "loss": 0.5500372648239136, + "step": 5141 + }, + { + "epoch": 1.503582395087001, + "grad_norm": 1.6657659858957796, + "learning_rate": 3.1881531402677934e-06, + "loss": 0.5065571069717407, + "step": 5142 + }, + { + "epoch": 1.5038748355022664, + "grad_norm": 2.106659003681642, + "learning_rate": 3.1846143912966887e-06, + "loss": 0.5942833423614502, + "step": 5143 + }, + { + "epoch": 1.5041672759175317, + "grad_norm": 1.5318136638727409, + "learning_rate": 3.181077235380531e-06, + "loss": 0.4089720547199249, + "step": 5144 + }, + { + "epoch": 1.5044597163327973, + "grad_norm": 1.959628279475518, + "learning_rate": 3.1775416733461107e-06, + "loss": 0.5360317230224609, + "step": 5145 + }, + { + "epoch": 1.5047521567480626, + "grad_norm": 1.8497642502339247, + "learning_rate": 3.174007706019845e-06, + "loss": 0.5403856635093689, + "step": 5146 + }, + { + "epoch": 1.505044597163328, + "grad_norm": 1.583723666722825, + "learning_rate": 3.1704753342277727e-06, + "loss": 0.5377147197723389, + "step": 5147 + }, + { + "epoch": 1.5053370375785935, + "grad_norm": 1.908833197627838, + "learning_rate": 3.166944558795567e-06, + "loss": 0.49888312816619873, + "step": 5148 + }, + { + "epoch": 1.5056294779938586, + "grad_norm": 1.604723023798687, + "learning_rate": 3.1634153805485245e-06, + "loss": 0.5105957984924316, + "step": 5149 + }, + { + "epoch": 1.5059219184091241, + "grad_norm": 1.530550544138999, + "learning_rate": 3.1598878003115694e-06, + "loss": 0.5653882026672363, + "step": 5150 + }, + { + "epoch": 1.5062143588243895, + "grad_norm": 1.7528922447010231, + "learning_rate": 3.1563618189092536e-06, + "loss": 0.5293145179748535, + "step": 5151 + }, + { + "epoch": 1.5065067992396548, + "grad_norm": 1.7049104339852403, + "learning_rate": 3.1528374371657524e-06, + "loss": 0.5852463841438293, + "step": 5152 + }, + { + "epoch": 1.5067992396549204, + "grad_norm": 1.7074372465536334, + "learning_rate": 3.1493146559048683e-06, + "loss": 0.5986759662628174, + "step": 5153 + }, + { + "epoch": 1.5070916800701857, + "grad_norm": 1.8410699226798701, + "learning_rate": 3.1457934759500298e-06, + "loss": 0.6363133788108826, + "step": 5154 + }, + { + "epoch": 1.507384120485451, + "grad_norm": 1.7703119171725752, + "learning_rate": 3.1422738981242927e-06, + "loss": 0.4757901430130005, + "step": 5155 + }, + { + "epoch": 1.5076765609007166, + "grad_norm": 1.8042941675603332, + "learning_rate": 3.1387559232503374e-06, + "loss": 0.7614980936050415, + "step": 5156 + }, + { + "epoch": 1.5079690013159819, + "grad_norm": 1.8353916940267578, + "learning_rate": 3.13523955215047e-06, + "loss": 0.5739883184432983, + "step": 5157 + }, + { + "epoch": 1.5082614417312472, + "grad_norm": 1.6405466984899346, + "learning_rate": 3.131724785646616e-06, + "loss": 0.5893388390541077, + "step": 5158 + }, + { + "epoch": 1.5085538821465128, + "grad_norm": 1.4613031069188664, + "learning_rate": 3.1282116245603333e-06, + "loss": 0.5809957981109619, + "step": 5159 + }, + { + "epoch": 1.508846322561778, + "grad_norm": 1.558509757762028, + "learning_rate": 3.124700069712803e-06, + "loss": 0.5651090741157532, + "step": 5160 + }, + { + "epoch": 1.5091387629770434, + "grad_norm": 1.5870160926102073, + "learning_rate": 3.1211901219248273e-06, + "loss": 0.3736303448677063, + "step": 5161 + }, + { + "epoch": 1.509431203392309, + "grad_norm": 1.744264206007829, + "learning_rate": 3.117681782016838e-06, + "loss": 0.5501068234443665, + "step": 5162 + }, + { + "epoch": 1.509723643807574, + "grad_norm": 1.7377852819958348, + "learning_rate": 3.1141750508088865e-06, + "loss": 0.6210630536079407, + "step": 5163 + }, + { + "epoch": 1.5100160842228396, + "grad_norm": 1.5741938339988393, + "learning_rate": 3.110669929120651e-06, + "loss": 0.5722042322158813, + "step": 5164 + }, + { + "epoch": 1.510308524638105, + "grad_norm": 1.617906406413033, + "learning_rate": 3.107166417771431e-06, + "loss": 0.5813776254653931, + "step": 5165 + }, + { + "epoch": 1.5106009650533703, + "grad_norm": 1.5816945478856634, + "learning_rate": 3.1036645175801515e-06, + "loss": 0.4911368787288666, + "step": 5166 + }, + { + "epoch": 1.5108934054686358, + "grad_norm": 1.5812988749732655, + "learning_rate": 3.100164229365361e-06, + "loss": 0.5136172771453857, + "step": 5167 + }, + { + "epoch": 1.5111858458839011, + "grad_norm": 1.7202185949801794, + "learning_rate": 3.096665553945234e-06, + "loss": 0.5746543407440186, + "step": 5168 + }, + { + "epoch": 1.5114782862991665, + "grad_norm": 1.8577610332100818, + "learning_rate": 3.0931684921375572e-06, + "loss": 0.4949193000793457, + "step": 5169 + }, + { + "epoch": 1.511770726714432, + "grad_norm": 1.6744220879324234, + "learning_rate": 3.089673044759751e-06, + "loss": 0.5732932090759277, + "step": 5170 + }, + { + "epoch": 1.5120631671296973, + "grad_norm": 1.5865659073822531, + "learning_rate": 3.086179212628855e-06, + "loss": 0.5329696536064148, + "step": 5171 + }, + { + "epoch": 1.5123556075449627, + "grad_norm": 1.7970382860153173, + "learning_rate": 3.082686996561531e-06, + "loss": 0.631770670413971, + "step": 5172 + }, + { + "epoch": 1.5126480479602282, + "grad_norm": 1.5998021767601671, + "learning_rate": 3.0791963973740646e-06, + "loss": 0.5183405876159668, + "step": 5173 + }, + { + "epoch": 1.5129404883754933, + "grad_norm": 1.7133603210505308, + "learning_rate": 3.075707415882361e-06, + "loss": 0.5616034269332886, + "step": 5174 + }, + { + "epoch": 1.5132329287907589, + "grad_norm": 1.5912245556380846, + "learning_rate": 3.0722200529019477e-06, + "loss": 0.48513877391815186, + "step": 5175 + }, + { + "epoch": 1.5135253692060244, + "grad_norm": 1.574805808870548, + "learning_rate": 3.068734309247976e-06, + "loss": 0.5226399898529053, + "step": 5176 + }, + { + "epoch": 1.5138178096212895, + "grad_norm": 1.592402045128277, + "learning_rate": 3.0652501857352167e-06, + "loss": 0.48817533254623413, + "step": 5177 + }, + { + "epoch": 1.514110250036555, + "grad_norm": 1.5523305292465257, + "learning_rate": 3.061767683178063e-06, + "loss": 0.4163327217102051, + "step": 5178 + }, + { + "epoch": 1.5144026904518204, + "grad_norm": 1.6254224030737643, + "learning_rate": 3.058286802390531e-06, + "loss": 0.5984256267547607, + "step": 5179 + }, + { + "epoch": 1.5146951308670857, + "grad_norm": 1.8006518354372911, + "learning_rate": 3.054807544186249e-06, + "loss": 0.47233515977859497, + "step": 5180 + }, + { + "epoch": 1.5149875712823513, + "grad_norm": 1.6896342506826862, + "learning_rate": 3.0513299093784766e-06, + "loss": 0.5545482635498047, + "step": 5181 + }, + { + "epoch": 1.5152800116976166, + "grad_norm": 1.5925171354605219, + "learning_rate": 3.047853898780089e-06, + "loss": 0.46200019121170044, + "step": 5182 + }, + { + "epoch": 1.515572452112882, + "grad_norm": 1.7986358499610187, + "learning_rate": 3.0443795132035824e-06, + "loss": 0.6146235466003418, + "step": 5183 + }, + { + "epoch": 1.5158648925281475, + "grad_norm": 1.6180210942837954, + "learning_rate": 3.040906753461075e-06, + "loss": 0.5653461217880249, + "step": 5184 + }, + { + "epoch": 1.5161573329434128, + "grad_norm": 1.7782122645526974, + "learning_rate": 3.0374356203643008e-06, + "loss": 0.6514929533004761, + "step": 5185 + }, + { + "epoch": 1.5164497733586781, + "grad_norm": 1.6488410817366923, + "learning_rate": 3.033966114724618e-06, + "loss": 0.48213401436805725, + "step": 5186 + }, + { + "epoch": 1.5167422137739437, + "grad_norm": 1.8810893536328739, + "learning_rate": 3.0304982373530013e-06, + "loss": 0.4935530424118042, + "step": 5187 + }, + { + "epoch": 1.5170346541892088, + "grad_norm": 1.9406636249591702, + "learning_rate": 3.0270319890600465e-06, + "loss": 0.6435343027114868, + "step": 5188 + }, + { + "epoch": 1.5173270946044743, + "grad_norm": 1.4722259236044228, + "learning_rate": 3.0235673706559675e-06, + "loss": 0.49350717663764954, + "step": 5189 + }, + { + "epoch": 1.5176195350197397, + "grad_norm": 1.636152242750681, + "learning_rate": 3.0201043829506015e-06, + "loss": 0.4745938777923584, + "step": 5190 + }, + { + "epoch": 1.517911975435005, + "grad_norm": 1.747247707841839, + "learning_rate": 3.0166430267533944e-06, + "loss": 0.5867031812667847, + "step": 5191 + }, + { + "epoch": 1.5182044158502705, + "grad_norm": 2.0836038611604275, + "learning_rate": 3.01318330287342e-06, + "loss": 0.5477231740951538, + "step": 5192 + }, + { + "epoch": 1.5184968562655359, + "grad_norm": 1.5825293698408722, + "learning_rate": 3.0097252121193687e-06, + "loss": 0.5788818597793579, + "step": 5193 + }, + { + "epoch": 1.5187892966808012, + "grad_norm": 1.5819522244244852, + "learning_rate": 3.0062687552995475e-06, + "loss": 0.4967714548110962, + "step": 5194 + }, + { + "epoch": 1.5190817370960668, + "grad_norm": 1.810354148695448, + "learning_rate": 3.002813933221882e-06, + "loss": 0.6427319645881653, + "step": 5195 + }, + { + "epoch": 1.519374177511332, + "grad_norm": 1.7324283900525337, + "learning_rate": 2.999360746693916e-06, + "loss": 0.5615307688713074, + "step": 5196 + }, + { + "epoch": 1.5196666179265974, + "grad_norm": 1.8017068269121923, + "learning_rate": 2.9959091965228102e-06, + "loss": 0.6646313667297363, + "step": 5197 + }, + { + "epoch": 1.519959058341863, + "grad_norm": 1.4648905848591907, + "learning_rate": 2.9924592835153454e-06, + "loss": 0.47536247968673706, + "step": 5198 + }, + { + "epoch": 1.5202514987571283, + "grad_norm": 1.701001149097395, + "learning_rate": 2.9890110084779157e-06, + "loss": 0.5850256681442261, + "step": 5199 + }, + { + "epoch": 1.5205439391723936, + "grad_norm": 1.6650942638342863, + "learning_rate": 2.985564372216536e-06, + "loss": 0.5724887251853943, + "step": 5200 + }, + { + "epoch": 1.5208363795876592, + "grad_norm": 1.6379341688791944, + "learning_rate": 2.9821193755368383e-06, + "loss": 0.5052510499954224, + "step": 5201 + }, + { + "epoch": 1.5211288200029243, + "grad_norm": 1.5270508750040293, + "learning_rate": 2.9786760192440644e-06, + "loss": 0.439144492149353, + "step": 5202 + }, + { + "epoch": 1.5214212604181898, + "grad_norm": 1.624134940512823, + "learning_rate": 2.97523430414308e-06, + "loss": 0.4560511112213135, + "step": 5203 + }, + { + "epoch": 1.5217137008334551, + "grad_norm": 1.9447169329839864, + "learning_rate": 2.9717942310383664e-06, + "loss": 0.6848068237304688, + "step": 5204 + }, + { + "epoch": 1.5220061412487205, + "grad_norm": 1.5338251170475576, + "learning_rate": 2.9683558007340184e-06, + "loss": 0.5541313886642456, + "step": 5205 + }, + { + "epoch": 1.522298581663986, + "grad_norm": 1.4921475223936211, + "learning_rate": 2.964919014033749e-06, + "loss": 0.5117338299751282, + "step": 5206 + }, + { + "epoch": 1.5225910220792513, + "grad_norm": 1.8454970950489444, + "learning_rate": 2.9614838717408866e-06, + "loss": 0.5164151191711426, + "step": 5207 + }, + { + "epoch": 1.5228834624945167, + "grad_norm": 1.6612213438595136, + "learning_rate": 2.9580503746583744e-06, + "loss": 0.5461020469665527, + "step": 5208 + }, + { + "epoch": 1.5231759029097822, + "grad_norm": 1.580589085309813, + "learning_rate": 2.9546185235887705e-06, + "loss": 0.4265401065349579, + "step": 5209 + }, + { + "epoch": 1.5234683433250475, + "grad_norm": 1.822483254200033, + "learning_rate": 2.9511883193342505e-06, + "loss": 0.47372496128082275, + "step": 5210 + }, + { + "epoch": 1.5237607837403129, + "grad_norm": 1.5409548150660597, + "learning_rate": 2.9477597626966036e-06, + "loss": 0.43951019644737244, + "step": 5211 + }, + { + "epoch": 1.5240532241555784, + "grad_norm": 2.1038432849237862, + "learning_rate": 2.9443328544772343e-06, + "loss": 0.6514073610305786, + "step": 5212 + }, + { + "epoch": 1.5243456645708435, + "grad_norm": 1.6794879789857167, + "learning_rate": 2.940907595477164e-06, + "loss": 0.523013710975647, + "step": 5213 + }, + { + "epoch": 1.524638104986109, + "grad_norm": 1.6399154124434079, + "learning_rate": 2.9374839864970194e-06, + "loss": 0.4945281744003296, + "step": 5214 + }, + { + "epoch": 1.5249305454013746, + "grad_norm": 1.83414324289986, + "learning_rate": 2.9340620283370525e-06, + "loss": 0.5768609046936035, + "step": 5215 + }, + { + "epoch": 1.5252229858166397, + "grad_norm": 1.7611799606025424, + "learning_rate": 2.930641721797125e-06, + "loss": 0.45644205808639526, + "step": 5216 + }, + { + "epoch": 1.5255154262319053, + "grad_norm": 1.5932175762441756, + "learning_rate": 2.92722306767671e-06, + "loss": 0.590227484703064, + "step": 5217 + }, + { + "epoch": 1.5258078666471706, + "grad_norm": 1.8078838529845034, + "learning_rate": 2.9238060667749014e-06, + "loss": 0.5618122816085815, + "step": 5218 + }, + { + "epoch": 1.526100307062436, + "grad_norm": 1.9135498575527394, + "learning_rate": 2.9203907198904027e-06, + "loss": 0.6431877613067627, + "step": 5219 + }, + { + "epoch": 1.5263927474777015, + "grad_norm": 1.5548470750003383, + "learning_rate": 2.916977027821527e-06, + "loss": 0.5019941329956055, + "step": 5220 + }, + { + "epoch": 1.5266851878929668, + "grad_norm": 1.9013308084843434, + "learning_rate": 2.913564991366209e-06, + "loss": 0.5413016080856323, + "step": 5221 + }, + { + "epoch": 1.5269776283082321, + "grad_norm": 1.6880920277336984, + "learning_rate": 2.9101546113219846e-06, + "loss": 0.6546905636787415, + "step": 5222 + }, + { + "epoch": 1.5272700687234977, + "grad_norm": 1.7013707157233615, + "learning_rate": 2.906745888486013e-06, + "loss": 0.5689815878868103, + "step": 5223 + }, + { + "epoch": 1.527562509138763, + "grad_norm": 1.8369848799419313, + "learning_rate": 2.9033388236550632e-06, + "loss": 0.5134810209274292, + "step": 5224 + }, + { + "epoch": 1.5278549495540283, + "grad_norm": 1.4280052174004847, + "learning_rate": 2.8999334176255143e-06, + "loss": 0.4880787134170532, + "step": 5225 + }, + { + "epoch": 1.528147389969294, + "grad_norm": 1.8292283637694566, + "learning_rate": 2.89652967119336e-06, + "loss": 0.4345950782299042, + "step": 5226 + }, + { + "epoch": 1.528439830384559, + "grad_norm": 1.724451812949585, + "learning_rate": 2.893127585154205e-06, + "loss": 0.43327242136001587, + "step": 5227 + }, + { + "epoch": 1.5287322707998245, + "grad_norm": 1.780345207484487, + "learning_rate": 2.889727160303266e-06, + "loss": 0.6423674821853638, + "step": 5228 + }, + { + "epoch": 1.5290247112150899, + "grad_norm": 1.5540524492201802, + "learning_rate": 2.886328397435374e-06, + "loss": 0.5263554453849792, + "step": 5229 + }, + { + "epoch": 1.5293171516303552, + "grad_norm": 1.6433428703006638, + "learning_rate": 2.882931297344965e-06, + "loss": 0.4111948013305664, + "step": 5230 + }, + { + "epoch": 1.5296095920456207, + "grad_norm": 1.804627326985323, + "learning_rate": 2.8795358608260936e-06, + "loss": 0.43803131580352783, + "step": 5231 + }, + { + "epoch": 1.529902032460886, + "grad_norm": 1.5504311785369362, + "learning_rate": 2.8761420886724223e-06, + "loss": 0.4708956778049469, + "step": 5232 + }, + { + "epoch": 1.5301944728761514, + "grad_norm": 1.7185936460565197, + "learning_rate": 2.8727499816772265e-06, + "loss": 0.5268635749816895, + "step": 5233 + }, + { + "epoch": 1.530486913291417, + "grad_norm": 1.6977720322438927, + "learning_rate": 2.869359540633385e-06, + "loss": 0.5092788934707642, + "step": 5234 + }, + { + "epoch": 1.5307793537066823, + "grad_norm": 1.630735809850627, + "learning_rate": 2.8659707663333958e-06, + "loss": 0.4603293836116791, + "step": 5235 + }, + { + "epoch": 1.5310717941219476, + "grad_norm": 1.7857705195277582, + "learning_rate": 2.8625836595693646e-06, + "loss": 0.545462965965271, + "step": 5236 + }, + { + "epoch": 1.5313642345372132, + "grad_norm": 1.6146415057105645, + "learning_rate": 2.8591982211330073e-06, + "loss": 0.511603832244873, + "step": 5237 + }, + { + "epoch": 1.5316566749524785, + "grad_norm": 1.7935851159627383, + "learning_rate": 2.8558144518156485e-06, + "loss": 0.5076707601547241, + "step": 5238 + }, + { + "epoch": 1.5319491153677438, + "grad_norm": 1.7012818042378361, + "learning_rate": 2.852432352408224e-06, + "loss": 0.5923745632171631, + "step": 5239 + }, + { + "epoch": 1.5322415557830094, + "grad_norm": 1.8251553548092714, + "learning_rate": 2.849051923701279e-06, + "loss": 0.5588465332984924, + "step": 5240 + }, + { + "epoch": 1.5325339961982745, + "grad_norm": 1.6493521356208132, + "learning_rate": 2.845673166484969e-06, + "loss": 0.6681923270225525, + "step": 5241 + }, + { + "epoch": 1.53282643661354, + "grad_norm": 1.8683876960783266, + "learning_rate": 2.8422960815490564e-06, + "loss": 0.5702543258666992, + "step": 5242 + }, + { + "epoch": 1.5331188770288053, + "grad_norm": 1.8090012581479555, + "learning_rate": 2.8389206696829165e-06, + "loss": 0.5401744842529297, + "step": 5243 + }, + { + "epoch": 1.5334113174440707, + "grad_norm": 1.6641276436242072, + "learning_rate": 2.8355469316755324e-06, + "loss": 0.43371906876564026, + "step": 5244 + }, + { + "epoch": 1.5337037578593362, + "grad_norm": 1.6323739542625777, + "learning_rate": 2.8321748683154893e-06, + "loss": 0.5598163604736328, + "step": 5245 + }, + { + "epoch": 1.5339961982746015, + "grad_norm": 1.8330291281030966, + "learning_rate": 2.8288044803909896e-06, + "loss": 0.5836831331253052, + "step": 5246 + }, + { + "epoch": 1.5342886386898669, + "grad_norm": 1.6637462764959579, + "learning_rate": 2.8254357686898404e-06, + "loss": 0.5308898687362671, + "step": 5247 + }, + { + "epoch": 1.5345810791051324, + "grad_norm": 1.7589253104867197, + "learning_rate": 2.822068733999459e-06, + "loss": 0.6104828119277954, + "step": 5248 + }, + { + "epoch": 1.5348735195203977, + "grad_norm": 1.9266285032289332, + "learning_rate": 2.8187033771068685e-06, + "loss": 0.48373985290527344, + "step": 5249 + }, + { + "epoch": 1.535165959935663, + "grad_norm": 1.745809860715047, + "learning_rate": 2.8153396987987e-06, + "loss": 0.5213532447814941, + "step": 5250 + }, + { + "epoch": 1.5354584003509286, + "grad_norm": 1.7052291407432676, + "learning_rate": 2.811977699861195e-06, + "loss": 0.5241051912307739, + "step": 5251 + }, + { + "epoch": 1.5357508407661937, + "grad_norm": 1.54399807563896, + "learning_rate": 2.8086173810801974e-06, + "loss": 0.48321712017059326, + "step": 5252 + }, + { + "epoch": 1.5360432811814593, + "grad_norm": 1.831716416150244, + "learning_rate": 2.8052587432411626e-06, + "loss": 0.5352765917778015, + "step": 5253 + }, + { + "epoch": 1.5363357215967248, + "grad_norm": 1.7051244593885417, + "learning_rate": 2.8019017871291522e-06, + "loss": 0.5402188301086426, + "step": 5254 + }, + { + "epoch": 1.53662816201199, + "grad_norm": 1.5780940900489064, + "learning_rate": 2.798546513528837e-06, + "loss": 0.4398813545703888, + "step": 5255 + }, + { + "epoch": 1.5369206024272555, + "grad_norm": 1.6682503262337565, + "learning_rate": 2.7951929232244855e-06, + "loss": 0.5661803483963013, + "step": 5256 + }, + { + "epoch": 1.5372130428425208, + "grad_norm": 1.9389870116334766, + "learning_rate": 2.791841016999982e-06, + "loss": 0.5051732063293457, + "step": 5257 + }, + { + "epoch": 1.5375054832577861, + "grad_norm": 1.7323475801875265, + "learning_rate": 2.788490795638815e-06, + "loss": 0.5712389945983887, + "step": 5258 + }, + { + "epoch": 1.5377979236730517, + "grad_norm": 1.7189716580722423, + "learning_rate": 2.7851422599240773e-06, + "loss": 0.6257319450378418, + "step": 5259 + }, + { + "epoch": 1.538090364088317, + "grad_norm": 1.7862483931054027, + "learning_rate": 2.7817954106384704e-06, + "loss": 0.5788396596908569, + "step": 5260 + }, + { + "epoch": 1.5383828045035823, + "grad_norm": 1.508089974245087, + "learning_rate": 2.7784502485642985e-06, + "loss": 0.37253260612487793, + "step": 5261 + }, + { + "epoch": 1.5386752449188479, + "grad_norm": 2.206166372523085, + "learning_rate": 2.7751067744834726e-06, + "loss": 0.6547001004219055, + "step": 5262 + }, + { + "epoch": 1.5389676853341132, + "grad_norm": 1.551783656656575, + "learning_rate": 2.77176498917751e-06, + "loss": 0.510914146900177, + "step": 5263 + }, + { + "epoch": 1.5392601257493785, + "grad_norm": 1.731638922465708, + "learning_rate": 2.7684248934275327e-06, + "loss": 0.4387754201889038, + "step": 5264 + }, + { + "epoch": 1.539552566164644, + "grad_norm": 1.573259655998941, + "learning_rate": 2.765086488014268e-06, + "loss": 0.5640195608139038, + "step": 5265 + }, + { + "epoch": 1.5398450065799092, + "grad_norm": 2.3327619392306684, + "learning_rate": 2.7617497737180508e-06, + "loss": 0.5780993103981018, + "step": 5266 + }, + { + "epoch": 1.5401374469951747, + "grad_norm": 1.7296077762304434, + "learning_rate": 2.758414751318813e-06, + "loss": 0.5190057158470154, + "step": 5267 + }, + { + "epoch": 1.54042988741044, + "grad_norm": 1.6180118608432006, + "learning_rate": 2.7550814215960964e-06, + "loss": 0.4204869270324707, + "step": 5268 + }, + { + "epoch": 1.5407223278257054, + "grad_norm": 1.5345717637092124, + "learning_rate": 2.7517497853290477e-06, + "loss": 0.5649294853210449, + "step": 5269 + }, + { + "epoch": 1.541014768240971, + "grad_norm": 1.8541084629609554, + "learning_rate": 2.748419843296416e-06, + "loss": 0.49142545461654663, + "step": 5270 + }, + { + "epoch": 1.5413072086562363, + "grad_norm": 2.006144774477858, + "learning_rate": 2.745091596276557e-06, + "loss": 0.483539879322052, + "step": 5271 + }, + { + "epoch": 1.5415996490715016, + "grad_norm": 1.8772157933692841, + "learning_rate": 2.7417650450474253e-06, + "loss": 0.5400283336639404, + "step": 5272 + }, + { + "epoch": 1.5418920894867671, + "grad_norm": 1.6915167892784866, + "learning_rate": 2.7384401903865844e-06, + "loss": 0.5490765571594238, + "step": 5273 + }, + { + "epoch": 1.5421845299020325, + "grad_norm": 2.267512124400057, + "learning_rate": 2.7351170330711975e-06, + "loss": 0.5434873700141907, + "step": 5274 + }, + { + "epoch": 1.5424769703172978, + "grad_norm": 1.8064402200670897, + "learning_rate": 2.7317955738780333e-06, + "loss": 0.6195025444030762, + "step": 5275 + }, + { + "epoch": 1.5427694107325634, + "grad_norm": 1.6751288499310806, + "learning_rate": 2.728475813583462e-06, + "loss": 0.5552260875701904, + "step": 5276 + }, + { + "epoch": 1.5430618511478287, + "grad_norm": 1.8146552227089312, + "learning_rate": 2.725157752963461e-06, + "loss": 0.5430501699447632, + "step": 5277 + }, + { + "epoch": 1.543354291563094, + "grad_norm": 2.1339271947469047, + "learning_rate": 2.7218413927936006e-06, + "loss": 0.633337676525116, + "step": 5278 + }, + { + "epoch": 1.5436467319783596, + "grad_norm": 1.6483089945499043, + "learning_rate": 2.718526733849062e-06, + "loss": 0.4974183738231659, + "step": 5279 + }, + { + "epoch": 1.5439391723936247, + "grad_norm": 2.06701718299293, + "learning_rate": 2.715213776904628e-06, + "loss": 0.5840449929237366, + "step": 5280 + }, + { + "epoch": 1.5442316128088902, + "grad_norm": 1.480832016038464, + "learning_rate": 2.7119025227346807e-06, + "loss": 0.4684101343154907, + "step": 5281 + }, + { + "epoch": 1.5445240532241555, + "grad_norm": 1.5849030043466241, + "learning_rate": 2.7085929721132078e-06, + "loss": 0.48402637243270874, + "step": 5282 + }, + { + "epoch": 1.5448164936394209, + "grad_norm": 1.6449199299919448, + "learning_rate": 2.7052851258137936e-06, + "loss": 0.6122831106185913, + "step": 5283 + }, + { + "epoch": 1.5451089340546864, + "grad_norm": 1.6951661547391625, + "learning_rate": 2.701978984609629e-06, + "loss": 0.5731217861175537, + "step": 5284 + }, + { + "epoch": 1.5454013744699517, + "grad_norm": 1.869052563685483, + "learning_rate": 2.6986745492735044e-06, + "loss": 0.5610803961753845, + "step": 5285 + }, + { + "epoch": 1.545693814885217, + "grad_norm": 1.4190791359210344, + "learning_rate": 2.695371820577811e-06, + "loss": 0.46112626791000366, + "step": 5286 + }, + { + "epoch": 1.5459862553004826, + "grad_norm": 2.1150576387004247, + "learning_rate": 2.692070799294542e-06, + "loss": 0.5368741154670715, + "step": 5287 + }, + { + "epoch": 1.546278695715748, + "grad_norm": 1.905327182706658, + "learning_rate": 2.688771486195293e-06, + "loss": 0.5991438627243042, + "step": 5288 + }, + { + "epoch": 1.5465711361310133, + "grad_norm": 1.9084615434749013, + "learning_rate": 2.685473882051254e-06, + "loss": 0.5751149654388428, + "step": 5289 + }, + { + "epoch": 1.5468635765462788, + "grad_norm": 2.0751264575493247, + "learning_rate": 2.682177987633221e-06, + "loss": 0.6055437326431274, + "step": 5290 + }, + { + "epoch": 1.547156016961544, + "grad_norm": 1.8883429200709412, + "learning_rate": 2.6788838037115916e-06, + "loss": 0.6009221076965332, + "step": 5291 + }, + { + "epoch": 1.5474484573768095, + "grad_norm": 1.8170478309101001, + "learning_rate": 2.6755913310563585e-06, + "loss": 0.6071531772613525, + "step": 5292 + }, + { + "epoch": 1.547740897792075, + "grad_norm": 1.4851824864906211, + "learning_rate": 2.6723005704371164e-06, + "loss": 0.4102080464363098, + "step": 5293 + }, + { + "epoch": 1.5480333382073401, + "grad_norm": 1.861843061560023, + "learning_rate": 2.6690115226230663e-06, + "loss": 0.48021870851516724, + "step": 5294 + }, + { + "epoch": 1.5483257786226057, + "grad_norm": 1.916351154521063, + "learning_rate": 2.665724188382999e-06, + "loss": 0.4893236458301544, + "step": 5295 + }, + { + "epoch": 1.548618219037871, + "grad_norm": 1.611822755629755, + "learning_rate": 2.6624385684853095e-06, + "loss": 0.6365019083023071, + "step": 5296 + }, + { + "epoch": 1.5489106594531363, + "grad_norm": 1.8901541843584413, + "learning_rate": 2.659154663697995e-06, + "loss": 0.46510767936706543, + "step": 5297 + }, + { + "epoch": 1.5492030998684019, + "grad_norm": 1.4887188273793392, + "learning_rate": 2.655872474788641e-06, + "loss": 0.4355175495147705, + "step": 5298 + }, + { + "epoch": 1.5494955402836672, + "grad_norm": 1.3536753107928572, + "learning_rate": 2.6525920025244432e-06, + "loss": 0.5180836915969849, + "step": 5299 + }, + { + "epoch": 1.5497879806989325, + "grad_norm": 1.9072335806805663, + "learning_rate": 2.6493132476721927e-06, + "loss": 0.5597968101501465, + "step": 5300 + }, + { + "epoch": 1.550080421114198, + "grad_norm": 1.7134796878533993, + "learning_rate": 2.646036210998276e-06, + "loss": 0.6581016778945923, + "step": 5301 + }, + { + "epoch": 1.5503728615294634, + "grad_norm": 1.8671635537156963, + "learning_rate": 2.642760893268684e-06, + "loss": 0.4875848889350891, + "step": 5302 + }, + { + "epoch": 1.5506653019447287, + "grad_norm": 1.571897962721608, + "learning_rate": 2.639487295248999e-06, + "loss": 0.4410843253135681, + "step": 5303 + }, + { + "epoch": 1.5509577423599943, + "grad_norm": 1.8113376757557438, + "learning_rate": 2.6362154177044076e-06, + "loss": 0.5829580426216125, + "step": 5304 + }, + { + "epoch": 1.5512501827752594, + "grad_norm": 1.6979805053981243, + "learning_rate": 2.6329452613996886e-06, + "loss": 0.6281459927558899, + "step": 5305 + }, + { + "epoch": 1.551542623190525, + "grad_norm": 1.6778942363253981, + "learning_rate": 2.629676827099222e-06, + "loss": 0.525640606880188, + "step": 5306 + }, + { + "epoch": 1.5518350636057903, + "grad_norm": 1.710219412838542, + "learning_rate": 2.626410115566985e-06, + "loss": 0.5219406485557556, + "step": 5307 + }, + { + "epoch": 1.5521275040210556, + "grad_norm": 1.7812622188686809, + "learning_rate": 2.623145127566555e-06, + "loss": 0.5120927691459656, + "step": 5308 + }, + { + "epoch": 1.5524199444363211, + "grad_norm": 1.856533490372594, + "learning_rate": 2.6198818638610967e-06, + "loss": 0.586410641670227, + "step": 5309 + }, + { + "epoch": 1.5527123848515865, + "grad_norm": 1.726189213717832, + "learning_rate": 2.6166203252133803e-06, + "loss": 0.5014485120773315, + "step": 5310 + }, + { + "epoch": 1.5530048252668518, + "grad_norm": 1.7251785105103856, + "learning_rate": 2.6133605123857707e-06, + "loss": 0.5087070465087891, + "step": 5311 + }, + { + "epoch": 1.5532972656821173, + "grad_norm": 1.9411711444593984, + "learning_rate": 2.610102426140231e-06, + "loss": 0.5829774737358093, + "step": 5312 + }, + { + "epoch": 1.5535897060973827, + "grad_norm": 1.9403338817582965, + "learning_rate": 2.6068460672383166e-06, + "loss": 0.5273870229721069, + "step": 5313 + }, + { + "epoch": 1.553882146512648, + "grad_norm": 1.6781304796241345, + "learning_rate": 2.603591436441183e-06, + "loss": 0.528778076171875, + "step": 5314 + }, + { + "epoch": 1.5541745869279135, + "grad_norm": 1.6477790459502455, + "learning_rate": 2.600338534509581e-06, + "loss": 0.4914259612560272, + "step": 5315 + }, + { + "epoch": 1.5544670273431789, + "grad_norm": 1.5838952242674544, + "learning_rate": 2.597087362203855e-06, + "loss": 0.48063480854034424, + "step": 5316 + }, + { + "epoch": 1.5547594677584442, + "grad_norm": 1.6948007690415343, + "learning_rate": 2.593837920283949e-06, + "loss": 0.4406088888645172, + "step": 5317 + }, + { + "epoch": 1.5550519081737098, + "grad_norm": 1.5839061375343884, + "learning_rate": 2.590590209509398e-06, + "loss": 0.5027159452438354, + "step": 5318 + }, + { + "epoch": 1.5553443485889749, + "grad_norm": 1.447462212774582, + "learning_rate": 2.5873442306393357e-06, + "loss": 0.3894188404083252, + "step": 5319 + }, + { + "epoch": 1.5556367890042404, + "grad_norm": 1.8834380096125083, + "learning_rate": 2.584099984432492e-06, + "loss": 0.5393104553222656, + "step": 5320 + }, + { + "epoch": 1.5559292294195057, + "grad_norm": 1.640256381642302, + "learning_rate": 2.580857471647186e-06, + "loss": 0.5701737999916077, + "step": 5321 + }, + { + "epoch": 1.556221669834771, + "grad_norm": 1.9050066043706444, + "learning_rate": 2.577616693041336e-06, + "loss": 0.6173145174980164, + "step": 5322 + }, + { + "epoch": 1.5565141102500366, + "grad_norm": 1.718666562714064, + "learning_rate": 2.5743776493724548e-06, + "loss": 0.534600555896759, + "step": 5323 + }, + { + "epoch": 1.556806550665302, + "grad_norm": 1.7258193752543447, + "learning_rate": 2.571140341397651e-06, + "loss": 0.5205268859863281, + "step": 5324 + }, + { + "epoch": 1.5570989910805673, + "grad_norm": 1.9160383524514086, + "learning_rate": 2.5679047698736224e-06, + "loss": 0.5631835460662842, + "step": 5325 + }, + { + "epoch": 1.5573914314958328, + "grad_norm": 1.786367865175988, + "learning_rate": 2.564670935556667e-06, + "loss": 0.5855015516281128, + "step": 5326 + }, + { + "epoch": 1.5576838719110981, + "grad_norm": 1.538967985462843, + "learning_rate": 2.5614388392026735e-06, + "loss": 0.5219928026199341, + "step": 5327 + }, + { + "epoch": 1.5579763123263635, + "grad_norm": 1.6118392863192783, + "learning_rate": 2.5582084815671225e-06, + "loss": 0.50178462266922, + "step": 5328 + }, + { + "epoch": 1.558268752741629, + "grad_norm": 1.65351304969076, + "learning_rate": 2.554979863405094e-06, + "loss": 0.643866777420044, + "step": 5329 + }, + { + "epoch": 1.5585611931568941, + "grad_norm": 1.6117676019433484, + "learning_rate": 2.5517529854712543e-06, + "loss": 0.4976714849472046, + "step": 5330 + }, + { + "epoch": 1.5588536335721597, + "grad_norm": 1.6012275122207043, + "learning_rate": 2.5485278485198716e-06, + "loss": 0.47352612018585205, + "step": 5331 + }, + { + "epoch": 1.5591460739874252, + "grad_norm": 1.5967917267320113, + "learning_rate": 2.5453044533047955e-06, + "loss": 0.6319230794906616, + "step": 5332 + }, + { + "epoch": 1.5594385144026903, + "grad_norm": 1.9005541524381997, + "learning_rate": 2.5420828005794786e-06, + "loss": 0.724555253982544, + "step": 5333 + }, + { + "epoch": 1.5597309548179559, + "grad_norm": 1.987695201205215, + "learning_rate": 2.5388628910969625e-06, + "loss": 0.6235928535461426, + "step": 5334 + }, + { + "epoch": 1.5600233952332212, + "grad_norm": 1.9501926966829706, + "learning_rate": 2.5356447256098805e-06, + "loss": 0.47880417108535767, + "step": 5335 + }, + { + "epoch": 1.5603158356484865, + "grad_norm": 1.451114547860928, + "learning_rate": 2.53242830487046e-06, + "loss": 0.3986828029155731, + "step": 5336 + }, + { + "epoch": 1.560608276063752, + "grad_norm": 1.747029246487311, + "learning_rate": 2.529213629630519e-06, + "loss": 0.515389084815979, + "step": 5337 + }, + { + "epoch": 1.5609007164790174, + "grad_norm": 1.4773319281213657, + "learning_rate": 2.52600070064147e-06, + "loss": 0.611845076084137, + "step": 5338 + }, + { + "epoch": 1.5611931568942827, + "grad_norm": 1.4758258492307896, + "learning_rate": 2.522789518654314e-06, + "loss": 0.4417461156845093, + "step": 5339 + }, + { + "epoch": 1.5614855973095483, + "grad_norm": 1.819505142519117, + "learning_rate": 2.519580084419646e-06, + "loss": 0.5082979798316956, + "step": 5340 + }, + { + "epoch": 1.5617780377248136, + "grad_norm": 1.6547823991622836, + "learning_rate": 2.516372398687652e-06, + "loss": 0.4535973072052002, + "step": 5341 + }, + { + "epoch": 1.562070478140079, + "grad_norm": 1.5836674832459754, + "learning_rate": 2.513166462208111e-06, + "loss": 0.5528950095176697, + "step": 5342 + }, + { + "epoch": 1.5623629185553445, + "grad_norm": 1.9642626952112248, + "learning_rate": 2.5099622757303865e-06, + "loss": 0.6272662281990051, + "step": 5343 + }, + { + "epoch": 1.5626553589706096, + "grad_norm": 1.6065246572629583, + "learning_rate": 2.506759840003439e-06, + "loss": 0.602135181427002, + "step": 5344 + }, + { + "epoch": 1.5629477993858751, + "grad_norm": 1.6289588222907745, + "learning_rate": 2.5035591557758197e-06, + "loss": 0.6336733102798462, + "step": 5345 + }, + { + "epoch": 1.5632402398011405, + "grad_norm": 1.6487862192612195, + "learning_rate": 2.500360223795668e-06, + "loss": 0.5819063186645508, + "step": 5346 + }, + { + "epoch": 1.5635326802164058, + "grad_norm": 1.9625665043715836, + "learning_rate": 2.4971630448107166e-06, + "loss": 0.6384624242782593, + "step": 5347 + }, + { + "epoch": 1.5638251206316713, + "grad_norm": 1.7408709214756897, + "learning_rate": 2.493967619568285e-06, + "loss": 0.5495754480361938, + "step": 5348 + }, + { + "epoch": 1.5641175610469367, + "grad_norm": 1.7544921790911043, + "learning_rate": 2.490773948815284e-06, + "loss": 0.5661545395851135, + "step": 5349 + }, + { + "epoch": 1.564410001462202, + "grad_norm": 1.6122536544450556, + "learning_rate": 2.487582033298217e-06, + "loss": 0.47731083631515503, + "step": 5350 + }, + { + "epoch": 1.5647024418774675, + "grad_norm": 1.6660059461046859, + "learning_rate": 2.4843918737631724e-06, + "loss": 0.5081999897956848, + "step": 5351 + }, + { + "epoch": 1.5649948822927329, + "grad_norm": 1.7409567692793637, + "learning_rate": 2.481203470955832e-06, + "loss": 0.4803314208984375, + "step": 5352 + }, + { + "epoch": 1.5652873227079982, + "grad_norm": 1.5751543533365946, + "learning_rate": 2.4780168256214687e-06, + "loss": 0.5049692392349243, + "step": 5353 + }, + { + "epoch": 1.5655797631232637, + "grad_norm": 1.5980094392584046, + "learning_rate": 2.4748319385049346e-06, + "loss": 0.46404945850372314, + "step": 5354 + }, + { + "epoch": 1.565872203538529, + "grad_norm": 1.8809652221147528, + "learning_rate": 2.471648810350681e-06, + "loss": 0.426737904548645, + "step": 5355 + }, + { + "epoch": 1.5661646439537944, + "grad_norm": 1.8658447876398343, + "learning_rate": 2.4684674419027445e-06, + "loss": 0.511459231376648, + "step": 5356 + }, + { + "epoch": 1.56645708436906, + "grad_norm": 1.6030611377734088, + "learning_rate": 2.4652878339047516e-06, + "loss": 0.5199254155158997, + "step": 5357 + }, + { + "epoch": 1.566749524784325, + "grad_norm": 1.8647690278368902, + "learning_rate": 2.4621099870999156e-06, + "loss": 0.6220999360084534, + "step": 5358 + }, + { + "epoch": 1.5670419651995906, + "grad_norm": 1.6243824818203765, + "learning_rate": 2.4589339022310386e-06, + "loss": 0.598499059677124, + "step": 5359 + }, + { + "epoch": 1.567334405614856, + "grad_norm": 1.6070369897776633, + "learning_rate": 2.455759580040512e-06, + "loss": 0.4726351499557495, + "step": 5360 + }, + { + "epoch": 1.5676268460301213, + "grad_norm": 1.5276631939356082, + "learning_rate": 2.452587021270314e-06, + "loss": 0.4492379426956177, + "step": 5361 + }, + { + "epoch": 1.5679192864453868, + "grad_norm": 1.5322598639207448, + "learning_rate": 2.4494162266620105e-06, + "loss": 0.46546655893325806, + "step": 5362 + }, + { + "epoch": 1.5682117268606521, + "grad_norm": 1.5784589531224524, + "learning_rate": 2.446247196956756e-06, + "loss": 0.45048198103904724, + "step": 5363 + }, + { + "epoch": 1.5685041672759175, + "grad_norm": 1.7001549698958467, + "learning_rate": 2.4430799328952935e-06, + "loss": 0.543383002281189, + "step": 5364 + }, + { + "epoch": 1.568796607691183, + "grad_norm": 1.881054972907132, + "learning_rate": 2.4399144352179484e-06, + "loss": 0.560661256313324, + "step": 5365 + }, + { + "epoch": 1.5690890481064483, + "grad_norm": 1.7380225532335671, + "learning_rate": 2.4367507046646367e-06, + "loss": 0.4915887117385864, + "step": 5366 + }, + { + "epoch": 1.5693814885217137, + "grad_norm": 3.6756946542988396, + "learning_rate": 2.433588741974863e-06, + "loss": 0.576668918132782, + "step": 5367 + }, + { + "epoch": 1.5696739289369792, + "grad_norm": 1.9696979271734443, + "learning_rate": 2.4304285478877134e-06, + "loss": 0.615422248840332, + "step": 5368 + }, + { + "epoch": 1.5699663693522443, + "grad_norm": 1.7262412669866045, + "learning_rate": 2.4272701231418706e-06, + "loss": 0.505649209022522, + "step": 5369 + }, + { + "epoch": 1.5702588097675099, + "grad_norm": 1.6721925296757776, + "learning_rate": 2.424113468475593e-06, + "loss": 0.4803265929222107, + "step": 5370 + }, + { + "epoch": 1.5705512501827754, + "grad_norm": 1.5546849518292136, + "learning_rate": 2.4209585846267293e-06, + "loss": 0.43251073360443115, + "step": 5371 + }, + { + "epoch": 1.5708436905980405, + "grad_norm": 1.517432850414526, + "learning_rate": 2.417805472332716e-06, + "loss": 0.6021081209182739, + "step": 5372 + }, + { + "epoch": 1.571136131013306, + "grad_norm": 1.5438721648404399, + "learning_rate": 2.414654132330575e-06, + "loss": 0.5236715078353882, + "step": 5373 + }, + { + "epoch": 1.5714285714285714, + "grad_norm": 1.7272971424194805, + "learning_rate": 2.4115045653569092e-06, + "loss": 0.45632290840148926, + "step": 5374 + }, + { + "epoch": 1.5717210118438367, + "grad_norm": 1.51681371819029, + "learning_rate": 2.408356772147912e-06, + "loss": 0.5745086669921875, + "step": 5375 + }, + { + "epoch": 1.5720134522591023, + "grad_norm": 1.7235832219181546, + "learning_rate": 2.405210753439361e-06, + "loss": 0.6032901406288147, + "step": 5376 + }, + { + "epoch": 1.5723058926743676, + "grad_norm": 1.9887425059975659, + "learning_rate": 2.40206650996662e-06, + "loss": 0.579899787902832, + "step": 5377 + }, + { + "epoch": 1.572598333089633, + "grad_norm": 1.84593228973349, + "learning_rate": 2.3989240424646355e-06, + "loss": 0.5920897722244263, + "step": 5378 + }, + { + "epoch": 1.5728907735048985, + "grad_norm": 1.6814027292095717, + "learning_rate": 2.395783351667941e-06, + "loss": 0.5080469846725464, + "step": 5379 + }, + { + "epoch": 1.5731832139201638, + "grad_norm": 1.6852885660534134, + "learning_rate": 2.392644438310654e-06, + "loss": 0.6438730955123901, + "step": 5380 + }, + { + "epoch": 1.5734756543354291, + "grad_norm": 1.5835392817230642, + "learning_rate": 2.389507303126475e-06, + "loss": 0.6496621370315552, + "step": 5381 + }, + { + "epoch": 1.5737680947506947, + "grad_norm": 2.056471050614057, + "learning_rate": 2.3863719468486925e-06, + "loss": 0.5780459642410278, + "step": 5382 + }, + { + "epoch": 1.5740605351659598, + "grad_norm": 1.6854861118133662, + "learning_rate": 2.3832383702101747e-06, + "loss": 0.47817176580429077, + "step": 5383 + }, + { + "epoch": 1.5743529755812253, + "grad_norm": 1.8294128359408837, + "learning_rate": 2.3801065739433816e-06, + "loss": 0.565629243850708, + "step": 5384 + }, + { + "epoch": 1.5746454159964907, + "grad_norm": 1.6612699899563574, + "learning_rate": 2.376976558780343e-06, + "loss": 0.6291453838348389, + "step": 5385 + }, + { + "epoch": 1.574937856411756, + "grad_norm": 1.538236610732314, + "learning_rate": 2.3738483254526856e-06, + "loss": 0.5309170484542847, + "step": 5386 + }, + { + "epoch": 1.5752302968270215, + "grad_norm": 1.5901478294831086, + "learning_rate": 2.370721874691614e-06, + "loss": 0.36860692501068115, + "step": 5387 + }, + { + "epoch": 1.5755227372422869, + "grad_norm": 1.4970687777761233, + "learning_rate": 2.3675972072279172e-06, + "loss": 0.4871997833251953, + "step": 5388 + }, + { + "epoch": 1.5758151776575522, + "grad_norm": 1.7243858787556505, + "learning_rate": 2.3644743237919674e-06, + "loss": 0.5318939685821533, + "step": 5389 + }, + { + "epoch": 1.5761076180728177, + "grad_norm": 1.6509311118620078, + "learning_rate": 2.3613532251137205e-06, + "loss": 0.5851289629936218, + "step": 5390 + }, + { + "epoch": 1.576400058488083, + "grad_norm": 1.7554122423009038, + "learning_rate": 2.358233911922713e-06, + "loss": 0.5535321235656738, + "step": 5391 + }, + { + "epoch": 1.5766924989033484, + "grad_norm": 1.6614076147074466, + "learning_rate": 2.3551163849480664e-06, + "loss": 0.5443980693817139, + "step": 5392 + }, + { + "epoch": 1.576984939318614, + "grad_norm": 1.7236213464789372, + "learning_rate": 2.352000644918483e-06, + "loss": 0.6381241083145142, + "step": 5393 + }, + { + "epoch": 1.5772773797338793, + "grad_norm": 1.7284545309348427, + "learning_rate": 2.348886692562248e-06, + "loss": 0.5710772275924683, + "step": 5394 + }, + { + "epoch": 1.5775698201491446, + "grad_norm": 1.5430684665624785, + "learning_rate": 2.3457745286072307e-06, + "loss": 0.5507428050041199, + "step": 5395 + }, + { + "epoch": 1.5778622605644101, + "grad_norm": 1.4206197407713899, + "learning_rate": 2.342664153780878e-06, + "loss": 0.4475744366645813, + "step": 5396 + }, + { + "epoch": 1.5781547009796753, + "grad_norm": 1.636583588423456, + "learning_rate": 2.339555568810221e-06, + "loss": 0.5237560868263245, + "step": 5397 + }, + { + "epoch": 1.5784471413949408, + "grad_norm": 1.8224385271688819, + "learning_rate": 2.3364487744218735e-06, + "loss": 0.513353705406189, + "step": 5398 + }, + { + "epoch": 1.5787395818102061, + "grad_norm": 1.7286392562782233, + "learning_rate": 2.3333437713420305e-06, + "loss": 0.5986731052398682, + "step": 5399 + }, + { + "epoch": 1.5790320222254715, + "grad_norm": 1.5907081834202914, + "learning_rate": 2.330240560296466e-06, + "loss": 0.5834506750106812, + "step": 5400 + }, + { + "epoch": 1.579324462640737, + "grad_norm": 1.4316449017872799, + "learning_rate": 2.3271391420105384e-06, + "loss": 0.4756021499633789, + "step": 5401 + }, + { + "epoch": 1.5796169030560023, + "grad_norm": 1.828748410964233, + "learning_rate": 2.3240395172091847e-06, + "loss": 0.5524263978004456, + "step": 5402 + }, + { + "epoch": 1.5799093434712677, + "grad_norm": 1.7797701447484084, + "learning_rate": 2.320941686616922e-06, + "loss": 0.5689926743507385, + "step": 5403 + }, + { + "epoch": 1.5802017838865332, + "grad_norm": 2.079791124123793, + "learning_rate": 2.317845650957852e-06, + "loss": 0.5737600326538086, + "step": 5404 + }, + { + "epoch": 1.5804942243017985, + "grad_norm": 2.1591480990218406, + "learning_rate": 2.314751410955652e-06, + "loss": 0.585626482963562, + "step": 5405 + }, + { + "epoch": 1.5807866647170639, + "grad_norm": 1.3475179143489473, + "learning_rate": 2.3116589673335833e-06, + "loss": 0.4410518407821655, + "step": 5406 + }, + { + "epoch": 1.5810791051323294, + "grad_norm": 1.4002471500541231, + "learning_rate": 2.308568320814487e-06, + "loss": 0.49071764945983887, + "step": 5407 + }, + { + "epoch": 1.5813715455475945, + "grad_norm": 1.7384943405251394, + "learning_rate": 2.3054794721207796e-06, + "loss": 0.5332186818122864, + "step": 5408 + }, + { + "epoch": 1.58166398596286, + "grad_norm": 1.672632129609112, + "learning_rate": 2.3023924219744607e-06, + "loss": 0.4655637741088867, + "step": 5409 + }, + { + "epoch": 1.5819564263781256, + "grad_norm": 1.8700821530052487, + "learning_rate": 2.2993071710971115e-06, + "loss": 0.4226027727127075, + "step": 5410 + }, + { + "epoch": 1.5822488667933907, + "grad_norm": 1.662889108823369, + "learning_rate": 2.2962237202098903e-06, + "loss": 0.5582948923110962, + "step": 5411 + }, + { + "epoch": 1.5825413072086563, + "grad_norm": 1.9177043486104604, + "learning_rate": 2.293142070033535e-06, + "loss": 0.6695314645767212, + "step": 5412 + }, + { + "epoch": 1.5828337476239216, + "grad_norm": 1.3346239854361734, + "learning_rate": 2.2900622212883617e-06, + "loss": 0.39315858483314514, + "step": 5413 + }, + { + "epoch": 1.583126188039187, + "grad_norm": 1.6781692583647863, + "learning_rate": 2.2869841746942666e-06, + "loss": 0.5034759044647217, + "step": 5414 + }, + { + "epoch": 1.5834186284544525, + "grad_norm": 1.9091862181504, + "learning_rate": 2.2839079309707256e-06, + "loss": 0.6739548444747925, + "step": 5415 + }, + { + "epoch": 1.5837110688697178, + "grad_norm": 1.700292089346711, + "learning_rate": 2.2808334908367914e-06, + "loss": 0.4091438949108124, + "step": 5416 + }, + { + "epoch": 1.5840035092849831, + "grad_norm": 1.9132208987373394, + "learning_rate": 2.277760855011094e-06, + "loss": 0.5543409585952759, + "step": 5417 + }, + { + "epoch": 1.5842959497002487, + "grad_norm": 1.5448108643055853, + "learning_rate": 2.2746900242118487e-06, + "loss": 0.44680702686309814, + "step": 5418 + }, + { + "epoch": 1.584588390115514, + "grad_norm": 1.812422444695138, + "learning_rate": 2.271620999156837e-06, + "loss": 0.604156494140625, + "step": 5419 + }, + { + "epoch": 1.5848808305307793, + "grad_norm": 1.7746704953171426, + "learning_rate": 2.268553780563427e-06, + "loss": 0.6055774688720703, + "step": 5420 + }, + { + "epoch": 1.5851732709460449, + "grad_norm": 1.6413153541100303, + "learning_rate": 2.265488369148563e-06, + "loss": 0.5826502442359924, + "step": 5421 + }, + { + "epoch": 1.58546571136131, + "grad_norm": 1.6438604610732335, + "learning_rate": 2.2624247656287658e-06, + "loss": 0.61782306432724, + "step": 5422 + }, + { + "epoch": 1.5857581517765755, + "grad_norm": 1.6412325546038886, + "learning_rate": 2.2593629707201348e-06, + "loss": 0.5561526417732239, + "step": 5423 + }, + { + "epoch": 1.5860505921918409, + "grad_norm": 1.934339107757701, + "learning_rate": 2.2563029851383447e-06, + "loss": 0.6122138500213623, + "step": 5424 + }, + { + "epoch": 1.5863430326071062, + "grad_norm": 1.7721974769204, + "learning_rate": 2.2532448095986504e-06, + "loss": 0.5694067478179932, + "step": 5425 + }, + { + "epoch": 1.5866354730223717, + "grad_norm": 2.0424311158796145, + "learning_rate": 2.2501884448158804e-06, + "loss": 0.5243874788284302, + "step": 5426 + }, + { + "epoch": 1.586927913437637, + "grad_norm": 1.8166715080001115, + "learning_rate": 2.2471338915044414e-06, + "loss": 0.5144485831260681, + "step": 5427 + }, + { + "epoch": 1.5872203538529024, + "grad_norm": 1.853424108367526, + "learning_rate": 2.244081150378318e-06, + "loss": 0.5013881325721741, + "step": 5428 + }, + { + "epoch": 1.587512794268168, + "grad_norm": 1.7554305935150418, + "learning_rate": 2.2410302221510704e-06, + "loss": 0.45199382305145264, + "step": 5429 + }, + { + "epoch": 1.5878052346834333, + "grad_norm": 1.7321007114143003, + "learning_rate": 2.2379811075358315e-06, + "loss": 0.4699060022830963, + "step": 5430 + }, + { + "epoch": 1.5880976750986986, + "grad_norm": 1.6542253790144112, + "learning_rate": 2.234933807245314e-06, + "loss": 0.6530928611755371, + "step": 5431 + }, + { + "epoch": 1.5883901155139641, + "grad_norm": 2.1734435533671337, + "learning_rate": 2.2318883219918075e-06, + "loss": 0.653563380241394, + "step": 5432 + }, + { + "epoch": 1.5886825559292295, + "grad_norm": 1.6977334736027891, + "learning_rate": 2.2288446524871743e-06, + "loss": 0.5283595323562622, + "step": 5433 + }, + { + "epoch": 1.5889749963444948, + "grad_norm": 1.8947978942641126, + "learning_rate": 2.2258027994428543e-06, + "loss": 0.4382442831993103, + "step": 5434 + }, + { + "epoch": 1.5892674367597603, + "grad_norm": 1.5530659953902877, + "learning_rate": 2.2227627635698624e-06, + "loss": 0.427448570728302, + "step": 5435 + }, + { + "epoch": 1.5895598771750254, + "grad_norm": 1.9154573086486193, + "learning_rate": 2.2197245455787875e-06, + "loss": 0.5794345140457153, + "step": 5436 + }, + { + "epoch": 1.589852317590291, + "grad_norm": 1.7112908574959096, + "learning_rate": 2.2166881461797953e-06, + "loss": 0.4996277987957001, + "step": 5437 + }, + { + "epoch": 1.5901447580055563, + "grad_norm": 1.6191576283665394, + "learning_rate": 2.213653566082625e-06, + "loss": 0.580248236656189, + "step": 5438 + }, + { + "epoch": 1.5904371984208217, + "grad_norm": 1.7775881602556973, + "learning_rate": 2.210620805996594e-06, + "loss": 0.5173758864402771, + "step": 5439 + }, + { + "epoch": 1.5907296388360872, + "grad_norm": 1.9899233221127093, + "learning_rate": 2.2075898666305908e-06, + "loss": 0.5336873531341553, + "step": 5440 + }, + { + "epoch": 1.5910220792513525, + "grad_norm": 1.6076505919691177, + "learning_rate": 2.204560748693074e-06, + "loss": 0.44921910762786865, + "step": 5441 + }, + { + "epoch": 1.5913145196666179, + "grad_norm": 1.6059480320951056, + "learning_rate": 2.201533452892086e-06, + "loss": 0.46475526690483093, + "step": 5442 + }, + { + "epoch": 1.5916069600818834, + "grad_norm": 1.9029311381102771, + "learning_rate": 2.1985079799352383e-06, + "loss": 0.6213991045951843, + "step": 5443 + }, + { + "epoch": 1.5918994004971487, + "grad_norm": 1.7215123021526133, + "learning_rate": 2.1954843305297138e-06, + "loss": 0.5271334648132324, + "step": 5444 + }, + { + "epoch": 1.592191840912414, + "grad_norm": 2.0224569757299333, + "learning_rate": 2.192462505382277e-06, + "loss": 0.6957610249519348, + "step": 5445 + }, + { + "epoch": 1.5924842813276796, + "grad_norm": 1.474394106198892, + "learning_rate": 2.1894425051992587e-06, + "loss": 0.4935681223869324, + "step": 5446 + }, + { + "epoch": 1.5927767217429447, + "grad_norm": 1.8779887346615283, + "learning_rate": 2.1864243306865663e-06, + "loss": 0.7389976978302002, + "step": 5447 + }, + { + "epoch": 1.5930691621582103, + "grad_norm": 1.6663547791548505, + "learning_rate": 2.183407982549679e-06, + "loss": 0.4711039662361145, + "step": 5448 + }, + { + "epoch": 1.5933616025734758, + "grad_norm": 1.7966119865723598, + "learning_rate": 2.180393461493654e-06, + "loss": 0.5640024542808533, + "step": 5449 + }, + { + "epoch": 1.593654042988741, + "grad_norm": 1.8836976245237465, + "learning_rate": 2.1773807682231095e-06, + "loss": 0.5471343994140625, + "step": 5450 + }, + { + "epoch": 1.5939464834040065, + "grad_norm": 1.948314000978572, + "learning_rate": 2.1743699034422483e-06, + "loss": 0.4971361756324768, + "step": 5451 + }, + { + "epoch": 1.5942389238192718, + "grad_norm": 1.816359724345797, + "learning_rate": 2.1713608678548414e-06, + "loss": 0.6338681578636169, + "step": 5452 + }, + { + "epoch": 1.5945313642345371, + "grad_norm": 1.5944974938870278, + "learning_rate": 2.168353662164233e-06, + "loss": 0.5218038558959961, + "step": 5453 + }, + { + "epoch": 1.5948238046498027, + "grad_norm": 1.4835669226996004, + "learning_rate": 2.165348287073339e-06, + "loss": 0.44414108991622925, + "step": 5454 + }, + { + "epoch": 1.595116245065068, + "grad_norm": 1.741912524884312, + "learning_rate": 2.162344743284647e-06, + "loss": 0.5994665622711182, + "step": 5455 + }, + { + "epoch": 1.5954086854803333, + "grad_norm": 1.8002543813503216, + "learning_rate": 2.159343031500217e-06, + "loss": 0.6745023727416992, + "step": 5456 + }, + { + "epoch": 1.5957011258955989, + "grad_norm": 1.8583415901166598, + "learning_rate": 2.1563431524216825e-06, + "loss": 0.4678364396095276, + "step": 5457 + }, + { + "epoch": 1.5959935663108642, + "grad_norm": 1.5733048792098263, + "learning_rate": 2.1533451067502464e-06, + "loss": 0.5792031288146973, + "step": 5458 + }, + { + "epoch": 1.5962860067261295, + "grad_norm": 1.575360514250564, + "learning_rate": 2.1503488951866822e-06, + "loss": 0.48152512311935425, + "step": 5459 + }, + { + "epoch": 1.596578447141395, + "grad_norm": 1.6753593421486697, + "learning_rate": 2.147354518431339e-06, + "loss": 0.4407780170440674, + "step": 5460 + }, + { + "epoch": 1.5968708875566602, + "grad_norm": 1.5845476508430212, + "learning_rate": 2.1443619771841308e-06, + "loss": 0.41062241792678833, + "step": 5461 + }, + { + "epoch": 1.5971633279719257, + "grad_norm": 1.6329985009235597, + "learning_rate": 2.1413712721445478e-06, + "loss": 0.4564778208732605, + "step": 5462 + }, + { + "epoch": 1.597455768387191, + "grad_norm": 1.7819738842734478, + "learning_rate": 2.1383824040116474e-06, + "loss": 0.4347888231277466, + "step": 5463 + }, + { + "epoch": 1.5977482088024564, + "grad_norm": 1.7547086253653914, + "learning_rate": 2.1353953734840615e-06, + "loss": 0.574216902256012, + "step": 5464 + }, + { + "epoch": 1.598040649217722, + "grad_norm": 1.5449681232026575, + "learning_rate": 2.1324101812599884e-06, + "loss": 0.46540650725364685, + "step": 5465 + }, + { + "epoch": 1.5983330896329873, + "grad_norm": 1.7330971380509632, + "learning_rate": 2.129426828037201e-06, + "loss": 0.5446870923042297, + "step": 5466 + }, + { + "epoch": 1.5986255300482526, + "grad_norm": 1.5387720739202952, + "learning_rate": 2.126445314513038e-06, + "loss": 0.5442406535148621, + "step": 5467 + }, + { + "epoch": 1.5989179704635181, + "grad_norm": 1.5552673745283687, + "learning_rate": 2.1234656413844114e-06, + "loss": 0.48960334062576294, + "step": 5468 + }, + { + "epoch": 1.5992104108787835, + "grad_norm": 1.6554781479614895, + "learning_rate": 2.1204878093477998e-06, + "loss": 0.5053935647010803, + "step": 5469 + }, + { + "epoch": 1.5995028512940488, + "grad_norm": 1.9853825289751812, + "learning_rate": 2.117511819099256e-06, + "loss": 0.5984711647033691, + "step": 5470 + }, + { + "epoch": 1.5997952917093143, + "grad_norm": 1.7887732493049897, + "learning_rate": 2.1145376713344e-06, + "loss": 0.6060935258865356, + "step": 5471 + }, + { + "epoch": 1.6000877321245797, + "grad_norm": 1.7731884284372257, + "learning_rate": 2.111565366748416e-06, + "loss": 0.5640311241149902, + "step": 5472 + }, + { + "epoch": 1.600380172539845, + "grad_norm": 1.4780823569090165, + "learning_rate": 2.1085949060360654e-06, + "loss": 0.5127131342887878, + "step": 5473 + }, + { + "epoch": 1.6006726129551105, + "grad_norm": 1.7137118890776333, + "learning_rate": 2.1056262898916747e-06, + "loss": 0.5630159378051758, + "step": 5474 + }, + { + "epoch": 1.6009650533703756, + "grad_norm": 1.6419339983794916, + "learning_rate": 2.1026595190091403e-06, + "loss": 0.4511195421218872, + "step": 5475 + }, + { + "epoch": 1.6012574937856412, + "grad_norm": 1.5933389134682139, + "learning_rate": 2.099694594081927e-06, + "loss": 0.47073638439178467, + "step": 5476 + }, + { + "epoch": 1.6015499342009065, + "grad_norm": 1.7678159005173808, + "learning_rate": 2.0967315158030675e-06, + "loss": 0.47757452726364136, + "step": 5477 + }, + { + "epoch": 1.6018423746161718, + "grad_norm": 1.612539233178663, + "learning_rate": 2.093770284865164e-06, + "loss": 0.4703200161457062, + "step": 5478 + }, + { + "epoch": 1.6021348150314374, + "grad_norm": 1.7112390228319339, + "learning_rate": 2.090810901960385e-06, + "loss": 0.47457796335220337, + "step": 5479 + }, + { + "epoch": 1.6024272554467027, + "grad_norm": 1.6069409002673796, + "learning_rate": 2.087853367780469e-06, + "loss": 0.4907105267047882, + "step": 5480 + }, + { + "epoch": 1.602719695861968, + "grad_norm": 1.8859078577608002, + "learning_rate": 2.0848976830167224e-06, + "loss": 0.5329782962799072, + "step": 5481 + }, + { + "epoch": 1.6030121362772336, + "grad_norm": 1.8407304692969428, + "learning_rate": 2.0819438483600197e-06, + "loss": 0.45858579874038696, + "step": 5482 + }, + { + "epoch": 1.603304576692499, + "grad_norm": 1.7103287599993058, + "learning_rate": 2.0789918645007977e-06, + "loss": 0.47545814514160156, + "step": 5483 + }, + { + "epoch": 1.6035970171077643, + "grad_norm": 1.7521375813446352, + "learning_rate": 2.076041732129066e-06, + "loss": 0.5482660531997681, + "step": 5484 + }, + { + "epoch": 1.6038894575230298, + "grad_norm": 1.650951498750666, + "learning_rate": 2.0730934519344025e-06, + "loss": 0.5252633094787598, + "step": 5485 + }, + { + "epoch": 1.604181897938295, + "grad_norm": 2.7727108215969882, + "learning_rate": 2.0701470246059472e-06, + "loss": 0.5400367379188538, + "step": 5486 + }, + { + "epoch": 1.6044743383535605, + "grad_norm": 1.5423948281806983, + "learning_rate": 2.0672024508324107e-06, + "loss": 0.4788953363895416, + "step": 5487 + }, + { + "epoch": 1.604766778768826, + "grad_norm": 1.6092306606930025, + "learning_rate": 2.0642597313020685e-06, + "loss": 0.5430850982666016, + "step": 5488 + }, + { + "epoch": 1.6050592191840911, + "grad_norm": 1.8683302543522238, + "learning_rate": 2.061318866702765e-06, + "loss": 0.5833520293235779, + "step": 5489 + }, + { + "epoch": 1.6053516595993567, + "grad_norm": 1.7369107165445012, + "learning_rate": 2.058379857721908e-06, + "loss": 0.5854958295822144, + "step": 5490 + }, + { + "epoch": 1.605644100014622, + "grad_norm": 1.6603772170749127, + "learning_rate": 2.0554427050464742e-06, + "loss": 0.5577352643013, + "step": 5491 + }, + { + "epoch": 1.6059365404298873, + "grad_norm": 1.6757677840410201, + "learning_rate": 2.052507409363004e-06, + "loss": 0.5328816175460815, + "step": 5492 + }, + { + "epoch": 1.6062289808451529, + "grad_norm": 1.7643397031335737, + "learning_rate": 2.0495739713576046e-06, + "loss": 0.5606744289398193, + "step": 5493 + }, + { + "epoch": 1.6065214212604182, + "grad_norm": 1.7836115172074085, + "learning_rate": 2.0466423917159526e-06, + "loss": 0.541358470916748, + "step": 5494 + }, + { + "epoch": 1.6068138616756835, + "grad_norm": 2.1455011977132714, + "learning_rate": 2.0437126711232826e-06, + "loss": 0.6578946709632874, + "step": 5495 + }, + { + "epoch": 1.607106302090949, + "grad_norm": 1.9512378226148355, + "learning_rate": 2.0407848102644002e-06, + "loss": 0.5967978239059448, + "step": 5496 + }, + { + "epoch": 1.6073987425062144, + "grad_norm": 1.623105883994405, + "learning_rate": 2.037858809823675e-06, + "loss": 0.46947693824768066, + "step": 5497 + }, + { + "epoch": 1.6076911829214797, + "grad_norm": 1.5763151196056784, + "learning_rate": 2.0349346704850436e-06, + "loss": 0.5014760494232178, + "step": 5498 + }, + { + "epoch": 1.6079836233367453, + "grad_norm": 1.5417734514532708, + "learning_rate": 2.0320123929320033e-06, + "loss": 0.4399675726890564, + "step": 5499 + }, + { + "epoch": 1.6082760637520104, + "grad_norm": 1.8719036359624468, + "learning_rate": 2.0290919778476214e-06, + "loss": 0.4729107618331909, + "step": 5500 + }, + { + "epoch": 1.608568504167276, + "grad_norm": 1.5894079730285777, + "learning_rate": 2.0261734259145248e-06, + "loss": 0.5669134259223938, + "step": 5501 + }, + { + "epoch": 1.6088609445825413, + "grad_norm": 1.554035864612711, + "learning_rate": 2.0232567378149082e-06, + "loss": 0.4200817942619324, + "step": 5502 + }, + { + "epoch": 1.6091533849978066, + "grad_norm": 1.8154865090092227, + "learning_rate": 2.0203419142305303e-06, + "loss": 0.6057849526405334, + "step": 5503 + }, + { + "epoch": 1.6094458254130721, + "grad_norm": 1.7156552575659618, + "learning_rate": 2.017428955842713e-06, + "loss": 0.5644170045852661, + "step": 5504 + }, + { + "epoch": 1.6097382658283375, + "grad_norm": 1.9102243104698693, + "learning_rate": 2.014517863332345e-06, + "loss": 0.6368730068206787, + "step": 5505 + }, + { + "epoch": 1.6100307062436028, + "grad_norm": 1.5712918255487374, + "learning_rate": 2.0116086373798704e-06, + "loss": 0.4829355478286743, + "step": 5506 + }, + { + "epoch": 1.6103231466588683, + "grad_norm": 1.642541904242283, + "learning_rate": 2.0087012786653072e-06, + "loss": 0.5604796409606934, + "step": 5507 + }, + { + "epoch": 1.6106155870741337, + "grad_norm": 1.8591393596163848, + "learning_rate": 2.005795787868232e-06, + "loss": 0.5594274997711182, + "step": 5508 + }, + { + "epoch": 1.610908027489399, + "grad_norm": 1.607362999733334, + "learning_rate": 2.0028921656677857e-06, + "loss": 0.5553449988365173, + "step": 5509 + }, + { + "epoch": 1.6112004679046645, + "grad_norm": 1.7968941470299316, + "learning_rate": 1.999990412742673e-06, + "loss": 0.5056631565093994, + "step": 5510 + }, + { + "epoch": 1.6114929083199299, + "grad_norm": 1.5654499452702673, + "learning_rate": 1.9970905297711606e-06, + "loss": 0.432037353515625, + "step": 5511 + }, + { + "epoch": 1.6117853487351952, + "grad_norm": 1.6991047972494284, + "learning_rate": 1.9941925174310773e-06, + "loss": 0.5152974128723145, + "step": 5512 + }, + { + "epoch": 1.6120777891504607, + "grad_norm": 2.1530610582321015, + "learning_rate": 1.9912963763998185e-06, + "loss": 0.59015291929245, + "step": 5513 + }, + { + "epoch": 1.6123702295657258, + "grad_norm": 2.024675130869183, + "learning_rate": 1.9884021073543368e-06, + "loss": 0.564031720161438, + "step": 5514 + }, + { + "epoch": 1.6126626699809914, + "grad_norm": 1.560415326953441, + "learning_rate": 1.985509710971152e-06, + "loss": 0.5930228233337402, + "step": 5515 + }, + { + "epoch": 1.6129551103962567, + "grad_norm": 1.6853261691368011, + "learning_rate": 1.9826191879263446e-06, + "loss": 0.540229082107544, + "step": 5516 + }, + { + "epoch": 1.613247550811522, + "grad_norm": 1.3918983021829734, + "learning_rate": 1.9797305388955547e-06, + "loss": 0.5473166704177856, + "step": 5517 + }, + { + "epoch": 1.6135399912267876, + "grad_norm": 1.888180196247059, + "learning_rate": 1.976843764553986e-06, + "loss": 0.5814535617828369, + "step": 5518 + }, + { + "epoch": 1.613832431642053, + "grad_norm": 1.556089571926902, + "learning_rate": 1.973958865576403e-06, + "loss": 0.4892576038837433, + "step": 5519 + }, + { + "epoch": 1.6141248720573182, + "grad_norm": 2.0461002845877454, + "learning_rate": 1.97107584263714e-06, + "loss": 0.5416869521141052, + "step": 5520 + }, + { + "epoch": 1.6144173124725838, + "grad_norm": 1.9685372161480885, + "learning_rate": 1.9681946964100807e-06, + "loss": 0.5956105589866638, + "step": 5521 + }, + { + "epoch": 1.6147097528878491, + "grad_norm": 1.7885384988170376, + "learning_rate": 1.9653154275686782e-06, + "loss": 0.5722565650939941, + "step": 5522 + }, + { + "epoch": 1.6150021933031145, + "grad_norm": 1.7917880328936266, + "learning_rate": 1.962438036785942e-06, + "loss": 0.3984888195991516, + "step": 5523 + }, + { + "epoch": 1.61529463371838, + "grad_norm": 1.6334267618118792, + "learning_rate": 1.959562524734445e-06, + "loss": 0.601211428642273, + "step": 5524 + }, + { + "epoch": 1.615587074133645, + "grad_norm": 1.8080265301577823, + "learning_rate": 1.9566888920863247e-06, + "loss": 0.4803691506385803, + "step": 5525 + }, + { + "epoch": 1.6158795145489107, + "grad_norm": 1.7017865626810558, + "learning_rate": 1.9538171395132688e-06, + "loss": 0.6914256811141968, + "step": 5526 + }, + { + "epoch": 1.6161719549641762, + "grad_norm": 1.6511977253132817, + "learning_rate": 1.950947267686536e-06, + "loss": 0.49076569080352783, + "step": 5527 + }, + { + "epoch": 1.6164643953794413, + "grad_norm": 1.747888743558531, + "learning_rate": 1.9480792772769384e-06, + "loss": 0.45781368017196655, + "step": 5528 + }, + { + "epoch": 1.6167568357947069, + "grad_norm": 1.6564602036044371, + "learning_rate": 1.9452131689548547e-06, + "loss": 0.5257985591888428, + "step": 5529 + }, + { + "epoch": 1.6170492762099722, + "grad_norm": 1.6440311298220422, + "learning_rate": 1.9423489433902186e-06, + "loss": 0.4170517921447754, + "step": 5530 + }, + { + "epoch": 1.6173417166252375, + "grad_norm": 2.0566981290938386, + "learning_rate": 1.939486601252525e-06, + "loss": 0.5612319707870483, + "step": 5531 + }, + { + "epoch": 1.617634157040503, + "grad_norm": 1.6971941543602107, + "learning_rate": 1.93662614321083e-06, + "loss": 0.4543185234069824, + "step": 5532 + }, + { + "epoch": 1.6179265974557684, + "grad_norm": 1.8366998724664239, + "learning_rate": 1.933767569933749e-06, + "loss": 0.5506256222724915, + "step": 5533 + }, + { + "epoch": 1.6182190378710337, + "grad_norm": 1.9975995427991473, + "learning_rate": 1.930910882089454e-06, + "loss": 0.5411139130592346, + "step": 5534 + }, + { + "epoch": 1.6185114782862993, + "grad_norm": 1.5549343206880035, + "learning_rate": 1.9280560803456794e-06, + "loss": 0.5332196950912476, + "step": 5535 + }, + { + "epoch": 1.6188039187015646, + "grad_norm": 1.6015028384804206, + "learning_rate": 1.92520316536972e-06, + "loss": 0.5159808993339539, + "step": 5536 + }, + { + "epoch": 1.61909635911683, + "grad_norm": 1.6182793083642761, + "learning_rate": 1.9223521378284227e-06, + "loss": 0.5483378767967224, + "step": 5537 + }, + { + "epoch": 1.6193887995320955, + "grad_norm": 1.748287896704832, + "learning_rate": 1.9195029983882008e-06, + "loss": 0.6451961994171143, + "step": 5538 + }, + { + "epoch": 1.6196812399473606, + "grad_norm": 1.799346834276764, + "learning_rate": 1.9166557477150227e-06, + "loss": 0.5904289484024048, + "step": 5539 + }, + { + "epoch": 1.6199736803626261, + "grad_norm": 1.7892510148499787, + "learning_rate": 1.9138103864744164e-06, + "loss": 0.6688845753669739, + "step": 5540 + }, + { + "epoch": 1.6202661207778914, + "grad_norm": 2.050448659373089, + "learning_rate": 1.910966915331467e-06, + "loss": 0.6299211382865906, + "step": 5541 + }, + { + "epoch": 1.6205585611931568, + "grad_norm": 1.7426964698819027, + "learning_rate": 1.908125334950819e-06, + "loss": 0.5502864122390747, + "step": 5542 + }, + { + "epoch": 1.6208510016084223, + "grad_norm": 1.7235241145346871, + "learning_rate": 1.905285645996674e-06, + "loss": 0.5332865118980408, + "step": 5543 + }, + { + "epoch": 1.6211434420236877, + "grad_norm": 1.6383658575716402, + "learning_rate": 1.9024478491327936e-06, + "loss": 0.43371304869651794, + "step": 5544 + }, + { + "epoch": 1.621435882438953, + "grad_norm": 1.6217362880484214, + "learning_rate": 1.8996119450224936e-06, + "loss": 0.6992501616477966, + "step": 5545 + }, + { + "epoch": 1.6217283228542185, + "grad_norm": 1.6128570910751827, + "learning_rate": 1.8967779343286507e-06, + "loss": 0.46558254957199097, + "step": 5546 + }, + { + "epoch": 1.6220207632694839, + "grad_norm": 1.8944422030763228, + "learning_rate": 1.8939458177136994e-06, + "loss": 0.48943620920181274, + "step": 5547 + }, + { + "epoch": 1.6223132036847492, + "grad_norm": 1.7707340805275773, + "learning_rate": 1.8911155958396256e-06, + "loss": 0.6041419506072998, + "step": 5548 + }, + { + "epoch": 1.6226056441000147, + "grad_norm": 1.9134744412177116, + "learning_rate": 1.8882872693679787e-06, + "loss": 0.5695589780807495, + "step": 5549 + }, + { + "epoch": 1.62289808451528, + "grad_norm": 1.5970321508292495, + "learning_rate": 1.8854608389598616e-06, + "loss": 0.45147764682769775, + "step": 5550 + }, + { + "epoch": 1.6231905249305454, + "grad_norm": 1.6145559649024994, + "learning_rate": 1.8826363052759367e-06, + "loss": 0.45560893416404724, + "step": 5551 + }, + { + "epoch": 1.623482965345811, + "grad_norm": 2.0039561405471855, + "learning_rate": 1.8798136689764213e-06, + "loss": 0.5714661478996277, + "step": 5552 + }, + { + "epoch": 1.623775405761076, + "grad_norm": 1.963169578207157, + "learning_rate": 1.8769929307210889e-06, + "loss": 0.6074325442314148, + "step": 5553 + }, + { + "epoch": 1.6240678461763416, + "grad_norm": 1.8912554250379197, + "learning_rate": 1.8741740911692708e-06, + "loss": 0.5406322479248047, + "step": 5554 + }, + { + "epoch": 1.624360286591607, + "grad_norm": 1.6201303288999382, + "learning_rate": 1.8713571509798524e-06, + "loss": 0.6098664999008179, + "step": 5555 + }, + { + "epoch": 1.6246527270068722, + "grad_norm": 1.5939995677707521, + "learning_rate": 1.8685421108112778e-06, + "loss": 0.4424859881401062, + "step": 5556 + }, + { + "epoch": 1.6249451674221378, + "grad_norm": 1.939602482231334, + "learning_rate": 1.8657289713215442e-06, + "loss": 0.5893913507461548, + "step": 5557 + }, + { + "epoch": 1.6252376078374031, + "grad_norm": 1.7158163961672592, + "learning_rate": 1.862917733168208e-06, + "loss": 0.5462610125541687, + "step": 5558 + }, + { + "epoch": 1.6255300482526684, + "grad_norm": 1.6886650692922842, + "learning_rate": 1.8601083970083766e-06, + "loss": 0.6044303178787231, + "step": 5559 + }, + { + "epoch": 1.625822488667934, + "grad_norm": 1.9164005939081288, + "learning_rate": 1.857300963498715e-06, + "loss": 0.4110264778137207, + "step": 5560 + }, + { + "epoch": 1.6261149290831993, + "grad_norm": 1.863152431496286, + "learning_rate": 1.8544954332954445e-06, + "loss": 0.5640783309936523, + "step": 5561 + }, + { + "epoch": 1.6264073694984647, + "grad_norm": 1.7604141548514003, + "learning_rate": 1.851691807054342e-06, + "loss": 0.43247851729393005, + "step": 5562 + }, + { + "epoch": 1.6266998099137302, + "grad_norm": 1.5534888737518595, + "learning_rate": 1.8488900854307367e-06, + "loss": 0.4909735918045044, + "step": 5563 + }, + { + "epoch": 1.6269922503289953, + "grad_norm": 2.0368143734269983, + "learning_rate": 1.8460902690795135e-06, + "loss": 0.5705426335334778, + "step": 5564 + }, + { + "epoch": 1.6272846907442609, + "grad_norm": 1.7974229709801126, + "learning_rate": 1.8432923586551144e-06, + "loss": 0.6455575823783875, + "step": 5565 + }, + { + "epoch": 1.6275771311595264, + "grad_norm": 1.5854644304225498, + "learning_rate": 1.8404963548115318e-06, + "loss": 0.4156351089477539, + "step": 5566 + }, + { + "epoch": 1.6278695715747915, + "grad_norm": 1.5858505218040218, + "learning_rate": 1.8377022582023163e-06, + "loss": 0.5497896075248718, + "step": 5567 + }, + { + "epoch": 1.628162011990057, + "grad_norm": 1.5247094519796704, + "learning_rate": 1.8349100694805711e-06, + "loss": 0.5237758159637451, + "step": 5568 + }, + { + "epoch": 1.6284544524053224, + "grad_norm": 1.8340347816856337, + "learning_rate": 1.832119789298954e-06, + "loss": 0.5140771865844727, + "step": 5569 + }, + { + "epoch": 1.6287468928205877, + "grad_norm": 2.04051717357991, + "learning_rate": 1.8293314183096721e-06, + "loss": 0.5942349433898926, + "step": 5570 + }, + { + "epoch": 1.6290393332358533, + "grad_norm": 1.7868670881272706, + "learning_rate": 1.8265449571644933e-06, + "loss": 0.6316613554954529, + "step": 5571 + }, + { + "epoch": 1.6293317736511186, + "grad_norm": 1.7168155291178147, + "learning_rate": 1.823760406514735e-06, + "loss": 0.4789954423904419, + "step": 5572 + }, + { + "epoch": 1.629624214066384, + "grad_norm": 1.67674259516067, + "learning_rate": 1.8209777670112706e-06, + "loss": 0.596744179725647, + "step": 5573 + }, + { + "epoch": 1.6299166544816495, + "grad_norm": 1.7162317239554103, + "learning_rate": 1.8181970393045223e-06, + "loss": 0.5785890817642212, + "step": 5574 + }, + { + "epoch": 1.6302090948969148, + "grad_norm": 1.72408279785472, + "learning_rate": 1.8154182240444706e-06, + "loss": 0.5399461388587952, + "step": 5575 + }, + { + "epoch": 1.6305015353121801, + "grad_norm": 1.789842505433769, + "learning_rate": 1.812641321880645e-06, + "loss": 0.5251961946487427, + "step": 5576 + }, + { + "epoch": 1.6307939757274457, + "grad_norm": 1.5658430659550284, + "learning_rate": 1.8098663334621314e-06, + "loss": 0.6094855070114136, + "step": 5577 + }, + { + "epoch": 1.6310864161427108, + "grad_norm": 1.7839781318616403, + "learning_rate": 1.8070932594375656e-06, + "loss": 0.5586157441139221, + "step": 5578 + }, + { + "epoch": 1.6313788565579763, + "grad_norm": 1.6074136925381057, + "learning_rate": 1.804322100455136e-06, + "loss": 0.5572035312652588, + "step": 5579 + }, + { + "epoch": 1.6316712969732416, + "grad_norm": 1.8419935059375991, + "learning_rate": 1.801552857162585e-06, + "loss": 0.5567929148674011, + "step": 5580 + }, + { + "epoch": 1.631963737388507, + "grad_norm": 1.6633256712541593, + "learning_rate": 1.79878553020721e-06, + "loss": 0.4823629558086395, + "step": 5581 + }, + { + "epoch": 1.6322561778037725, + "grad_norm": 1.6367496398860508, + "learning_rate": 1.7960201202358495e-06, + "loss": 0.52935791015625, + "step": 5582 + }, + { + "epoch": 1.6325486182190379, + "grad_norm": 1.612437469487566, + "learning_rate": 1.7932566278949049e-06, + "loss": 0.5486055016517639, + "step": 5583 + }, + { + "epoch": 1.6328410586343032, + "grad_norm": 1.638977663987494, + "learning_rate": 1.7904950538303256e-06, + "loss": 0.5606030225753784, + "step": 5584 + }, + { + "epoch": 1.6331334990495687, + "grad_norm": 1.5693957426770746, + "learning_rate": 1.7877353986876134e-06, + "loss": 0.5394873023033142, + "step": 5585 + }, + { + "epoch": 1.633425939464834, + "grad_norm": 1.7113121312436326, + "learning_rate": 1.7849776631118198e-06, + "loss": 0.6015416383743286, + "step": 5586 + }, + { + "epoch": 1.6337183798800994, + "grad_norm": 1.308708455891742, + "learning_rate": 1.7822218477475496e-06, + "loss": 0.3476119041442871, + "step": 5587 + }, + { + "epoch": 1.634010820295365, + "grad_norm": 1.6872606261874499, + "learning_rate": 1.7794679532389569e-06, + "loss": 0.43062901496887207, + "step": 5588 + }, + { + "epoch": 1.6343032607106303, + "grad_norm": 1.5715829289628913, + "learning_rate": 1.7767159802297497e-06, + "loss": 0.5267136096954346, + "step": 5589 + }, + { + "epoch": 1.6345957011258956, + "grad_norm": 1.5843839322860915, + "learning_rate": 1.7739659293631828e-06, + "loss": 0.40477365255355835, + "step": 5590 + }, + { + "epoch": 1.6348881415411611, + "grad_norm": 1.5464703907052304, + "learning_rate": 1.7712178012820657e-06, + "loss": 0.5166594386100769, + "step": 5591 + }, + { + "epoch": 1.6351805819564262, + "grad_norm": 1.8725681057880097, + "learning_rate": 1.768471596628757e-06, + "loss": 0.577332615852356, + "step": 5592 + }, + { + "epoch": 1.6354730223716918, + "grad_norm": 1.9361068668488919, + "learning_rate": 1.7657273160451626e-06, + "loss": 0.6265558004379272, + "step": 5593 + }, + { + "epoch": 1.6357654627869571, + "grad_norm": 1.359788014623014, + "learning_rate": 1.7629849601727422e-06, + "loss": 0.46483689546585083, + "step": 5594 + }, + { + "epoch": 1.6360579032022224, + "grad_norm": 1.763922790196176, + "learning_rate": 1.760244529652504e-06, + "loss": 0.5217114090919495, + "step": 5595 + }, + { + "epoch": 1.636350343617488, + "grad_norm": 1.6050490395737056, + "learning_rate": 1.7575060251250098e-06, + "loss": 0.40754032135009766, + "step": 5596 + }, + { + "epoch": 1.6366427840327533, + "grad_norm": 1.8321306870013994, + "learning_rate": 1.7547694472303677e-06, + "loss": 0.5153856873512268, + "step": 5597 + }, + { + "epoch": 1.6369352244480186, + "grad_norm": 1.7719174136737381, + "learning_rate": 1.7520347966082352e-06, + "loss": 0.47374534606933594, + "step": 5598 + }, + { + "epoch": 1.6372276648632842, + "grad_norm": 1.85303064846871, + "learning_rate": 1.7493020738978205e-06, + "loss": 0.375232070684433, + "step": 5599 + }, + { + "epoch": 1.6375201052785495, + "grad_norm": 1.6844665277996391, + "learning_rate": 1.746571279737884e-06, + "loss": 0.5731218457221985, + "step": 5600 + }, + { + "epoch": 1.6378125456938148, + "grad_norm": 1.7604017420749336, + "learning_rate": 1.7438424147667267e-06, + "loss": 0.4908478260040283, + "step": 5601 + }, + { + "epoch": 1.6381049861090804, + "grad_norm": 1.759771030770569, + "learning_rate": 1.741115479622205e-06, + "loss": 0.6114420890808105, + "step": 5602 + }, + { + "epoch": 1.6383974265243455, + "grad_norm": 1.6860969538693165, + "learning_rate": 1.738390474941727e-06, + "loss": 0.6207842826843262, + "step": 5603 + }, + { + "epoch": 1.638689866939611, + "grad_norm": 1.8627303036453442, + "learning_rate": 1.7356674013622431e-06, + "loss": 0.4745057225227356, + "step": 5604 + }, + { + "epoch": 1.6389823073548766, + "grad_norm": 1.7448279838579288, + "learning_rate": 1.7329462595202573e-06, + "loss": 0.5501791834831238, + "step": 5605 + }, + { + "epoch": 1.6392747477701417, + "grad_norm": 1.5723514930569527, + "learning_rate": 1.7302270500518181e-06, + "loss": 0.5497169494628906, + "step": 5606 + }, + { + "epoch": 1.6395671881854073, + "grad_norm": 1.5582550082102102, + "learning_rate": 1.7275097735925239e-06, + "loss": 0.4439499080181122, + "step": 5607 + }, + { + "epoch": 1.6398596286006726, + "grad_norm": 1.5421642594165323, + "learning_rate": 1.7247944307775245e-06, + "loss": 0.5869239568710327, + "step": 5608 + }, + { + "epoch": 1.640152069015938, + "grad_norm": 1.8811101308859866, + "learning_rate": 1.722081022241512e-06, + "loss": 0.6979252099990845, + "step": 5609 + }, + { + "epoch": 1.6404445094312035, + "grad_norm": 1.6052357503320651, + "learning_rate": 1.719369548618729e-06, + "loss": 0.43291550874710083, + "step": 5610 + }, + { + "epoch": 1.6407369498464688, + "grad_norm": 1.9414841639869573, + "learning_rate": 1.7166600105429676e-06, + "loss": 0.5670255422592163, + "step": 5611 + }, + { + "epoch": 1.6410293902617341, + "grad_norm": 1.8236286685742322, + "learning_rate": 1.7139524086475679e-06, + "loss": 0.5956759452819824, + "step": 5612 + }, + { + "epoch": 1.6413218306769997, + "grad_norm": 1.8379778243010318, + "learning_rate": 1.71124674356541e-06, + "loss": 0.624202311038971, + "step": 5613 + }, + { + "epoch": 1.641614271092265, + "grad_norm": 1.546976601945301, + "learning_rate": 1.7085430159289295e-06, + "loss": 0.5394845604896545, + "step": 5614 + }, + { + "epoch": 1.6419067115075303, + "grad_norm": 1.689395226298913, + "learning_rate": 1.7058412263701063e-06, + "loss": 0.5320364236831665, + "step": 5615 + }, + { + "epoch": 1.6421991519227959, + "grad_norm": 1.719103506089404, + "learning_rate": 1.7031413755204673e-06, + "loss": 0.5889087915420532, + "step": 5616 + }, + { + "epoch": 1.642491592338061, + "grad_norm": 2.1274597082343103, + "learning_rate": 1.7004434640110857e-06, + "loss": 0.63529372215271, + "step": 5617 + }, + { + "epoch": 1.6427840327533265, + "grad_norm": 1.4153766033649497, + "learning_rate": 1.6977474924725823e-06, + "loss": 0.48696887493133545, + "step": 5618 + }, + { + "epoch": 1.6430764731685918, + "grad_norm": 1.592412158520241, + "learning_rate": 1.6950534615351234e-06, + "loss": 0.5998564958572388, + "step": 5619 + }, + { + "epoch": 1.6433689135838572, + "grad_norm": 2.0156836237169142, + "learning_rate": 1.6923613718284237e-06, + "loss": 0.5256673693656921, + "step": 5620 + }, + { + "epoch": 1.6436613539991227, + "grad_norm": 1.6125826678096948, + "learning_rate": 1.6896712239817425e-06, + "loss": 0.4609792232513428, + "step": 5621 + }, + { + "epoch": 1.643953794414388, + "grad_norm": 1.6601641389435113, + "learning_rate": 1.6869830186238846e-06, + "loss": 0.6816249489784241, + "step": 5622 + }, + { + "epoch": 1.6442462348296534, + "grad_norm": 1.7917988570441037, + "learning_rate": 1.6842967563832036e-06, + "loss": 0.4622993767261505, + "step": 5623 + }, + { + "epoch": 1.644538675244919, + "grad_norm": 1.7039192593388794, + "learning_rate": 1.6816124378875942e-06, + "loss": 0.5089092254638672, + "step": 5624 + }, + { + "epoch": 1.6448311156601843, + "grad_norm": 1.804753905417491, + "learning_rate": 1.6789300637645e-06, + "loss": 0.49178463220596313, + "step": 5625 + }, + { + "epoch": 1.6451235560754496, + "grad_norm": 1.5823189990427826, + "learning_rate": 1.676249634640912e-06, + "loss": 0.4943847358226776, + "step": 5626 + }, + { + "epoch": 1.6454159964907151, + "grad_norm": 1.368767643177251, + "learning_rate": 1.6735711511433606e-06, + "loss": 0.38509243726730347, + "step": 5627 + }, + { + "epoch": 1.6457084369059805, + "grad_norm": 1.8368901340386043, + "learning_rate": 1.6708946138979288e-06, + "loss": 0.4765651822090149, + "step": 5628 + }, + { + "epoch": 1.6460008773212458, + "grad_norm": 1.9577136986762462, + "learning_rate": 1.6682200235302383e-06, + "loss": 0.5667406916618347, + "step": 5629 + }, + { + "epoch": 1.6462933177365113, + "grad_norm": 1.7936789387136831, + "learning_rate": 1.66554738066546e-06, + "loss": 0.702905535697937, + "step": 5630 + }, + { + "epoch": 1.6465857581517764, + "grad_norm": 1.8218045279879265, + "learning_rate": 1.6628766859283064e-06, + "loss": 0.5056663155555725, + "step": 5631 + }, + { + "epoch": 1.646878198567042, + "grad_norm": 1.5059989898819282, + "learning_rate": 1.660207939943037e-06, + "loss": 0.3949700593948364, + "step": 5632 + }, + { + "epoch": 1.6471706389823073, + "grad_norm": 1.8365180821647582, + "learning_rate": 1.6575411433334553e-06, + "loss": 0.5562522411346436, + "step": 5633 + }, + { + "epoch": 1.6474630793975726, + "grad_norm": 1.853282636299631, + "learning_rate": 1.6548762967229104e-06, + "loss": 0.5046012997627258, + "step": 5634 + }, + { + "epoch": 1.6477555198128382, + "grad_norm": 1.873405273649113, + "learning_rate": 1.6522134007342894e-06, + "loss": 0.510586678981781, + "step": 5635 + }, + { + "epoch": 1.6480479602281035, + "grad_norm": 1.7244709678320052, + "learning_rate": 1.649552455990031e-06, + "loss": 0.5587502717971802, + "step": 5636 + }, + { + "epoch": 1.6483404006433688, + "grad_norm": 1.4244703013642708, + "learning_rate": 1.6468934631121147e-06, + "loss": 0.4774302840232849, + "step": 5637 + }, + { + "epoch": 1.6486328410586344, + "grad_norm": 1.7840577383362874, + "learning_rate": 1.644236422722063e-06, + "loss": 0.5969966650009155, + "step": 5638 + }, + { + "epoch": 1.6489252814738997, + "grad_norm": 1.9166026144170052, + "learning_rate": 1.6415813354409438e-06, + "loss": 0.6344267129898071, + "step": 5639 + }, + { + "epoch": 1.649217721889165, + "grad_norm": 1.6455866581497667, + "learning_rate": 1.638928201889367e-06, + "loss": 0.5252394676208496, + "step": 5640 + }, + { + "epoch": 1.6495101623044306, + "grad_norm": 1.7211145798255698, + "learning_rate": 1.636277022687488e-06, + "loss": 0.5092496871948242, + "step": 5641 + }, + { + "epoch": 1.6498026027196957, + "grad_norm": 1.797167898340461, + "learning_rate": 1.633627798455002e-06, + "loss": 0.6530938148498535, + "step": 5642 + }, + { + "epoch": 1.6500950431349612, + "grad_norm": 1.9009398203220143, + "learning_rate": 1.6309805298111492e-06, + "loss": 0.5152128338813782, + "step": 5643 + }, + { + "epoch": 1.6503874835502268, + "grad_norm": 1.4233567646508596, + "learning_rate": 1.6283352173747148e-06, + "loss": 0.4670040011405945, + "step": 5644 + }, + { + "epoch": 1.650679923965492, + "grad_norm": 1.718468492793745, + "learning_rate": 1.625691861764024e-06, + "loss": 0.47373896837234497, + "step": 5645 + }, + { + "epoch": 1.6509723643807575, + "grad_norm": 1.7609405530750961, + "learning_rate": 1.6230504635969413e-06, + "loss": 0.44277217984199524, + "step": 5646 + }, + { + "epoch": 1.6512648047960228, + "grad_norm": 1.9781946361433098, + "learning_rate": 1.6204110234908798e-06, + "loss": 0.6000313758850098, + "step": 5647 + }, + { + "epoch": 1.651557245211288, + "grad_norm": 1.7535547863968097, + "learning_rate": 1.6177735420627939e-06, + "loss": 0.5950880646705627, + "step": 5648 + }, + { + "epoch": 1.6518496856265537, + "grad_norm": 1.6566653852646755, + "learning_rate": 1.6151380199291767e-06, + "loss": 0.6308536529541016, + "step": 5649 + }, + { + "epoch": 1.652142126041819, + "grad_norm": 1.6807098070808562, + "learning_rate": 1.6125044577060667e-06, + "loss": 0.619708240032196, + "step": 5650 + }, + { + "epoch": 1.6524345664570843, + "grad_norm": 1.5330415165514617, + "learning_rate": 1.6098728560090438e-06, + "loss": 0.4912105202674866, + "step": 5651 + }, + { + "epoch": 1.6527270068723499, + "grad_norm": 1.6147329529882195, + "learning_rate": 1.607243215453227e-06, + "loss": 0.5078046917915344, + "step": 5652 + }, + { + "epoch": 1.6530194472876152, + "grad_norm": 1.562057881587638, + "learning_rate": 1.60461553665328e-06, + "loss": 0.4845188856124878, + "step": 5653 + }, + { + "epoch": 1.6533118877028805, + "grad_norm": 1.7846716530712952, + "learning_rate": 1.6019898202234075e-06, + "loss": 0.6323055028915405, + "step": 5654 + }, + { + "epoch": 1.653604328118146, + "grad_norm": 1.6998274797888833, + "learning_rate": 1.5993660667773524e-06, + "loss": 0.5700039863586426, + "step": 5655 + }, + { + "epoch": 1.6538967685334112, + "grad_norm": 1.567032216624363, + "learning_rate": 1.596744276928406e-06, + "loss": 0.5415322780609131, + "step": 5656 + }, + { + "epoch": 1.6541892089486767, + "grad_norm": 1.5705725451812174, + "learning_rate": 1.5941244512893894e-06, + "loss": 0.47339457273483276, + "step": 5657 + }, + { + "epoch": 1.654481649363942, + "grad_norm": 1.7467509306260278, + "learning_rate": 1.5915065904726735e-06, + "loss": 0.5391967296600342, + "step": 5658 + }, + { + "epoch": 1.6547740897792074, + "grad_norm": 1.4866752002795596, + "learning_rate": 1.5888906950901683e-06, + "loss": 0.4832335114479065, + "step": 5659 + }, + { + "epoch": 1.655066530194473, + "grad_norm": 1.6779111992749078, + "learning_rate": 1.5862767657533217e-06, + "loss": 0.4539526104927063, + "step": 5660 + }, + { + "epoch": 1.6553589706097382, + "grad_norm": 1.6528644114250834, + "learning_rate": 1.583664803073125e-06, + "loss": 0.5261383056640625, + "step": 5661 + }, + { + "epoch": 1.6556514110250036, + "grad_norm": 1.497445031852123, + "learning_rate": 1.5810548076601096e-06, + "loss": 0.44060665369033813, + "step": 5662 + }, + { + "epoch": 1.6559438514402691, + "grad_norm": 1.5492116356252563, + "learning_rate": 1.578446780124344e-06, + "loss": 0.4202715754508972, + "step": 5663 + }, + { + "epoch": 1.6562362918555344, + "grad_norm": 1.6539664834530166, + "learning_rate": 1.57584072107544e-06, + "loss": 0.4736124873161316, + "step": 5664 + }, + { + "epoch": 1.6565287322707998, + "grad_norm": 1.71312353367257, + "learning_rate": 1.5732366311225466e-06, + "loss": 0.46696585416793823, + "step": 5665 + }, + { + "epoch": 1.6568211726860653, + "grad_norm": 1.9879711761174368, + "learning_rate": 1.570634510874356e-06, + "loss": 0.566236138343811, + "step": 5666 + }, + { + "epoch": 1.6571136131013307, + "grad_norm": 1.8521270783851422, + "learning_rate": 1.568034360939098e-06, + "loss": 0.5486587882041931, + "step": 5667 + }, + { + "epoch": 1.657406053516596, + "grad_norm": 1.7248593346342498, + "learning_rate": 1.5654361819245423e-06, + "loss": 0.49735748767852783, + "step": 5668 + }, + { + "epoch": 1.6576984939318615, + "grad_norm": 1.7146086057727925, + "learning_rate": 1.562839974437993e-06, + "loss": 0.6306062340736389, + "step": 5669 + }, + { + "epoch": 1.6579909343471266, + "grad_norm": 1.524921725608077, + "learning_rate": 1.5602457390863e-06, + "loss": 0.5062750577926636, + "step": 5670 + }, + { + "epoch": 1.6582833747623922, + "grad_norm": 1.511094812843301, + "learning_rate": 1.5576534764758522e-06, + "loss": 0.5037271976470947, + "step": 5671 + }, + { + "epoch": 1.6585758151776575, + "grad_norm": 1.5731242321067331, + "learning_rate": 1.5550631872125743e-06, + "loss": 0.5749099254608154, + "step": 5672 + }, + { + "epoch": 1.6588682555929228, + "grad_norm": 1.7434864310127793, + "learning_rate": 1.5524748719019312e-06, + "loss": 0.5241814255714417, + "step": 5673 + }, + { + "epoch": 1.6591606960081884, + "grad_norm": 1.6599956977784314, + "learning_rate": 1.5498885311489243e-06, + "loss": 0.5410301685333252, + "step": 5674 + }, + { + "epoch": 1.6594531364234537, + "grad_norm": 1.6011154372945764, + "learning_rate": 1.5473041655580956e-06, + "loss": 0.6363968253135681, + "step": 5675 + }, + { + "epoch": 1.659745576838719, + "grad_norm": 1.7262614943682328, + "learning_rate": 1.5447217757335264e-06, + "loss": 0.5476758480072021, + "step": 5676 + }, + { + "epoch": 1.6600380172539846, + "grad_norm": 2.1890795829733465, + "learning_rate": 1.5421413622788328e-06, + "loss": 0.5375553369522095, + "step": 5677 + }, + { + "epoch": 1.66033045766925, + "grad_norm": 1.6882335328441256, + "learning_rate": 1.53956292579717e-06, + "loss": 0.6227232217788696, + "step": 5678 + }, + { + "epoch": 1.6606228980845152, + "grad_norm": 1.6718724900526085, + "learning_rate": 1.5369864668912327e-06, + "loss": 0.5981070399284363, + "step": 5679 + }, + { + "epoch": 1.6609153384997808, + "grad_norm": 1.6935493861773532, + "learning_rate": 1.5344119861632535e-06, + "loss": 0.5535466074943542, + "step": 5680 + }, + { + "epoch": 1.661207778915046, + "grad_norm": 1.5610811166933365, + "learning_rate": 1.5318394842150009e-06, + "loss": 0.5562780499458313, + "step": 5681 + }, + { + "epoch": 1.6615002193303114, + "grad_norm": 2.8725866349660683, + "learning_rate": 1.5292689616477808e-06, + "loss": 0.5937552452087402, + "step": 5682 + }, + { + "epoch": 1.661792659745577, + "grad_norm": 1.578580766715999, + "learning_rate": 1.526700419062439e-06, + "loss": 0.5154576301574707, + "step": 5683 + }, + { + "epoch": 1.662085100160842, + "grad_norm": 1.691867883182686, + "learning_rate": 1.5241338570593557e-06, + "loss": 0.6887973546981812, + "step": 5684 + }, + { + "epoch": 1.6623775405761076, + "grad_norm": 1.6575847736482525, + "learning_rate": 1.5215692762384481e-06, + "loss": 0.5365385413169861, + "step": 5685 + }, + { + "epoch": 1.662669980991373, + "grad_norm": 1.8209681086307343, + "learning_rate": 1.519006677199173e-06, + "loss": 0.5906165838241577, + "step": 5686 + }, + { + "epoch": 1.6629624214066383, + "grad_norm": 1.5861180854035228, + "learning_rate": 1.5164460605405252e-06, + "loss": 0.5752634406089783, + "step": 5687 + }, + { + "epoch": 1.6632548618219039, + "grad_norm": 1.5319119666926662, + "learning_rate": 1.5138874268610259e-06, + "loss": 0.6265667676925659, + "step": 5688 + }, + { + "epoch": 1.6635473022371692, + "grad_norm": 1.9588164368468703, + "learning_rate": 1.5113307767587449e-06, + "loss": 0.5032769441604614, + "step": 5689 + }, + { + "epoch": 1.6638397426524345, + "grad_norm": 1.799708728422752, + "learning_rate": 1.5087761108312837e-06, + "loss": 0.5740037560462952, + "step": 5690 + }, + { + "epoch": 1.6641321830677, + "grad_norm": 1.7236751091410876, + "learning_rate": 1.5062234296757782e-06, + "loss": 0.5745523571968079, + "step": 5691 + }, + { + "epoch": 1.6644246234829654, + "grad_norm": 1.4898534105858117, + "learning_rate": 1.5036727338889035e-06, + "loss": 0.4448510408401489, + "step": 5692 + }, + { + "epoch": 1.6647170638982307, + "grad_norm": 1.7482222251428332, + "learning_rate": 1.5011240240668678e-06, + "loss": 0.5142196416854858, + "step": 5693 + }, + { + "epoch": 1.6650095043134963, + "grad_norm": 1.484675519827308, + "learning_rate": 1.4985773008054184e-06, + "loss": 0.3317479193210602, + "step": 5694 + }, + { + "epoch": 1.6653019447287614, + "grad_norm": 1.9471413048062423, + "learning_rate": 1.4960325646998353e-06, + "loss": 0.5721619129180908, + "step": 5695 + }, + { + "epoch": 1.665594385144027, + "grad_norm": 1.7546330993452042, + "learning_rate": 1.4934898163449341e-06, + "loss": 0.4937021732330322, + "step": 5696 + }, + { + "epoch": 1.6658868255592922, + "grad_norm": 1.9665727915679332, + "learning_rate": 1.4909490563350694e-06, + "loss": 0.6414870023727417, + "step": 5697 + }, + { + "epoch": 1.6661792659745576, + "grad_norm": 2.144217430639584, + "learning_rate": 1.4884102852641258e-06, + "loss": 0.6265281438827515, + "step": 5698 + }, + { + "epoch": 1.6664717063898231, + "grad_norm": 1.9171094003176723, + "learning_rate": 1.48587350372553e-06, + "loss": 0.5908917784690857, + "step": 5699 + }, + { + "epoch": 1.6667641468050884, + "grad_norm": 1.5794126433874063, + "learning_rate": 1.4833387123122334e-06, + "loss": 0.6098382472991943, + "step": 5700 + }, + { + "epoch": 1.6670565872203538, + "grad_norm": 1.7930907914682574, + "learning_rate": 1.4808059116167306e-06, + "loss": 0.5106536746025085, + "step": 5701 + }, + { + "epoch": 1.6673490276356193, + "grad_norm": 1.9054848074745216, + "learning_rate": 1.4782751022310481e-06, + "loss": 0.5548620820045471, + "step": 5702 + }, + { + "epoch": 1.6676414680508846, + "grad_norm": 1.5332665904029121, + "learning_rate": 1.4757462847467475e-06, + "loss": 0.4596245288848877, + "step": 5703 + }, + { + "epoch": 1.66793390846615, + "grad_norm": 1.98056012220508, + "learning_rate": 1.4732194597549244e-06, + "loss": 0.6000612378120422, + "step": 5704 + }, + { + "epoch": 1.6682263488814155, + "grad_norm": 1.53003821533968, + "learning_rate": 1.4706946278462097e-06, + "loss": 0.5522277355194092, + "step": 5705 + }, + { + "epoch": 1.6685187892966808, + "grad_norm": 1.741978737228361, + "learning_rate": 1.468171789610766e-06, + "loss": 0.4765724837779999, + "step": 5706 + }, + { + "epoch": 1.6688112297119462, + "grad_norm": 1.737640693413614, + "learning_rate": 1.4656509456382927e-06, + "loss": 0.564188539981842, + "step": 5707 + }, + { + "epoch": 1.6691036701272117, + "grad_norm": 1.8586603218062736, + "learning_rate": 1.4631320965180208e-06, + "loss": 0.4910390377044678, + "step": 5708 + }, + { + "epoch": 1.6693961105424768, + "grad_norm": 1.8232002468100077, + "learning_rate": 1.4606152428387166e-06, + "loss": 0.5992041826248169, + "step": 5709 + }, + { + "epoch": 1.6696885509577424, + "grad_norm": 1.7676126822410316, + "learning_rate": 1.4581003851886811e-06, + "loss": 0.4873291850090027, + "step": 5710 + }, + { + "epoch": 1.6699809913730077, + "grad_norm": 1.7366762629360202, + "learning_rate": 1.4555875241557426e-06, + "loss": 0.6487013101577759, + "step": 5711 + }, + { + "epoch": 1.670273431788273, + "grad_norm": 1.5436242943423213, + "learning_rate": 1.4530766603272695e-06, + "loss": 0.4624609351158142, + "step": 5712 + }, + { + "epoch": 1.6705658722035386, + "grad_norm": 1.8279691880026145, + "learning_rate": 1.4505677942901609e-06, + "loss": 0.5765592455863953, + "step": 5713 + }, + { + "epoch": 1.670858312618804, + "grad_norm": 1.6171369345434061, + "learning_rate": 1.4480609266308488e-06, + "loss": 0.6730339527130127, + "step": 5714 + }, + { + "epoch": 1.6711507530340692, + "grad_norm": 1.7616383708984635, + "learning_rate": 1.445556057935299e-06, + "loss": 0.6381770372390747, + "step": 5715 + }, + { + "epoch": 1.6714431934493348, + "grad_norm": 1.8270298461203718, + "learning_rate": 1.4430531887890076e-06, + "loss": 0.6236029863357544, + "step": 5716 + }, + { + "epoch": 1.6717356338646001, + "grad_norm": 1.8837930420569144, + "learning_rate": 1.4405523197770076e-06, + "loss": 0.521639347076416, + "step": 5717 + }, + { + "epoch": 1.6720280742798654, + "grad_norm": 1.651464369232987, + "learning_rate": 1.4380534514838596e-06, + "loss": 0.5912468433380127, + "step": 5718 + }, + { + "epoch": 1.672320514695131, + "grad_norm": 1.7062190862435904, + "learning_rate": 1.4355565844936602e-06, + "loss": 0.5533329248428345, + "step": 5719 + }, + { + "epoch": 1.672612955110396, + "grad_norm": 1.8368834191923704, + "learning_rate": 1.4330617193900365e-06, + "loss": 0.5901006460189819, + "step": 5720 + }, + { + "epoch": 1.6729053955256616, + "grad_norm": 1.7501848609248272, + "learning_rate": 1.4305688567561503e-06, + "loss": 0.5083344578742981, + "step": 5721 + }, + { + "epoch": 1.6731978359409272, + "grad_norm": 1.359271864269329, + "learning_rate": 1.4280779971746894e-06, + "loss": 0.4443317651748657, + "step": 5722 + }, + { + "epoch": 1.6734902763561923, + "grad_norm": 1.931591797384203, + "learning_rate": 1.4255891412278778e-06, + "loss": 0.6355078220367432, + "step": 5723 + }, + { + "epoch": 1.6737827167714578, + "grad_norm": 1.751894451134603, + "learning_rate": 1.423102289497471e-06, + "loss": 0.5403381586074829, + "step": 5724 + }, + { + "epoch": 1.6740751571867232, + "grad_norm": 1.7703719298346523, + "learning_rate": 1.4206174425647556e-06, + "loss": 0.5272151231765747, + "step": 5725 + }, + { + "epoch": 1.6743675976019885, + "grad_norm": 1.6453865684399975, + "learning_rate": 1.41813460101055e-06, + "loss": 0.6750346422195435, + "step": 5726 + }, + { + "epoch": 1.674660038017254, + "grad_norm": 1.7471006401236766, + "learning_rate": 1.4156537654152026e-06, + "loss": 0.552655816078186, + "step": 5727 + }, + { + "epoch": 1.6749524784325194, + "grad_norm": 1.5245393452927156, + "learning_rate": 1.4131749363585933e-06, + "loss": 0.3947732448577881, + "step": 5728 + }, + { + "epoch": 1.6752449188477847, + "grad_norm": 1.4115886314708204, + "learning_rate": 1.4106981144201337e-06, + "loss": 0.4910270571708679, + "step": 5729 + }, + { + "epoch": 1.6755373592630503, + "grad_norm": 1.9268335112778272, + "learning_rate": 1.408223300178767e-06, + "loss": 0.5717943906784058, + "step": 5730 + }, + { + "epoch": 1.6758297996783156, + "grad_norm": 1.6782914146067396, + "learning_rate": 1.4057504942129652e-06, + "loss": 0.4993055462837219, + "step": 5731 + }, + { + "epoch": 1.676122240093581, + "grad_norm": 1.851203153701759, + "learning_rate": 1.4032796971007322e-06, + "loss": 0.4772619605064392, + "step": 5732 + }, + { + "epoch": 1.6764146805088465, + "grad_norm": 1.6250346930838577, + "learning_rate": 1.400810909419601e-06, + "loss": 0.3824518322944641, + "step": 5733 + }, + { + "epoch": 1.6767071209241116, + "grad_norm": 1.815105841906862, + "learning_rate": 1.398344131746634e-06, + "loss": 0.5302368402481079, + "step": 5734 + }, + { + "epoch": 1.676999561339377, + "grad_norm": 1.738439871277175, + "learning_rate": 1.3958793646584279e-06, + "loss": 0.5776697397232056, + "step": 5735 + }, + { + "epoch": 1.6772920017546424, + "grad_norm": 1.7943869884408015, + "learning_rate": 1.3934166087311063e-06, + "loss": 0.53890061378479, + "step": 5736 + }, + { + "epoch": 1.6775844421699078, + "grad_norm": 1.71821325954837, + "learning_rate": 1.3909558645403243e-06, + "loss": 0.47210827469825745, + "step": 5737 + }, + { + "epoch": 1.6778768825851733, + "grad_norm": 1.8689211559459666, + "learning_rate": 1.388497132661264e-06, + "loss": 0.6020913124084473, + "step": 5738 + }, + { + "epoch": 1.6781693230004386, + "grad_norm": 1.8612626444994878, + "learning_rate": 1.3860404136686411e-06, + "loss": 0.4244590997695923, + "step": 5739 + }, + { + "epoch": 1.678461763415704, + "grad_norm": 1.8281285744352933, + "learning_rate": 1.3835857081366965e-06, + "loss": 0.5969624519348145, + "step": 5740 + }, + { + "epoch": 1.6787542038309695, + "grad_norm": 1.774375495611947, + "learning_rate": 1.3811330166392057e-06, + "loss": 0.6573030352592468, + "step": 5741 + }, + { + "epoch": 1.6790466442462348, + "grad_norm": 1.641770470616675, + "learning_rate": 1.3786823397494675e-06, + "loss": 0.4251132905483246, + "step": 5742 + }, + { + "epoch": 1.6793390846615002, + "grad_norm": 1.504067842347657, + "learning_rate": 1.3762336780403163e-06, + "loss": 0.5555700659751892, + "step": 5743 + }, + { + "epoch": 1.6796315250767657, + "grad_norm": 2.1042602241156128, + "learning_rate": 1.3737870320841073e-06, + "loss": 0.5651364326477051, + "step": 5744 + }, + { + "epoch": 1.679923965492031, + "grad_norm": 1.7135873981198582, + "learning_rate": 1.371342402452731e-06, + "loss": 0.6283698678016663, + "step": 5745 + }, + { + "epoch": 1.6802164059072964, + "grad_norm": 1.4720135811876174, + "learning_rate": 1.3688997897176037e-06, + "loss": 0.47864413261413574, + "step": 5746 + }, + { + "epoch": 1.680508846322562, + "grad_norm": 1.9268157997034314, + "learning_rate": 1.366459194449674e-06, + "loss": 0.6254131197929382, + "step": 5747 + }, + { + "epoch": 1.680801286737827, + "grad_norm": 1.4626288934383243, + "learning_rate": 1.364020617219415e-06, + "loss": 0.35147637128829956, + "step": 5748 + }, + { + "epoch": 1.6810937271530926, + "grad_norm": 1.9627536321629568, + "learning_rate": 1.3615840585968287e-06, + "loss": 0.6126410961151123, + "step": 5749 + }, + { + "epoch": 1.681386167568358, + "grad_norm": 1.7732443093164585, + "learning_rate": 1.359149519151447e-06, + "loss": 0.5807974338531494, + "step": 5750 + }, + { + "epoch": 1.6816786079836232, + "grad_norm": 1.5024396656291241, + "learning_rate": 1.3567169994523277e-06, + "loss": 0.5033349990844727, + "step": 5751 + }, + { + "epoch": 1.6819710483988888, + "grad_norm": 1.5694630419560385, + "learning_rate": 1.3542865000680604e-06, + "loss": 0.47656023502349854, + "step": 5752 + }, + { + "epoch": 1.682263488814154, + "grad_norm": 1.6495545571688441, + "learning_rate": 1.3518580215667542e-06, + "loss": 0.5137293338775635, + "step": 5753 + }, + { + "epoch": 1.6825559292294194, + "grad_norm": 1.5950800812601422, + "learning_rate": 1.3494315645160539e-06, + "loss": 0.4636800289154053, + "step": 5754 + }, + { + "epoch": 1.682848369644685, + "grad_norm": 1.937366310273075, + "learning_rate": 1.3470071294831289e-06, + "loss": 0.5825523138046265, + "step": 5755 + }, + { + "epoch": 1.6831408100599503, + "grad_norm": 1.798274160020649, + "learning_rate": 1.344584717034677e-06, + "loss": 0.49282288551330566, + "step": 5756 + }, + { + "epoch": 1.6834332504752156, + "grad_norm": 1.6393172330125654, + "learning_rate": 1.3421643277369211e-06, + "loss": 0.5551935434341431, + "step": 5757 + }, + { + "epoch": 1.6837256908904812, + "grad_norm": 1.7844394306187494, + "learning_rate": 1.339745962155613e-06, + "loss": 0.6423832178115845, + "step": 5758 + }, + { + "epoch": 1.6840181313057463, + "grad_norm": 1.7061841923170233, + "learning_rate": 1.3373296208560316e-06, + "loss": 0.6178075671195984, + "step": 5759 + }, + { + "epoch": 1.6843105717210118, + "grad_norm": 1.7099687312543272, + "learning_rate": 1.3349153044029816e-06, + "loss": 0.6781176328659058, + "step": 5760 + }, + { + "epoch": 1.6846030121362774, + "grad_norm": 1.837996192806761, + "learning_rate": 1.332503013360794e-06, + "loss": 0.6511910557746887, + "step": 5761 + }, + { + "epoch": 1.6848954525515425, + "grad_norm": 1.3861294384859772, + "learning_rate": 1.3300927482933279e-06, + "loss": 0.4980696141719818, + "step": 5762 + }, + { + "epoch": 1.685187892966808, + "grad_norm": 1.6100202697936232, + "learning_rate": 1.3276845097639702e-06, + "loss": 0.49176928400993347, + "step": 5763 + }, + { + "epoch": 1.6854803333820734, + "grad_norm": 1.893755486996651, + "learning_rate": 1.3252782983356272e-06, + "loss": 0.5198799967765808, + "step": 5764 + }, + { + "epoch": 1.6857727737973387, + "grad_norm": 1.4629269004624288, + "learning_rate": 1.322874114570739e-06, + "loss": 0.5058869123458862, + "step": 5765 + }, + { + "epoch": 1.6860652142126042, + "grad_norm": 1.7429625548536576, + "learning_rate": 1.3204719590312698e-06, + "loss": 0.46573105454444885, + "step": 5766 + }, + { + "epoch": 1.6863576546278696, + "grad_norm": 1.702952537068074, + "learning_rate": 1.3180718322787067e-06, + "loss": 0.5033260583877563, + "step": 5767 + }, + { + "epoch": 1.686650095043135, + "grad_norm": 1.4816614067920655, + "learning_rate": 1.3156737348740655e-06, + "loss": 0.5306515693664551, + "step": 5768 + }, + { + "epoch": 1.6869425354584004, + "grad_norm": 1.5781671263541353, + "learning_rate": 1.313277667377888e-06, + "loss": 0.45660221576690674, + "step": 5769 + }, + { + "epoch": 1.6872349758736658, + "grad_norm": 1.8450654821638361, + "learning_rate": 1.3108836303502392e-06, + "loss": 0.5353757739067078, + "step": 5770 + }, + { + "epoch": 1.687527416288931, + "grad_norm": 1.719850536675987, + "learning_rate": 1.3084916243507118e-06, + "loss": 0.5415239930152893, + "step": 5771 + }, + { + "epoch": 1.6878198567041967, + "grad_norm": 1.7512787251419633, + "learning_rate": 1.3061016499384217e-06, + "loss": 0.5860229730606079, + "step": 5772 + }, + { + "epoch": 1.6881122971194618, + "grad_norm": 1.7755386406909037, + "learning_rate": 1.3037137076720107e-06, + "loss": 0.5524891018867493, + "step": 5773 + }, + { + "epoch": 1.6884047375347273, + "grad_norm": 1.9368287931198411, + "learning_rate": 1.3013277981096484e-06, + "loss": 0.5557498931884766, + "step": 5774 + }, + { + "epoch": 1.6886971779499926, + "grad_norm": 1.6092314723070207, + "learning_rate": 1.2989439218090227e-06, + "loss": 0.45877397060394287, + "step": 5775 + }, + { + "epoch": 1.688989618365258, + "grad_norm": 1.587163397973365, + "learning_rate": 1.2965620793273515e-06, + "loss": 0.5310335159301758, + "step": 5776 + }, + { + "epoch": 1.6892820587805235, + "grad_norm": 1.4561579735469703, + "learning_rate": 1.294182271221377e-06, + "loss": 0.4855915904045105, + "step": 5777 + }, + { + "epoch": 1.6895744991957888, + "grad_norm": 1.5707049211364605, + "learning_rate": 1.2918044980473643e-06, + "loss": 0.6070747971534729, + "step": 5778 + }, + { + "epoch": 1.6898669396110542, + "grad_norm": 1.6739979529118527, + "learning_rate": 1.2894287603611033e-06, + "loss": 0.5108609795570374, + "step": 5779 + }, + { + "epoch": 1.6901593800263197, + "grad_norm": 1.7647615756485302, + "learning_rate": 1.2870550587179087e-06, + "loss": 0.49141189455986023, + "step": 5780 + }, + { + "epoch": 1.690451820441585, + "grad_norm": 1.4176033732152467, + "learning_rate": 1.2846833936726178e-06, + "loss": 0.4239678382873535, + "step": 5781 + }, + { + "epoch": 1.6907442608568504, + "grad_norm": 1.8431022697656632, + "learning_rate": 1.2823137657795948e-06, + "loss": 0.6348937153816223, + "step": 5782 + }, + { + "epoch": 1.691036701272116, + "grad_norm": 1.4853353146024342, + "learning_rate": 1.2799461755927233e-06, + "loss": 0.4561845064163208, + "step": 5783 + }, + { + "epoch": 1.6913291416873812, + "grad_norm": 2.1521785942560197, + "learning_rate": 1.2775806236654153e-06, + "loss": 0.5663880109786987, + "step": 5784 + }, + { + "epoch": 1.6916215821026466, + "grad_norm": 1.5729874297711008, + "learning_rate": 1.275217110550604e-06, + "loss": 0.5200550556182861, + "step": 5785 + }, + { + "epoch": 1.6919140225179121, + "grad_norm": 1.7072937541006934, + "learning_rate": 1.2728556368007461e-06, + "loss": 0.5401214361190796, + "step": 5786 + }, + { + "epoch": 1.6922064629331772, + "grad_norm": 2.0089316276908917, + "learning_rate": 1.2704962029678202e-06, + "loss": 0.5409752130508423, + "step": 5787 + }, + { + "epoch": 1.6924989033484428, + "grad_norm": 1.6551832796501305, + "learning_rate": 1.2681388096033298e-06, + "loss": 0.46215158700942993, + "step": 5788 + }, + { + "epoch": 1.692791343763708, + "grad_norm": 1.508586050733543, + "learning_rate": 1.2657834572583027e-06, + "loss": 0.44687867164611816, + "step": 5789 + }, + { + "epoch": 1.6930837841789734, + "grad_norm": 1.5112474922130816, + "learning_rate": 1.2634301464832877e-06, + "loss": 0.47882723808288574, + "step": 5790 + }, + { + "epoch": 1.693376224594239, + "grad_norm": 2.0838428918534264, + "learning_rate": 1.2610788778283567e-06, + "loss": 0.6108201742172241, + "step": 5791 + }, + { + "epoch": 1.6936686650095043, + "grad_norm": 1.4370335670353505, + "learning_rate": 1.2587296518431036e-06, + "loss": 0.45024657249450684, + "step": 5792 + }, + { + "epoch": 1.6939611054247696, + "grad_norm": 2.263053324487421, + "learning_rate": 1.256382469076648e-06, + "loss": 0.6746254563331604, + "step": 5793 + }, + { + "epoch": 1.6942535458400352, + "grad_norm": 1.7423805800598553, + "learning_rate": 1.2540373300776264e-06, + "loss": 0.6439248323440552, + "step": 5794 + }, + { + "epoch": 1.6945459862553005, + "grad_norm": 1.5101648188878154, + "learning_rate": 1.251694235394204e-06, + "loss": 0.467510461807251, + "step": 5795 + }, + { + "epoch": 1.6948384266705658, + "grad_norm": 2.0083904845815117, + "learning_rate": 1.2493531855740626e-06, + "loss": 0.5509516596794128, + "step": 5796 + }, + { + "epoch": 1.6951308670858314, + "grad_norm": 1.6291523574406077, + "learning_rate": 1.247014181164412e-06, + "loss": 0.49178194999694824, + "step": 5797 + }, + { + "epoch": 1.6954233075010965, + "grad_norm": 1.6626228068208797, + "learning_rate": 1.2446772227119753e-06, + "loss": 0.4825005531311035, + "step": 5798 + }, + { + "epoch": 1.695715747916362, + "grad_norm": 1.7609982762736733, + "learning_rate": 1.242342310763005e-06, + "loss": 0.7441064715385437, + "step": 5799 + }, + { + "epoch": 1.6960081883316276, + "grad_norm": 1.6166055740202077, + "learning_rate": 1.2400094458632717e-06, + "loss": 0.5020110011100769, + "step": 5800 + }, + { + "epoch": 1.6963006287468927, + "grad_norm": 1.6328086702132818, + "learning_rate": 1.237678628558069e-06, + "loss": 0.5439830422401428, + "step": 5801 + }, + { + "epoch": 1.6965930691621582, + "grad_norm": 2.0549338843530136, + "learning_rate": 1.235349859392211e-06, + "loss": 0.6235179901123047, + "step": 5802 + }, + { + "epoch": 1.6968855095774236, + "grad_norm": 1.7141848290041162, + "learning_rate": 1.2330231389100323e-06, + "loss": 0.6176612377166748, + "step": 5803 + }, + { + "epoch": 1.697177949992689, + "grad_norm": 1.7381769122607003, + "learning_rate": 1.2306984676553924e-06, + "loss": 0.5956840515136719, + "step": 5804 + }, + { + "epoch": 1.6974703904079544, + "grad_norm": 1.6857909163061566, + "learning_rate": 1.2283758461716667e-06, + "loss": 0.5025947690010071, + "step": 5805 + }, + { + "epoch": 1.6977628308232198, + "grad_norm": 1.507035347865144, + "learning_rate": 1.2260552750017551e-06, + "loss": 0.5772436857223511, + "step": 5806 + }, + { + "epoch": 1.698055271238485, + "grad_norm": 1.6665432076063584, + "learning_rate": 1.223736754688075e-06, + "loss": 0.4336615204811096, + "step": 5807 + }, + { + "epoch": 1.6983477116537506, + "grad_norm": 1.695081220374435, + "learning_rate": 1.221420285772572e-06, + "loss": 0.5697668790817261, + "step": 5808 + }, + { + "epoch": 1.698640152069016, + "grad_norm": 1.8545475442236217, + "learning_rate": 1.2191058687966995e-06, + "loss": 0.4966861605644226, + "step": 5809 + }, + { + "epoch": 1.6989325924842813, + "grad_norm": 1.777484506048346, + "learning_rate": 1.2167935043014411e-06, + "loss": 0.5805951952934265, + "step": 5810 + }, + { + "epoch": 1.6992250328995469, + "grad_norm": 1.6055305498040644, + "learning_rate": 1.2144831928272994e-06, + "loss": 0.4669906497001648, + "step": 5811 + }, + { + "epoch": 1.699517473314812, + "grad_norm": 1.7730179282571827, + "learning_rate": 1.212174934914294e-06, + "loss": 0.5630965828895569, + "step": 5812 + }, + { + "epoch": 1.6998099137300775, + "grad_norm": 1.7272395334456936, + "learning_rate": 1.2098687311019663e-06, + "loss": 0.5345104932785034, + "step": 5813 + }, + { + "epoch": 1.7001023541453428, + "grad_norm": 1.9547814584710963, + "learning_rate": 1.207564581929378e-06, + "loss": 0.5760249495506287, + "step": 5814 + }, + { + "epoch": 1.7003947945606082, + "grad_norm": 1.563397994600299, + "learning_rate": 1.2052624879351105e-06, + "loss": 0.506635308265686, + "step": 5815 + }, + { + "epoch": 1.7006872349758737, + "grad_norm": 1.792775034126629, + "learning_rate": 1.2029624496572622e-06, + "loss": 0.5107032656669617, + "step": 5816 + }, + { + "epoch": 1.700979675391139, + "grad_norm": 1.5891211780153636, + "learning_rate": 1.2006644676334557e-06, + "loss": 0.5888187885284424, + "step": 5817 + }, + { + "epoch": 1.7012721158064044, + "grad_norm": 1.8008314810247776, + "learning_rate": 1.1983685424008285e-06, + "loss": 0.5326075553894043, + "step": 5818 + }, + { + "epoch": 1.70156455622167, + "grad_norm": 1.6515493940564925, + "learning_rate": 1.1960746744960417e-06, + "loss": 0.5097993612289429, + "step": 5819 + }, + { + "epoch": 1.7018569966369352, + "grad_norm": 1.6532256911128915, + "learning_rate": 1.1937828644552696e-06, + "loss": 0.6001093983650208, + "step": 5820 + }, + { + "epoch": 1.7021494370522006, + "grad_norm": 1.7728326525757572, + "learning_rate": 1.1914931128142072e-06, + "loss": 0.513684093952179, + "step": 5821 + }, + { + "epoch": 1.7024418774674661, + "grad_norm": 1.6118848482453871, + "learning_rate": 1.189205420108076e-06, + "loss": 0.4688597321510315, + "step": 5822 + }, + { + "epoch": 1.7027343178827314, + "grad_norm": 1.6755720349462948, + "learning_rate": 1.1869197868716075e-06, + "loss": 0.4537498354911804, + "step": 5823 + }, + { + "epoch": 1.7030267582979968, + "grad_norm": 1.625108439053771, + "learning_rate": 1.1846362136390531e-06, + "loss": 0.43031078577041626, + "step": 5824 + }, + { + "epoch": 1.7033191987132623, + "grad_norm": 1.9244406734438975, + "learning_rate": 1.182354700944187e-06, + "loss": 0.5139330625534058, + "step": 5825 + }, + { + "epoch": 1.7036116391285274, + "grad_norm": 1.7897651312393703, + "learning_rate": 1.180075249320296e-06, + "loss": 0.6542010307312012, + "step": 5826 + }, + { + "epoch": 1.703904079543793, + "grad_norm": 1.6365189888188503, + "learning_rate": 1.1777978593001903e-06, + "loss": 0.5371676087379456, + "step": 5827 + }, + { + "epoch": 1.7041965199590583, + "grad_norm": 1.6793659914593386, + "learning_rate": 1.1755225314161967e-06, + "loss": 0.47583359479904175, + "step": 5828 + }, + { + "epoch": 1.7044889603743236, + "grad_norm": 1.7363884838234833, + "learning_rate": 1.173249266200156e-06, + "loss": 0.5471247434616089, + "step": 5829 + }, + { + "epoch": 1.7047814007895892, + "grad_norm": 1.850508925320166, + "learning_rate": 1.1709780641834323e-06, + "loss": 0.5095713138580322, + "step": 5830 + }, + { + "epoch": 1.7050738412048545, + "grad_norm": 1.5373790027628114, + "learning_rate": 1.1687089258969041e-06, + "loss": 0.41944777965545654, + "step": 5831 + }, + { + "epoch": 1.7053662816201198, + "grad_norm": 1.5434472143224902, + "learning_rate": 1.1664418518709697e-06, + "loss": 0.42380404472351074, + "step": 5832 + }, + { + "epoch": 1.7056587220353854, + "grad_norm": 1.8798510100106, + "learning_rate": 1.1641768426355427e-06, + "loss": 0.5688038468360901, + "step": 5833 + }, + { + "epoch": 1.7059511624506507, + "grad_norm": 1.6396391570153137, + "learning_rate": 1.1619138987200562e-06, + "loss": 0.5432788133621216, + "step": 5834 + }, + { + "epoch": 1.706243602865916, + "grad_norm": 1.699260651340017, + "learning_rate": 1.1596530206534606e-06, + "loss": 0.5408512949943542, + "step": 5835 + }, + { + "epoch": 1.7065360432811816, + "grad_norm": 1.5364052920051108, + "learning_rate": 1.1573942089642198e-06, + "loss": 0.5149247646331787, + "step": 5836 + }, + { + "epoch": 1.7068284836964467, + "grad_norm": 1.6490213140214325, + "learning_rate": 1.1551374641803193e-06, + "loss": 0.36905592679977417, + "step": 5837 + }, + { + "epoch": 1.7071209241117122, + "grad_norm": 1.7960598101415164, + "learning_rate": 1.152882786829259e-06, + "loss": 0.5370720624923706, + "step": 5838 + }, + { + "epoch": 1.7074133645269778, + "grad_norm": 1.5874644037104577, + "learning_rate": 1.1506301774380578e-06, + "loss": 0.4535629153251648, + "step": 5839 + }, + { + "epoch": 1.7077058049422429, + "grad_norm": 1.79916689116012, + "learning_rate": 1.1483796365332455e-06, + "loss": 0.5456075668334961, + "step": 5840 + }, + { + "epoch": 1.7079982453575084, + "grad_norm": 1.4286640626946725, + "learning_rate": 1.1461311646408756e-06, + "loss": 0.5884554386138916, + "step": 5841 + }, + { + "epoch": 1.7082906857727738, + "grad_norm": 1.6397329737807809, + "learning_rate": 1.1438847622865125e-06, + "loss": 0.605168879032135, + "step": 5842 + }, + { + "epoch": 1.708583126188039, + "grad_norm": 1.5178839829112376, + "learning_rate": 1.14164042999524e-06, + "loss": 0.43739163875579834, + "step": 5843 + }, + { + "epoch": 1.7088755666033046, + "grad_norm": 1.46949260133067, + "learning_rate": 1.1393981682916578e-06, + "loss": 0.4508574306964874, + "step": 5844 + }, + { + "epoch": 1.70916800701857, + "grad_norm": 1.822138537734332, + "learning_rate": 1.1371579776998798e-06, + "loss": 0.5918034315109253, + "step": 5845 + }, + { + "epoch": 1.7094604474338353, + "grad_norm": 2.0746386130567873, + "learning_rate": 1.1349198587435373e-06, + "loss": 0.5668582320213318, + "step": 5846 + }, + { + "epoch": 1.7097528878491008, + "grad_norm": 1.9282537614980426, + "learning_rate": 1.1326838119457784e-06, + "loss": 0.6374846696853638, + "step": 5847 + }, + { + "epoch": 1.7100453282643662, + "grad_norm": 1.8903891011788552, + "learning_rate": 1.130449837829264e-06, + "loss": 0.5074985027313232, + "step": 5848 + }, + { + "epoch": 1.7103377686796315, + "grad_norm": 1.5190057242638555, + "learning_rate": 1.1282179369161717e-06, + "loss": 0.5012484788894653, + "step": 5849 + }, + { + "epoch": 1.710630209094897, + "grad_norm": 1.632090745734556, + "learning_rate": 1.1259881097281977e-06, + "loss": 0.4417869746685028, + "step": 5850 + }, + { + "epoch": 1.7109226495101622, + "grad_norm": 1.8294483106085377, + "learning_rate": 1.1237603567865452e-06, + "loss": 0.6032637357711792, + "step": 5851 + }, + { + "epoch": 1.7112150899254277, + "grad_norm": 1.688609377749929, + "learning_rate": 1.121534678611942e-06, + "loss": 0.5790234804153442, + "step": 5852 + }, + { + "epoch": 1.711507530340693, + "grad_norm": 1.591889646615377, + "learning_rate": 1.1193110757246251e-06, + "loss": 0.5436397194862366, + "step": 5853 + }, + { + "epoch": 1.7117999707559584, + "grad_norm": 1.7117652881589365, + "learning_rate": 1.11708954864435e-06, + "loss": 0.5088083744049072, + "step": 5854 + }, + { + "epoch": 1.712092411171224, + "grad_norm": 1.889980799223528, + "learning_rate": 1.1148700978903826e-06, + "loss": 0.5907719135284424, + "step": 5855 + }, + { + "epoch": 1.7123848515864892, + "grad_norm": 1.567722853126729, + "learning_rate": 1.1126527239815078e-06, + "loss": 0.4744384288787842, + "step": 5856 + }, + { + "epoch": 1.7126772920017546, + "grad_norm": 1.9312865174889629, + "learning_rate": 1.110437427436023e-06, + "loss": 0.6644346714019775, + "step": 5857 + }, + { + "epoch": 1.71296973241702, + "grad_norm": 1.6765623554239069, + "learning_rate": 1.10822420877174e-06, + "loss": 0.4926042854785919, + "step": 5858 + }, + { + "epoch": 1.7132621728322854, + "grad_norm": 1.6722485452227753, + "learning_rate": 1.1060130685059845e-06, + "loss": 0.47684335708618164, + "step": 5859 + }, + { + "epoch": 1.7135546132475508, + "grad_norm": 1.8519560247307543, + "learning_rate": 1.1038040071555988e-06, + "loss": 0.5574014186859131, + "step": 5860 + }, + { + "epoch": 1.7138470536628163, + "grad_norm": 1.737717748392033, + "learning_rate": 1.101597025236939e-06, + "loss": 0.6276485323905945, + "step": 5861 + }, + { + "epoch": 1.7141394940780816, + "grad_norm": 1.7853097232505406, + "learning_rate": 1.099392123265869e-06, + "loss": 0.558611273765564, + "step": 5862 + }, + { + "epoch": 1.714431934493347, + "grad_norm": 1.8318989515664625, + "learning_rate": 1.097189301757773e-06, + "loss": 0.5561566948890686, + "step": 5863 + }, + { + "epoch": 1.7147243749086125, + "grad_norm": 1.7772127580066208, + "learning_rate": 1.094988561227548e-06, + "loss": 0.5360273122787476, + "step": 5864 + }, + { + "epoch": 1.7150168153238776, + "grad_norm": 1.9869672499266697, + "learning_rate": 1.0927899021896038e-06, + "loss": 0.5572026968002319, + "step": 5865 + }, + { + "epoch": 1.7153092557391432, + "grad_norm": 1.696631763346783, + "learning_rate": 1.0905933251578626e-06, + "loss": 0.4593105912208557, + "step": 5866 + }, + { + "epoch": 1.7156016961544085, + "grad_norm": 1.7954251083275348, + "learning_rate": 1.0883988306457627e-06, + "loss": 0.5017558336257935, + "step": 5867 + }, + { + "epoch": 1.7158941365696738, + "grad_norm": 1.6294086582619267, + "learning_rate": 1.0862064191662524e-06, + "loss": 0.4982030391693115, + "step": 5868 + }, + { + "epoch": 1.7161865769849394, + "grad_norm": 1.5832146918310879, + "learning_rate": 1.0840160912317943e-06, + "loss": 0.5563114881515503, + "step": 5869 + }, + { + "epoch": 1.7164790174002047, + "grad_norm": 1.6522408781609719, + "learning_rate": 1.0818278473543652e-06, + "loss": 0.4817348122596741, + "step": 5870 + }, + { + "epoch": 1.71677145781547, + "grad_norm": 1.6923338540004815, + "learning_rate": 1.079641688045453e-06, + "loss": 0.47907108068466187, + "step": 5871 + }, + { + "epoch": 1.7170638982307356, + "grad_norm": 1.985278987997586, + "learning_rate": 1.0774576138160596e-06, + "loss": 0.6158252954483032, + "step": 5872 + }, + { + "epoch": 1.717356338646001, + "grad_norm": 1.712800633970283, + "learning_rate": 1.0752756251767015e-06, + "loss": 0.5336505174636841, + "step": 5873 + }, + { + "epoch": 1.7176487790612662, + "grad_norm": 1.6889119428738892, + "learning_rate": 1.0730957226374006e-06, + "loss": 0.5806115865707397, + "step": 5874 + }, + { + "epoch": 1.7179412194765318, + "grad_norm": 1.7163109676688793, + "learning_rate": 1.070917906707698e-06, + "loss": 0.3701411485671997, + "step": 5875 + }, + { + "epoch": 1.7182336598917969, + "grad_norm": 1.5519162070562529, + "learning_rate": 1.0687421778966445e-06, + "loss": 0.5779517292976379, + "step": 5876 + }, + { + "epoch": 1.7185261003070624, + "grad_norm": 1.5444011974555767, + "learning_rate": 1.0665685367128041e-06, + "loss": 0.43965232372283936, + "step": 5877 + }, + { + "epoch": 1.718818540722328, + "grad_norm": 1.7154722678485648, + "learning_rate": 1.064396983664253e-06, + "loss": 0.4768058657646179, + "step": 5878 + }, + { + "epoch": 1.719110981137593, + "grad_norm": 1.6286437020829267, + "learning_rate": 1.0622275192585773e-06, + "loss": 0.5331600904464722, + "step": 5879 + }, + { + "epoch": 1.7194034215528586, + "grad_norm": 1.6603687606186237, + "learning_rate": 1.0600601440028758e-06, + "loss": 0.5495625734329224, + "step": 5880 + }, + { + "epoch": 1.719695861968124, + "grad_norm": 1.6915455937474744, + "learning_rate": 1.0578948584037608e-06, + "loss": 0.4244312345981598, + "step": 5881 + }, + { + "epoch": 1.7199883023833893, + "grad_norm": 1.7562786480710206, + "learning_rate": 1.0557316629673531e-06, + "loss": 0.4618447721004486, + "step": 5882 + }, + { + "epoch": 1.7202807427986548, + "grad_norm": 1.3835850144546908, + "learning_rate": 1.0535705581992873e-06, + "loss": 0.4226785898208618, + "step": 5883 + }, + { + "epoch": 1.7205731832139202, + "grad_norm": 1.8373576265806915, + "learning_rate": 1.0514115446047101e-06, + "loss": 0.5813404321670532, + "step": 5884 + }, + { + "epoch": 1.7208656236291855, + "grad_norm": 1.774672318962678, + "learning_rate": 1.0492546226882738e-06, + "loss": 0.6700260639190674, + "step": 5885 + }, + { + "epoch": 1.721158064044451, + "grad_norm": 1.8100136828076652, + "learning_rate": 1.0470997929541494e-06, + "loss": 0.6024131178855896, + "step": 5886 + }, + { + "epoch": 1.7214505044597164, + "grad_norm": 1.8033126749427817, + "learning_rate": 1.0449470559060125e-06, + "loss": 0.6015123724937439, + "step": 5887 + }, + { + "epoch": 1.7217429448749817, + "grad_norm": 1.714487906410119, + "learning_rate": 1.0427964120470534e-06, + "loss": 0.6631267070770264, + "step": 5888 + }, + { + "epoch": 1.7220353852902472, + "grad_norm": 1.7445362923992234, + "learning_rate": 1.0406478618799731e-06, + "loss": 0.5267488956451416, + "step": 5889 + }, + { + "epoch": 1.7223278257055123, + "grad_norm": 1.6683876570881706, + "learning_rate": 1.038501405906982e-06, + "loss": 0.5190263986587524, + "step": 5890 + }, + { + "epoch": 1.722620266120778, + "grad_norm": 1.6678272928853268, + "learning_rate": 1.0363570446297999e-06, + "loss": 0.5253189206123352, + "step": 5891 + }, + { + "epoch": 1.7229127065360432, + "grad_norm": 1.6306770585402846, + "learning_rate": 1.0342147785496581e-06, + "loss": 0.5271278619766235, + "step": 5892 + }, + { + "epoch": 1.7232051469513086, + "grad_norm": 1.7373442044536598, + "learning_rate": 1.0320746081672994e-06, + "loss": 0.5284109711647034, + "step": 5893 + }, + { + "epoch": 1.723497587366574, + "grad_norm": 1.764049872395232, + "learning_rate": 1.0299365339829747e-06, + "loss": 0.6119050979614258, + "step": 5894 + }, + { + "epoch": 1.7237900277818394, + "grad_norm": 1.583925980020329, + "learning_rate": 1.0278005564964488e-06, + "loss": 0.42297711968421936, + "step": 5895 + }, + { + "epoch": 1.7240824681971048, + "grad_norm": 1.7105013452989373, + "learning_rate": 1.02566667620699e-06, + "loss": 0.5923792123794556, + "step": 5896 + }, + { + "epoch": 1.7243749086123703, + "grad_norm": 1.6831938137571334, + "learning_rate": 1.023534893613377e-06, + "loss": 0.4999189078807831, + "step": 5897 + }, + { + "epoch": 1.7246673490276356, + "grad_norm": 1.6907699986400666, + "learning_rate": 1.0214052092139082e-06, + "loss": 0.49083560705184937, + "step": 5898 + }, + { + "epoch": 1.724959789442901, + "grad_norm": 3.9391609144586437, + "learning_rate": 1.0192776235063795e-06, + "loss": 0.6001632213592529, + "step": 5899 + }, + { + "epoch": 1.7252522298581665, + "grad_norm": 1.473933103211581, + "learning_rate": 1.0171521369881044e-06, + "loss": 0.4897228479385376, + "step": 5900 + }, + { + "epoch": 1.7255446702734318, + "grad_norm": 1.6762179044603425, + "learning_rate": 1.0150287501558997e-06, + "loss": 0.44784292578697205, + "step": 5901 + }, + { + "epoch": 1.7258371106886972, + "grad_norm": 1.542625612657722, + "learning_rate": 1.0129074635060943e-06, + "loss": 0.46105432510375977, + "step": 5902 + }, + { + "epoch": 1.7261295511039627, + "grad_norm": 1.9028079699425045, + "learning_rate": 1.0107882775345278e-06, + "loss": 0.5805546045303345, + "step": 5903 + }, + { + "epoch": 1.7264219915192278, + "grad_norm": 1.719859761694945, + "learning_rate": 1.0086711927365488e-06, + "loss": 0.560761570930481, + "step": 5904 + }, + { + "epoch": 1.7267144319344934, + "grad_norm": 1.475103420661766, + "learning_rate": 1.006556209607007e-06, + "loss": 0.533979058265686, + "step": 5905 + }, + { + "epoch": 1.7270068723497587, + "grad_norm": 1.7039894712110264, + "learning_rate": 1.004443328640271e-06, + "loss": 0.5742807984352112, + "step": 5906 + }, + { + "epoch": 1.727299312765024, + "grad_norm": 1.9394101910903232, + "learning_rate": 1.0023325503302129e-06, + "loss": 0.5617523789405823, + "step": 5907 + }, + { + "epoch": 1.7275917531802896, + "grad_norm": 1.620137966655423, + "learning_rate": 1.0002238751702143e-06, + "loss": 0.45596855878829956, + "step": 5908 + }, + { + "epoch": 1.727884193595555, + "grad_norm": 1.523715810181856, + "learning_rate": 9.981173036531655e-07, + "loss": 0.4917908012866974, + "step": 5909 + }, + { + "epoch": 1.7281766340108202, + "grad_norm": 1.8089323806924067, + "learning_rate": 9.960128362714637e-07, + "loss": 0.6204911470413208, + "step": 5910 + }, + { + "epoch": 1.7284690744260858, + "grad_norm": 1.615074466715287, + "learning_rate": 9.93910473517018e-07, + "loss": 0.47288352251052856, + "step": 5911 + }, + { + "epoch": 1.728761514841351, + "grad_norm": 1.9414111913595387, + "learning_rate": 9.918102158812404e-07, + "loss": 0.48668670654296875, + "step": 5912 + }, + { + "epoch": 1.7290539552566164, + "grad_norm": 1.723740686191889, + "learning_rate": 9.89712063855054e-07, + "loss": 0.43311381340026855, + "step": 5913 + }, + { + "epoch": 1.729346395671882, + "grad_norm": 1.748275288399291, + "learning_rate": 9.876160179288886e-07, + "loss": 0.5066087245941162, + "step": 5914 + }, + { + "epoch": 1.729638836087147, + "grad_norm": 1.6099318260759374, + "learning_rate": 9.855220785926856e-07, + "loss": 0.6022528409957886, + "step": 5915 + }, + { + "epoch": 1.7299312765024126, + "grad_norm": 1.6809686879748886, + "learning_rate": 9.834302463358858e-07, + "loss": 0.5288707613945007, + "step": 5916 + }, + { + "epoch": 1.7302237169176782, + "grad_norm": 1.7087060764928856, + "learning_rate": 9.813405216474436e-07, + "loss": 0.6150302290916443, + "step": 5917 + }, + { + "epoch": 1.7305161573329433, + "grad_norm": 1.7234099983807605, + "learning_rate": 9.792529050158218e-07, + "loss": 0.5431156158447266, + "step": 5918 + }, + { + "epoch": 1.7308085977482088, + "grad_norm": 1.7871856102017598, + "learning_rate": 9.771673969289851e-07, + "loss": 0.6844080686569214, + "step": 5919 + }, + { + "epoch": 1.7311010381634742, + "grad_norm": 1.9483136158091563, + "learning_rate": 9.750839978744098e-07, + "loss": 0.4778372049331665, + "step": 5920 + }, + { + "epoch": 1.7313934785787395, + "grad_norm": 1.803034120095624, + "learning_rate": 9.73002708339077e-07, + "loss": 0.6913809776306152, + "step": 5921 + }, + { + "epoch": 1.731685918994005, + "grad_norm": 1.5934425203745812, + "learning_rate": 9.709235288094765e-07, + "loss": 0.6289864778518677, + "step": 5922 + }, + { + "epoch": 1.7319783594092704, + "grad_norm": 1.7803434049533318, + "learning_rate": 9.68846459771604e-07, + "loss": 0.4735794961452484, + "step": 5923 + }, + { + "epoch": 1.7322707998245357, + "grad_norm": 1.7329775832839742, + "learning_rate": 9.667715017109614e-07, + "loss": 0.53554767370224, + "step": 5924 + }, + { + "epoch": 1.7325632402398012, + "grad_norm": 1.9726609824515038, + "learning_rate": 9.64698655112557e-07, + "loss": 0.5118460655212402, + "step": 5925 + }, + { + "epoch": 1.7328556806550666, + "grad_norm": 1.819236864509276, + "learning_rate": 9.626279204609079e-07, + "loss": 0.5739814043045044, + "step": 5926 + }, + { + "epoch": 1.733148121070332, + "grad_norm": 1.6784156679062403, + "learning_rate": 9.605592982400325e-07, + "loss": 0.5716123580932617, + "step": 5927 + }, + { + "epoch": 1.7334405614855974, + "grad_norm": 1.7916971306174196, + "learning_rate": 9.584927889334605e-07, + "loss": 0.5091898441314697, + "step": 5928 + }, + { + "epoch": 1.7337330019008625, + "grad_norm": 1.6267503374739263, + "learning_rate": 9.564283930242258e-07, + "loss": 0.46946650743484497, + "step": 5929 + }, + { + "epoch": 1.734025442316128, + "grad_norm": 1.6625890698419732, + "learning_rate": 9.543661109948688e-07, + "loss": 0.6238217353820801, + "step": 5930 + }, + { + "epoch": 1.7343178827313934, + "grad_norm": 1.8870256552743607, + "learning_rate": 9.52305943327434e-07, + "loss": 0.7464175224304199, + "step": 5931 + }, + { + "epoch": 1.7346103231466588, + "grad_norm": 2.417177332317345, + "learning_rate": 9.502478905034751e-07, + "loss": 0.6064578294754028, + "step": 5932 + }, + { + "epoch": 1.7349027635619243, + "grad_norm": 1.5711166860907437, + "learning_rate": 9.481919530040484e-07, + "loss": 0.5703303813934326, + "step": 5933 + }, + { + "epoch": 1.7351952039771896, + "grad_norm": 1.773413757718004, + "learning_rate": 9.461381313097162e-07, + "loss": 0.5570278167724609, + "step": 5934 + }, + { + "epoch": 1.735487644392455, + "grad_norm": 1.795987369299435, + "learning_rate": 9.440864259005477e-07, + "loss": 0.54972243309021, + "step": 5935 + }, + { + "epoch": 1.7357800848077205, + "grad_norm": 1.6140356285907533, + "learning_rate": 9.420368372561161e-07, + "loss": 0.5670010447502136, + "step": 5936 + }, + { + "epoch": 1.7360725252229858, + "grad_norm": 1.642113144044588, + "learning_rate": 9.399893658555026e-07, + "loss": 0.5306927561759949, + "step": 5937 + }, + { + "epoch": 1.7363649656382512, + "grad_norm": 1.5565759572639428, + "learning_rate": 9.379440121772876e-07, + "loss": 0.5080308318138123, + "step": 5938 + }, + { + "epoch": 1.7366574060535167, + "grad_norm": 1.5576076668453387, + "learning_rate": 9.359007766995609e-07, + "loss": 0.5444519519805908, + "step": 5939 + }, + { + "epoch": 1.736949846468782, + "grad_norm": 1.737287044912212, + "learning_rate": 9.338596598999172e-07, + "loss": 0.5353262424468994, + "step": 5940 + }, + { + "epoch": 1.7372422868840474, + "grad_norm": 1.5405646785157867, + "learning_rate": 9.318206622554549e-07, + "loss": 0.4766794443130493, + "step": 5941 + }, + { + "epoch": 1.737534727299313, + "grad_norm": 1.5818937282065444, + "learning_rate": 9.29783784242777e-07, + "loss": 0.4913482666015625, + "step": 5942 + }, + { + "epoch": 1.737827167714578, + "grad_norm": 1.5030657740252151, + "learning_rate": 9.277490263379918e-07, + "loss": 0.47637009620666504, + "step": 5943 + }, + { + "epoch": 1.7381196081298436, + "grad_norm": 1.8131560819786492, + "learning_rate": 9.25716389016712e-07, + "loss": 0.5122126936912537, + "step": 5944 + }, + { + "epoch": 1.738412048545109, + "grad_norm": 1.51801570238093, + "learning_rate": 9.236858727540543e-07, + "loss": 0.5263532400131226, + "step": 5945 + }, + { + "epoch": 1.7387044889603742, + "grad_norm": 1.607505719698849, + "learning_rate": 9.216574780246379e-07, + "loss": 0.5214182734489441, + "step": 5946 + }, + { + "epoch": 1.7389969293756398, + "grad_norm": 1.877073258708154, + "learning_rate": 9.196312053025891e-07, + "loss": 0.5955429077148438, + "step": 5947 + }, + { + "epoch": 1.739289369790905, + "grad_norm": 1.6543213511410424, + "learning_rate": 9.176070550615379e-07, + "loss": 0.4809807538986206, + "step": 5948 + }, + { + "epoch": 1.7395818102061704, + "grad_norm": 1.882804975326707, + "learning_rate": 9.155850277746148e-07, + "loss": 0.4769969582557678, + "step": 5949 + }, + { + "epoch": 1.739874250621436, + "grad_norm": 2.4545379886365954, + "learning_rate": 9.135651239144561e-07, + "loss": 0.48527538776397705, + "step": 5950 + }, + { + "epoch": 1.7401666910367013, + "grad_norm": 1.696389032166004, + "learning_rate": 9.115473439532041e-07, + "loss": 0.6703393459320068, + "step": 5951 + }, + { + "epoch": 1.7404591314519666, + "grad_norm": 1.807366721076005, + "learning_rate": 9.095316883625016e-07, + "loss": 0.5742951035499573, + "step": 5952 + }, + { + "epoch": 1.7407515718672322, + "grad_norm": 1.9552666950039521, + "learning_rate": 9.075181576134961e-07, + "loss": 0.6285614967346191, + "step": 5953 + }, + { + "epoch": 1.7410440122824973, + "grad_norm": 1.6961990538831806, + "learning_rate": 9.055067521768379e-07, + "loss": 0.5872488021850586, + "step": 5954 + }, + { + "epoch": 1.7413364526977628, + "grad_norm": 1.6900638369260592, + "learning_rate": 9.034974725226808e-07, + "loss": 0.5483776330947876, + "step": 5955 + }, + { + "epoch": 1.7416288931130284, + "grad_norm": 1.838843211951185, + "learning_rate": 9.014903191206825e-07, + "loss": 0.4913061261177063, + "step": 5956 + }, + { + "epoch": 1.7419213335282935, + "grad_norm": 1.6413412279440867, + "learning_rate": 8.994852924400022e-07, + "loss": 0.5431212186813354, + "step": 5957 + }, + { + "epoch": 1.742213773943559, + "grad_norm": 1.735940615294129, + "learning_rate": 8.974823929493015e-07, + "loss": 0.5391141176223755, + "step": 5958 + }, + { + "epoch": 1.7425062143588244, + "grad_norm": 1.455007956070738, + "learning_rate": 8.954816211167483e-07, + "loss": 0.48980265855789185, + "step": 5959 + }, + { + "epoch": 1.7427986547740897, + "grad_norm": 1.6465722416646151, + "learning_rate": 8.934829774100118e-07, + "loss": 0.6747336387634277, + "step": 5960 + }, + { + "epoch": 1.7430910951893552, + "grad_norm": 2.048914745001018, + "learning_rate": 8.914864622962582e-07, + "loss": 0.4911282956600189, + "step": 5961 + }, + { + "epoch": 1.7433835356046206, + "grad_norm": 1.6999465895023511, + "learning_rate": 8.894920762421644e-07, + "loss": 0.5863965153694153, + "step": 5962 + }, + { + "epoch": 1.7436759760198859, + "grad_norm": 1.6964011957158196, + "learning_rate": 8.87499819713904e-07, + "loss": 0.5413792729377747, + "step": 5963 + }, + { + "epoch": 1.7439684164351514, + "grad_norm": 1.8650590121272839, + "learning_rate": 8.855096931771568e-07, + "loss": 0.5288723707199097, + "step": 5964 + }, + { + "epoch": 1.7442608568504168, + "grad_norm": 1.72339918808855, + "learning_rate": 8.835216970971006e-07, + "loss": 0.5129783749580383, + "step": 5965 + }, + { + "epoch": 1.744553297265682, + "grad_norm": 1.7489856693904517, + "learning_rate": 8.815358319384193e-07, + "loss": 0.5606918334960938, + "step": 5966 + }, + { + "epoch": 1.7448457376809476, + "grad_norm": 1.9036006380739827, + "learning_rate": 8.79552098165296e-07, + "loss": 0.6277288198471069, + "step": 5967 + }, + { + "epoch": 1.7451381780962127, + "grad_norm": 1.7432749923566282, + "learning_rate": 8.775704962414167e-07, + "loss": 0.5390176773071289, + "step": 5968 + }, + { + "epoch": 1.7454306185114783, + "grad_norm": 1.8645943677337757, + "learning_rate": 8.755910266299684e-07, + "loss": 0.680462121963501, + "step": 5969 + }, + { + "epoch": 1.7457230589267436, + "grad_norm": 1.678166381653785, + "learning_rate": 8.736136897936398e-07, + "loss": 0.5134397149085999, + "step": 5970 + }, + { + "epoch": 1.746015499342009, + "grad_norm": 1.796274905651791, + "learning_rate": 8.716384861946248e-07, + "loss": 0.6280460357666016, + "step": 5971 + }, + { + "epoch": 1.7463079397572745, + "grad_norm": 1.8396010080181593, + "learning_rate": 8.696654162946094e-07, + "loss": 0.5425370931625366, + "step": 5972 + }, + { + "epoch": 1.7466003801725398, + "grad_norm": 1.7657169836698012, + "learning_rate": 8.676944805547882e-07, + "loss": 0.5831055045127869, + "step": 5973 + }, + { + "epoch": 1.7468928205878052, + "grad_norm": 1.3865571188404813, + "learning_rate": 8.657256794358592e-07, + "loss": 0.4615570306777954, + "step": 5974 + }, + { + "epoch": 1.7471852610030707, + "grad_norm": 1.6631260131171646, + "learning_rate": 8.637590133980145e-07, + "loss": 0.5727440118789673, + "step": 5975 + }, + { + "epoch": 1.747477701418336, + "grad_norm": 1.6981377401436724, + "learning_rate": 8.617944829009517e-07, + "loss": 0.5652801990509033, + "step": 5976 + }, + { + "epoch": 1.7477701418336014, + "grad_norm": 1.6704888560345945, + "learning_rate": 8.59832088403868e-07, + "loss": 0.42455562949180603, + "step": 5977 + }, + { + "epoch": 1.748062582248867, + "grad_norm": 1.8565352683598422, + "learning_rate": 8.578718303654588e-07, + "loss": 0.526951789855957, + "step": 5978 + }, + { + "epoch": 1.7483550226641322, + "grad_norm": 1.5113931171346078, + "learning_rate": 8.559137092439252e-07, + "loss": 0.45547354221343994, + "step": 5979 + }, + { + "epoch": 1.7486474630793976, + "grad_norm": 1.843493314178274, + "learning_rate": 8.539577254969667e-07, + "loss": 0.5470790863037109, + "step": 5980 + }, + { + "epoch": 1.748939903494663, + "grad_norm": 1.6766357010415907, + "learning_rate": 8.520038795817798e-07, + "loss": 0.5608032941818237, + "step": 5981 + }, + { + "epoch": 1.7492323439099282, + "grad_norm": 1.7251948475523264, + "learning_rate": 8.500521719550648e-07, + "loss": 0.5243809223175049, + "step": 5982 + }, + { + "epoch": 1.7495247843251938, + "grad_norm": 1.7843504248813373, + "learning_rate": 8.481026030730222e-07, + "loss": 0.5040958523750305, + "step": 5983 + }, + { + "epoch": 1.749817224740459, + "grad_norm": 1.9016982717852353, + "learning_rate": 8.461551733913509e-07, + "loss": 0.5026291012763977, + "step": 5984 + }, + { + "epoch": 1.7501096651557244, + "grad_norm": 1.578287817505696, + "learning_rate": 8.442098833652523e-07, + "loss": 0.5273059010505676, + "step": 5985 + }, + { + "epoch": 1.75040210557099, + "grad_norm": 1.7872787423587175, + "learning_rate": 8.42266733449425e-07, + "loss": 0.5811910629272461, + "step": 5986 + }, + { + "epoch": 1.7506945459862553, + "grad_norm": 1.9383664928260165, + "learning_rate": 8.4032572409807e-07, + "loss": 0.6078274250030518, + "step": 5987 + }, + { + "epoch": 1.7509869864015206, + "grad_norm": 1.8956639494069205, + "learning_rate": 8.383868557648833e-07, + "loss": 0.5214031338691711, + "step": 5988 + }, + { + "epoch": 1.7512794268167862, + "grad_norm": 1.8686899180431094, + "learning_rate": 8.364501289030669e-07, + "loss": 0.5464918613433838, + "step": 5989 + }, + { + "epoch": 1.7515718672320515, + "grad_norm": 1.9577387999849984, + "learning_rate": 8.345155439653175e-07, + "loss": 0.48296278715133667, + "step": 5990 + }, + { + "epoch": 1.7518643076473168, + "grad_norm": 1.6097156536359971, + "learning_rate": 8.325831014038344e-07, + "loss": 0.5441919565200806, + "step": 5991 + }, + { + "epoch": 1.7521567480625824, + "grad_norm": 1.524249865256617, + "learning_rate": 8.306528016703097e-07, + "loss": 0.4623905420303345, + "step": 5992 + }, + { + "epoch": 1.7524491884778475, + "grad_norm": 1.7850630013083288, + "learning_rate": 8.287246452159437e-07, + "loss": 0.5671495795249939, + "step": 5993 + }, + { + "epoch": 1.752741628893113, + "grad_norm": 1.7371249179959158, + "learning_rate": 8.267986324914278e-07, + "loss": 0.5400685667991638, + "step": 5994 + }, + { + "epoch": 1.7530340693083786, + "grad_norm": 1.7239850907759944, + "learning_rate": 8.24874763946959e-07, + "loss": 0.4362148642539978, + "step": 5995 + }, + { + "epoch": 1.7533265097236437, + "grad_norm": 1.7548276097653166, + "learning_rate": 8.229530400322283e-07, + "loss": 0.554877519607544, + "step": 5996 + }, + { + "epoch": 1.7536189501389092, + "grad_norm": 1.6421753593232726, + "learning_rate": 8.210334611964266e-07, + "loss": 0.5239896774291992, + "step": 5997 + }, + { + "epoch": 1.7539113905541746, + "grad_norm": 1.9442998633220852, + "learning_rate": 8.191160278882438e-07, + "loss": 0.4729669988155365, + "step": 5998 + }, + { + "epoch": 1.7542038309694399, + "grad_norm": 1.5789777380903094, + "learning_rate": 8.172007405558702e-07, + "loss": 0.5449322462081909, + "step": 5999 + }, + { + "epoch": 1.7544962713847054, + "grad_norm": 1.6329056939447448, + "learning_rate": 8.1528759964699e-07, + "loss": 0.5422194600105286, + "step": 6000 + }, + { + "epoch": 1.7547887117999708, + "grad_norm": 1.4174806038648198, + "learning_rate": 8.1337660560879e-07, + "loss": 0.3854302763938904, + "step": 6001 + }, + { + "epoch": 1.755081152215236, + "grad_norm": 1.7209418471597917, + "learning_rate": 8.114677588879549e-07, + "loss": 0.4678449034690857, + "step": 6002 + }, + { + "epoch": 1.7553735926305016, + "grad_norm": 1.5464176931987315, + "learning_rate": 8.095610599306614e-07, + "loss": 0.5462471842765808, + "step": 6003 + }, + { + "epoch": 1.755666033045767, + "grad_norm": 2.204727836372247, + "learning_rate": 8.076565091825916e-07, + "loss": 0.6314511299133301, + "step": 6004 + }, + { + "epoch": 1.7559584734610323, + "grad_norm": 1.5748030564701405, + "learning_rate": 8.057541070889229e-07, + "loss": 0.6373077630996704, + "step": 6005 + }, + { + "epoch": 1.7562509138762978, + "grad_norm": 1.3531361411828478, + "learning_rate": 8.038538540943297e-07, + "loss": 0.5279273986816406, + "step": 6006 + }, + { + "epoch": 1.756543354291563, + "grad_norm": 1.8888532901747122, + "learning_rate": 8.019557506429843e-07, + "loss": 0.5645443201065063, + "step": 6007 + }, + { + "epoch": 1.7568357947068285, + "grad_norm": 1.776791412383221, + "learning_rate": 8.000597971785573e-07, + "loss": 0.552385151386261, + "step": 6008 + }, + { + "epoch": 1.757128235122094, + "grad_norm": 1.8476397874412334, + "learning_rate": 7.981659941442154e-07, + "loss": 0.5790541172027588, + "step": 6009 + }, + { + "epoch": 1.7574206755373591, + "grad_norm": 1.8909444917759248, + "learning_rate": 7.962743419826247e-07, + "loss": 0.550809383392334, + "step": 6010 + }, + { + "epoch": 1.7577131159526247, + "grad_norm": 1.845124979293289, + "learning_rate": 7.943848411359479e-07, + "loss": 0.4659814238548279, + "step": 6011 + }, + { + "epoch": 1.75800555636789, + "grad_norm": 1.8856668900422473, + "learning_rate": 7.924974920458428e-07, + "loss": 0.5099040269851685, + "step": 6012 + }, + { + "epoch": 1.7582979967831553, + "grad_norm": 1.8389637809818669, + "learning_rate": 7.906122951534678e-07, + "loss": 0.4819038510322571, + "step": 6013 + }, + { + "epoch": 1.758590437198421, + "grad_norm": 1.6198962208116707, + "learning_rate": 7.887292508994737e-07, + "loss": 0.4033840298652649, + "step": 6014 + }, + { + "epoch": 1.7588828776136862, + "grad_norm": 2.0694906070649397, + "learning_rate": 7.868483597240117e-07, + "loss": 0.6316418647766113, + "step": 6015 + }, + { + "epoch": 1.7591753180289516, + "grad_norm": 1.574018695954754, + "learning_rate": 7.84969622066728e-07, + "loss": 0.5141040682792664, + "step": 6016 + }, + { + "epoch": 1.759467758444217, + "grad_norm": 1.625714616568423, + "learning_rate": 7.830930383667668e-07, + "loss": 0.44808077812194824, + "step": 6017 + }, + { + "epoch": 1.7597601988594824, + "grad_norm": 1.6060850378753004, + "learning_rate": 7.812186090627694e-07, + "loss": 0.5661089420318604, + "step": 6018 + }, + { + "epoch": 1.7600526392747478, + "grad_norm": 1.553528332849771, + "learning_rate": 7.793463345928697e-07, + "loss": 0.487590491771698, + "step": 6019 + }, + { + "epoch": 1.7603450796900133, + "grad_norm": 1.9699234516767667, + "learning_rate": 7.774762153947024e-07, + "loss": 0.5775022506713867, + "step": 6020 + }, + { + "epoch": 1.7606375201052784, + "grad_norm": 1.7091892859281639, + "learning_rate": 7.756082519053965e-07, + "loss": 0.5714563131332397, + "step": 6021 + }, + { + "epoch": 1.760929960520544, + "grad_norm": 1.8764497127243964, + "learning_rate": 7.73742444561576e-07, + "loss": 0.6063593626022339, + "step": 6022 + }, + { + "epoch": 1.7612224009358093, + "grad_norm": 1.7254357706950765, + "learning_rate": 7.718787937993622e-07, + "loss": 0.48034632205963135, + "step": 6023 + }, + { + "epoch": 1.7615148413510746, + "grad_norm": 1.4591503666123415, + "learning_rate": 7.700173000543742e-07, + "loss": 0.6003588438034058, + "step": 6024 + }, + { + "epoch": 1.7618072817663402, + "grad_norm": 1.6378303717233282, + "learning_rate": 7.681579637617209e-07, + "loss": 0.42842140793800354, + "step": 6025 + }, + { + "epoch": 1.7620997221816055, + "grad_norm": 1.502196803812996, + "learning_rate": 7.663007853560145e-07, + "loss": 0.5235859155654907, + "step": 6026 + }, + { + "epoch": 1.7623921625968708, + "grad_norm": 1.6904284507093605, + "learning_rate": 7.644457652713566e-07, + "loss": 0.47140365839004517, + "step": 6027 + }, + { + "epoch": 1.7626846030121364, + "grad_norm": 1.6014343948293654, + "learning_rate": 7.625929039413483e-07, + "loss": 0.53680419921875, + "step": 6028 + }, + { + "epoch": 1.7629770434274017, + "grad_norm": 1.6173156649426792, + "learning_rate": 7.60742201799084e-07, + "loss": 0.5280998349189758, + "step": 6029 + }, + { + "epoch": 1.763269483842667, + "grad_norm": 1.58299525140219, + "learning_rate": 7.588936592771545e-07, + "loss": 0.49653276801109314, + "step": 6030 + }, + { + "epoch": 1.7635619242579326, + "grad_norm": 1.6031325431493386, + "learning_rate": 7.570472768076464e-07, + "loss": 0.511070966720581, + "step": 6031 + }, + { + "epoch": 1.7638543646731977, + "grad_norm": 1.9173967106238505, + "learning_rate": 7.552030548221379e-07, + "loss": 0.6601030826568604, + "step": 6032 + }, + { + "epoch": 1.7641468050884632, + "grad_norm": 1.7630822043129881, + "learning_rate": 7.533609937517072e-07, + "loss": 0.6216480731964111, + "step": 6033 + }, + { + "epoch": 1.7644392455037288, + "grad_norm": 2.1444282721386765, + "learning_rate": 7.515210940269224e-07, + "loss": 0.7237618565559387, + "step": 6034 + }, + { + "epoch": 1.7647316859189939, + "grad_norm": 1.9895778147794236, + "learning_rate": 7.496833560778527e-07, + "loss": 0.4979498088359833, + "step": 6035 + }, + { + "epoch": 1.7650241263342594, + "grad_norm": 1.5816967377469986, + "learning_rate": 7.478477803340533e-07, + "loss": 0.49408191442489624, + "step": 6036 + }, + { + "epoch": 1.7653165667495248, + "grad_norm": 1.8439349693338256, + "learning_rate": 7.460143672245823e-07, + "loss": 0.524259626865387, + "step": 6037 + }, + { + "epoch": 1.76560900716479, + "grad_norm": 1.8574151410796558, + "learning_rate": 7.441831171779878e-07, + "loss": 0.625320315361023, + "step": 6038 + }, + { + "epoch": 1.7659014475800556, + "grad_norm": 1.7217980866482836, + "learning_rate": 7.42354030622312e-07, + "loss": 0.5971028804779053, + "step": 6039 + }, + { + "epoch": 1.766193887995321, + "grad_norm": 1.5069481360511938, + "learning_rate": 7.405271079850951e-07, + "loss": 0.48935002088546753, + "step": 6040 + }, + { + "epoch": 1.7664863284105863, + "grad_norm": 1.7616973297205794, + "learning_rate": 7.387023496933687e-07, + "loss": 0.46346336603164673, + "step": 6041 + }, + { + "epoch": 1.7667787688258518, + "grad_norm": 1.5425066644175864, + "learning_rate": 7.368797561736574e-07, + "loss": 0.5135314464569092, + "step": 6042 + }, + { + "epoch": 1.7670712092411172, + "grad_norm": 1.7938719309176694, + "learning_rate": 7.350593278519824e-07, + "loss": 0.45815128087997437, + "step": 6043 + }, + { + "epoch": 1.7673636496563825, + "grad_norm": 1.8253657375894647, + "learning_rate": 7.332410651538591e-07, + "loss": 0.5663015246391296, + "step": 6044 + }, + { + "epoch": 1.767656090071648, + "grad_norm": 1.6737365706300193, + "learning_rate": 7.314249685042929e-07, + "loss": 0.5323490500450134, + "step": 6045 + }, + { + "epoch": 1.7679485304869131, + "grad_norm": 1.8380863614801877, + "learning_rate": 7.296110383277866e-07, + "loss": 0.5489768981933594, + "step": 6046 + }, + { + "epoch": 1.7682409709021787, + "grad_norm": 1.867533811207324, + "learning_rate": 7.277992750483364e-07, + "loss": 0.5951086282730103, + "step": 6047 + }, + { + "epoch": 1.7685334113174442, + "grad_norm": 1.6688539257267474, + "learning_rate": 7.259896790894271e-07, + "loss": 0.48228102922439575, + "step": 6048 + }, + { + "epoch": 1.7688258517327093, + "grad_norm": 1.7579049817410466, + "learning_rate": 7.241822508740448e-07, + "loss": 0.6318891644477844, + "step": 6049 + }, + { + "epoch": 1.769118292147975, + "grad_norm": 1.967894881109258, + "learning_rate": 7.223769908246636e-07, + "loss": 0.4966656267642975, + "step": 6050 + }, + { + "epoch": 1.7694107325632402, + "grad_norm": 1.7465352091582635, + "learning_rate": 7.205738993632516e-07, + "loss": 0.5645290613174438, + "step": 6051 + }, + { + "epoch": 1.7697031729785055, + "grad_norm": 1.8324400656837103, + "learning_rate": 7.187729769112717e-07, + "loss": 0.560075044631958, + "step": 6052 + }, + { + "epoch": 1.769995613393771, + "grad_norm": 1.658346896913261, + "learning_rate": 7.169742238896771e-07, + "loss": 0.6375163793563843, + "step": 6053 + }, + { + "epoch": 1.7702880538090364, + "grad_norm": 1.9991114191844357, + "learning_rate": 7.15177640718916e-07, + "loss": 0.5620392560958862, + "step": 6054 + }, + { + "epoch": 1.7705804942243017, + "grad_norm": 1.7885795694198106, + "learning_rate": 7.133832278189301e-07, + "loss": 0.5382653474807739, + "step": 6055 + }, + { + "epoch": 1.7708729346395673, + "grad_norm": 1.954649524899457, + "learning_rate": 7.115909856091497e-07, + "loss": 0.502597451210022, + "step": 6056 + }, + { + "epoch": 1.7711653750548326, + "grad_norm": 1.782753780230982, + "learning_rate": 7.098009145085016e-07, + "loss": 0.5876599550247192, + "step": 6057 + }, + { + "epoch": 1.771457815470098, + "grad_norm": 1.7624219528533958, + "learning_rate": 7.080130149354048e-07, + "loss": 0.5164280533790588, + "step": 6058 + }, + { + "epoch": 1.7717502558853635, + "grad_norm": 1.7004652166347358, + "learning_rate": 7.062272873077691e-07, + "loss": 0.5192137360572815, + "step": 6059 + }, + { + "epoch": 1.7720426963006286, + "grad_norm": 1.6924472823946135, + "learning_rate": 7.044437320429987e-07, + "loss": 0.5298370122909546, + "step": 6060 + }, + { + "epoch": 1.7723351367158942, + "grad_norm": 1.671988873461514, + "learning_rate": 7.026623495579876e-07, + "loss": 0.5099462270736694, + "step": 6061 + }, + { + "epoch": 1.7726275771311595, + "grad_norm": 1.8314661737989666, + "learning_rate": 7.00883140269123e-07, + "loss": 0.6061269640922546, + "step": 6062 + }, + { + "epoch": 1.7729200175464248, + "grad_norm": 1.9189229950794147, + "learning_rate": 6.991061045922854e-07, + "loss": 0.683641254901886, + "step": 6063 + }, + { + "epoch": 1.7732124579616904, + "grad_norm": 2.089118565246571, + "learning_rate": 6.973312429428458e-07, + "loss": 0.6294830441474915, + "step": 6064 + }, + { + "epoch": 1.7735048983769557, + "grad_norm": 1.6252098698149335, + "learning_rate": 6.95558555735667e-07, + "loss": 0.40493613481521606, + "step": 6065 + }, + { + "epoch": 1.773797338792221, + "grad_norm": 1.7745752298261492, + "learning_rate": 6.93788043385103e-07, + "loss": 0.501255452632904, + "step": 6066 + }, + { + "epoch": 1.7740897792074866, + "grad_norm": 1.7883463098117711, + "learning_rate": 6.920197063050038e-07, + "loss": 0.6004104614257812, + "step": 6067 + }, + { + "epoch": 1.7743822196227519, + "grad_norm": 1.5939834110995985, + "learning_rate": 6.902535449087023e-07, + "loss": 0.48683321475982666, + "step": 6068 + }, + { + "epoch": 1.7746746600380172, + "grad_norm": 1.7279814402431617, + "learning_rate": 6.884895596090302e-07, + "loss": 0.6048111319541931, + "step": 6069 + }, + { + "epoch": 1.7749671004532828, + "grad_norm": 1.8759604993064984, + "learning_rate": 6.867277508183101e-07, + "loss": 0.5532732009887695, + "step": 6070 + }, + { + "epoch": 1.7752595408685479, + "grad_norm": 2.066556008321799, + "learning_rate": 6.849681189483515e-07, + "loss": 0.544552206993103, + "step": 6071 + }, + { + "epoch": 1.7755519812838134, + "grad_norm": 1.9161876673278242, + "learning_rate": 6.832106644104586e-07, + "loss": 0.5114158391952515, + "step": 6072 + }, + { + "epoch": 1.775844421699079, + "grad_norm": 1.6996182780694216, + "learning_rate": 6.814553876154273e-07, + "loss": 0.45777493715286255, + "step": 6073 + }, + { + "epoch": 1.776136862114344, + "grad_norm": 1.6209289540377791, + "learning_rate": 6.797022889735405e-07, + "loss": 0.5449005365371704, + "step": 6074 + }, + { + "epoch": 1.7764293025296096, + "grad_norm": 1.8749070330960134, + "learning_rate": 6.779513688945749e-07, + "loss": 0.6308485865592957, + "step": 6075 + }, + { + "epoch": 1.776721742944875, + "grad_norm": 1.951122544814841, + "learning_rate": 6.762026277877986e-07, + "loss": 0.5904842019081116, + "step": 6076 + }, + { + "epoch": 1.7770141833601403, + "grad_norm": 1.8358819377761475, + "learning_rate": 6.744560660619681e-07, + "loss": 0.6681115627288818, + "step": 6077 + }, + { + "epoch": 1.7773066237754058, + "grad_norm": 1.7337774705028348, + "learning_rate": 6.727116841253334e-07, + "loss": 0.5084429979324341, + "step": 6078 + }, + { + "epoch": 1.7775990641906712, + "grad_norm": 1.706737040250044, + "learning_rate": 6.709694823856305e-07, + "loss": 0.5705291032791138, + "step": 6079 + }, + { + "epoch": 1.7778915046059365, + "grad_norm": 1.541912819246542, + "learning_rate": 6.692294612500894e-07, + "loss": 0.6481744050979614, + "step": 6080 + }, + { + "epoch": 1.778183945021202, + "grad_norm": 1.5164317234096627, + "learning_rate": 6.67491621125429e-07, + "loss": 0.5236573815345764, + "step": 6081 + }, + { + "epoch": 1.7784763854364674, + "grad_norm": 1.761941770239031, + "learning_rate": 6.657559624178611e-07, + "loss": 0.5169326663017273, + "step": 6082 + }, + { + "epoch": 1.7787688258517327, + "grad_norm": 1.7653960525219785, + "learning_rate": 6.640224855330824e-07, + "loss": 0.5304254293441772, + "step": 6083 + }, + { + "epoch": 1.7790612662669982, + "grad_norm": 1.7073706399680681, + "learning_rate": 6.622911908762852e-07, + "loss": 0.457882285118103, + "step": 6084 + }, + { + "epoch": 1.7793537066822633, + "grad_norm": 1.4459810475641077, + "learning_rate": 6.605620788521472e-07, + "loss": 0.48427796363830566, + "step": 6085 + }, + { + "epoch": 1.7796461470975289, + "grad_norm": 1.7511368613506917, + "learning_rate": 6.588351498648382e-07, + "loss": 0.598512589931488, + "step": 6086 + }, + { + "epoch": 1.7799385875127944, + "grad_norm": 1.6445184894388314, + "learning_rate": 6.571104043180188e-07, + "loss": 0.5065094232559204, + "step": 6087 + }, + { + "epoch": 1.7802310279280595, + "grad_norm": 1.7505635404599922, + "learning_rate": 6.553878426148364e-07, + "loss": 0.5493142008781433, + "step": 6088 + }, + { + "epoch": 1.780523468343325, + "grad_norm": 1.5236545905427594, + "learning_rate": 6.5366746515793e-07, + "loss": 0.40520578622817993, + "step": 6089 + }, + { + "epoch": 1.7808159087585904, + "grad_norm": 1.6562045226817075, + "learning_rate": 6.51949272349427e-07, + "loss": 0.5416547656059265, + "step": 6090 + }, + { + "epoch": 1.7811083491738557, + "grad_norm": 1.5389792406208165, + "learning_rate": 6.502332645909438e-07, + "loss": 0.4531989097595215, + "step": 6091 + }, + { + "epoch": 1.7814007895891213, + "grad_norm": 1.9811412419033423, + "learning_rate": 6.485194422835872e-07, + "loss": 0.6385304927825928, + "step": 6092 + }, + { + "epoch": 1.7816932300043866, + "grad_norm": 1.631678357707061, + "learning_rate": 6.468078058279537e-07, + "loss": 0.5503095388412476, + "step": 6093 + }, + { + "epoch": 1.781985670419652, + "grad_norm": 1.810992666384156, + "learning_rate": 6.450983556241264e-07, + "loss": 0.5184366703033447, + "step": 6094 + }, + { + "epoch": 1.7822781108349175, + "grad_norm": 1.8021498649724184, + "learning_rate": 6.433910920716813e-07, + "loss": 0.5211689472198486, + "step": 6095 + }, + { + "epoch": 1.7825705512501828, + "grad_norm": 1.5495698877916986, + "learning_rate": 6.416860155696781e-07, + "loss": 0.7357909679412842, + "step": 6096 + }, + { + "epoch": 1.7828629916654481, + "grad_norm": 1.6814949660424658, + "learning_rate": 6.399831265166689e-07, + "loss": 0.6283953189849854, + "step": 6097 + }, + { + "epoch": 1.7831554320807137, + "grad_norm": 1.7274003515879492, + "learning_rate": 6.382824253106945e-07, + "loss": 0.45040953159332275, + "step": 6098 + }, + { + "epoch": 1.7834478724959788, + "grad_norm": 1.9179221464776945, + "learning_rate": 6.365839123492834e-07, + "loss": 0.5056609511375427, + "step": 6099 + }, + { + "epoch": 1.7837403129112444, + "grad_norm": 1.4295507016254647, + "learning_rate": 6.348875880294536e-07, + "loss": 0.4940416216850281, + "step": 6100 + }, + { + "epoch": 1.7840327533265097, + "grad_norm": 1.487738102541406, + "learning_rate": 6.33193452747708e-07, + "loss": 0.45796072483062744, + "step": 6101 + }, + { + "epoch": 1.784325193741775, + "grad_norm": 1.5314389713015535, + "learning_rate": 6.315015069000408e-07, + "loss": 0.4828432500362396, + "step": 6102 + }, + { + "epoch": 1.7846176341570406, + "grad_norm": 1.7652995666195541, + "learning_rate": 6.298117508819357e-07, + "loss": 0.5564515590667725, + "step": 6103 + }, + { + "epoch": 1.7849100745723059, + "grad_norm": 1.7672116497467336, + "learning_rate": 6.281241850883624e-07, + "loss": 0.5160977840423584, + "step": 6104 + }, + { + "epoch": 1.7852025149875712, + "grad_norm": 1.6835388368372863, + "learning_rate": 6.264388099137775e-07, + "loss": 0.585543155670166, + "step": 6105 + }, + { + "epoch": 1.7854949554028368, + "grad_norm": 1.9025389414417693, + "learning_rate": 6.247556257521303e-07, + "loss": 0.5377194881439209, + "step": 6106 + }, + { + "epoch": 1.785787395818102, + "grad_norm": 1.6124331818311004, + "learning_rate": 6.230746329968518e-07, + "loss": 0.46788060665130615, + "step": 6107 + }, + { + "epoch": 1.7860798362333674, + "grad_norm": 1.481941465563148, + "learning_rate": 6.213958320408664e-07, + "loss": 0.511722207069397, + "step": 6108 + }, + { + "epoch": 1.786372276648633, + "grad_norm": 1.7380505303184415, + "learning_rate": 6.197192232765814e-07, + "loss": 0.5609079599380493, + "step": 6109 + }, + { + "epoch": 1.786664717063898, + "grad_norm": 1.5715739237199864, + "learning_rate": 6.180448070958955e-07, + "loss": 0.47641855478286743, + "step": 6110 + }, + { + "epoch": 1.7869571574791636, + "grad_norm": 1.4072609352957208, + "learning_rate": 6.163725838901946e-07, + "loss": 0.4209919273853302, + "step": 6111 + }, + { + "epoch": 1.7872495978944292, + "grad_norm": 1.7120783337900378, + "learning_rate": 6.147025540503459e-07, + "loss": 0.6012829542160034, + "step": 6112 + }, + { + "epoch": 1.7875420383096943, + "grad_norm": 1.8789998564305304, + "learning_rate": 6.130347179667129e-07, + "loss": 0.6112918853759766, + "step": 6113 + }, + { + "epoch": 1.7878344787249598, + "grad_norm": 1.8641199827985835, + "learning_rate": 6.113690760291402e-07, + "loss": 0.6370030641555786, + "step": 6114 + }, + { + "epoch": 1.7881269191402251, + "grad_norm": 1.837749741108103, + "learning_rate": 6.097056286269631e-07, + "loss": 0.5385129451751709, + "step": 6115 + }, + { + "epoch": 1.7884193595554905, + "grad_norm": 1.7733960362556163, + "learning_rate": 6.080443761490007e-07, + "loss": 0.4707196354866028, + "step": 6116 + }, + { + "epoch": 1.788711799970756, + "grad_norm": 1.8302621423982353, + "learning_rate": 6.063853189835611e-07, + "loss": 0.5361602306365967, + "step": 6117 + }, + { + "epoch": 1.7890042403860213, + "grad_norm": 1.592603561791519, + "learning_rate": 6.047284575184398e-07, + "loss": 0.48841261863708496, + "step": 6118 + }, + { + "epoch": 1.7892966808012867, + "grad_norm": 1.6413123655048356, + "learning_rate": 6.030737921409169e-07, + "loss": 0.47491973638534546, + "step": 6119 + }, + { + "epoch": 1.7895891212165522, + "grad_norm": 1.608045516338794, + "learning_rate": 6.014213232377608e-07, + "loss": 0.4579542875289917, + "step": 6120 + }, + { + "epoch": 1.7898815616318176, + "grad_norm": 1.7739986275669979, + "learning_rate": 5.997710511952259e-07, + "loss": 0.4517485499382019, + "step": 6121 + }, + { + "epoch": 1.7901740020470829, + "grad_norm": 1.61243285020885, + "learning_rate": 5.981229763990559e-07, + "loss": 0.5656695365905762, + "step": 6122 + }, + { + "epoch": 1.7904664424623484, + "grad_norm": 1.8328920976142473, + "learning_rate": 5.964770992344737e-07, + "loss": 0.5000064373016357, + "step": 6123 + }, + { + "epoch": 1.7907588828776135, + "grad_norm": 1.691423776793607, + "learning_rate": 5.948334200861927e-07, + "loss": 0.4823925495147705, + "step": 6124 + }, + { + "epoch": 1.791051323292879, + "grad_norm": 1.6081373509153076, + "learning_rate": 5.931919393384189e-07, + "loss": 0.45079779624938965, + "step": 6125 + }, + { + "epoch": 1.7913437637081446, + "grad_norm": 1.7368976771393152, + "learning_rate": 5.915526573748331e-07, + "loss": 0.5887237787246704, + "step": 6126 + }, + { + "epoch": 1.7916362041234097, + "grad_norm": 1.5326002891728705, + "learning_rate": 5.8991557457861e-07, + "loss": 0.5625102519989014, + "step": 6127 + }, + { + "epoch": 1.7919286445386753, + "grad_norm": 1.773152580661058, + "learning_rate": 5.882806913324079e-07, + "loss": 0.5290789604187012, + "step": 6128 + }, + { + "epoch": 1.7922210849539406, + "grad_norm": 1.8240731968563617, + "learning_rate": 5.86648008018369e-07, + "loss": 0.47694748640060425, + "step": 6129 + }, + { + "epoch": 1.792513525369206, + "grad_norm": 1.7480468996944738, + "learning_rate": 5.850175250181244e-07, + "loss": 0.6297628879547119, + "step": 6130 + }, + { + "epoch": 1.7928059657844715, + "grad_norm": 1.767468792446569, + "learning_rate": 5.833892427127908e-07, + "loss": 0.5748087167739868, + "step": 6131 + }, + { + "epoch": 1.7930984061997368, + "grad_norm": 2.0367130445902313, + "learning_rate": 5.817631614829666e-07, + "loss": 0.552059531211853, + "step": 6132 + }, + { + "epoch": 1.7933908466150021, + "grad_norm": 1.881082319886368, + "learning_rate": 5.801392817087392e-07, + "loss": 0.5980287790298462, + "step": 6133 + }, + { + "epoch": 1.7936832870302677, + "grad_norm": 1.7948740811393897, + "learning_rate": 5.785176037696815e-07, + "loss": 0.5682743191719055, + "step": 6134 + }, + { + "epoch": 1.793975727445533, + "grad_norm": 1.6227048981437364, + "learning_rate": 5.768981280448494e-07, + "loss": 0.6907520294189453, + "step": 6135 + }, + { + "epoch": 1.7942681678607983, + "grad_norm": 1.82613812962419, + "learning_rate": 5.752808549127875e-07, + "loss": 0.5939712524414062, + "step": 6136 + }, + { + "epoch": 1.794560608276064, + "grad_norm": 1.961952469296216, + "learning_rate": 5.736657847515215e-07, + "loss": 0.5169910192489624, + "step": 6137 + }, + { + "epoch": 1.794853048691329, + "grad_norm": 1.7101466149490088, + "learning_rate": 5.720529179385659e-07, + "loss": 0.5795155167579651, + "step": 6138 + }, + { + "epoch": 1.7951454891065945, + "grad_norm": 1.6643593680063449, + "learning_rate": 5.704422548509181e-07, + "loss": 0.4296284317970276, + "step": 6139 + }, + { + "epoch": 1.7954379295218599, + "grad_norm": 1.780840768711558, + "learning_rate": 5.688337958650603e-07, + "loss": 0.5175303220748901, + "step": 6140 + }, + { + "epoch": 1.7957303699371252, + "grad_norm": 1.5534990300027502, + "learning_rate": 5.672275413569605e-07, + "loss": 0.49900466203689575, + "step": 6141 + }, + { + "epoch": 1.7960228103523908, + "grad_norm": 1.741229060320259, + "learning_rate": 5.65623491702072e-07, + "loss": 0.5047665238380432, + "step": 6142 + }, + { + "epoch": 1.796315250767656, + "grad_norm": 1.6004175896698871, + "learning_rate": 5.64021647275329e-07, + "loss": 0.5309686660766602, + "step": 6143 + }, + { + "epoch": 1.7966076911829214, + "grad_norm": 1.84753723892279, + "learning_rate": 5.624220084511544e-07, + "loss": 0.7270892858505249, + "step": 6144 + }, + { + "epoch": 1.796900131598187, + "grad_norm": 1.8607152469266723, + "learning_rate": 5.608245756034536e-07, + "loss": 0.515272319316864, + "step": 6145 + }, + { + "epoch": 1.7971925720134523, + "grad_norm": 1.5111910050436628, + "learning_rate": 5.592293491056167e-07, + "loss": 0.4919237196445465, + "step": 6146 + }, + { + "epoch": 1.7974850124287176, + "grad_norm": 1.8345189418412804, + "learning_rate": 5.576363293305187e-07, + "loss": 0.5812259316444397, + "step": 6147 + }, + { + "epoch": 1.7977774528439832, + "grad_norm": 1.7464814721572284, + "learning_rate": 5.560455166505185e-07, + "loss": 0.434345006942749, + "step": 6148 + }, + { + "epoch": 1.7980698932592483, + "grad_norm": 1.6287087584719833, + "learning_rate": 5.544569114374588e-07, + "loss": 0.4670771360397339, + "step": 6149 + }, + { + "epoch": 1.7983623336745138, + "grad_norm": 1.5038620849892772, + "learning_rate": 5.528705140626667e-07, + "loss": 0.5867526531219482, + "step": 6150 + }, + { + "epoch": 1.7986547740897794, + "grad_norm": 1.8981858755166237, + "learning_rate": 5.512863248969513e-07, + "loss": 0.5453605651855469, + "step": 6151 + }, + { + "epoch": 1.7989472145050445, + "grad_norm": 1.9030067654858334, + "learning_rate": 5.497043443106087e-07, + "loss": 0.5535463690757751, + "step": 6152 + }, + { + "epoch": 1.79923965492031, + "grad_norm": 1.72031713178446, + "learning_rate": 5.481245726734174e-07, + "loss": 0.6250847578048706, + "step": 6153 + }, + { + "epoch": 1.7995320953355753, + "grad_norm": 1.625961067284692, + "learning_rate": 5.465470103546399e-07, + "loss": 0.45504581928253174, + "step": 6154 + }, + { + "epoch": 1.7998245357508407, + "grad_norm": 2.039802523536217, + "learning_rate": 5.449716577230202e-07, + "loss": 0.6192604303359985, + "step": 6155 + }, + { + "epoch": 1.8001169761661062, + "grad_norm": 1.8695276161806251, + "learning_rate": 5.433985151467869e-07, + "loss": 0.5624358654022217, + "step": 6156 + }, + { + "epoch": 1.8004094165813715, + "grad_norm": 1.7494457460727728, + "learning_rate": 5.418275829936537e-07, + "loss": 0.5759576559066772, + "step": 6157 + }, + { + "epoch": 1.8007018569966369, + "grad_norm": 1.752894288026352, + "learning_rate": 5.402588616308169e-07, + "loss": 0.5710508227348328, + "step": 6158 + }, + { + "epoch": 1.8009942974119024, + "grad_norm": 1.6781697189669698, + "learning_rate": 5.386923514249542e-07, + "loss": 0.6146141290664673, + "step": 6159 + }, + { + "epoch": 1.8012867378271677, + "grad_norm": 1.618055518270054, + "learning_rate": 5.371280527422296e-07, + "loss": 0.425834983587265, + "step": 6160 + }, + { + "epoch": 1.801579178242433, + "grad_norm": 1.8062077594882358, + "learning_rate": 5.35565965948287e-07, + "loss": 0.4353194236755371, + "step": 6161 + }, + { + "epoch": 1.8018716186576986, + "grad_norm": 2.0598668441022037, + "learning_rate": 5.340060914082546e-07, + "loss": 0.7202355861663818, + "step": 6162 + }, + { + "epoch": 1.8021640590729637, + "grad_norm": 1.552014134498689, + "learning_rate": 5.324484294867449e-07, + "loss": 0.5371845960617065, + "step": 6163 + }, + { + "epoch": 1.8024564994882293, + "grad_norm": 1.7812688374701713, + "learning_rate": 5.308929805478513e-07, + "loss": 0.4995431900024414, + "step": 6164 + }, + { + "epoch": 1.8027489399034948, + "grad_norm": 1.9376433940202618, + "learning_rate": 5.293397449551519e-07, + "loss": 0.6503393650054932, + "step": 6165 + }, + { + "epoch": 1.80304138031876, + "grad_norm": 1.608511841040304, + "learning_rate": 5.277887230717027e-07, + "loss": 0.5083032250404358, + "step": 6166 + }, + { + "epoch": 1.8033338207340255, + "grad_norm": 1.7910725457082355, + "learning_rate": 5.262399152600473e-07, + "loss": 0.6067851781845093, + "step": 6167 + }, + { + "epoch": 1.8036262611492908, + "grad_norm": 1.6601362559713981, + "learning_rate": 5.246933218822104e-07, + "loss": 0.6446479558944702, + "step": 6168 + }, + { + "epoch": 1.8039187015645561, + "grad_norm": 1.9668874595165033, + "learning_rate": 5.231489432996984e-07, + "loss": 0.6940749883651733, + "step": 6169 + }, + { + "epoch": 1.8042111419798217, + "grad_norm": 1.6254914024201104, + "learning_rate": 5.216067798735014e-07, + "loss": 0.558691143989563, + "step": 6170 + }, + { + "epoch": 1.804503582395087, + "grad_norm": 1.706821795047188, + "learning_rate": 5.2006683196409e-07, + "loss": 0.4561213254928589, + "step": 6171 + }, + { + "epoch": 1.8047960228103523, + "grad_norm": 1.5741713506995776, + "learning_rate": 5.185290999314174e-07, + "loss": 0.514278769493103, + "step": 6172 + }, + { + "epoch": 1.805088463225618, + "grad_norm": 1.7438493762338294, + "learning_rate": 5.169935841349194e-07, + "loss": 0.41933614015579224, + "step": 6173 + }, + { + "epoch": 1.8053809036408832, + "grad_norm": 1.5639626592195386, + "learning_rate": 5.154602849335133e-07, + "loss": 0.5590407848358154, + "step": 6174 + }, + { + "epoch": 1.8056733440561485, + "grad_norm": 1.7923343761763981, + "learning_rate": 5.139292026855991e-07, + "loss": 0.49428898096084595, + "step": 6175 + }, + { + "epoch": 1.805965784471414, + "grad_norm": 1.6980318077322492, + "learning_rate": 5.124003377490582e-07, + "loss": 0.4737596809864044, + "step": 6176 + }, + { + "epoch": 1.8062582248866792, + "grad_norm": 1.6716862203734568, + "learning_rate": 5.108736904812517e-07, + "loss": 0.5017397403717041, + "step": 6177 + }, + { + "epoch": 1.8065506653019447, + "grad_norm": 1.733919571237643, + "learning_rate": 5.09349261239026e-07, + "loss": 0.4509057402610779, + "step": 6178 + }, + { + "epoch": 1.80684310571721, + "grad_norm": 1.9095997808768526, + "learning_rate": 5.078270503787053e-07, + "loss": 0.4440206289291382, + "step": 6179 + }, + { + "epoch": 1.8071355461324754, + "grad_norm": 1.6672235625660048, + "learning_rate": 5.063070582560991e-07, + "loss": 0.4981609582901001, + "step": 6180 + }, + { + "epoch": 1.807427986547741, + "grad_norm": 1.4041701397189061, + "learning_rate": 5.047892852264946e-07, + "loss": 0.4057808518409729, + "step": 6181 + }, + { + "epoch": 1.8077204269630063, + "grad_norm": 1.8238388895662465, + "learning_rate": 5.032737316446634e-07, + "loss": 0.5770435333251953, + "step": 6182 + }, + { + "epoch": 1.8080128673782716, + "grad_norm": 1.5817149529336438, + "learning_rate": 5.017603978648567e-07, + "loss": 0.5431563258171082, + "step": 6183 + }, + { + "epoch": 1.8083053077935372, + "grad_norm": 1.7959973431061746, + "learning_rate": 5.002492842408058e-07, + "loss": 0.469868928194046, + "step": 6184 + }, + { + "epoch": 1.8085977482088025, + "grad_norm": 1.6470575782998251, + "learning_rate": 4.98740391125726e-07, + "loss": 0.4581238925457001, + "step": 6185 + }, + { + "epoch": 1.8088901886240678, + "grad_norm": 1.5613704220145663, + "learning_rate": 4.972337188723108e-07, + "loss": 0.43255913257598877, + "step": 6186 + }, + { + "epoch": 1.8091826290393334, + "grad_norm": 1.6405804521880538, + "learning_rate": 4.957292678327374e-07, + "loss": 0.5817975997924805, + "step": 6187 + }, + { + "epoch": 1.8094750694545985, + "grad_norm": 1.701175567145501, + "learning_rate": 4.9422703835866e-07, + "loss": 0.506614089012146, + "step": 6188 + }, + { + "epoch": 1.809767509869864, + "grad_norm": 1.8093255501568073, + "learning_rate": 4.927270308012155e-07, + "loss": 0.5245084762573242, + "step": 6189 + }, + { + "epoch": 1.8100599502851296, + "grad_norm": 1.9638481802757681, + "learning_rate": 4.912292455110235e-07, + "loss": 0.48700785636901855, + "step": 6190 + }, + { + "epoch": 1.8103523907003947, + "grad_norm": 1.7084108143801102, + "learning_rate": 4.897336828381794e-07, + "loss": 0.5512829422950745, + "step": 6191 + }, + { + "epoch": 1.8106448311156602, + "grad_norm": 1.9425355962156208, + "learning_rate": 4.882403431322647e-07, + "loss": 0.444965660572052, + "step": 6192 + }, + { + "epoch": 1.8109372715309255, + "grad_norm": 1.6773870360526466, + "learning_rate": 4.86749226742338e-07, + "loss": 0.49120527505874634, + "step": 6193 + }, + { + "epoch": 1.8112297119461909, + "grad_norm": 1.5444026883137385, + "learning_rate": 4.852603340169371e-07, + "loss": 0.47114405035972595, + "step": 6194 + }, + { + "epoch": 1.8115221523614564, + "grad_norm": 1.3641759741105037, + "learning_rate": 4.837736653040825e-07, + "loss": 0.41404014825820923, + "step": 6195 + }, + { + "epoch": 1.8118145927767217, + "grad_norm": 1.5779692763243462, + "learning_rate": 4.822892209512742e-07, + "loss": 0.5773917436599731, + "step": 6196 + }, + { + "epoch": 1.812107033191987, + "grad_norm": 1.5867022738126413, + "learning_rate": 4.808070013054911e-07, + "loss": 0.5048927068710327, + "step": 6197 + }, + { + "epoch": 1.8123994736072526, + "grad_norm": 1.4880382186782968, + "learning_rate": 4.793270067131961e-07, + "loss": 0.48112595081329346, + "step": 6198 + }, + { + "epoch": 1.812691914022518, + "grad_norm": 1.5982708355484612, + "learning_rate": 4.778492375203236e-07, + "loss": 0.465067982673645, + "step": 6199 + }, + { + "epoch": 1.8129843544377833, + "grad_norm": 2.10382956966043, + "learning_rate": 4.763736940722985e-07, + "loss": 0.5456488132476807, + "step": 6200 + }, + { + "epoch": 1.8132767948530488, + "grad_norm": 1.7197696401081977, + "learning_rate": 4.74900376714017e-07, + "loss": 0.5078476071357727, + "step": 6201 + }, + { + "epoch": 1.813569235268314, + "grad_norm": 1.8035895737751002, + "learning_rate": 4.7342928578985814e-07, + "loss": 0.5087896585464478, + "step": 6202 + }, + { + "epoch": 1.8138616756835795, + "grad_norm": 1.8289842367399733, + "learning_rate": 4.719604216436824e-07, + "loss": 0.5734537243843079, + "step": 6203 + }, + { + "epoch": 1.814154116098845, + "grad_norm": 1.8255387764821909, + "learning_rate": 4.704937846188262e-07, + "loss": 0.5163359045982361, + "step": 6204 + }, + { + "epoch": 1.8144465565141101, + "grad_norm": 1.7367361746759034, + "learning_rate": 4.6902937505810765e-07, + "loss": 0.5884007811546326, + "step": 6205 + }, + { + "epoch": 1.8147389969293757, + "grad_norm": 1.459881439563451, + "learning_rate": 4.675671933038228e-07, + "loss": 0.454215407371521, + "step": 6206 + }, + { + "epoch": 1.815031437344641, + "grad_norm": 1.4834270754413148, + "learning_rate": 4.661072396977506e-07, + "loss": 0.4380212426185608, + "step": 6207 + }, + { + "epoch": 1.8153238777599063, + "grad_norm": 1.5724796080178702, + "learning_rate": 4.646495145811425e-07, + "loss": 0.6138126850128174, + "step": 6208 + }, + { + "epoch": 1.8156163181751719, + "grad_norm": 1.7578891144089137, + "learning_rate": 4.6319401829473366e-07, + "loss": 0.560515284538269, + "step": 6209 + }, + { + "epoch": 1.8159087585904372, + "grad_norm": 1.6717823771103892, + "learning_rate": 4.6174075117873976e-07, + "loss": 0.4744090735912323, + "step": 6210 + }, + { + "epoch": 1.8162011990057025, + "grad_norm": 1.566667953265204, + "learning_rate": 4.6028971357285126e-07, + "loss": 0.4508114457130432, + "step": 6211 + }, + { + "epoch": 1.816493639420968, + "grad_norm": 1.6686159118306128, + "learning_rate": 4.5884090581623906e-07, + "loss": 0.5437598824501038, + "step": 6212 + }, + { + "epoch": 1.8167860798362334, + "grad_norm": 1.871048661690424, + "learning_rate": 4.5739432824755456e-07, + "loss": 0.608635425567627, + "step": 6213 + }, + { + "epoch": 1.8170785202514987, + "grad_norm": 1.683927429440131, + "learning_rate": 4.5594998120492505e-07, + "loss": 0.45614784955978394, + "step": 6214 + }, + { + "epoch": 1.8173709606667643, + "grad_norm": 1.8175326303925177, + "learning_rate": 4.5450786502595933e-07, + "loss": 0.46722525358200073, + "step": 6215 + }, + { + "epoch": 1.8176634010820294, + "grad_norm": 1.6729337536988582, + "learning_rate": 4.5306798004774333e-07, + "loss": 0.5424127578735352, + "step": 6216 + }, + { + "epoch": 1.817955841497295, + "grad_norm": 1.8512870023540355, + "learning_rate": 4.5163032660684e-07, + "loss": 0.4360300302505493, + "step": 6217 + }, + { + "epoch": 1.8182482819125603, + "grad_norm": 1.4671759860658016, + "learning_rate": 4.5019490503929395e-07, + "loss": 0.43406206369400024, + "step": 6218 + }, + { + "epoch": 1.8185407223278256, + "grad_norm": 1.5669201854687904, + "learning_rate": 4.4876171568062346e-07, + "loss": 0.5435998439788818, + "step": 6219 + }, + { + "epoch": 1.8188331627430911, + "grad_norm": 1.7571994730111475, + "learning_rate": 4.4733075886583043e-07, + "loss": 0.4555914103984833, + "step": 6220 + }, + { + "epoch": 1.8191256031583565, + "grad_norm": 1.9267993644134682, + "learning_rate": 4.4590203492939076e-07, + "loss": 0.5246081352233887, + "step": 6221 + }, + { + "epoch": 1.8194180435736218, + "grad_norm": 1.4234567063452161, + "learning_rate": 4.4447554420525954e-07, + "loss": 0.5093664526939392, + "step": 6222 + }, + { + "epoch": 1.8197104839888874, + "grad_norm": 1.9251138549109805, + "learning_rate": 4.430512870268733e-07, + "loss": 0.5759550333023071, + "step": 6223 + }, + { + "epoch": 1.8200029244041527, + "grad_norm": 2.2446814471076184, + "learning_rate": 4.416292637271402e-07, + "loss": 0.5477207899093628, + "step": 6224 + }, + { + "epoch": 1.820295364819418, + "grad_norm": 1.7579783947323675, + "learning_rate": 4.402094746384511e-07, + "loss": 0.5786882638931274, + "step": 6225 + }, + { + "epoch": 1.8205878052346836, + "grad_norm": 1.6652775403735034, + "learning_rate": 4.3879192009267266e-07, + "loss": 0.36909428238868713, + "step": 6226 + }, + { + "epoch": 1.8208802456499487, + "grad_norm": 1.6359565015929571, + "learning_rate": 4.3737660042114993e-07, + "loss": 0.5471982955932617, + "step": 6227 + }, + { + "epoch": 1.8211726860652142, + "grad_norm": 1.633893653092529, + "learning_rate": 4.3596351595470596e-07, + "loss": 0.49737733602523804, + "step": 6228 + }, + { + "epoch": 1.8214651264804798, + "grad_norm": 1.8445639233475513, + "learning_rate": 4.3455266702363997e-07, + "loss": 0.70830237865448, + "step": 6229 + }, + { + "epoch": 1.8217575668957449, + "grad_norm": 1.5312305470870462, + "learning_rate": 4.331440539577281e-07, + "loss": 0.5844424962997437, + "step": 6230 + }, + { + "epoch": 1.8220500073110104, + "grad_norm": 1.5427896071730656, + "learning_rate": 4.317376770862269e-07, + "loss": 0.42457354068756104, + "step": 6231 + }, + { + "epoch": 1.8223424477262757, + "grad_norm": 2.058390634719774, + "learning_rate": 4.3033353673786695e-07, + "loss": 0.5154321193695068, + "step": 6232 + }, + { + "epoch": 1.822634888141541, + "grad_norm": 1.7898699548834731, + "learning_rate": 4.2893163324085886e-07, + "loss": 0.5896856784820557, + "step": 6233 + }, + { + "epoch": 1.8229273285568066, + "grad_norm": 1.8303948048078211, + "learning_rate": 4.2753196692288835e-07, + "loss": 0.5032835006713867, + "step": 6234 + }, + { + "epoch": 1.823219768972072, + "grad_norm": 1.8584560183845538, + "learning_rate": 4.2613453811111814e-07, + "loss": 0.4691713750362396, + "step": 6235 + }, + { + "epoch": 1.8235122093873373, + "grad_norm": 1.5627513261590378, + "learning_rate": 4.2473934713219033e-07, + "loss": 0.595095694065094, + "step": 6236 + }, + { + "epoch": 1.8238046498026028, + "grad_norm": 1.6531612719483142, + "learning_rate": 4.233463943122218e-07, + "loss": 0.5004895329475403, + "step": 6237 + }, + { + "epoch": 1.8240970902178681, + "grad_norm": 1.7047690953050751, + "learning_rate": 4.2195567997680654e-07, + "loss": 0.4924081563949585, + "step": 6238 + }, + { + "epoch": 1.8243895306331335, + "grad_norm": 1.7572886707576447, + "learning_rate": 4.2056720445101565e-07, + "loss": 0.5350006818771362, + "step": 6239 + }, + { + "epoch": 1.824681971048399, + "grad_norm": 1.9485734179206806, + "learning_rate": 4.191809680593961e-07, + "loss": 0.5404629707336426, + "step": 6240 + }, + { + "epoch": 1.8249744114636641, + "grad_norm": 1.6023324600099473, + "learning_rate": 4.177969711259744e-07, + "loss": 0.727859377861023, + "step": 6241 + }, + { + "epoch": 1.8252668518789297, + "grad_norm": 1.553973004264676, + "learning_rate": 4.164152139742494e-07, + "loss": 0.4805057644844055, + "step": 6242 + }, + { + "epoch": 1.8255592922941952, + "grad_norm": 1.7536116301732134, + "learning_rate": 4.1503569692719847e-07, + "loss": 0.5520761013031006, + "step": 6243 + }, + { + "epoch": 1.8258517327094603, + "grad_norm": 1.8327055737656117, + "learning_rate": 4.1365842030727576e-07, + "loss": 0.6130107641220093, + "step": 6244 + }, + { + "epoch": 1.8261441731247259, + "grad_norm": 1.7887203227793926, + "learning_rate": 4.122833844364116e-07, + "loss": 0.6048229932785034, + "step": 6245 + }, + { + "epoch": 1.8264366135399912, + "grad_norm": 1.717414490213998, + "learning_rate": 4.1091058963601214e-07, + "loss": 0.667324960231781, + "step": 6246 + }, + { + "epoch": 1.8267290539552565, + "grad_norm": 2.083699506724501, + "learning_rate": 4.095400362269597e-07, + "loss": 0.45595815777778625, + "step": 6247 + }, + { + "epoch": 1.827021494370522, + "grad_norm": 1.7162831332631867, + "learning_rate": 4.081717245296124e-07, + "loss": 0.49015533924102783, + "step": 6248 + }, + { + "epoch": 1.8273139347857874, + "grad_norm": 2.1906207360630763, + "learning_rate": 4.068056548638055e-07, + "loss": 0.5230038166046143, + "step": 6249 + }, + { + "epoch": 1.8276063752010527, + "grad_norm": 1.6860531929221865, + "learning_rate": 4.054418275488492e-07, + "loss": 0.5025942325592041, + "step": 6250 + }, + { + "epoch": 1.8278988156163183, + "grad_norm": 1.736980191753769, + "learning_rate": 4.0408024290352955e-07, + "loss": 0.5136677026748657, + "step": 6251 + }, + { + "epoch": 1.8281912560315836, + "grad_norm": 1.7988212644666006, + "learning_rate": 4.0272090124611086e-07, + "loss": 0.6209211945533752, + "step": 6252 + }, + { + "epoch": 1.828483696446849, + "grad_norm": 1.9742781188768104, + "learning_rate": 4.0136380289432784e-07, + "loss": 0.5913738012313843, + "step": 6253 + }, + { + "epoch": 1.8287761368621145, + "grad_norm": 1.9710058674803597, + "learning_rate": 4.000089481653946e-07, + "loss": 0.5745095610618591, + "step": 6254 + }, + { + "epoch": 1.8290685772773796, + "grad_norm": 1.4867167586867893, + "learning_rate": 3.9865633737600105e-07, + "loss": 0.4566704034805298, + "step": 6255 + }, + { + "epoch": 1.8293610176926451, + "grad_norm": 1.672257025513455, + "learning_rate": 3.9730597084231105e-07, + "loss": 0.49784860014915466, + "step": 6256 + }, + { + "epoch": 1.8296534581079105, + "grad_norm": 1.7381596787517106, + "learning_rate": 3.9595784887996647e-07, + "loss": 0.4489399790763855, + "step": 6257 + }, + { + "epoch": 1.8299458985231758, + "grad_norm": 1.9703484082158151, + "learning_rate": 3.946119718040797e-07, + "loss": 0.6335956454277039, + "step": 6258 + }, + { + "epoch": 1.8302383389384413, + "grad_norm": 1.4097270774574866, + "learning_rate": 3.932683399292436e-07, + "loss": 0.44865918159484863, + "step": 6259 + }, + { + "epoch": 1.8305307793537067, + "grad_norm": 1.6485718017332285, + "learning_rate": 3.919269535695225e-07, + "loss": 0.4328421354293823, + "step": 6260 + }, + { + "epoch": 1.830823219768972, + "grad_norm": 1.6528043958881276, + "learning_rate": 3.9058781303845886e-07, + "loss": 0.463814377784729, + "step": 6261 + }, + { + "epoch": 1.8311156601842375, + "grad_norm": 1.9336577936651187, + "learning_rate": 3.892509186490667e-07, + "loss": 0.5857536196708679, + "step": 6262 + }, + { + "epoch": 1.8314081005995029, + "grad_norm": 1.4512027972560333, + "learning_rate": 3.879162707138395e-07, + "loss": 0.4873831272125244, + "step": 6263 + }, + { + "epoch": 1.8317005410147682, + "grad_norm": 1.89367526659171, + "learning_rate": 3.8658386954474104e-07, + "loss": 0.5428040027618408, + "step": 6264 + }, + { + "epoch": 1.8319929814300338, + "grad_norm": 1.759804366679343, + "learning_rate": 3.852537154532121e-07, + "loss": 0.49092623591423035, + "step": 6265 + }, + { + "epoch": 1.8322854218452989, + "grad_norm": 1.7919708064212196, + "learning_rate": 3.839258087501685e-07, + "loss": 0.5515817999839783, + "step": 6266 + }, + { + "epoch": 1.8325778622605644, + "grad_norm": 1.5550731443697672, + "learning_rate": 3.8260014974600077e-07, + "loss": 0.48080340027809143, + "step": 6267 + }, + { + "epoch": 1.83287030267583, + "grad_norm": 2.292962123842254, + "learning_rate": 3.812767387505734e-07, + "loss": 0.6129888296127319, + "step": 6268 + }, + { + "epoch": 1.833162743091095, + "grad_norm": 1.8203026764024284, + "learning_rate": 3.7995557607322543e-07, + "loss": 0.5843402147293091, + "step": 6269 + }, + { + "epoch": 1.8334551835063606, + "grad_norm": 1.9423893526281284, + "learning_rate": 3.7863666202276996e-07, + "loss": 0.5573143362998962, + "step": 6270 + }, + { + "epoch": 1.833747623921626, + "grad_norm": 1.9386384718546945, + "learning_rate": 3.773199969074959e-07, + "loss": 0.552756667137146, + "step": 6271 + }, + { + "epoch": 1.8340400643368913, + "grad_norm": 1.7629811878645265, + "learning_rate": 3.7600558103516706e-07, + "loss": 0.5559083223342896, + "step": 6272 + }, + { + "epoch": 1.8343325047521568, + "grad_norm": 1.9388416947858518, + "learning_rate": 3.746934147130177e-07, + "loss": 0.5388067364692688, + "step": 6273 + }, + { + "epoch": 1.8346249451674221, + "grad_norm": 1.694909278172827, + "learning_rate": 3.7338349824776133e-07, + "loss": 0.5816110968589783, + "step": 6274 + }, + { + "epoch": 1.8349173855826875, + "grad_norm": 1.9312358476553817, + "learning_rate": 3.720758319455786e-07, + "loss": 0.5720102787017822, + "step": 6275 + }, + { + "epoch": 1.835209825997953, + "grad_norm": 1.5440220572809102, + "learning_rate": 3.707704161121328e-07, + "loss": 0.46005699038505554, + "step": 6276 + }, + { + "epoch": 1.8355022664132183, + "grad_norm": 2.0613584980065776, + "learning_rate": 3.6946725105255656e-07, + "loss": 0.5602168440818787, + "step": 6277 + }, + { + "epoch": 1.8357947068284837, + "grad_norm": 1.6156922208810771, + "learning_rate": 3.68166337071455e-07, + "loss": 0.5390583276748657, + "step": 6278 + }, + { + "epoch": 1.8360871472437492, + "grad_norm": 1.558407958302267, + "learning_rate": 3.668676744729094e-07, + "loss": 0.48980700969696045, + "step": 6279 + }, + { + "epoch": 1.8363795876590143, + "grad_norm": 1.5853357453165142, + "learning_rate": 3.655712635604747e-07, + "loss": 0.6565061807632446, + "step": 6280 + }, + { + "epoch": 1.8366720280742799, + "grad_norm": 1.5692146512642422, + "learning_rate": 3.642771046371785e-07, + "loss": 0.465609610080719, + "step": 6281 + }, + { + "epoch": 1.8369644684895454, + "grad_norm": 1.7219983092976099, + "learning_rate": 3.6298519800552434e-07, + "loss": 0.5698891282081604, + "step": 6282 + }, + { + "epoch": 1.8372569089048105, + "grad_norm": 2.103680074754177, + "learning_rate": 3.616955439674863e-07, + "loss": 0.5885399580001831, + "step": 6283 + }, + { + "epoch": 1.837549349320076, + "grad_norm": 1.7028861151189467, + "learning_rate": 3.60408142824511e-07, + "loss": 0.5158063173294067, + "step": 6284 + }, + { + "epoch": 1.8378417897353414, + "grad_norm": 1.6728867893623607, + "learning_rate": 3.5912299487752434e-07, + "loss": 0.49203822016716003, + "step": 6285 + }, + { + "epoch": 1.8381342301506067, + "grad_norm": 1.991753525300203, + "learning_rate": 3.578401004269183e-07, + "loss": 0.5756489038467407, + "step": 6286 + }, + { + "epoch": 1.8384266705658723, + "grad_norm": 1.9424738806131756, + "learning_rate": 3.565594597725652e-07, + "loss": 0.5970584154129028, + "step": 6287 + }, + { + "epoch": 1.8387191109811376, + "grad_norm": 1.4438564684738853, + "learning_rate": 3.552810732138046e-07, + "loss": 0.48702481389045715, + "step": 6288 + }, + { + "epoch": 1.839011551396403, + "grad_norm": 1.6632334435868308, + "learning_rate": 3.540049410494517e-07, + "loss": 0.4818963408470154, + "step": 6289 + }, + { + "epoch": 1.8393039918116685, + "grad_norm": 1.6617150886827665, + "learning_rate": 3.5273106357779585e-07, + "loss": 0.389699786901474, + "step": 6290 + }, + { + "epoch": 1.8395964322269338, + "grad_norm": 1.7654595369504777, + "learning_rate": 3.514594410965977e-07, + "loss": 0.6438174247741699, + "step": 6291 + }, + { + "epoch": 1.8398888726421991, + "grad_norm": 1.9409260673022277, + "learning_rate": 3.501900739030906e-07, + "loss": 0.654021143913269, + "step": 6292 + }, + { + "epoch": 1.8401813130574647, + "grad_norm": 1.921461492738401, + "learning_rate": 3.489229622939827e-07, + "loss": 0.748673677444458, + "step": 6293 + }, + { + "epoch": 1.8404737534727298, + "grad_norm": 1.850157344469969, + "learning_rate": 3.476581065654527e-07, + "loss": 0.47883105278015137, + "step": 6294 + }, + { + "epoch": 1.8407661938879953, + "grad_norm": 1.555147241743972, + "learning_rate": 3.4639550701315303e-07, + "loss": 0.5221554040908813, + "step": 6295 + }, + { + "epoch": 1.8410586343032607, + "grad_norm": 1.7256564846330384, + "learning_rate": 3.451351639322087e-07, + "loss": 0.482231080532074, + "step": 6296 + }, + { + "epoch": 1.841351074718526, + "grad_norm": 1.797442509245834, + "learning_rate": 3.4387707761721625e-07, + "loss": 0.5407366752624512, + "step": 6297 + }, + { + "epoch": 1.8416435151337915, + "grad_norm": 1.9177358417772523, + "learning_rate": 3.426212483622482e-07, + "loss": 0.626631498336792, + "step": 6298 + }, + { + "epoch": 1.8419359555490569, + "grad_norm": 1.4729327167263073, + "learning_rate": 3.4136767646084424e-07, + "loss": 0.4401513338088989, + "step": 6299 + }, + { + "epoch": 1.8422283959643222, + "grad_norm": 1.756926078765411, + "learning_rate": 3.4011636220602106e-07, + "loss": 0.48130229115486145, + "step": 6300 + }, + { + "epoch": 1.8425208363795877, + "grad_norm": 1.9010914484665373, + "learning_rate": 3.3886730589026475e-07, + "loss": 0.7132935523986816, + "step": 6301 + }, + { + "epoch": 1.842813276794853, + "grad_norm": 1.692313625720156, + "learning_rate": 3.37620507805535e-07, + "loss": 0.6665343642234802, + "step": 6302 + }, + { + "epoch": 1.8431057172101184, + "grad_norm": 1.7909091838212496, + "learning_rate": 3.3637596824326435e-07, + "loss": 0.4313231408596039, + "step": 6303 + }, + { + "epoch": 1.843398157625384, + "grad_norm": 1.6745971926171657, + "learning_rate": 3.3513368749435447e-07, + "loss": 0.6263744235038757, + "step": 6304 + }, + { + "epoch": 1.843690598040649, + "grad_norm": 1.6133043168174617, + "learning_rate": 3.3389366584918313e-07, + "loss": 0.6215947866439819, + "step": 6305 + }, + { + "epoch": 1.8439830384559146, + "grad_norm": 1.6349014502820445, + "learning_rate": 3.3265590359759517e-07, + "loss": 0.45956021547317505, + "step": 6306 + }, + { + "epoch": 1.8442754788711802, + "grad_norm": 1.6194578088821072, + "learning_rate": 3.3142040102891126e-07, + "loss": 0.5363642573356628, + "step": 6307 + }, + { + "epoch": 1.8445679192864453, + "grad_norm": 1.7115305858843777, + "learning_rate": 3.3018715843192273e-07, + "loss": 0.4574592709541321, + "step": 6308 + }, + { + "epoch": 1.8448603597017108, + "grad_norm": 1.6684239678735615, + "learning_rate": 3.2895617609489337e-07, + "loss": 0.43236005306243896, + "step": 6309 + }, + { + "epoch": 1.8451528001169761, + "grad_norm": 1.574172974777944, + "learning_rate": 3.277274543055564e-07, + "loss": 0.46349820494651794, + "step": 6310 + }, + { + "epoch": 1.8454452405322415, + "grad_norm": 1.9135327602518888, + "learning_rate": 3.265009933511176e-07, + "loss": 0.5233386754989624, + "step": 6311 + }, + { + "epoch": 1.845737680947507, + "grad_norm": 1.5165768096310508, + "learning_rate": 3.252767935182566e-07, + "loss": 0.44902727007865906, + "step": 6312 + }, + { + "epoch": 1.8460301213627723, + "grad_norm": 1.9281348385682333, + "learning_rate": 3.240548550931222e-07, + "loss": 0.709855854511261, + "step": 6313 + }, + { + "epoch": 1.8463225617780377, + "grad_norm": 1.8532989008830933, + "learning_rate": 3.228351783613348e-07, + "loss": 0.5194632411003113, + "step": 6314 + }, + { + "epoch": 1.8466150021933032, + "grad_norm": 1.750242735396334, + "learning_rate": 3.2161776360798535e-07, + "loss": 0.6027804017066956, + "step": 6315 + }, + { + "epoch": 1.8469074426085685, + "grad_norm": 1.591118544218686, + "learning_rate": 3.2040261111763946e-07, + "loss": 0.5047632455825806, + "step": 6316 + }, + { + "epoch": 1.8471998830238339, + "grad_norm": 2.082041129535105, + "learning_rate": 3.1918972117433e-07, + "loss": 0.5763708353042603, + "step": 6317 + }, + { + "epoch": 1.8474923234390994, + "grad_norm": 1.7701935148884373, + "learning_rate": 3.1797909406156234e-07, + "loss": 0.4725028872489929, + "step": 6318 + }, + { + "epoch": 1.8477847638543645, + "grad_norm": 1.5419878667068574, + "learning_rate": 3.167707300623135e-07, + "loss": 0.523047924041748, + "step": 6319 + }, + { + "epoch": 1.84807720426963, + "grad_norm": 1.6321175932285703, + "learning_rate": 3.15564629459032e-07, + "loss": 0.5100070238113403, + "step": 6320 + }, + { + "epoch": 1.8483696446848956, + "grad_norm": 1.7375024362733555, + "learning_rate": 3.143607925336356e-07, + "loss": 0.6019359827041626, + "step": 6321 + }, + { + "epoch": 1.8486620851001607, + "grad_norm": 1.8195133886893664, + "learning_rate": 3.1315921956751483e-07, + "loss": 0.5514570474624634, + "step": 6322 + }, + { + "epoch": 1.8489545255154263, + "grad_norm": 1.6002643586013279, + "learning_rate": 3.1195991084152944e-07, + "loss": 0.49585646390914917, + "step": 6323 + }, + { + "epoch": 1.8492469659306916, + "grad_norm": 1.724322382501938, + "learning_rate": 3.1076286663601076e-07, + "loss": 0.5738509297370911, + "step": 6324 + }, + { + "epoch": 1.849539406345957, + "grad_norm": 1.8621720995112787, + "learning_rate": 3.095680872307605e-07, + "loss": 0.5149112939834595, + "step": 6325 + }, + { + "epoch": 1.8498318467612225, + "grad_norm": 1.6738148879498993, + "learning_rate": 3.0837557290505083e-07, + "loss": 0.45808184146881104, + "step": 6326 + }, + { + "epoch": 1.8501242871764878, + "grad_norm": 1.6155317269058609, + "learning_rate": 3.0718532393762435e-07, + "loss": 0.5173396468162537, + "step": 6327 + }, + { + "epoch": 1.8504167275917531, + "grad_norm": 1.6905273546590853, + "learning_rate": 3.059973406066963e-07, + "loss": 0.6229383945465088, + "step": 6328 + }, + { + "epoch": 1.8507091680070187, + "grad_norm": 1.6794531990129002, + "learning_rate": 3.0481162318994894e-07, + "loss": 0.45520371198654175, + "step": 6329 + }, + { + "epoch": 1.851001608422284, + "grad_norm": 1.5024073523898138, + "learning_rate": 3.036281719645373e-07, + "loss": 0.43216121196746826, + "step": 6330 + }, + { + "epoch": 1.8512940488375493, + "grad_norm": 1.9238309164883824, + "learning_rate": 3.0244698720708456e-07, + "loss": 0.5440583825111389, + "step": 6331 + }, + { + "epoch": 1.8515864892528149, + "grad_norm": 1.8189444343843324, + "learning_rate": 3.0126806919368756e-07, + "loss": 0.5474626421928406, + "step": 6332 + }, + { + "epoch": 1.85187892966808, + "grad_norm": 1.7800420936387606, + "learning_rate": 3.000914181999093e-07, + "loss": 0.5122883915901184, + "step": 6333 + }, + { + "epoch": 1.8521713700833455, + "grad_norm": 1.776220435476035, + "learning_rate": 2.989170345007852e-07, + "loss": 0.48304370045661926, + "step": 6334 + }, + { + "epoch": 1.8524638104986109, + "grad_norm": 1.6949801188317577, + "learning_rate": 2.977449183708214e-07, + "loss": 0.566180408000946, + "step": 6335 + }, + { + "epoch": 1.8527562509138762, + "grad_norm": 1.7482351137010406, + "learning_rate": 2.96575070083992e-07, + "loss": 0.5218988656997681, + "step": 6336 + }, + { + "epoch": 1.8530486913291417, + "grad_norm": 1.8289145949576808, + "learning_rate": 2.954074899137427e-07, + "loss": 0.49669283628463745, + "step": 6337 + }, + { + "epoch": 1.853341131744407, + "grad_norm": 1.6012219042297557, + "learning_rate": 2.942421781329874e-07, + "loss": 0.5505487322807312, + "step": 6338 + }, + { + "epoch": 1.8536335721596724, + "grad_norm": 1.6156483149639533, + "learning_rate": 2.930791350141116e-07, + "loss": 0.5386735200881958, + "step": 6339 + }, + { + "epoch": 1.853926012574938, + "grad_norm": 2.0764057670166776, + "learning_rate": 2.919183608289689e-07, + "loss": 0.5266523957252502, + "step": 6340 + }, + { + "epoch": 1.8542184529902033, + "grad_norm": 1.573480922837112, + "learning_rate": 2.907598558488822e-07, + "loss": 0.5335103273391724, + "step": 6341 + }, + { + "epoch": 1.8545108934054686, + "grad_norm": 1.8447961626822076, + "learning_rate": 2.896036203446473e-07, + "loss": 0.6155405044555664, + "step": 6342 + }, + { + "epoch": 1.8548033338207341, + "grad_norm": 1.5602039082453873, + "learning_rate": 2.884496545865245e-07, + "loss": 0.5258159041404724, + "step": 6343 + }, + { + "epoch": 1.8550957742359993, + "grad_norm": 1.7894466773590292, + "learning_rate": 2.8729795884424927e-07, + "loss": 0.5428795218467712, + "step": 6344 + }, + { + "epoch": 1.8553882146512648, + "grad_norm": 1.4344098630811726, + "learning_rate": 2.8614853338702066e-07, + "loss": 0.4876418709754944, + "step": 6345 + }, + { + "epoch": 1.8556806550665303, + "grad_norm": 1.606511441088432, + "learning_rate": 2.850013784835115e-07, + "loss": 0.49640393257141113, + "step": 6346 + }, + { + "epoch": 1.8559730954817955, + "grad_norm": 1.8316843043903746, + "learning_rate": 2.838564944018618e-07, + "loss": 0.5726122260093689, + "step": 6347 + }, + { + "epoch": 1.856265535897061, + "grad_norm": 1.653087716973347, + "learning_rate": 2.827138814096819e-07, + "loss": 0.5106557011604309, + "step": 6348 + }, + { + "epoch": 1.8565579763123263, + "grad_norm": 1.5025453294784719, + "learning_rate": 2.8157353977405044e-07, + "loss": 0.45941129326820374, + "step": 6349 + }, + { + "epoch": 1.8568504167275917, + "grad_norm": 1.781767756464568, + "learning_rate": 2.8043546976151414e-07, + "loss": 0.488609254360199, + "step": 6350 + }, + { + "epoch": 1.8571428571428572, + "grad_norm": 1.764244860072195, + "learning_rate": 2.7929967163809135e-07, + "loss": 0.639745831489563, + "step": 6351 + }, + { + "epoch": 1.8574352975581225, + "grad_norm": 1.498822179909691, + "learning_rate": 2.7816614566926747e-07, + "loss": 0.45327228307724, + "step": 6352 + }, + { + "epoch": 1.8577277379733879, + "grad_norm": 1.625074504661963, + "learning_rate": 2.7703489211999725e-07, + "loss": 0.5606091022491455, + "step": 6353 + }, + { + "epoch": 1.8580201783886534, + "grad_norm": 1.7312129624633084, + "learning_rate": 2.759059112547047e-07, + "loss": 0.5078528523445129, + "step": 6354 + }, + { + "epoch": 1.8583126188039187, + "grad_norm": 1.6687258508972733, + "learning_rate": 2.74779203337282e-07, + "loss": 0.5558253526687622, + "step": 6355 + }, + { + "epoch": 1.858605059219184, + "grad_norm": 1.7014892476807573, + "learning_rate": 2.7365476863108974e-07, + "loss": 0.3962102234363556, + "step": 6356 + }, + { + "epoch": 1.8588974996344496, + "grad_norm": 1.6957699860554467, + "learning_rate": 2.725326073989587e-07, + "loss": 0.4737718105316162, + "step": 6357 + }, + { + "epoch": 1.8591899400497147, + "grad_norm": 3.313281560384309, + "learning_rate": 2.7141271990318576e-07, + "loss": 0.5389090180397034, + "step": 6358 + }, + { + "epoch": 1.8594823804649803, + "grad_norm": 1.7840378938084138, + "learning_rate": 2.7029510640554033e-07, + "loss": 0.5311479568481445, + "step": 6359 + }, + { + "epoch": 1.8597748208802458, + "grad_norm": 1.631290291956445, + "learning_rate": 2.691797671672558e-07, + "loss": 0.4753482937812805, + "step": 6360 + }, + { + "epoch": 1.860067261295511, + "grad_norm": 1.581254208029566, + "learning_rate": 2.6806670244903577e-07, + "loss": 0.5192427635192871, + "step": 6361 + }, + { + "epoch": 1.8603597017107765, + "grad_norm": 1.9540580966263197, + "learning_rate": 2.6695591251105214e-07, + "loss": 0.5910875797271729, + "step": 6362 + }, + { + "epoch": 1.8606521421260418, + "grad_norm": 1.7486575397054567, + "learning_rate": 2.658473976129472e-07, + "loss": 0.5465212464332581, + "step": 6363 + }, + { + "epoch": 1.8609445825413071, + "grad_norm": 1.7446293681201037, + "learning_rate": 2.647411580138282e-07, + "loss": 0.43188267946243286, + "step": 6364 + }, + { + "epoch": 1.8612370229565727, + "grad_norm": 2.144472636918694, + "learning_rate": 2.636371939722715e-07, + "loss": 0.5723724365234375, + "step": 6365 + }, + { + "epoch": 1.861529463371838, + "grad_norm": 1.6310859619397844, + "learning_rate": 2.62535505746323e-07, + "loss": 0.47383856773376465, + "step": 6366 + }, + { + "epoch": 1.8618219037871033, + "grad_norm": 1.764378835172625, + "learning_rate": 2.6143609359349566e-07, + "loss": 0.502855658531189, + "step": 6367 + }, + { + "epoch": 1.8621143442023689, + "grad_norm": 2.265501418087609, + "learning_rate": 2.6033895777077043e-07, + "loss": 0.5934205055236816, + "step": 6368 + }, + { + "epoch": 1.8624067846176342, + "grad_norm": 1.469455820490925, + "learning_rate": 2.5924409853459455e-07, + "loss": 0.4157971143722534, + "step": 6369 + }, + { + "epoch": 1.8626992250328995, + "grad_norm": 1.8051847044948597, + "learning_rate": 2.5815151614088764e-07, + "loss": 0.5944307446479797, + "step": 6370 + }, + { + "epoch": 1.862991665448165, + "grad_norm": 2.0081645135491812, + "learning_rate": 2.57061210845031e-07, + "loss": 0.5603153705596924, + "step": 6371 + }, + { + "epoch": 1.8632841058634302, + "grad_norm": 1.752999497142634, + "learning_rate": 2.559731829018786e-07, + "loss": 0.49231380224227905, + "step": 6372 + }, + { + "epoch": 1.8635765462786957, + "grad_norm": 1.666251917997058, + "learning_rate": 2.548874325657502e-07, + "loss": 0.46984565258026123, + "step": 6373 + }, + { + "epoch": 1.863868986693961, + "grad_norm": 1.7373025752546019, + "learning_rate": 2.5380396009043297e-07, + "loss": 0.5088338255882263, + "step": 6374 + }, + { + "epoch": 1.8641614271092264, + "grad_norm": 1.7554684094014161, + "learning_rate": 2.52722765729182e-07, + "loss": 0.4760589599609375, + "step": 6375 + }, + { + "epoch": 1.864453867524492, + "grad_norm": 1.6521606786384044, + "learning_rate": 2.5164384973471954e-07, + "loss": 0.44232040643692017, + "step": 6376 + }, + { + "epoch": 1.8647463079397573, + "grad_norm": 1.736903879415624, + "learning_rate": 2.505672123592373e-07, + "loss": 0.46714338660240173, + "step": 6377 + }, + { + "epoch": 1.8650387483550226, + "grad_norm": 1.9333860177281759, + "learning_rate": 2.494928538543917e-07, + "loss": 0.5527149438858032, + "step": 6378 + }, + { + "epoch": 1.8653311887702881, + "grad_norm": 1.690422887605866, + "learning_rate": 2.484207744713074e-07, + "loss": 0.5006313323974609, + "step": 6379 + }, + { + "epoch": 1.8656236291855535, + "grad_norm": 1.5247883016042734, + "learning_rate": 2.473509744605751e-07, + "loss": 0.5007860660552979, + "step": 6380 + }, + { + "epoch": 1.8659160696008188, + "grad_norm": 1.683063597354387, + "learning_rate": 2.4628345407225804e-07, + "loss": 0.4354132413864136, + "step": 6381 + }, + { + "epoch": 1.8662085100160843, + "grad_norm": 1.718309113338333, + "learning_rate": 2.452182135558789e-07, + "loss": 0.5199555158615112, + "step": 6382 + }, + { + "epoch": 1.8665009504313494, + "grad_norm": 1.6260046663066803, + "learning_rate": 2.441552531604319e-07, + "loss": 0.5117326974868774, + "step": 6383 + }, + { + "epoch": 1.866793390846615, + "grad_norm": 1.803024051218915, + "learning_rate": 2.43094573134377e-07, + "loss": 0.5169814825057983, + "step": 6384 + }, + { + "epoch": 1.8670858312618805, + "grad_norm": 1.7012998015666523, + "learning_rate": 2.420361737256438e-07, + "loss": 0.563339352607727, + "step": 6385 + }, + { + "epoch": 1.8673782716771457, + "grad_norm": 2.1248949598274325, + "learning_rate": 2.409800551816255e-07, + "loss": 0.710465133190155, + "step": 6386 + }, + { + "epoch": 1.8676707120924112, + "grad_norm": 1.6580658731053397, + "learning_rate": 2.3992621774918343e-07, + "loss": 0.6894562244415283, + "step": 6387 + }, + { + "epoch": 1.8679631525076765, + "grad_norm": 1.7380197058585787, + "learning_rate": 2.388746616746462e-07, + "loss": 0.5105183124542236, + "step": 6388 + }, + { + "epoch": 1.8682555929229419, + "grad_norm": 2.0034985048956684, + "learning_rate": 2.3782538720380722e-07, + "loss": 0.4602908492088318, + "step": 6389 + }, + { + "epoch": 1.8685480333382074, + "grad_norm": 1.7787197864367217, + "learning_rate": 2.3677839458192908e-07, + "loss": 0.5395161509513855, + "step": 6390 + }, + { + "epoch": 1.8688404737534727, + "grad_norm": 1.6121023481071262, + "learning_rate": 2.3573368405374054e-07, + "loss": 0.5842725038528442, + "step": 6391 + }, + { + "epoch": 1.869132914168738, + "grad_norm": 1.6354709739233064, + "learning_rate": 2.346912558634362e-07, + "loss": 0.5837947130203247, + "step": 6392 + }, + { + "epoch": 1.8694253545840036, + "grad_norm": 1.8136211176417363, + "learning_rate": 2.3365111025467568e-07, + "loss": 0.5255596041679382, + "step": 6393 + }, + { + "epoch": 1.869717794999269, + "grad_norm": 1.5586602271443384, + "learning_rate": 2.326132474705889e-07, + "loss": 0.5614485144615173, + "step": 6394 + }, + { + "epoch": 1.8700102354145343, + "grad_norm": 1.5895893761997042, + "learning_rate": 2.3157766775376733e-07, + "loss": 0.5510128736495972, + "step": 6395 + }, + { + "epoch": 1.8703026758297998, + "grad_norm": 2.295988070565878, + "learning_rate": 2.3054437134627406e-07, + "loss": 0.690884530544281, + "step": 6396 + }, + { + "epoch": 1.870595116245065, + "grad_norm": 1.94960784120805, + "learning_rate": 2.2951335848963364e-07, + "loss": 0.637476921081543, + "step": 6397 + }, + { + "epoch": 1.8708875566603305, + "grad_norm": 1.6526446878259382, + "learning_rate": 2.2848462942484108e-07, + "loss": 0.5254319906234741, + "step": 6398 + }, + { + "epoch": 1.871179997075596, + "grad_norm": 1.7552717813182315, + "learning_rate": 2.27458184392354e-07, + "loss": 0.5038233995437622, + "step": 6399 + }, + { + "epoch": 1.8714724374908611, + "grad_norm": 1.4123258498894362, + "learning_rate": 2.2643402363209832e-07, + "loss": 0.43701431155204773, + "step": 6400 + }, + { + "epoch": 1.8717648779061267, + "grad_norm": 1.8138198755485717, + "learning_rate": 2.2541214738346583e-07, + "loss": 0.5490877628326416, + "step": 6401 + }, + { + "epoch": 1.872057318321392, + "grad_norm": 1.5452561215431913, + "learning_rate": 2.2439255588531327e-07, + "loss": 0.48393410444259644, + "step": 6402 + }, + { + "epoch": 1.8723497587366573, + "grad_norm": 1.6213926610567049, + "learning_rate": 2.2337524937596444e-07, + "loss": 0.5439243912696838, + "step": 6403 + }, + { + "epoch": 1.8726421991519229, + "grad_norm": 1.6026974016529494, + "learning_rate": 2.22360228093208e-07, + "loss": 0.5272157192230225, + "step": 6404 + }, + { + "epoch": 1.8729346395671882, + "grad_norm": 1.6750451870732375, + "learning_rate": 2.2134749227429864e-07, + "loss": 0.6323473453521729, + "step": 6405 + }, + { + "epoch": 1.8732270799824535, + "grad_norm": 1.6749139186520705, + "learning_rate": 2.2033704215595808e-07, + "loss": 0.4568995237350464, + "step": 6406 + }, + { + "epoch": 1.873519520397719, + "grad_norm": 1.8331627672377568, + "learning_rate": 2.1932887797437296e-07, + "loss": 0.5817153453826904, + "step": 6407 + }, + { + "epoch": 1.8738119608129844, + "grad_norm": 1.4674902238035163, + "learning_rate": 2.183229999651948e-07, + "loss": 0.5104260444641113, + "step": 6408 + }, + { + "epoch": 1.8741044012282497, + "grad_norm": 1.7946613600749395, + "learning_rate": 2.1731940836354105e-07, + "loss": 0.44944921135902405, + "step": 6409 + }, + { + "epoch": 1.8743968416435153, + "grad_norm": 1.794977484250215, + "learning_rate": 2.163181034039974e-07, + "loss": 0.6935169696807861, + "step": 6410 + }, + { + "epoch": 1.8746892820587804, + "grad_norm": 1.7330999339843873, + "learning_rate": 2.1531908532060998e-07, + "loss": 0.55609130859375, + "step": 6411 + }, + { + "epoch": 1.874981722474046, + "grad_norm": 1.6428359107019144, + "learning_rate": 2.143223543468953e-07, + "loss": 0.5402215719223022, + "step": 6412 + }, + { + "epoch": 1.8752741628893113, + "grad_norm": 1.8163043216263146, + "learning_rate": 2.1332791071583258e-07, + "loss": 0.5669365525245667, + "step": 6413 + }, + { + "epoch": 1.8755666033045766, + "grad_norm": 2.2122008806914044, + "learning_rate": 2.123357546598659e-07, + "loss": 0.46257615089416504, + "step": 6414 + }, + { + "epoch": 1.8758590437198421, + "grad_norm": 1.6308794717153283, + "learning_rate": 2.1134588641090858e-07, + "loss": 0.4596136212348938, + "step": 6415 + }, + { + "epoch": 1.8761514841351075, + "grad_norm": 1.6758615624094995, + "learning_rate": 2.1035830620033227e-07, + "loss": 0.5086819529533386, + "step": 6416 + }, + { + "epoch": 1.8764439245503728, + "grad_norm": 1.8974547658257448, + "learning_rate": 2.0937301425898115e-07, + "loss": 0.6008501052856445, + "step": 6417 + }, + { + "epoch": 1.8767363649656383, + "grad_norm": 1.8448672190670345, + "learning_rate": 2.0839001081715882e-07, + "loss": 0.5943784713745117, + "step": 6418 + }, + { + "epoch": 1.8770288053809037, + "grad_norm": 1.3203141385144623, + "learning_rate": 2.0740929610463813e-07, + "loss": 0.5006660223007202, + "step": 6419 + }, + { + "epoch": 1.877321245796169, + "grad_norm": 1.7508035137785818, + "learning_rate": 2.0643087035065458e-07, + "loss": 0.5434073805809021, + "step": 6420 + }, + { + "epoch": 1.8776136862114345, + "grad_norm": 1.8446497118213794, + "learning_rate": 2.0545473378390858e-07, + "loss": 0.6426963210105896, + "step": 6421 + }, + { + "epoch": 1.8779061266266996, + "grad_norm": 1.7388169538440008, + "learning_rate": 2.044808866325676e-07, + "loss": 0.5190218687057495, + "step": 6422 + }, + { + "epoch": 1.8781985670419652, + "grad_norm": 1.5291942184143035, + "learning_rate": 2.035093291242607e-07, + "loss": 0.40918534994125366, + "step": 6423 + }, + { + "epoch": 1.8784910074572307, + "grad_norm": 1.719713887519883, + "learning_rate": 2.0254006148608507e-07, + "loss": 0.5403652191162109, + "step": 6424 + }, + { + "epoch": 1.8787834478724958, + "grad_norm": 1.3839892041506006, + "learning_rate": 2.0157308394460062e-07, + "loss": 0.49781516194343567, + "step": 6425 + }, + { + "epoch": 1.8790758882877614, + "grad_norm": 1.8332751958303748, + "learning_rate": 2.006083967258321e-07, + "loss": 0.5841303467750549, + "step": 6426 + }, + { + "epoch": 1.8793683287030267, + "grad_norm": 1.679945923485487, + "learning_rate": 1.9964600005527024e-07, + "loss": 0.5054808855056763, + "step": 6427 + }, + { + "epoch": 1.879660769118292, + "grad_norm": 1.7695393284467882, + "learning_rate": 1.9868589415786843e-07, + "loss": 0.4801362454891205, + "step": 6428 + }, + { + "epoch": 1.8799532095335576, + "grad_norm": 1.8547174560912147, + "learning_rate": 1.9772807925804494e-07, + "loss": 0.4709380269050598, + "step": 6429 + }, + { + "epoch": 1.880245649948823, + "grad_norm": 1.8447220446699908, + "learning_rate": 1.9677255557968511e-07, + "loss": 0.665968120098114, + "step": 6430 + }, + { + "epoch": 1.8805380903640883, + "grad_norm": 1.7494009698963573, + "learning_rate": 1.9581932334613585e-07, + "loss": 0.515839159488678, + "step": 6431 + }, + { + "epoch": 1.8808305307793538, + "grad_norm": 1.6699738562759978, + "learning_rate": 1.948683827802089e-07, + "loss": 0.5399242043495178, + "step": 6432 + }, + { + "epoch": 1.8811229711946191, + "grad_norm": 1.7478095955612059, + "learning_rate": 1.9391973410418097e-07, + "loss": 0.6167087554931641, + "step": 6433 + }, + { + "epoch": 1.8814154116098845, + "grad_norm": 1.826500337038364, + "learning_rate": 1.9297337753979462e-07, + "loss": 0.6139745116233826, + "step": 6434 + }, + { + "epoch": 1.88170785202515, + "grad_norm": 2.0873679343118257, + "learning_rate": 1.9202931330825292e-07, + "loss": 0.7103149890899658, + "step": 6435 + }, + { + "epoch": 1.8820002924404151, + "grad_norm": 1.6777685812633742, + "learning_rate": 1.9108754163022602e-07, + "loss": 0.5958741903305054, + "step": 6436 + }, + { + "epoch": 1.8822927328556807, + "grad_norm": 1.2489160599157765, + "learning_rate": 1.9014806272584673e-07, + "loss": 0.32660478353500366, + "step": 6437 + }, + { + "epoch": 1.8825851732709462, + "grad_norm": 1.822465954469875, + "learning_rate": 1.8921087681471272e-07, + "loss": 0.49485981464385986, + "step": 6438 + }, + { + "epoch": 1.8828776136862113, + "grad_norm": 1.5404253681507418, + "learning_rate": 1.8827598411588544e-07, + "loss": 0.5106277465820312, + "step": 6439 + }, + { + "epoch": 1.8831700541014769, + "grad_norm": 1.5696470040532076, + "learning_rate": 1.8734338484789115e-07, + "loss": 0.50006502866745, + "step": 6440 + }, + { + "epoch": 1.8834624945167422, + "grad_norm": 1.5827360977472946, + "learning_rate": 1.8641307922871887e-07, + "loss": 0.47097745537757874, + "step": 6441 + }, + { + "epoch": 1.8837549349320075, + "grad_norm": 1.718260594389779, + "learning_rate": 1.854850674758213e-07, + "loss": 0.5874402523040771, + "step": 6442 + }, + { + "epoch": 1.884047375347273, + "grad_norm": 1.7055917291229012, + "learning_rate": 1.8455934980611602e-07, + "loss": 0.45705318450927734, + "step": 6443 + }, + { + "epoch": 1.8843398157625384, + "grad_norm": 1.8262667617041222, + "learning_rate": 1.8363592643598328e-07, + "loss": 0.4949952960014343, + "step": 6444 + }, + { + "epoch": 1.8846322561778037, + "grad_norm": 2.0005095204142056, + "learning_rate": 1.827147975812693e-07, + "loss": 0.5311721563339233, + "step": 6445 + }, + { + "epoch": 1.8849246965930693, + "grad_norm": 1.8075375628836245, + "learning_rate": 1.817959634572819e-07, + "loss": 0.5652828216552734, + "step": 6446 + }, + { + "epoch": 1.8852171370083346, + "grad_norm": 1.7007026167846622, + "learning_rate": 1.8087942427879146e-07, + "loss": 0.4856044054031372, + "step": 6447 + }, + { + "epoch": 1.8855095774236, + "grad_norm": 1.6920105837383546, + "learning_rate": 1.799651802600344e-07, + "loss": 0.55420982837677, + "step": 6448 + }, + { + "epoch": 1.8858020178388655, + "grad_norm": 1.8804834035548856, + "learning_rate": 1.7905323161470867e-07, + "loss": 0.5869326591491699, + "step": 6449 + }, + { + "epoch": 1.8860944582541306, + "grad_norm": 1.761061751635786, + "learning_rate": 1.781435785559793e-07, + "loss": 0.4505504369735718, + "step": 6450 + }, + { + "epoch": 1.8863868986693961, + "grad_norm": 1.7194415376329713, + "learning_rate": 1.7723622129646955e-07, + "loss": 0.5460773706436157, + "step": 6451 + }, + { + "epoch": 1.8866793390846615, + "grad_norm": 1.7253684204963688, + "learning_rate": 1.7633116004826978e-07, + "loss": 0.6214778423309326, + "step": 6452 + }, + { + "epoch": 1.8869717794999268, + "grad_norm": 1.786722853658628, + "learning_rate": 1.7542839502293297e-07, + "loss": 0.4900703430175781, + "step": 6453 + }, + { + "epoch": 1.8872642199151923, + "grad_norm": 1.8351888114829378, + "learning_rate": 1.7452792643147364e-07, + "loss": 0.5177547931671143, + "step": 6454 + }, + { + "epoch": 1.8875566603304577, + "grad_norm": 1.6033594290974305, + "learning_rate": 1.7362975448437236e-07, + "loss": 0.3914458453655243, + "step": 6455 + }, + { + "epoch": 1.887849100745723, + "grad_norm": 1.7306995937297311, + "learning_rate": 1.7273387939157116e-07, + "loss": 0.5222523212432861, + "step": 6456 + }, + { + "epoch": 1.8881415411609885, + "grad_norm": 1.8351026582741266, + "learning_rate": 1.7184030136247477e-07, + "loss": 0.5097587704658508, + "step": 6457 + }, + { + "epoch": 1.8884339815762539, + "grad_norm": 1.711376264331189, + "learning_rate": 1.7094902060595053e-07, + "loss": 0.517410397529602, + "step": 6458 + }, + { + "epoch": 1.8887264219915192, + "grad_norm": 1.5054067124169248, + "learning_rate": 1.7006003733033182e-07, + "loss": 0.4951689839363098, + "step": 6459 + }, + { + "epoch": 1.8890188624067847, + "grad_norm": 1.8698243351971042, + "learning_rate": 1.6917335174341242e-07, + "loss": 0.5530004501342773, + "step": 6460 + }, + { + "epoch": 1.8893113028220498, + "grad_norm": 1.3793759581483827, + "learning_rate": 1.6828896405244988e-07, + "loss": 0.5231990814208984, + "step": 6461 + }, + { + "epoch": 1.8896037432373154, + "grad_norm": 1.7109665283076239, + "learning_rate": 1.6740687446416326e-07, + "loss": 0.5142268538475037, + "step": 6462 + }, + { + "epoch": 1.889896183652581, + "grad_norm": 1.5939124952252972, + "learning_rate": 1.6652708318473765e-07, + "loss": 0.4803999364376068, + "step": 6463 + }, + { + "epoch": 1.890188624067846, + "grad_norm": 1.8261203070041963, + "learning_rate": 1.6564959041981743e-07, + "loss": 0.38822099566459656, + "step": 6464 + }, + { + "epoch": 1.8904810644831116, + "grad_norm": 1.7158195687276572, + "learning_rate": 1.6477439637451186e-07, + "loss": 0.4778556823730469, + "step": 6465 + }, + { + "epoch": 1.890773504898377, + "grad_norm": 1.548976438279917, + "learning_rate": 1.6390150125339178e-07, + "loss": 0.5083664059638977, + "step": 6466 + }, + { + "epoch": 1.8910659453136422, + "grad_norm": 2.298817115631298, + "learning_rate": 1.6303090526049058e-07, + "loss": 0.6592142581939697, + "step": 6467 + }, + { + "epoch": 1.8913583857289078, + "grad_norm": 1.7188849828284447, + "learning_rate": 1.6216260859930776e-07, + "loss": 0.6350588798522949, + "step": 6468 + }, + { + "epoch": 1.8916508261441731, + "grad_norm": 1.900981319900476, + "learning_rate": 1.6129661147279763e-07, + "loss": 0.5542852282524109, + "step": 6469 + }, + { + "epoch": 1.8919432665594385, + "grad_norm": 1.7094379727839777, + "learning_rate": 1.6043291408338602e-07, + "loss": 0.572988748550415, + "step": 6470 + }, + { + "epoch": 1.892235706974704, + "grad_norm": 1.578693569659532, + "learning_rate": 1.5957151663295367e-07, + "loss": 0.4801466763019562, + "step": 6471 + }, + { + "epoch": 1.8925281473899693, + "grad_norm": 2.0149025268161207, + "learning_rate": 1.5871241932284953e-07, + "loss": 0.6286160349845886, + "step": 6472 + }, + { + "epoch": 1.8928205878052347, + "grad_norm": 1.8739502258074872, + "learning_rate": 1.5785562235388074e-07, + "loss": 0.5731645822525024, + "step": 6473 + }, + { + "epoch": 1.8931130282205002, + "grad_norm": 2.02559646967304, + "learning_rate": 1.5700112592631933e-07, + "loss": 0.47890836000442505, + "step": 6474 + }, + { + "epoch": 1.8934054686357653, + "grad_norm": 1.8833158182705436, + "learning_rate": 1.5614893023989886e-07, + "loss": 0.4379703998565674, + "step": 6475 + }, + { + "epoch": 1.8936979090510309, + "grad_norm": 1.886508266764503, + "learning_rate": 1.5529903549381331e-07, + "loss": 0.5629044771194458, + "step": 6476 + }, + { + "epoch": 1.8939903494662964, + "grad_norm": 1.6388873220258502, + "learning_rate": 1.5445144188672268e-07, + "loss": 0.4995439052581787, + "step": 6477 + }, + { + "epoch": 1.8942827898815615, + "grad_norm": 1.54762620576383, + "learning_rate": 1.5360614961674403e-07, + "loss": 0.5350549221038818, + "step": 6478 + }, + { + "epoch": 1.894575230296827, + "grad_norm": 1.636976407400752, + "learning_rate": 1.5276315888146266e-07, + "loss": 0.5245925188064575, + "step": 6479 + }, + { + "epoch": 1.8948676707120924, + "grad_norm": 1.870112790684546, + "learning_rate": 1.519224698779198e-07, + "loss": 0.5159675478935242, + "step": 6480 + }, + { + "epoch": 1.8951601111273577, + "grad_norm": 1.6327790205426773, + "learning_rate": 1.5108408280262276e-07, + "loss": 0.5046014189720154, + "step": 6481 + }, + { + "epoch": 1.8954525515426233, + "grad_norm": 1.5658787677393426, + "learning_rate": 1.502479978515381e-07, + "loss": 0.35977911949157715, + "step": 6482 + }, + { + "epoch": 1.8957449919578886, + "grad_norm": 1.6374646749200208, + "learning_rate": 1.4941421522009725e-07, + "loss": 0.4689600467681885, + "step": 6483 + }, + { + "epoch": 1.896037432373154, + "grad_norm": 1.713919299692529, + "learning_rate": 1.485827351031899e-07, + "loss": 0.5729683637619019, + "step": 6484 + }, + { + "epoch": 1.8963298727884195, + "grad_norm": 2.081397285004385, + "learning_rate": 1.4775355769517163e-07, + "loss": 0.5929673314094543, + "step": 6485 + }, + { + "epoch": 1.8966223132036848, + "grad_norm": 1.6003411415494537, + "learning_rate": 1.4692668318985636e-07, + "loss": 0.43075594305992126, + "step": 6486 + }, + { + "epoch": 1.8969147536189501, + "grad_norm": 1.7646064022155787, + "learning_rate": 1.461021117805217e-07, + "loss": 0.5247992277145386, + "step": 6487 + }, + { + "epoch": 1.8972071940342157, + "grad_norm": 1.7947400732319756, + "learning_rate": 1.4527984365990455e-07, + "loss": 0.4930630326271057, + "step": 6488 + }, + { + "epoch": 1.8974996344494808, + "grad_norm": 2.490399223660391, + "learning_rate": 1.4445987902020676e-07, + "loss": 0.7183758616447449, + "step": 6489 + }, + { + "epoch": 1.8977920748647463, + "grad_norm": 1.6603594705802933, + "learning_rate": 1.4364221805309052e-07, + "loss": 0.4766094982624054, + "step": 6490 + }, + { + "epoch": 1.8980845152800117, + "grad_norm": 1.7401365125544646, + "learning_rate": 1.4282686094967747e-07, + "loss": 0.43594151735305786, + "step": 6491 + }, + { + "epoch": 1.898376955695277, + "grad_norm": 1.4953976915814553, + "learning_rate": 1.4201380790055397e-07, + "loss": 0.49320366978645325, + "step": 6492 + }, + { + "epoch": 1.8986693961105425, + "grad_norm": 1.7835092237734465, + "learning_rate": 1.4120305909576359e-07, + "loss": 0.600296139717102, + "step": 6493 + }, + { + "epoch": 1.8989618365258079, + "grad_norm": 1.8325915671317163, + "learning_rate": 1.4039461472481696e-07, + "loss": 0.6692827939987183, + "step": 6494 + }, + { + "epoch": 1.8992542769410732, + "grad_norm": 1.5707947665490356, + "learning_rate": 1.395884749766807e-07, + "loss": 0.49206262826919556, + "step": 6495 + }, + { + "epoch": 1.8995467173563387, + "grad_norm": 1.5801197568349268, + "learning_rate": 1.3878464003978741e-07, + "loss": 0.4987361431121826, + "step": 6496 + }, + { + "epoch": 1.899839157771604, + "grad_norm": 1.5345056226134064, + "learning_rate": 1.3798311010202681e-07, + "loss": 0.5020350217819214, + "step": 6497 + }, + { + "epoch": 1.9001315981868694, + "grad_norm": 1.804856300616187, + "learning_rate": 1.3718388535075123e-07, + "loss": 0.5906451344490051, + "step": 6498 + }, + { + "epoch": 1.900424038602135, + "grad_norm": 1.7402170644717794, + "learning_rate": 1.3638696597277678e-07, + "loss": 0.5089905858039856, + "step": 6499 + }, + { + "epoch": 1.9007164790174, + "grad_norm": 1.7322257732687294, + "learning_rate": 1.3559235215437672e-07, + "loss": 0.4633820056915283, + "step": 6500 + }, + { + "epoch": 1.9010089194326656, + "grad_norm": 1.760704522549711, + "learning_rate": 1.34800044081288e-07, + "loss": 0.4214053750038147, + "step": 6501 + }, + { + "epoch": 1.9013013598479311, + "grad_norm": 1.8502899980656935, + "learning_rate": 1.3401004193870694e-07, + "loss": 0.6652689576148987, + "step": 6502 + }, + { + "epoch": 1.9015938002631962, + "grad_norm": 1.9902189127655232, + "learning_rate": 1.3322234591129247e-07, + "loss": 0.610877275466919, + "step": 6503 + }, + { + "epoch": 1.9018862406784618, + "grad_norm": 1.8174576160077263, + "learning_rate": 1.324369561831651e-07, + "loss": 0.5051777958869934, + "step": 6504 + }, + { + "epoch": 1.9021786810937271, + "grad_norm": 1.679424427708786, + "learning_rate": 1.3165387293790133e-07, + "loss": 0.5004675984382629, + "step": 6505 + }, + { + "epoch": 1.9024711215089924, + "grad_norm": 1.7771913721647656, + "learning_rate": 1.3087309635854583e-07, + "loss": 0.5778615474700928, + "step": 6506 + }, + { + "epoch": 1.902763561924258, + "grad_norm": 2.0292378822767807, + "learning_rate": 1.300946266275982e-07, + "loss": 0.5282145738601685, + "step": 6507 + }, + { + "epoch": 1.9030560023395233, + "grad_norm": 1.7978860546574749, + "learning_rate": 1.2931846392702174e-07, + "loss": 0.5965359807014465, + "step": 6508 + }, + { + "epoch": 1.9033484427547886, + "grad_norm": 1.6652110616876246, + "learning_rate": 1.2854460843823912e-07, + "loss": 0.5891281366348267, + "step": 6509 + }, + { + "epoch": 1.9036408831700542, + "grad_norm": 1.6248785946895787, + "learning_rate": 1.2777306034213677e-07, + "loss": 0.516204297542572, + "step": 6510 + }, + { + "epoch": 1.9039333235853195, + "grad_norm": 1.6814946652270708, + "learning_rate": 1.2700381981905486e-07, + "loss": 0.5148355960845947, + "step": 6511 + }, + { + "epoch": 1.9042257640005849, + "grad_norm": 1.8013354973349966, + "learning_rate": 1.2623688704880287e-07, + "loss": 0.5599791407585144, + "step": 6512 + }, + { + "epoch": 1.9045182044158504, + "grad_norm": 1.4831223871376102, + "learning_rate": 1.2547226221064412e-07, + "loss": 0.44349417090415955, + "step": 6513 + }, + { + "epoch": 1.9048106448311155, + "grad_norm": 1.8442502212828862, + "learning_rate": 1.2470994548330672e-07, + "loss": 0.5919830799102783, + "step": 6514 + }, + { + "epoch": 1.905103085246381, + "grad_norm": 1.5907952124169482, + "learning_rate": 1.2394993704497592e-07, + "loss": 0.4615499675273895, + "step": 6515 + }, + { + "epoch": 1.9053955256616466, + "grad_norm": 2.080938429630683, + "learning_rate": 1.2319223707330074e-07, + "loss": 0.5217719674110413, + "step": 6516 + }, + { + "epoch": 1.9056879660769117, + "grad_norm": 1.7218384779241716, + "learning_rate": 1.2243684574538838e-07, + "loss": 0.510722279548645, + "step": 6517 + }, + { + "epoch": 1.9059804064921773, + "grad_norm": 1.7577870608967676, + "learning_rate": 1.2168376323780652e-07, + "loss": 0.6744403839111328, + "step": 6518 + }, + { + "epoch": 1.9062728469074426, + "grad_norm": 1.852387461765699, + "learning_rate": 1.209329897265832e-07, + "loss": 0.4991394281387329, + "step": 6519 + }, + { + "epoch": 1.906565287322708, + "grad_norm": 1.5123359691224252, + "learning_rate": 1.2018452538720805e-07, + "loss": 0.43237754702568054, + "step": 6520 + }, + { + "epoch": 1.9068577277379735, + "grad_norm": 1.929873331270624, + "learning_rate": 1.1943837039463112e-07, + "loss": 0.6042662262916565, + "step": 6521 + }, + { + "epoch": 1.9071501681532388, + "grad_norm": 1.5924831654811167, + "learning_rate": 1.186945249232585e-07, + "loss": 0.4275910556316376, + "step": 6522 + }, + { + "epoch": 1.9074426085685041, + "grad_norm": 1.6404715584331906, + "learning_rate": 1.1795298914696219e-07, + "loss": 0.4368266463279724, + "step": 6523 + }, + { + "epoch": 1.9077350489837697, + "grad_norm": 1.7092717646033881, + "learning_rate": 1.172137632390713e-07, + "loss": 0.49492496252059937, + "step": 6524 + }, + { + "epoch": 1.908027489399035, + "grad_norm": 1.587478317321713, + "learning_rate": 1.164768473723743e-07, + "loss": 0.4296407103538513, + "step": 6525 + }, + { + "epoch": 1.9083199298143003, + "grad_norm": 1.746911434628144, + "learning_rate": 1.1574224171912118e-07, + "loss": 0.4609370231628418, + "step": 6526 + }, + { + "epoch": 1.9086123702295659, + "grad_norm": 1.687321204236502, + "learning_rate": 1.1500994645102237e-07, + "loss": 0.5201660394668579, + "step": 6527 + }, + { + "epoch": 1.908904810644831, + "grad_norm": 1.686028014701993, + "learning_rate": 1.1427996173924649e-07, + "loss": 0.49946731328964233, + "step": 6528 + }, + { + "epoch": 1.9091972510600965, + "grad_norm": 1.700984250030961, + "learning_rate": 1.1355228775442262e-07, + "loss": 0.5479187369346619, + "step": 6529 + }, + { + "epoch": 1.9094896914753618, + "grad_norm": 1.6485232123504545, + "learning_rate": 1.1282692466664247e-07, + "loss": 0.5227243900299072, + "step": 6530 + }, + { + "epoch": 1.9097821318906272, + "grad_norm": 1.896983089459967, + "learning_rate": 1.1210387264545264e-07, + "loss": 0.42863208055496216, + "step": 6531 + }, + { + "epoch": 1.9100745723058927, + "grad_norm": 1.552171259240321, + "learning_rate": 1.113831318598635e-07, + "loss": 0.37858498096466064, + "step": 6532 + }, + { + "epoch": 1.910367012721158, + "grad_norm": 1.852509398879224, + "learning_rate": 1.1066470247834471e-07, + "loss": 0.6447315216064453, + "step": 6533 + }, + { + "epoch": 1.9106594531364234, + "grad_norm": 1.4833597844037574, + "learning_rate": 1.0994858466882197e-07, + "loss": 0.4159877300262451, + "step": 6534 + }, + { + "epoch": 1.910951893551689, + "grad_norm": 1.7056274655886765, + "learning_rate": 1.0923477859868581e-07, + "loss": 0.5042530298233032, + "step": 6535 + }, + { + "epoch": 1.9112443339669543, + "grad_norm": 1.6939120860687955, + "learning_rate": 1.0852328443478278e-07, + "loss": 0.35955798625946045, + "step": 6536 + }, + { + "epoch": 1.9115367743822196, + "grad_norm": 1.6272843503399623, + "learning_rate": 1.0781410234342093e-07, + "loss": 0.561823308467865, + "step": 6537 + }, + { + "epoch": 1.9118292147974851, + "grad_norm": 1.6724333597123697, + "learning_rate": 1.0710723249036659e-07, + "loss": 0.44518136978149414, + "step": 6538 + }, + { + "epoch": 1.9121216552127502, + "grad_norm": 2.0012454930429397, + "learning_rate": 1.0640267504084756e-07, + "loss": 0.5657057166099548, + "step": 6539 + }, + { + "epoch": 1.9124140956280158, + "grad_norm": 1.5762808769057957, + "learning_rate": 1.0570043015954989e-07, + "loss": 0.5659947395324707, + "step": 6540 + }, + { + "epoch": 1.9127065360432813, + "grad_norm": 1.5811137035723222, + "learning_rate": 1.0500049801061784e-07, + "loss": 0.45648419857025146, + "step": 6541 + }, + { + "epoch": 1.9129989764585464, + "grad_norm": 1.8646406465839787, + "learning_rate": 1.0430287875765611e-07, + "loss": 0.4978141784667969, + "step": 6542 + }, + { + "epoch": 1.913291416873812, + "grad_norm": 1.54273033799953, + "learning_rate": 1.0360757256372977e-07, + "loss": 0.5397627949714661, + "step": 6543 + }, + { + "epoch": 1.9135838572890773, + "grad_norm": 1.8918413526412523, + "learning_rate": 1.029145795913633e-07, + "loss": 0.6359304189682007, + "step": 6544 + }, + { + "epoch": 1.9138762977043426, + "grad_norm": 1.70706044627556, + "learning_rate": 1.0222390000253824e-07, + "loss": 0.5023899078369141, + "step": 6545 + }, + { + "epoch": 1.9141687381196082, + "grad_norm": 1.8668808073409142, + "learning_rate": 1.0153553395869654e-07, + "loss": 0.5231877565383911, + "step": 6546 + }, + { + "epoch": 1.9144611785348735, + "grad_norm": 1.7146199886416342, + "learning_rate": 1.008494816207406e-07, + "loss": 0.5925711393356323, + "step": 6547 + }, + { + "epoch": 1.9147536189501388, + "grad_norm": 1.5881527564838034, + "learning_rate": 1.0016574314902993e-07, + "loss": 0.42732810974121094, + "step": 6548 + }, + { + "epoch": 1.9150460593654044, + "grad_norm": 1.8539790257850415, + "learning_rate": 9.948431870338559e-08, + "loss": 0.5011821985244751, + "step": 6549 + }, + { + "epoch": 1.9153384997806697, + "grad_norm": 1.7063021653673758, + "learning_rate": 9.88052084430846e-08, + "loss": 0.5112487077713013, + "step": 6550 + }, + { + "epoch": 1.915630940195935, + "grad_norm": 1.8003514575818433, + "learning_rate": 9.812841252686667e-08, + "loss": 0.4751431345939636, + "step": 6551 + }, + { + "epoch": 1.9159233806112006, + "grad_norm": 1.9933791417538373, + "learning_rate": 9.745393111292745e-08, + "loss": 0.5343109369277954, + "step": 6552 + }, + { + "epoch": 1.9162158210264657, + "grad_norm": 1.4980785147509508, + "learning_rate": 9.678176435892417e-08, + "loss": 0.4602724015712738, + "step": 6553 + }, + { + "epoch": 1.9165082614417313, + "grad_norm": 1.5436966250785777, + "learning_rate": 9.611191242197005e-08, + "loss": 0.4756245017051697, + "step": 6554 + }, + { + "epoch": 1.9168007018569968, + "grad_norm": 1.6531719135209273, + "learning_rate": 9.544437545864093e-08, + "loss": 0.5291459560394287, + "step": 6555 + }, + { + "epoch": 1.917093142272262, + "grad_norm": 2.0976196168420946, + "learning_rate": 9.47791536249676e-08, + "loss": 0.5357412099838257, + "step": 6556 + }, + { + "epoch": 1.9173855826875275, + "grad_norm": 2.238353466121697, + "learning_rate": 9.411624707644229e-08, + "loss": 0.6298913955688477, + "step": 6557 + }, + { + "epoch": 1.9176780231027928, + "grad_norm": 1.4485326554294644, + "learning_rate": 9.345565596801553e-08, + "loss": 0.5150517225265503, + "step": 6558 + }, + { + "epoch": 1.917970463518058, + "grad_norm": 1.8563821954536717, + "learning_rate": 9.279738045409603e-08, + "loss": 0.6264858245849609, + "step": 6559 + }, + { + "epoch": 1.9182629039333237, + "grad_norm": 1.46383829182073, + "learning_rate": 9.214142068855292e-08, + "loss": 0.33123475313186646, + "step": 6560 + }, + { + "epoch": 1.918555344348589, + "grad_norm": 1.6384165039446617, + "learning_rate": 9.148777682471133e-08, + "loss": 0.5540212392807007, + "step": 6561 + }, + { + "epoch": 1.9188477847638543, + "grad_norm": 1.8427168178125763, + "learning_rate": 9.083644901535793e-08, + "loss": 0.5633922219276428, + "step": 6562 + }, + { + "epoch": 1.9191402251791199, + "grad_norm": 1.7743383669625796, + "learning_rate": 9.018743741273428e-08, + "loss": 0.58629310131073, + "step": 6563 + }, + { + "epoch": 1.9194326655943852, + "grad_norm": 1.8674136448530827, + "learning_rate": 8.95407421685457e-08, + "loss": 0.5985243320465088, + "step": 6564 + }, + { + "epoch": 1.9197251060096505, + "grad_norm": 1.6803719834498339, + "learning_rate": 8.889636343395235e-08, + "loss": 0.5344138741493225, + "step": 6565 + }, + { + "epoch": 1.920017546424916, + "grad_norm": 1.783895238536977, + "learning_rate": 8.825430135957381e-08, + "loss": 0.6139744520187378, + "step": 6566 + }, + { + "epoch": 1.9203099868401812, + "grad_norm": 1.4220884637268112, + "learning_rate": 8.761455609548663e-08, + "loss": 0.46376854181289673, + "step": 6567 + }, + { + "epoch": 1.9206024272554467, + "grad_norm": 1.7412635159811354, + "learning_rate": 8.697712779122902e-08, + "loss": 0.5053622722625732, + "step": 6568 + }, + { + "epoch": 1.920894867670712, + "grad_norm": 1.52795636278423, + "learning_rate": 8.634201659579622e-08, + "loss": 0.4363771080970764, + "step": 6569 + }, + { + "epoch": 1.9211873080859774, + "grad_norm": 1.6799265353987254, + "learning_rate": 8.570922265764059e-08, + "loss": 0.4167904853820801, + "step": 6570 + }, + { + "epoch": 1.921479748501243, + "grad_norm": 1.7506509667217935, + "learning_rate": 8.507874612467382e-08, + "loss": 0.525320291519165, + "step": 6571 + }, + { + "epoch": 1.9217721889165083, + "grad_norm": 1.5127507314447914, + "learning_rate": 8.445058714426691e-08, + "loss": 0.4087376594543457, + "step": 6572 + }, + { + "epoch": 1.9220646293317736, + "grad_norm": 1.975359435328043, + "learning_rate": 8.382474586324796e-08, + "loss": 0.471457839012146, + "step": 6573 + }, + { + "epoch": 1.9223570697470391, + "grad_norm": 1.5584377744842253, + "learning_rate": 8.32012224279033e-08, + "loss": 0.6125116348266602, + "step": 6574 + }, + { + "epoch": 1.9226495101623045, + "grad_norm": 1.8527915049964467, + "learning_rate": 8.258001698397744e-08, + "loss": 0.3800301253795624, + "step": 6575 + }, + { + "epoch": 1.9229419505775698, + "grad_norm": 1.7927235022665284, + "learning_rate": 8.196112967667313e-08, + "loss": 0.561034083366394, + "step": 6576 + }, + { + "epoch": 1.9232343909928353, + "grad_norm": 1.8012018638552385, + "learning_rate": 8.134456065065354e-08, + "loss": 0.5768460631370544, + "step": 6577 + }, + { + "epoch": 1.9235268314081004, + "grad_norm": 1.809882879975094, + "learning_rate": 8.073031005003562e-08, + "loss": 0.47440657019615173, + "step": 6578 + }, + { + "epoch": 1.923819271823366, + "grad_norm": 1.4902012429082565, + "learning_rate": 8.011837801839672e-08, + "loss": 0.5315208435058594, + "step": 6579 + }, + { + "epoch": 1.9241117122386315, + "grad_norm": 1.7054296975282524, + "learning_rate": 7.950876469877467e-08, + "loss": 0.4587036371231079, + "step": 6580 + }, + { + "epoch": 1.9244041526538966, + "grad_norm": 1.6717861291166198, + "learning_rate": 7.890147023366101e-08, + "loss": 0.5356466770172119, + "step": 6581 + }, + { + "epoch": 1.9246965930691622, + "grad_norm": 1.8066170712430372, + "learning_rate": 7.829649476500667e-08, + "loss": 0.48034095764160156, + "step": 6582 + }, + { + "epoch": 1.9249890334844275, + "grad_norm": 1.9403707417182101, + "learning_rate": 7.769383843422185e-08, + "loss": 0.502929151058197, + "step": 6583 + }, + { + "epoch": 1.9252814738996928, + "grad_norm": 1.5994546211401888, + "learning_rate": 7.709350138217386e-08, + "loss": 0.44771361351013184, + "step": 6584 + }, + { + "epoch": 1.9255739143149584, + "grad_norm": 1.7058923530240673, + "learning_rate": 7.649548374918824e-08, + "loss": 0.462479829788208, + "step": 6585 + }, + { + "epoch": 1.9258663547302237, + "grad_norm": 1.7481939511400157, + "learning_rate": 7.589978567504763e-08, + "loss": 0.4758496880531311, + "step": 6586 + }, + { + "epoch": 1.926158795145489, + "grad_norm": 1.8447645858435646, + "learning_rate": 7.530640729899174e-08, + "loss": 0.521172285079956, + "step": 6587 + }, + { + "epoch": 1.9264512355607546, + "grad_norm": 1.685029384432281, + "learning_rate": 7.471534875971964e-08, + "loss": 0.5274392366409302, + "step": 6588 + }, + { + "epoch": 1.92674367597602, + "grad_norm": 1.5547682278755586, + "learning_rate": 7.412661019538858e-08, + "loss": 0.4350961446762085, + "step": 6589 + }, + { + "epoch": 1.9270361163912852, + "grad_norm": 1.5773569785123847, + "learning_rate": 7.354019174361183e-08, + "loss": 0.6298524737358093, + "step": 6590 + }, + { + "epoch": 1.9273285568065508, + "grad_norm": 1.7494178023153484, + "learning_rate": 7.295609354146194e-08, + "loss": 0.5451292395591736, + "step": 6591 + }, + { + "epoch": 1.927620997221816, + "grad_norm": 1.8824055292173802, + "learning_rate": 7.23743157254675e-08, + "loss": 0.5371264219284058, + "step": 6592 + }, + { + "epoch": 1.9279134376370815, + "grad_norm": 1.714393478017535, + "learning_rate": 7.179485843161526e-08, + "loss": 0.5805129408836365, + "step": 6593 + }, + { + "epoch": 1.928205878052347, + "grad_norm": 1.9692321834579947, + "learning_rate": 7.121772179535135e-08, + "loss": 0.5542718172073364, + "step": 6594 + }, + { + "epoch": 1.928498318467612, + "grad_norm": 1.7503350699121312, + "learning_rate": 7.064290595157675e-08, + "loss": 0.5668192505836487, + "step": 6595 + }, + { + "epoch": 1.9287907588828777, + "grad_norm": 1.6293975396756264, + "learning_rate": 7.007041103465062e-08, + "loss": 0.5107895731925964, + "step": 6596 + }, + { + "epoch": 1.929083199298143, + "grad_norm": 1.847055531354174, + "learning_rate": 6.950023717839261e-08, + "loss": 0.47974276542663574, + "step": 6597 + }, + { + "epoch": 1.9293756397134083, + "grad_norm": 1.5624753949857668, + "learning_rate": 6.893238451607387e-08, + "loss": 0.5641148090362549, + "step": 6598 + }, + { + "epoch": 1.9296680801286739, + "grad_norm": 1.7181332365296518, + "learning_rate": 6.836685318042935e-08, + "loss": 0.5940253734588623, + "step": 6599 + }, + { + "epoch": 1.9299605205439392, + "grad_norm": 1.6880020580834156, + "learning_rate": 6.780364330364775e-08, + "loss": 0.46844422817230225, + "step": 6600 + }, + { + "epoch": 1.9302529609592045, + "grad_norm": 1.6235992853167036, + "learning_rate": 6.724275501737487e-08, + "loss": 0.3933336138725281, + "step": 6601 + }, + { + "epoch": 1.93054540137447, + "grad_norm": 1.4538666395679365, + "learning_rate": 6.668418845271695e-08, + "loss": 0.4786602258682251, + "step": 6602 + }, + { + "epoch": 1.9308378417897354, + "grad_norm": 1.798637107768398, + "learning_rate": 6.612794374023402e-08, + "loss": 0.49695518612861633, + "step": 6603 + }, + { + "epoch": 1.9311302822050007, + "grad_norm": 1.5049309556488495, + "learning_rate": 6.557402100994426e-08, + "loss": 0.4798729121685028, + "step": 6604 + }, + { + "epoch": 1.9314227226202663, + "grad_norm": 1.7300127457609986, + "learning_rate": 6.502242039132634e-08, + "loss": 0.4187319278717041, + "step": 6605 + }, + { + "epoch": 1.9317151630355314, + "grad_norm": 2.050722935709042, + "learning_rate": 6.447314201331156e-08, + "loss": 0.4945526719093323, + "step": 6606 + }, + { + "epoch": 1.932007603450797, + "grad_norm": 1.8976456851513979, + "learning_rate": 6.392618600429057e-08, + "loss": 0.5721586346626282, + "step": 6607 + }, + { + "epoch": 1.9323000438660622, + "grad_norm": 1.6286185694607815, + "learning_rate": 6.338155249211109e-08, + "loss": 0.45542022585868835, + "step": 6608 + }, + { + "epoch": 1.9325924842813276, + "grad_norm": 1.7597762099762242, + "learning_rate": 6.283924160407796e-08, + "loss": 0.5627170205116272, + "step": 6609 + }, + { + "epoch": 1.9328849246965931, + "grad_norm": 1.6951677907486626, + "learning_rate": 6.22992534669542e-08, + "loss": 0.5369620323181152, + "step": 6610 + }, + { + "epoch": 1.9331773651118584, + "grad_norm": 1.619968087818578, + "learning_rate": 6.176158820695665e-08, + "loss": 0.5268368124961853, + "step": 6611 + }, + { + "epoch": 1.9334698055271238, + "grad_norm": 1.6828649754520415, + "learning_rate": 6.122624594976257e-08, + "loss": 0.5734575986862183, + "step": 6612 + }, + { + "epoch": 1.9337622459423893, + "grad_norm": 1.86766787540182, + "learning_rate": 6.069322682050516e-08, + "loss": 0.5066978931427002, + "step": 6613 + }, + { + "epoch": 1.9340546863576547, + "grad_norm": 1.68962846891993, + "learning_rate": 6.016253094377366e-08, + "loss": 0.5462731719017029, + "step": 6614 + }, + { + "epoch": 1.93434712677292, + "grad_norm": 1.8689912619353801, + "learning_rate": 5.963415844361553e-08, + "loss": 0.5407041311264038, + "step": 6615 + }, + { + "epoch": 1.9346395671881855, + "grad_norm": 1.792133188360025, + "learning_rate": 5.910810944353418e-08, + "loss": 0.48977869749069214, + "step": 6616 + }, + { + "epoch": 1.9349320076034506, + "grad_norm": 1.8900630995604775, + "learning_rate": 5.858438406649125e-08, + "loss": 0.5320937037467957, + "step": 6617 + }, + { + "epoch": 1.9352244480187162, + "grad_norm": 1.6602834270947344, + "learning_rate": 5.806298243490327e-08, + "loss": 0.5860059261322021, + "step": 6618 + }, + { + "epoch": 1.9355168884339817, + "grad_norm": 1.7299178033338176, + "learning_rate": 5.7543904670644965e-08, + "loss": 0.49517208337783813, + "step": 6619 + }, + { + "epoch": 1.9358093288492468, + "grad_norm": 1.4975030277698207, + "learning_rate": 5.7027150895049286e-08, + "loss": 0.5060882568359375, + "step": 6620 + }, + { + "epoch": 1.9361017692645124, + "grad_norm": 1.7387399518104565, + "learning_rate": 5.651272122890184e-08, + "loss": 0.5887798070907593, + "step": 6621 + }, + { + "epoch": 1.9363942096797777, + "grad_norm": 2.006477050241073, + "learning_rate": 5.600061579244753e-08, + "loss": 0.6567577123641968, + "step": 6622 + }, + { + "epoch": 1.936686650095043, + "grad_norm": 1.7419376875296542, + "learning_rate": 5.549083470538952e-08, + "loss": 0.5672584176063538, + "step": 6623 + }, + { + "epoch": 1.9369790905103086, + "grad_norm": 1.6312975104255192, + "learning_rate": 5.4983378086885806e-08, + "loss": 0.5166369676589966, + "step": 6624 + }, + { + "epoch": 1.937271530925574, + "grad_norm": 1.7351407182284893, + "learning_rate": 5.447824605555041e-08, + "loss": 0.5157661437988281, + "step": 6625 + }, + { + "epoch": 1.9375639713408392, + "grad_norm": 1.5452343867654343, + "learning_rate": 5.397543872945443e-08, + "loss": 0.5001711845397949, + "step": 6626 + }, + { + "epoch": 1.9378564117561048, + "grad_norm": 1.5666441918912, + "learning_rate": 5.34749562261272e-08, + "loss": 0.48944878578186035, + "step": 6627 + }, + { + "epoch": 1.9381488521713701, + "grad_norm": 1.8943450842549039, + "learning_rate": 5.297679866255401e-08, + "loss": 0.5400780439376831, + "step": 6628 + }, + { + "epoch": 1.9384412925866354, + "grad_norm": 1.6944930575034618, + "learning_rate": 5.248096615517395e-08, + "loss": 0.544346809387207, + "step": 6629 + }, + { + "epoch": 1.938733733001901, + "grad_norm": 1.8360261063384646, + "learning_rate": 5.1987458819886535e-08, + "loss": 0.5283153653144836, + "step": 6630 + }, + { + "epoch": 1.939026173417166, + "grad_norm": 1.8162414803988312, + "learning_rate": 5.149627677204616e-08, + "loss": 0.555808424949646, + "step": 6631 + }, + { + "epoch": 1.9393186138324316, + "grad_norm": 1.7068645601820531, + "learning_rate": 5.10074201264632e-08, + "loss": 0.5230466723442078, + "step": 6632 + }, + { + "epoch": 1.9396110542476972, + "grad_norm": 1.592321180041504, + "learning_rate": 5.052088899740515e-08, + "loss": 0.4810416102409363, + "step": 6633 + }, + { + "epoch": 1.9399034946629623, + "grad_norm": 1.2489690563293379, + "learning_rate": 5.0036683498594365e-08, + "loss": 0.35233962535858154, + "step": 6634 + }, + { + "epoch": 1.9401959350782279, + "grad_norm": 1.5949248677680616, + "learning_rate": 4.955480374321253e-08, + "loss": 0.5250035524368286, + "step": 6635 + }, + { + "epoch": 1.9404883754934932, + "grad_norm": 1.5547636594172098, + "learning_rate": 4.907524984389622e-08, + "loss": 0.5896221399307251, + "step": 6636 + }, + { + "epoch": 1.9407808159087585, + "grad_norm": 1.5725705573586048, + "learning_rate": 4.859802191273688e-08, + "loss": 0.5410518050193787, + "step": 6637 + }, + { + "epoch": 1.941073256324024, + "grad_norm": 1.5273512663488045, + "learning_rate": 4.812312006128528e-08, + "loss": 0.5044152736663818, + "step": 6638 + }, + { + "epoch": 1.9413656967392894, + "grad_norm": 1.6537481992077037, + "learning_rate": 4.765054440054484e-08, + "loss": 0.5388177633285522, + "step": 6639 + }, + { + "epoch": 1.9416581371545547, + "grad_norm": 2.0702365693466485, + "learning_rate": 4.718029504097943e-08, + "loss": 0.5074491500854492, + "step": 6640 + }, + { + "epoch": 1.9419505775698203, + "grad_norm": 1.6224415285858116, + "learning_rate": 4.671237209250557e-08, + "loss": 0.47772669792175293, + "step": 6641 + }, + { + "epoch": 1.9422430179850856, + "grad_norm": 1.6570845374645817, + "learning_rate": 4.624677566449798e-08, + "loss": 0.4682825207710266, + "step": 6642 + }, + { + "epoch": 1.942535458400351, + "grad_norm": 1.5100328644654928, + "learning_rate": 4.578350586578628e-08, + "loss": 0.48880642652511597, + "step": 6643 + }, + { + "epoch": 1.9428278988156165, + "grad_norm": 1.6890744037677652, + "learning_rate": 4.532256280465719e-08, + "loss": 0.4590389132499695, + "step": 6644 + }, + { + "epoch": 1.9431203392308816, + "grad_norm": 1.903981857624826, + "learning_rate": 4.48639465888534e-08, + "loss": 0.5893105268478394, + "step": 6645 + }, + { + "epoch": 1.9434127796461471, + "grad_norm": 1.7274912065627603, + "learning_rate": 4.4407657325574725e-08, + "loss": 0.561900794506073, + "step": 6646 + }, + { + "epoch": 1.9437052200614124, + "grad_norm": 1.662019693277273, + "learning_rate": 4.395369512147474e-08, + "loss": 0.4140210747718811, + "step": 6647 + }, + { + "epoch": 1.9439976604766778, + "grad_norm": 1.7955978434650512, + "learning_rate": 4.350206008266522e-08, + "loss": 0.6220303773880005, + "step": 6648 + }, + { + "epoch": 1.9442901008919433, + "grad_norm": 1.771531678180808, + "learning_rate": 4.3052752314712844e-08, + "loss": 0.4903472065925598, + "step": 6649 + }, + { + "epoch": 1.9445825413072086, + "grad_norm": 1.889992657698585, + "learning_rate": 4.260577192263915e-08, + "loss": 0.4519340991973877, + "step": 6650 + }, + { + "epoch": 1.944874981722474, + "grad_norm": 1.7435292517018475, + "learning_rate": 4.216111901092501e-08, + "loss": 0.49067920446395874, + "step": 6651 + }, + { + "epoch": 1.9451674221377395, + "grad_norm": 1.8654652047797853, + "learning_rate": 4.1718793683505066e-08, + "loss": 0.5935854911804199, + "step": 6652 + }, + { + "epoch": 1.9454598625530048, + "grad_norm": 1.7744411864937968, + "learning_rate": 4.127879604376883e-08, + "loss": 0.5209576487541199, + "step": 6653 + }, + { + "epoch": 1.9457523029682702, + "grad_norm": 1.50564473891113, + "learning_rate": 4.084112619456515e-08, + "loss": 0.4454221725463867, + "step": 6654 + }, + { + "epoch": 1.9460447433835357, + "grad_norm": 1.8157940398905494, + "learning_rate": 4.0405784238194415e-08, + "loss": 0.5129591226577759, + "step": 6655 + }, + { + "epoch": 1.9463371837988008, + "grad_norm": 1.63185696744402, + "learning_rate": 3.997277027641744e-08, + "loss": 0.48704665899276733, + "step": 6656 + }, + { + "epoch": 1.9466296242140664, + "grad_norm": 1.8037751571098388, + "learning_rate": 3.95420844104466e-08, + "loss": 0.4510651230812073, + "step": 6657 + }, + { + "epoch": 1.946922064629332, + "grad_norm": 1.7817975919339482, + "learning_rate": 3.911372674095249e-08, + "loss": 0.5116807222366333, + "step": 6658 + }, + { + "epoch": 1.947214505044597, + "grad_norm": 1.7985765763419883, + "learning_rate": 3.868769736806277e-08, + "loss": 0.592056393623352, + "step": 6659 + }, + { + "epoch": 1.9475069454598626, + "grad_norm": 1.7881377609654638, + "learning_rate": 3.8263996391357805e-08, + "loss": 0.579146146774292, + "step": 6660 + }, + { + "epoch": 1.947799385875128, + "grad_norm": 1.6202416659647267, + "learning_rate": 3.784262390987503e-08, + "loss": 0.5253209471702576, + "step": 6661 + }, + { + "epoch": 1.9480918262903932, + "grad_norm": 2.008309380522338, + "learning_rate": 3.742358002210789e-08, + "loss": 0.5614888072013855, + "step": 6662 + }, + { + "epoch": 1.9483842667056588, + "grad_norm": 1.6491223001780133, + "learning_rate": 3.7006864826005796e-08, + "loss": 0.5630952715873718, + "step": 6663 + }, + { + "epoch": 1.9486767071209241, + "grad_norm": 2.1390311477096944, + "learning_rate": 3.659247841897306e-08, + "loss": 0.5990846157073975, + "step": 6664 + }, + { + "epoch": 1.9489691475361894, + "grad_norm": 1.6162006621933969, + "learning_rate": 3.6180420897868886e-08, + "loss": 0.5290813446044922, + "step": 6665 + }, + { + "epoch": 1.949261587951455, + "grad_norm": 2.6144126732722803, + "learning_rate": 3.577069235901176e-08, + "loss": 0.6710211038589478, + "step": 6666 + }, + { + "epoch": 1.9495540283667203, + "grad_norm": 1.71689411729531, + "learning_rate": 3.536329289817064e-08, + "loss": 0.4802299737930298, + "step": 6667 + }, + { + "epoch": 1.9498464687819856, + "grad_norm": 1.6268319596207468, + "learning_rate": 3.495822261057491e-08, + "loss": 0.5432649850845337, + "step": 6668 + }, + { + "epoch": 1.9501389091972512, + "grad_norm": 1.9426982793491434, + "learning_rate": 3.4555481590905495e-08, + "loss": 0.5824951529502869, + "step": 6669 + }, + { + "epoch": 1.9504313496125163, + "grad_norm": 1.5773733844612365, + "learning_rate": 3.4155069933301535e-08, + "loss": 0.48428961634635925, + "step": 6670 + }, + { + "epoch": 1.9507237900277818, + "grad_norm": 1.7258198741312958, + "learning_rate": 3.375698773135705e-08, + "loss": 0.5684780478477478, + "step": 6671 + }, + { + "epoch": 1.9510162304430474, + "grad_norm": 1.7742355369350526, + "learning_rate": 3.336123507811983e-08, + "loss": 0.5658689737319946, + "step": 6672 + }, + { + "epoch": 1.9513086708583125, + "grad_norm": 1.7743474017748566, + "learning_rate": 3.2967812066097006e-08, + "loss": 0.6265745162963867, + "step": 6673 + }, + { + "epoch": 1.951601111273578, + "grad_norm": 1.768397532537575, + "learning_rate": 3.257671878724722e-08, + "loss": 0.5732975006103516, + "step": 6674 + }, + { + "epoch": 1.9518935516888434, + "grad_norm": 2.3801499199920273, + "learning_rate": 3.218795533298624e-08, + "loss": 0.46968942880630493, + "step": 6675 + }, + { + "epoch": 1.9521859921041087, + "grad_norm": 1.9250466851177817, + "learning_rate": 3.180152179418472e-08, + "loss": 0.5651586055755615, + "step": 6676 + }, + { + "epoch": 1.9524784325193743, + "grad_norm": 1.4699414350235678, + "learning_rate": 3.141741826117151e-08, + "loss": 0.46789437532424927, + "step": 6677 + }, + { + "epoch": 1.9527708729346396, + "grad_norm": 1.6701838665271502, + "learning_rate": 3.1035644823725896e-08, + "loss": 0.5332610011100769, + "step": 6678 + }, + { + "epoch": 1.953063313349905, + "grad_norm": 1.825129394239336, + "learning_rate": 3.06562015710854e-08, + "loss": 0.49613600969314575, + "step": 6679 + }, + { + "epoch": 1.9533557537651705, + "grad_norm": 2.1340240197713265, + "learning_rate": 3.027908859194351e-08, + "loss": 0.5498408079147339, + "step": 6680 + }, + { + "epoch": 1.9536481941804358, + "grad_norm": 1.8887907896186948, + "learning_rate": 2.99043059744486e-08, + "loss": 0.6802657842636108, + "step": 6681 + }, + { + "epoch": 1.953940634595701, + "grad_norm": 1.8609256911752867, + "learning_rate": 2.9531853806201716e-08, + "loss": 0.5149989724159241, + "step": 6682 + }, + { + "epoch": 1.9542330750109667, + "grad_norm": 1.7262483706342455, + "learning_rate": 2.9161732174263212e-08, + "loss": 0.5249730944633484, + "step": 6683 + }, + { + "epoch": 1.9545255154262318, + "grad_norm": 1.7003943133697261, + "learning_rate": 2.8793941165147222e-08, + "loss": 0.5711483359336853, + "step": 6684 + }, + { + "epoch": 1.9548179558414973, + "grad_norm": 1.7303037823896377, + "learning_rate": 2.842848086482053e-08, + "loss": 0.4591020345687866, + "step": 6685 + }, + { + "epoch": 1.9551103962567626, + "grad_norm": 1.887004603599524, + "learning_rate": 2.8065351358708136e-08, + "loss": 0.575869083404541, + "step": 6686 + }, + { + "epoch": 1.955402836672028, + "grad_norm": 1.7563501117497715, + "learning_rate": 2.7704552731688816e-08, + "loss": 0.5664101839065552, + "step": 6687 + }, + { + "epoch": 1.9556952770872935, + "grad_norm": 1.5280681451949298, + "learning_rate": 2.7346085068098437e-08, + "loss": 0.5739811062812805, + "step": 6688 + }, + { + "epoch": 1.9559877175025588, + "grad_norm": 1.64304520297204, + "learning_rate": 2.6989948451726643e-08, + "loss": 0.4707348942756653, + "step": 6689 + }, + { + "epoch": 1.9562801579178242, + "grad_norm": 1.4347028954089904, + "learning_rate": 2.6636142965816848e-08, + "loss": 0.38842523097991943, + "step": 6690 + }, + { + "epoch": 1.9565725983330897, + "grad_norm": 1.9429266961932796, + "learning_rate": 2.628466869306956e-08, + "loss": 0.4295673668384552, + "step": 6691 + }, + { + "epoch": 1.956865038748355, + "grad_norm": 1.9886421076178336, + "learning_rate": 2.5935525715640176e-08, + "loss": 0.5358999967575073, + "step": 6692 + }, + { + "epoch": 1.9571574791636204, + "grad_norm": 1.8207487442928234, + "learning_rate": 2.5588714115137857e-08, + "loss": 0.49730730056762695, + "step": 6693 + }, + { + "epoch": 1.957449919578886, + "grad_norm": 1.8975782350563493, + "learning_rate": 2.5244233972627762e-08, + "loss": 0.5368232131004333, + "step": 6694 + }, + { + "epoch": 1.957742359994151, + "grad_norm": 1.6616905607648789, + "learning_rate": 2.4902085368632144e-08, + "loss": 0.48084500432014465, + "step": 6695 + }, + { + "epoch": 1.9580348004094166, + "grad_norm": 1.6503756551181779, + "learning_rate": 2.45622683831237e-08, + "loss": 0.5197296142578125, + "step": 6696 + }, + { + "epoch": 1.9583272408246821, + "grad_norm": 1.7005704554604877, + "learning_rate": 2.4224783095532224e-08, + "loss": 0.4807678163051605, + "step": 6697 + }, + { + "epoch": 1.9586196812399472, + "grad_norm": 1.5200854711140026, + "learning_rate": 2.388962958474461e-08, + "loss": 0.5117641687393188, + "step": 6698 + }, + { + "epoch": 1.9589121216552128, + "grad_norm": 1.5153035364420055, + "learning_rate": 2.355680792910153e-08, + "loss": 0.5318149328231812, + "step": 6699 + }, + { + "epoch": 1.959204562070478, + "grad_norm": 1.642749755305391, + "learning_rate": 2.3226318206395206e-08, + "loss": 0.5590193271636963, + "step": 6700 + }, + { + "epoch": 1.9594970024857434, + "grad_norm": 1.9273854799208605, + "learning_rate": 2.2898160493878275e-08, + "loss": 0.7686688899993896, + "step": 6701 + }, + { + "epoch": 1.959789442901009, + "grad_norm": 1.7479554033366604, + "learning_rate": 2.257233486825383e-08, + "loss": 0.5085177421569824, + "step": 6702 + }, + { + "epoch": 1.9600818833162743, + "grad_norm": 1.4224817781801729, + "learning_rate": 2.2248841405683176e-08, + "loss": 0.44002413749694824, + "step": 6703 + }, + { + "epoch": 1.9603743237315396, + "grad_norm": 1.6541616903883845, + "learning_rate": 2.1927680181779154e-08, + "loss": 0.5369126796722412, + "step": 6704 + }, + { + "epoch": 1.9606667641468052, + "grad_norm": 1.5811100430561291, + "learning_rate": 2.1608851271612828e-08, + "loss": 0.516021728515625, + "step": 6705 + }, + { + "epoch": 1.9609592045620705, + "grad_norm": 1.577385822778267, + "learning_rate": 2.1292354749707922e-08, + "loss": 0.5215185284614563, + "step": 6706 + }, + { + "epoch": 1.9612516449773358, + "grad_norm": 1.7926842955012665, + "learning_rate": 2.0978190690043032e-08, + "loss": 0.6051908731460571, + "step": 6707 + }, + { + "epoch": 1.9615440853926014, + "grad_norm": 1.8529134419730404, + "learning_rate": 2.066635916605386e-08, + "loss": 0.5426267385482788, + "step": 6708 + }, + { + "epoch": 1.9618365258078665, + "grad_norm": 2.064852719580073, + "learning_rate": 2.0356860250626554e-08, + "loss": 0.5888626575469971, + "step": 6709 + }, + { + "epoch": 1.962128966223132, + "grad_norm": 1.4287637894797525, + "learning_rate": 2.004969401610657e-08, + "loss": 0.5225001573562622, + "step": 6710 + }, + { + "epoch": 1.9624214066383976, + "grad_norm": 1.616132198436982, + "learning_rate": 1.974486053429092e-08, + "loss": 0.5735136270523071, + "step": 6711 + }, + { + "epoch": 1.9627138470536627, + "grad_norm": 1.6327567238976746, + "learning_rate": 1.9442359876433724e-08, + "loss": 0.5302764177322388, + "step": 6712 + }, + { + "epoch": 1.9630062874689282, + "grad_norm": 1.625182085046959, + "learning_rate": 1.9142192113241752e-08, + "loss": 0.5078837871551514, + "step": 6713 + }, + { + "epoch": 1.9632987278841936, + "grad_norm": 1.7110845788062152, + "learning_rate": 1.884435731487888e-08, + "loss": 0.5772985219955444, + "step": 6714 + }, + { + "epoch": 1.963591168299459, + "grad_norm": 2.2561904758082925, + "learning_rate": 1.8548855550959423e-08, + "loss": 0.5974931716918945, + "step": 6715 + }, + { + "epoch": 1.9638836087147244, + "grad_norm": 1.8105323667501525, + "learning_rate": 1.8255686890558123e-08, + "loss": 0.5065072774887085, + "step": 6716 + }, + { + "epoch": 1.9641760491299898, + "grad_norm": 1.4374806170365766, + "learning_rate": 1.7964851402199058e-08, + "loss": 0.4729428291320801, + "step": 6717 + }, + { + "epoch": 1.964468489545255, + "grad_norm": 1.7924892088352824, + "learning_rate": 1.7676349153864515e-08, + "loss": 0.46363723278045654, + "step": 6718 + }, + { + "epoch": 1.9647609299605207, + "grad_norm": 1.6096201158909726, + "learning_rate": 1.7390180212990547e-08, + "loss": 0.5436959266662598, + "step": 6719 + }, + { + "epoch": 1.965053370375786, + "grad_norm": 1.8570609869736334, + "learning_rate": 1.7106344646465877e-08, + "loss": 0.7571452856063843, + "step": 6720 + }, + { + "epoch": 1.9653458107910513, + "grad_norm": 1.7203125443062617, + "learning_rate": 1.682484252063632e-08, + "loss": 0.5724680423736572, + "step": 6721 + }, + { + "epoch": 1.9656382512063169, + "grad_norm": 1.5552868811193872, + "learning_rate": 1.654567390130146e-08, + "loss": 0.46937745809555054, + "step": 6722 + }, + { + "epoch": 1.965930691621582, + "grad_norm": 1.4639592826813614, + "learning_rate": 1.6268838853713552e-08, + "loss": 0.5764822363853455, + "step": 6723 + }, + { + "epoch": 1.9662231320368475, + "grad_norm": 1.8890557259087926, + "learning_rate": 1.5994337442584164e-08, + "loss": 0.6074192523956299, + "step": 6724 + }, + { + "epoch": 1.9665155724521128, + "grad_norm": 1.8156005720173343, + "learning_rate": 1.572216973207419e-08, + "loss": 0.6001715064048767, + "step": 6725 + }, + { + "epoch": 1.9668080128673782, + "grad_norm": 1.8635551001096793, + "learning_rate": 1.545233578580163e-08, + "loss": 0.5819540619850159, + "step": 6726 + }, + { + "epoch": 1.9671004532826437, + "grad_norm": 1.509757451229315, + "learning_rate": 1.518483566683826e-08, + "loss": 0.4745405912399292, + "step": 6727 + }, + { + "epoch": 1.967392893697909, + "grad_norm": 1.5301158686504193, + "learning_rate": 1.4919669437710725e-08, + "loss": 0.4438042640686035, + "step": 6728 + }, + { + "epoch": 1.9676853341131744, + "grad_norm": 1.6058873643565785, + "learning_rate": 1.465683716040056e-08, + "loss": 0.45798003673553467, + "step": 6729 + }, + { + "epoch": 1.96797777452844, + "grad_norm": 1.5582798501168125, + "learning_rate": 1.4396338896341955e-08, + "loss": 0.3918766379356384, + "step": 6730 + }, + { + "epoch": 1.9682702149437052, + "grad_norm": 1.6253936447718431, + "learning_rate": 1.4138174706426199e-08, + "loss": 0.5266170501708984, + "step": 6731 + }, + { + "epoch": 1.9685626553589706, + "grad_norm": 1.733772185361853, + "learning_rate": 1.3882344650998359e-08, + "loss": 0.5166668891906738, + "step": 6732 + }, + { + "epoch": 1.9688550957742361, + "grad_norm": 1.7595735268115036, + "learning_rate": 1.3628848789853932e-08, + "loss": 0.39324697852134705, + "step": 6733 + }, + { + "epoch": 1.9691475361895012, + "grad_norm": 1.8212233848125128, + "learning_rate": 1.3377687182248855e-08, + "loss": 0.4915732443332672, + "step": 6734 + }, + { + "epoch": 1.9694399766047668, + "grad_norm": 1.7689973508355645, + "learning_rate": 1.31288598868895e-08, + "loss": 0.5416492819786072, + "step": 6735 + }, + { + "epoch": 1.9697324170200323, + "grad_norm": 1.6021351256215517, + "learning_rate": 1.288236696193823e-08, + "loss": 0.4713748097419739, + "step": 6736 + }, + { + "epoch": 1.9700248574352974, + "grad_norm": 1.7411270752119496, + "learning_rate": 1.263820846501118e-08, + "loss": 0.44074663519859314, + "step": 6737 + }, + { + "epoch": 1.970317297850563, + "grad_norm": 1.7164561827524085, + "learning_rate": 1.2396384453179366e-08, + "loss": 0.4694680869579315, + "step": 6738 + }, + { + "epoch": 1.9706097382658283, + "grad_norm": 1.8691907501418656, + "learning_rate": 1.215689498296535e-08, + "loss": 0.553142786026001, + "step": 6739 + }, + { + "epoch": 1.9709021786810936, + "grad_norm": 1.7953149807008746, + "learning_rate": 1.1919740110351019e-08, + "loss": 0.533849835395813, + "step": 6740 + }, + { + "epoch": 1.9711946190963592, + "grad_norm": 1.9503927011602655, + "learning_rate": 1.1684919890768698e-08, + "loss": 0.5448808670043945, + "step": 6741 + }, + { + "epoch": 1.9714870595116245, + "grad_norm": 1.6447356703420446, + "learning_rate": 1.1452434379106703e-08, + "loss": 0.46860289573669434, + "step": 6742 + }, + { + "epoch": 1.9717794999268898, + "grad_norm": 2.0052944353876696, + "learning_rate": 1.122228362970712e-08, + "loss": 0.5552232265472412, + "step": 6743 + }, + { + "epoch": 1.9720719403421554, + "grad_norm": 1.7151457677082285, + "learning_rate": 1.0994467696364698e-08, + "loss": 0.4639692008495331, + "step": 6744 + }, + { + "epoch": 1.9723643807574207, + "grad_norm": 2.0905035821875746, + "learning_rate": 1.076898663233239e-08, + "loss": 0.7129387259483337, + "step": 6745 + }, + { + "epoch": 1.972656821172686, + "grad_norm": 1.6674482501618961, + "learning_rate": 1.0545840490313597e-08, + "loss": 0.6637833118438721, + "step": 6746 + }, + { + "epoch": 1.9729492615879516, + "grad_norm": 2.029336881837252, + "learning_rate": 1.0325029322467705e-08, + "loss": 0.6215991973876953, + "step": 6747 + }, + { + "epoch": 1.9732417020032167, + "grad_norm": 1.878624196936373, + "learning_rate": 1.0106553180407874e-08, + "loss": 0.48594456911087036, + "step": 6748 + }, + { + "epoch": 1.9735341424184822, + "grad_norm": 1.9063825585940108, + "learning_rate": 9.890412115202142e-09, + "loss": 0.5443629622459412, + "step": 6749 + }, + { + "epoch": 1.9738265828337478, + "grad_norm": 1.7053157420855176, + "learning_rate": 9.676606177371207e-09, + "loss": 0.643796443939209, + "step": 6750 + }, + { + "epoch": 1.974119023249013, + "grad_norm": 1.6282972872252912, + "learning_rate": 9.465135416891757e-09, + "loss": 0.6305385828018188, + "step": 6751 + }, + { + "epoch": 1.9744114636642784, + "grad_norm": 1.5632532849336644, + "learning_rate": 9.255999883193146e-09, + "loss": 0.5120108723640442, + "step": 6752 + }, + { + "epoch": 1.9747039040795438, + "grad_norm": 1.6718955354026932, + "learning_rate": 9.0491996251596e-09, + "loss": 0.5552967190742493, + "step": 6753 + }, + { + "epoch": 1.974996344494809, + "grad_norm": 1.935016742711985, + "learning_rate": 8.84473469113023e-09, + "loss": 0.6341986656188965, + "step": 6754 + }, + { + "epoch": 1.9752887849100746, + "grad_norm": 1.9011990155600869, + "learning_rate": 8.642605128896808e-09, + "loss": 0.5204262137413025, + "step": 6755 + }, + { + "epoch": 1.97558122532534, + "grad_norm": 1.75594319264598, + "learning_rate": 8.442810985705984e-09, + "loss": 0.4980974793434143, + "step": 6756 + }, + { + "epoch": 1.9758736657406053, + "grad_norm": 1.9165104575442982, + "learning_rate": 8.245352308258181e-09, + "loss": 0.5432465076446533, + "step": 6757 + }, + { + "epoch": 1.9761661061558708, + "grad_norm": 1.7852742537308695, + "learning_rate": 8.0502291427087e-09, + "loss": 0.813039243221283, + "step": 6758 + }, + { + "epoch": 1.9764585465711362, + "grad_norm": 1.8018799007975157, + "learning_rate": 7.85744153466661e-09, + "loss": 0.5723720788955688, + "step": 6759 + }, + { + "epoch": 1.9767509869864015, + "grad_norm": 1.8628448153664545, + "learning_rate": 7.666989529193647e-09, + "loss": 0.5562596321105957, + "step": 6760 + }, + { + "epoch": 1.977043427401667, + "grad_norm": 1.796195928066652, + "learning_rate": 7.478873170807532e-09, + "loss": 0.5455175638198853, + "step": 6761 + }, + { + "epoch": 1.9773358678169322, + "grad_norm": 1.791853318736957, + "learning_rate": 7.2930925034797595e-09, + "loss": 0.5753832459449768, + "step": 6762 + }, + { + "epoch": 1.9776283082321977, + "grad_norm": 1.786340662775674, + "learning_rate": 7.109647570634482e-09, + "loss": 0.49962282180786133, + "step": 6763 + }, + { + "epoch": 1.977920748647463, + "grad_norm": 1.4222417158044076, + "learning_rate": 6.9285384151507316e-09, + "loss": 0.44443345069885254, + "step": 6764 + }, + { + "epoch": 1.9782131890627284, + "grad_norm": 1.5729694345436978, + "learning_rate": 6.749765079363535e-09, + "loss": 0.3236424922943115, + "step": 6765 + }, + { + "epoch": 1.978505629477994, + "grad_norm": 1.766865850057596, + "learning_rate": 6.573327605057245e-09, + "loss": 0.5246942639350891, + "step": 6766 + }, + { + "epoch": 1.9787980698932592, + "grad_norm": 1.6890664092399734, + "learning_rate": 6.399226033475536e-09, + "loss": 0.6525053381919861, + "step": 6767 + }, + { + "epoch": 1.9790905103085246, + "grad_norm": 1.5450928873923104, + "learning_rate": 6.227460405312524e-09, + "loss": 0.502121090888977, + "step": 6768 + }, + { + "epoch": 1.9793829507237901, + "grad_norm": 2.00727430176714, + "learning_rate": 6.058030760718314e-09, + "loss": 0.6137609481811523, + "step": 6769 + }, + { + "epoch": 1.9796753911390554, + "grad_norm": 2.274345342275455, + "learning_rate": 5.890937139294561e-09, + "loss": 0.6673166751861572, + "step": 6770 + }, + { + "epoch": 1.9799678315543208, + "grad_norm": 1.8444636633461322, + "learning_rate": 5.726179580098912e-09, + "loss": 0.5888657569885254, + "step": 6771 + }, + { + "epoch": 1.9802602719695863, + "grad_norm": 1.6693157475267608, + "learning_rate": 5.563758121642781e-09, + "loss": 0.5239546298980713, + "step": 6772 + }, + { + "epoch": 1.9805527123848514, + "grad_norm": 1.8912704609026834, + "learning_rate": 5.403672801890247e-09, + "loss": 0.5446778535842896, + "step": 6773 + }, + { + "epoch": 1.980845152800117, + "grad_norm": 1.9927071253973727, + "learning_rate": 5.245923658262486e-09, + "loss": 0.6198326349258423, + "step": 6774 + }, + { + "epoch": 1.9811375932153825, + "grad_norm": 2.15235475034657, + "learning_rate": 5.090510727630005e-09, + "loss": 0.586353063583374, + "step": 6775 + }, + { + "epoch": 1.9814300336306476, + "grad_norm": 1.7195990521736408, + "learning_rate": 4.93743404632041e-09, + "loss": 0.6344239711761475, + "step": 6776 + }, + { + "epoch": 1.9817224740459132, + "grad_norm": 1.7280364585810115, + "learning_rate": 4.7866936501150816e-09, + "loss": 0.529091477394104, + "step": 6777 + }, + { + "epoch": 1.9820149144611785, + "grad_norm": 1.7357230298596742, + "learning_rate": 4.6382895742491665e-09, + "loss": 0.50063157081604, + "step": 6778 + }, + { + "epoch": 1.9823073548764438, + "grad_norm": 1.526019679238999, + "learning_rate": 4.492221853409362e-09, + "loss": 0.48398512601852417, + "step": 6779 + }, + { + "epoch": 1.9825997952917094, + "grad_norm": 1.5319705226915326, + "learning_rate": 4.348490521738358e-09, + "loss": 0.5330454707145691, + "step": 6780 + }, + { + "epoch": 1.9828922357069747, + "grad_norm": 1.829830860451363, + "learning_rate": 4.207095612833723e-09, + "loss": 0.4562032222747803, + "step": 6781 + }, + { + "epoch": 1.98318467612224, + "grad_norm": 1.7011927258883048, + "learning_rate": 4.0680371597456855e-09, + "loss": 0.47456252574920654, + "step": 6782 + }, + { + "epoch": 1.9834771165375056, + "grad_norm": 1.8486724201847988, + "learning_rate": 3.931315194977137e-09, + "loss": 0.6283844709396362, + "step": 6783 + }, + { + "epoch": 1.983769556952771, + "grad_norm": 1.7243813126388492, + "learning_rate": 3.7969297504858445e-09, + "loss": 0.5886485576629639, + "step": 6784 + }, + { + "epoch": 1.9840619973680362, + "grad_norm": 1.679651544361786, + "learning_rate": 3.664880857685571e-09, + "loss": 0.4711921811103821, + "step": 6785 + }, + { + "epoch": 1.9843544377833018, + "grad_norm": 1.8051937774075772, + "learning_rate": 3.5351685474394048e-09, + "loss": 0.5372034311294556, + "step": 6786 + }, + { + "epoch": 1.9846468781985669, + "grad_norm": 1.7143010926050217, + "learning_rate": 3.4077928500686473e-09, + "loss": 0.5314334034919739, + "step": 6787 + }, + { + "epoch": 1.9849393186138324, + "grad_norm": 1.7988305575744603, + "learning_rate": 3.2827537953461496e-09, + "loss": 0.6022863984107971, + "step": 6788 + }, + { + "epoch": 1.985231759029098, + "grad_norm": 1.844296066004364, + "learning_rate": 3.160051412499643e-09, + "loss": 0.6739746928215027, + "step": 6789 + }, + { + "epoch": 1.985524199444363, + "grad_norm": 1.795022844462659, + "learning_rate": 3.0396857302084082e-09, + "loss": 0.6454254388809204, + "step": 6790 + }, + { + "epoch": 1.9858166398596286, + "grad_norm": 1.7777744811692944, + "learning_rate": 2.9216567766088276e-09, + "loss": 0.567995011806488, + "step": 6791 + }, + { + "epoch": 1.986109080274894, + "grad_norm": 1.7916482396337698, + "learning_rate": 2.8059645792877233e-09, + "loss": 0.568576455116272, + "step": 6792 + }, + { + "epoch": 1.9864015206901593, + "grad_norm": 1.5789903561856604, + "learning_rate": 2.6926091652890175e-09, + "loss": 0.5053816437721252, + "step": 6793 + }, + { + "epoch": 1.9866939611054248, + "grad_norm": 1.4966825154239165, + "learning_rate": 2.5815905611081825e-09, + "loss": 0.47705504298210144, + "step": 6794 + }, + { + "epoch": 1.9869864015206902, + "grad_norm": 1.7555838648022946, + "learning_rate": 2.472908792695572e-09, + "loss": 0.48271438479423523, + "step": 6795 + }, + { + "epoch": 1.9872788419359555, + "grad_norm": 1.675207035758499, + "learning_rate": 2.3665638854541982e-09, + "loss": 0.5694486498832703, + "step": 6796 + }, + { + "epoch": 1.987571282351221, + "grad_norm": 1.6539598401922624, + "learning_rate": 2.2625558642419553e-09, + "loss": 0.4940011501312256, + "step": 6797 + }, + { + "epoch": 1.9878637227664864, + "grad_norm": 1.8961348890729253, + "learning_rate": 2.160884753370507e-09, + "loss": 0.5536549091339111, + "step": 6798 + }, + { + "epoch": 1.9881561631817517, + "grad_norm": 1.923836316704977, + "learning_rate": 2.0615505766041765e-09, + "loss": 0.5354948043823242, + "step": 6799 + }, + { + "epoch": 1.9884486035970173, + "grad_norm": 1.9901895658271425, + "learning_rate": 1.9645533571610585e-09, + "loss": 0.6246936321258545, + "step": 6800 + }, + { + "epoch": 1.9887410440122824, + "grad_norm": 1.9564588316886224, + "learning_rate": 1.869893117715238e-09, + "loss": 0.6690058708190918, + "step": 6801 + }, + { + "epoch": 1.989033484427548, + "grad_norm": 1.5017853956289122, + "learning_rate": 1.7775698803923491e-09, + "loss": 0.4022945761680603, + "step": 6802 + }, + { + "epoch": 1.9893259248428132, + "grad_norm": 1.863253138688696, + "learning_rate": 1.6875836667729073e-09, + "loss": 0.7192882299423218, + "step": 6803 + }, + { + "epoch": 1.9896183652580786, + "grad_norm": 1.7599554073021901, + "learning_rate": 1.5999344978889774e-09, + "loss": 0.4818531274795532, + "step": 6804 + }, + { + "epoch": 1.989910805673344, + "grad_norm": 1.6555564933889482, + "learning_rate": 1.5146223942297256e-09, + "loss": 0.5877143144607544, + "step": 6805 + }, + { + "epoch": 1.9902032460886094, + "grad_norm": 1.699617544549682, + "learning_rate": 1.4316473757347571e-09, + "loss": 0.5317925810813904, + "step": 6806 + }, + { + "epoch": 1.9904956865038748, + "grad_norm": 1.8556459190322732, + "learning_rate": 1.3510094618007785e-09, + "loss": 0.5203319787979126, + "step": 6807 + }, + { + "epoch": 1.9907881269191403, + "grad_norm": 1.8877803824180381, + "learning_rate": 1.2727086712760462e-09, + "loss": 0.5171575546264648, + "step": 6808 + }, + { + "epoch": 1.9910805673344056, + "grad_norm": 1.6004462821959236, + "learning_rate": 1.1967450224614763e-09, + "loss": 0.4570615291595459, + "step": 6809 + }, + { + "epoch": 1.991373007749671, + "grad_norm": 2.16253456274772, + "learning_rate": 1.123118533113976e-09, + "loss": 0.5689741969108582, + "step": 6810 + }, + { + "epoch": 1.9916654481649365, + "grad_norm": 1.796551268093938, + "learning_rate": 1.0518292204442226e-09, + "loss": 0.5029700994491577, + "step": 6811 + }, + { + "epoch": 1.9919578885802016, + "grad_norm": 1.774689424925791, + "learning_rate": 9.828771011144434e-10, + "loss": 0.5461232662200928, + "step": 6812 + }, + { + "epoch": 1.9922503289954672, + "grad_norm": 1.6779660974331405, + "learning_rate": 9.162621912417458e-10, + "loss": 0.4681328535079956, + "step": 6813 + }, + { + "epoch": 1.9925427694107327, + "grad_norm": 1.6414551197415561, + "learning_rate": 8.519845063970078e-10, + "loss": 0.6356761455535889, + "step": 6814 + }, + { + "epoch": 1.9928352098259978, + "grad_norm": 1.5821864651194355, + "learning_rate": 7.900440616059879e-10, + "loss": 0.48491230607032776, + "step": 6815 + }, + { + "epoch": 1.9931276502412634, + "grad_norm": 1.5771006740515017, + "learning_rate": 7.304408713448841e-10, + "loss": 0.45563238859176636, + "step": 6816 + }, + { + "epoch": 1.9934200906565287, + "grad_norm": 1.773565509502716, + "learning_rate": 6.731749495481054e-10, + "loss": 0.6067036986351013, + "step": 6817 + }, + { + "epoch": 1.993712531071794, + "grad_norm": 1.8573415580854213, + "learning_rate": 6.182463095982805e-10, + "loss": 0.6162583827972412, + "step": 6818 + }, + { + "epoch": 1.9940049714870596, + "grad_norm": 2.0477517064592456, + "learning_rate": 5.656549643373587e-10, + "loss": 0.6621623039245605, + "step": 6819 + }, + { + "epoch": 1.994297411902325, + "grad_norm": 1.4386624972833835, + "learning_rate": 5.154009260566195e-10, + "loss": 0.5374715328216553, + "step": 6820 + }, + { + "epoch": 1.9945898523175902, + "grad_norm": 1.690807663421353, + "learning_rate": 4.674842065033325e-10, + "loss": 0.5164921283721924, + "step": 6821 + }, + { + "epoch": 1.9948822927328558, + "grad_norm": 1.6217517652016564, + "learning_rate": 4.2190481687631736e-10, + "loss": 0.4816705584526062, + "step": 6822 + }, + { + "epoch": 1.995174733148121, + "grad_norm": 1.963964870727347, + "learning_rate": 3.786627678314947e-10, + "loss": 0.5393646955490112, + "step": 6823 + }, + { + "epoch": 1.9954671735633864, + "grad_norm": 1.965068141803477, + "learning_rate": 3.377580694763349e-10, + "loss": 0.6161901950836182, + "step": 6824 + }, + { + "epoch": 1.995759613978652, + "grad_norm": 1.7151080887799663, + "learning_rate": 2.991907313698583e-10, + "loss": 0.45819348096847534, + "step": 6825 + }, + { + "epoch": 1.996052054393917, + "grad_norm": 1.6568040031723943, + "learning_rate": 2.6296076252929623e-10, + "loss": 0.4111405611038208, + "step": 6826 + }, + { + "epoch": 1.9963444948091826, + "grad_norm": 1.579319709420574, + "learning_rate": 2.2906817142120952e-10, + "loss": 0.5351378917694092, + "step": 6827 + }, + { + "epoch": 1.9966369352244482, + "grad_norm": 1.9986759890358465, + "learning_rate": 1.9751296597037007e-10, + "loss": 0.5349807739257812, + "step": 6828 + }, + { + "epoch": 1.9969293756397133, + "grad_norm": 1.3400965720769549, + "learning_rate": 1.68295153549769e-10, + "loss": 0.3669770061969757, + "step": 6829 + }, + { + "epoch": 1.9972218160549788, + "grad_norm": 1.6049954159966944, + "learning_rate": 1.414147409906086e-10, + "loss": 0.51691073179245, + "step": 6830 + }, + { + "epoch": 1.9975142564702442, + "grad_norm": 1.6369812108335593, + "learning_rate": 1.1687173457564095e-10, + "loss": 0.530505895614624, + "step": 6831 + }, + { + "epoch": 1.9978066968855095, + "grad_norm": 1.7168823925537207, + "learning_rate": 9.466614004138841e-11, + "loss": 0.6562793850898743, + "step": 6832 + }, + { + "epoch": 1.998099137300775, + "grad_norm": 1.7130665778689727, + "learning_rate": 7.479796257925387e-11, + "loss": 0.5174558758735657, + "step": 6833 + }, + { + "epoch": 1.9983915777160404, + "grad_norm": 1.825880314789344, + "learning_rate": 5.726720683219e-11, + "loss": 0.5514833331108093, + "step": 6834 + }, + { + "epoch": 1.9986840181313057, + "grad_norm": 1.4872034884229834, + "learning_rate": 4.207387689803e-11, + "loss": 0.4652816653251648, + "step": 6835 + }, + { + "epoch": 1.9989764585465712, + "grad_norm": 1.497818750132978, + "learning_rate": 2.9217976328377305e-11, + "loss": 0.420850932598114, + "step": 6836 + }, + { + "epoch": 1.9992688989618366, + "grad_norm": 1.7752439390839505, + "learning_rate": 1.8699508128605658e-11, + "loss": 0.5394539833068848, + "step": 6837 + }, + { + "epoch": 1.999561339377102, + "grad_norm": 1.5556477853097161, + "learning_rate": 1.051847475674883e-11, + "loss": 0.458107590675354, + "step": 6838 + }, + { + "epoch": 1.9998537797923674, + "grad_norm": 1.6149334678852978, + "learning_rate": 4.6748781246108706e-12, + "loss": 0.552463173866272, + "step": 6839 + }, + { + "epoch": 2.0, + "grad_norm": 3.285366195149545, + "learning_rate": 1.1687195999865453e-12, + "loss": 0.4656301736831665, + "step": 6840 + }, + { + "epoch": 2.0, + "step": 6840, + "total_flos": 2089529852362752.0, + "train_loss": 0.6033765813455596, + "train_runtime": 36066.319, + "train_samples_per_second": 0.758, + "train_steps_per_second": 0.19 + } + ], + "logging_steps": 1, + "max_steps": 6840, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2089529852362752.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..81f2336f2b4301fde755bb2ff1a553c0af833dc6 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f18a5144102f6d607705c76873c9b6b52fea03ff40dc71ea0f2bf5e2547fe44f +size 6968 diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..6999b4eed987dcc83711d6663906326fbb9152e5 Binary files /dev/null and b/training_loss.png differ